Re: [PATCH v7 3/5] HV: Reshuffle start_cpus and start_cpu


Eddie Dong
 

-----Original Message-----
From: acrn-dev@... [mailto:acrn-dev@...]
On Behalf Of Kaige Fu
Sent: Friday, April 19, 2019 2:11 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH v7 3/5] HV: Reshuffle start_cpus and start_cpu

This patch makes the following changes:
- Add one parameter 'mask' to start_cpus for later use.
- Set cpu state as INVALID instead of dead loop when fail to start cpu.
- Panic when there are any failures when start cpus in init_cpu_post and
host_enter_s3.

Signed-off-by: Kaige Fu <kaige.fu@...>
---
hypervisor/arch/x86/cpu.c | 47 +++++++++++++++++++++++--------
hypervisor/arch/x86/pm.c | 13 ++++++++-
hypervisor/include/arch/x86/cpu.h | 3 +-
3 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c index
9c504cad..589faa99 100644
--- a/hypervisor/arch/x86/cpu.c
+++ b/hypervisor/arch/x86/cpu.c
@@ -164,6 +164,9 @@ void init_cpu_pre(uint16_t pcpu_id_args)
cpu_set_current_state(pcpu_id, PCPU_STATE_INITIALIZING); }

+static uint64_t ap_mask;
+static uint16_t ap_index;
+
void init_cpu_post(uint16_t pcpu_id)
{
#ifdef STACK_PROTECTOR
@@ -229,7 +232,15 @@ void init_cpu_post(uint16_t pcpu_id)

/* Start all secondary cores */
startup_paddr = prepare_trampoline();
- start_cpus();
+
+ /* Skip BSP */
+ for (ap_index = 1U; ap_index < phys_cpu_num; ap_index++) {
+ bitmap_set_nolock(ap_index, &ap_mask);
+ }
Do we need this loop? This is ((1UL << phys_cpu_num) -1 ) & !(1UL <<0). Not?

+
+ if (!start_cpus(ap_mask)) {
+ panic("Failed to start all secondary cores!");
+ }

ASSERT(get_cpu_id() == BOOT_CPU_ID, "");
} else {
@@ -287,32 +298,46 @@ static void start_cpu(uint16_t pcpu_id)

/* Check to see if expected CPU is actually up */
if (!is_pcpu_active(pcpu_id)) {
- /* Print error */
- pr_fatal("Secondary CPUs failed to come up");
-
- /* Error condition - loop endlessly for now */
- do {
- } while (1);
+ pr_fatal("Secondary CPU%hu failed to come up", pcpu_id);
+ cpu_set_current_state(pcpu_id, PCPU_STATE_INVALID);
I am questioning the need of the flag.

As if one PCPU fails to start, resume fails or boot fails. We should explicitly do this. This is what panic does now.
Do we need to handle this additional state with this assumption?

}
}

-void start_cpus(void)
+
+/**
+ * @brief Start all cpus if the bit is set in mask except itself
+ *
+ * @param[in] mask mask bits of cpus which should be started
+ *
+ * @return true if all cpus set in mask are started
+ * @return false if there are any cpus set in mask aren't started */
+bool start_cpus(uint64_t mask)
{
uint16_t i;
+ uint16_t pcpu_id = get_cpu_id();
+ uint64_t need_start_mask = mask;
+ uint64_t expected_start_mask = mask;

/* secondary cpu start up will wait for pcpu_sync -> 0UL */
atomic_store64(&pcpu_sync, 1UL);

- for (i = 0U; i < phys_cpu_num; i++) {
- if (get_cpu_id() == i) {
- continue;
+ i = ffs64(need_start_mask);
+ while (i != INVALID_BIT_INDEX) {
This style is fine too. But, I am wondering if following style is better?

For (i = ffs64(need_start_mask); i != INVALID_BIT_INDEX; i = ffs64(need_start_mask)) {

Please check from MISRAC p.o.v.


+ bitmap_clear_nolock(i, &need_start_mask);
+
+ if (pcpu_id == i) {
+ continue; /* Avoid start itself */
}

start_cpu(i);
+ i = ffs64(need_start_mask);
}

/* Trigger event to allow secondary CPUs to continue */
atomic_store64(&pcpu_sync, 0UL);
+
+ return ((pcpu_active_bitmap & expected_start_mask) ==
+expected_start_mask);
Here we can directly use mask, since this is readonly. This way, we can avoid use of both need_start_mask/ expected_start_mask.

}

void stop_cpus(void)
diff --git a/hypervisor/arch/x86/pm.c b/hypervisor/arch/x86/pm.c index
3a0d1b2c..2f27565a 100644
--- a/hypervisor/arch/x86/pm.c
+++ b/hypervisor/arch/x86/pm.c
@@ -4,6 +4,7 @@
*/

#include <types.h>
+ #include <bits.h>
#include <acrn_common.h>
#include <default_acpi_info.h>
#include <platform_acpi_info.h>
@@ -140,7 +141,10 @@ void do_acpi_s3(struct pm_s_state_data
*sstate_data, uint32_t pm1a_cnt_val, uint

void host_enter_s3(struct pm_s_state_data *sstate_data, uint32_t
pm1a_cnt_val, uint32_t pm1b_cnt_val) {
+ uint16_t pcpu_nums = get_pcpu_nums();
uint64_t pmain_entry_saved;
+ uint64_t mask;
+ uint16_t i;

stac();

@@ -185,6 +189,13 @@ void host_enter_s3(struct pm_s_state_data
*sstate_data, uint32_t pm1a_cnt_val, u
write_trampoline_sym(main_entry, pmain_entry_saved);
clac();

+ /* Skip BSP */
+ for (i = 1U; i < pcpu_nums; i++) {
+ bitmap_set_nolock(i, &mask);
+ }
+
Maybe not necessary. See above comments.
BTW, we may have a bug here: if one PCPU is already offline, this code seems to be not correct.
Anyway, this is not an issue of this patch, we can decouple from this one. Please check after this patch.

/* online all APs again */
- start_cpus();
+ if (!start_cpus(mask)) {
+ panic("Failed to start all APs!");
+ }
}
diff --git a/hypervisor/include/arch/x86/cpu.h
b/hypervisor/include/arch/x86/cpu.h
index 13c2fd9e..8f25ca5d 100644
--- a/hypervisor/include/arch/x86/cpu.h
+++ b/hypervisor/include/arch/x86/cpu.h
@@ -250,6 +250,7 @@ enum pcpu_boot_state {
PCPU_STATE_RUNNING,
PCPU_STATE_HALTED,
PCPU_STATE_DEAD,
+ PCPU_STATE_INVALID,
};

/* Function prototypes */
@@ -259,7 +260,7 @@ void trampoline_start16(void); void
load_cpu_state_data(void); void init_cpu_pre(uint16_t pcpu_id_args); void
init_cpu_post(uint16_t pcpu_id); -void start_cpus(void);
+bool start_cpus(uint64_t mask);
void stop_cpus(void);
void wait_sync_change(uint64_t *sync, uint64_t wake_sync);

--
2.20.0


Join acrn-dev@lists.projectacrn.org to automatically receive all group messages.