[PATCH v7 4/5] HV: Reshuffle the stop_cpus API


Kaige Fu
 

This patch makes the following changes:
- Add one parameter 'mask' for later use.
- Set pcpu state as INVALID when fail to offline cpu.
- Panic when there are any failures when stop_cpus in host_enter_s3.

Signed-off-by: Kaige Fu <kaige.fu@...>
---
hypervisor/arch/x86/cpu.c | 69 +++++++++++++++++++------------
hypervisor/arch/x86/pm.c | 15 ++++---
hypervisor/include/arch/x86/cpu.h | 2 +-
3 files changed, 52 insertions(+), 34 deletions(-)

diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c
index 589faa99..76ee5991 100644
--- a/hypervisor/arch/x86/cpu.c
+++ b/hypervisor/arch/x86/cpu.c
@@ -340,46 +340,60 @@ bool start_cpus(uint64_t mask)
return ((pcpu_active_bitmap & expected_start_mask) == expected_start_mask);
}

-void stop_cpus(void)
+/**
+ * @brief Stop all cpus if the bit is set in mask except itself
+ *
+ * @param[in] mask mask bits of cpus which should be stopped
+ *
+ * @return true if all cpus set in mask are stopped
+ * @return false if there are any cpus set in mask aren't stopped
+ */
+bool stop_cpus(uint64_t mask)
{
- uint16_t pcpu_id, expected_up;
+ bool ret = true;
+ uint16_t index;
uint32_t timeout;
+ uint16_t pcpu_id = get_cpu_id();
+ uint64_t need_offline_mask = mask;
+ uint64_t expected_offline_mask = mask;
+
+ index = ffs64(need_offline_mask);
+ while (index != INVALID_BIT_INDEX) {
+ bitmap_clear_nolock(index, &need_offline_mask);

- for (pcpu_id = 0U; pcpu_id < phys_cpu_num; pcpu_id++) {
- if (get_cpu_id() == pcpu_id) { /* avoid offline itself */
+ /* Avoid offline itself */
+ if (pcpu_id == index) {
continue;
}

- make_pcpu_offline(pcpu_id);
+ make_pcpu_offline(index);
+ index = ffs64(need_offline_mask);
}

- expected_up = 1U;
timeout = CPU_DOWN_TIMEOUT;
- while ((atomic_load16(&up_count) != expected_up) && (timeout != 0U)) {
- /* Delay 10us */
+ while (((pcpu_active_bitmap & expected_offline_mask) != 0UL) && (timeout != 0U)) {
udelay(10U);
-
- /* Decrement timeout value */
timeout -= 10U;
}

- if (atomic_load16(&up_count) != expected_up) {
- pr_fatal("Can't make all APs offline");
-
- /* if partial APs is down, it's not easy to recover
- * per our current implementation (need make up dead
- * APs one by one), just print error mesage and dead
- * loop here.
- *
- * FIXME:
- * We need to refine here to handle the AP offline
- * failure for release/debug version. Ideally, we should
- * define how to handle general unrecoverable error and
- * follow it here.
- */
- do {
- } while (1);
+ /*
+ * Timeout may occurs if there are NMI/SMI triggered targeting to the cpu we try to offline
+ * during offline process. Mark the cpu as INVALID and return the false to the caller in this
+ * situation.
+ */
+ index = ffs64(expected_offline_mask);
+ while (index != INVALID_BIT_INDEX) {
+ if (per_cpu(boot_state, index) != PCPU_STATE_DEAD) {
+ pr_fatal("Failed to offline cpu%hu", index);
+ cpu_set_current_state(index, PCPU_STATE_INVALID);
+ ret = false;
+ }
+
+ bitmap_clear_nolock(index, &expected_offline_mask);
+ index = ffs64(expected_offline_mask);
}
+
+ return ret;
}

void cpu_do_idle(void)
@@ -398,13 +412,14 @@ void cpu_dead(void)
int32_t halt = 1;
uint16_t pcpu_id = get_cpu_id();

- if (bitmap_test_and_clear_lock(pcpu_id, &pcpu_active_bitmap)) {
+ if (bitmap_test(pcpu_id, &pcpu_active_bitmap)) {
/* clean up native stuff */
vmx_off();
cache_flush_invalidate_all();

/* Set state to show CPU is dead */
cpu_set_current_state(pcpu_id, PCPU_STATE_DEAD);
+ bitmap_clear_nolock(pcpu_id, &pcpu_active_bitmap);

/* Halt the CPU */
do {
diff --git a/hypervisor/arch/x86/pm.c b/hypervisor/arch/x86/pm.c
index 2f27565a..5fe1e2e5 100644
--- a/hypervisor/arch/x86/pm.c
+++ b/hypervisor/arch/x86/pm.c
@@ -152,8 +152,16 @@ void host_enter_s3(struct pm_s_state_data *sstate_data, uint32_t pm1a_cnt_val, u
*(sstate_data->wake_vector_32) = (uint32_t)get_trampoline_start16_paddr();

clac();
+
+ /* Skip BSP */
+ for (i = 1U; i < pcpu_nums; i++) {
+ bitmap_set_nolock(i, &mask);
+ }
+
/* offline all APs */
- stop_cpus();
+ if (!stop_cpus(mask)) {
+ panic("Failed to offline all APs!");
+ }

stac();
/* Save default main entry and we will restore it after
@@ -189,11 +197,6 @@ void host_enter_s3(struct pm_s_state_data *sstate_data, uint32_t pm1a_cnt_val, u
write_trampoline_sym(main_entry, pmain_entry_saved);
clac();

- /* Skip BSP */
- for (i = 1U; i < pcpu_nums; i++) {
- bitmap_set_nolock(i, &mask);
- }
-
/* online all APs again */
if (!start_cpus(mask)) {
panic("Failed to start all APs!");
diff --git a/hypervisor/include/arch/x86/cpu.h b/hypervisor/include/arch/x86/cpu.h
index 8f25ca5d..fdae2ab8 100644
--- a/hypervisor/include/arch/x86/cpu.h
+++ b/hypervisor/include/arch/x86/cpu.h
@@ -261,7 +261,7 @@ void load_cpu_state_data(void);
void init_cpu_pre(uint16_t pcpu_id_args);
void init_cpu_post(uint16_t pcpu_id);
bool start_cpus(uint64_t mask);
-void stop_cpus(void);
+bool stop_cpus(uint64_t mask);
void wait_sync_change(uint64_t *sync, uint64_t wake_sync);

#define CPU_SEG_READ(seg, result_ptr) \
--
2.20.0

Join acrn-dev@lists.projectacrn.org to automatically receive all group messages.