[PATCH v4 1/2] hv: prevent VMs make pCPU enter C-state.


Zhao, Yuanyuan
 

If a shared pCPU enters C-State by a vCPU runs on it,
pPCU may can't wake on time when other vCPUs use it. So add a
new guest_flag 'GUEST_FLAG_OWN_PCPU' which used to tell hypervisor
if a VM exclusively owns the physical CPUs. Mwait will always not
be supported on a VM does not own pcpu.This patch does this by:
1. Clear vCPUID.05H.EXC[bit 0,1] and vCPUID.01H:ECX.[bit 3].
2. Clear MSR_IA32_MISC_ENABLE_MONITOR_ENA.
3. Trap instruction 'MONITOR' and 'MWAIT'.

v3->v4:
1. Set percpu related cpuid -> set_vcpuid_entries.
2. HIDE UWAIT/UMONITOR to shared vcpu.
3. inject #GP -> inject #UD.

v2->v3:
1.mwait_vmexit_handler -> mwait_monitor_vmexit_handler.
2.own_pcpu -> guest_flag(GUEST_FLAG_OWN_PCPU)
3.Check own_pcpu when set vm_arch.vm_mwait_cap.

v1->v2:
1.add own_pcpu.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
hypervisor/arch/x86/guest/vcpuid.c | 15 +++++++++++++++
hypervisor/arch/x86/guest/vm.c | 2 +-
hypervisor/arch/x86/guest/vmcs.c | 16 +++++++++++++++-
hypervisor/arch/x86/guest/vmexit.c | 15 +++++++++++++--
hypervisor/include/arch/x86/asm/cpuid.h | 4 ++++
hypervisor/include/public/acrn_common.h | 1 +
6 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcpuid.c b/hypervisor/arch/x86/guest/vcpuid.c
index 580098719..3fd4befcb 100644
--- a/hypervisor/arch/x86/guest/vcpuid.c
+++ b/hypervisor/arch/x86/guest/vcpuid.c
@@ -538,6 +538,13 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
}
}
break;
+ case 0x05U:
+ if(!vm->arch_vm.vm_mwait_cap) {
+ init_vcpuid_entry(i, 0U, CPUID_CHECK_SUBLEAF, &entry);
+ entry.ecx &= ~(CPUID_ECX_MWAIT | CPUID_ECX_MWAIT_INT);
+ result = set_vcpuid_entry(vm, &entry);
+ }
+ break;
case 0x07U:
init_vcpuid_entry(i, 0U, CPUID_CHECK_SUBLEAF, &entry);
if (entry.eax != 0U) {
@@ -547,6 +554,9 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
if (is_vsgx_supported(vm->vm_id)) {
entry.ebx |= CPUID_EBX_SGX;
}
+ if(!vm->arch_vm.vm_mwait_cap) {
+ entry.ecx &= ~CPUID_ECX_WAITPKG;
+ }

#ifdef CONFIG_VCAT_ENABLED
if (is_vcat_configured(vm)) {
@@ -635,6 +645,11 @@ static void guest_cpuid_01h(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx
*ecx &= ~CPUID_ECX_VMX;
}

+ /* mask MONINTOR/MWAIT for shared cpu */
+ if (!vcpu->vm->arch_vm.vm_mwait_cap) {
+ *ecx &= ~CPUID_ECX_MONITOR;
+ }
+
/* set Hypervisor Present Bit */
*ecx |= CPUID_ECX_HV;

diff --git a/hypervisor/arch/x86/guest/vm.c b/hypervisor/arch/x86/guest/vm.c
index bdd31d3fa..7747e8ccd 100644
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -704,7 +704,7 @@ int32_t create_vm(uint16_t vm_id, uint64_t pcpu_bitmap, struct acrn_vm_config *v
spinlock_init(&vm->arch_vm.iwkey_backup_lock);

vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC;
- vm->arch_vm.vm_mwait_cap = has_monitor_cap();
+ vm->arch_vm.vm_mwait_cap = (has_monitor_cap() && (vm_config->guest_flags & GUEST_FLAG_OWN_PCPU) != 0U);
vm->intr_inject_delay_delta = 0UL;
vm->nr_emul_mmio_regions = 0U;
vm->vcpuid_entry_nr = 0U;
diff --git a/hypervisor/arch/x86/guest/vmcs.c b/hypervisor/arch/x86/guest/vmcs.c
index 684507d7d..2dee7285c 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -25,6 +25,7 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3,
{
struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu->arch.cur_context];
struct ext_context *ectx = &ctx->ext_ctx;
+ uint64_t value64 = 0UL;

pr_dbg("%s,cr0:0x%lx, cr4:0x%lx.", __func__, cr0, cr4);

@@ -53,7 +54,9 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3,
load_segment(ectx->ldtr, VMX_GUEST_LDTR);

/* init guest ia32_misc_enable value for guest read */
- vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, msr_read(MSR_IA32_MISC_ENABLE));
+ value64 = vcpu->vm->arch_vm.vm_mwait_cap ? msr_read(MSR_IA32_MISC_ENABLE) :
+ (msr_read(MSR_IA32_MISC_ENABLE) & (~MSR_IA32_MISC_ENABLE_MONITOR_ENA));
+ vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, value64);

/* fixed values */
exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, 0U);
@@ -311,6 +314,13 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 |= VMX_PROCBASED_CTLS_RDPMC;
}

+ /*
+ * Enable VM_EXIT for mwait for VM which may share pcpu.
+ */
+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value32 |= VMX_PROCBASED_CTLS_MWAIT | VMX_PROCBASED_CTLS_MONITOR;
+ }
+
vcpu->arch.proc_vm_exec_ctrls = value32;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32);
pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32);
@@ -333,6 +343,10 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 &= ~VMX_PROCBASED_CTLS2_INVPCID;
}

+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value32 &= ~VMX_PROCBASED_CTLS2_UWAIT_PAUSE;
+ }
+
if (is_apicv_advanced_feature_supported()) {
value32 |= VMX_PROCBASED_CTLS2_VIRQ;
value32 |= VMX_PROCBASED_CTLS2_VAPIC_REGS;
diff --git a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c
index 2888278aa..7adb17dec 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -40,6 +40,7 @@ static int32_t hlt_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t mtf_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t loadiwkey_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t init_signal_vmexit_handler(__unused struct acrn_vcpu *vcpu);
+static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu);

/* VM Dispatch table for Exit condition handling */
static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
@@ -151,11 +152,11 @@ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_MWAIT] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_monitor_vmexit_handler },
[VMX_EXIT_REASON_MONITOR_TRAP] = {
.handler = mtf_vmexit_handler},
[VMX_EXIT_REASON_MONITOR] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_monitor_vmexit_handler },
[VMX_EXIT_REASON_PAUSE] = {
.handler = pause_vmexit_handler},
[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = {
@@ -287,6 +288,16 @@ int32_t vmexit_handler(struct acrn_vcpu *vcpu)
return ret;
}

+static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu)
+{
+ pr_fatal("Error: Unsupported mwait option from guest at 0x%016lx ",
+ exec_vmread(VMX_GUEST_RIP));
+
+ vcpu_inject_ud(vcpu);
+
+ return 0;
+}
+
static int32_t unhandled_vmexit_handler(struct acrn_vcpu *vcpu)
{
pr_fatal("Error: Unhandled VM exit condition from guest at 0x%016lx ",
diff --git a/hypervisor/include/arch/x86/asm/cpuid.h b/hypervisor/include/arch/x86/asm/cpuid.h
index 491758bab..95e63f034 100644
--- a/hypervisor/include/arch/x86/asm/cpuid.h
+++ b/hypervisor/include/arch/x86/asm/cpuid.h
@@ -42,6 +42,8 @@
#define CPUID_ECX_OSXSAVE (1U<<27U)
#define CPUID_ECX_AVX (1U<<28U)
#define CPUID_ECX_HV (1U<<31U)
+#define CPUID_ECX_MWAIT (1U<<0U)
+#define CPUID_ECX_MWAIT_INT (1U<<0U)
#define CPUID_EDX_FPU (1U<<0U)
#define CPUID_EDX_VME (1U<<1U)
#define CPUID_EDX_DE (1U<<2U)
@@ -88,6 +90,8 @@
#define CPUID_ECX_UMIP (1U<<2U)
/* CPUID.07H:ECX.PKE */
#define CPUID_ECX_PKE (1U<<3U)
+/* CPUID.07H:ECX.WAITPKG */
+#define CPUID_ECX_WAITPKG (1U<<5U)
/* CPUID.07H:ECX.CET_SS */
#define CPUID_ECX_CET_SS (1U<<7U)
/* CPUID.07H:ECX.LA57 */
diff --git a/hypervisor/include/public/acrn_common.h b/hypervisor/include/public/acrn_common.h
index fee71a655..b10e88ff7 100644
--- a/hypervisor/include/public/acrn_common.h
+++ b/hypervisor/include/public/acrn_common.h
@@ -62,6 +62,7 @@
#define GUEST_FLAG_TEE (1UL << 9U) /* Whether the VM is TEE VM */
#define GUEST_FLAG_REE (1UL << 10U) /* Whether the VM is REE VM */
#define GUEST_FLAG_PMU_PASSTHROUGH (1UL << 11U) /* Whether PMU is passed through */
+#define GUEST_FLAG_OWN_PCPU (1UL << 12U) /* Whether the VM own pcpu assigned */


/* TODO: We may need to get this addr from guest ACPI instead of hardcode here */
--
2.25.1


Eddie Dong
 

-----Original Message-----
From: Yuanyuan Zhao <yuanyuan.zhao@...>
Sent: Wednesday, September 7, 2022 1:19 AM
To: Dong, Eddie <eddie.dong@...>; Mao, Junjie
<junjie.mao@...>; Li, Fei1 <fei1.li@...>; acrn-
dev@...
Cc: yuanyuan.zhao@...
Subject: [PATCH v4 1/2] hv: prevent VMs make pCPU enter C-state.

If a shared pCPU enters C-State by a vCPU runs on it, pPCU may can't wake on
time when other vCPUs use it. So add a new guest_flag
W/ 2nd thinking, why a shared PCPU cannot be waken up? I thought it is still ok.

'GUEST_FLAG_OWN_PCPU' which used to tell hypervisor if a VM exclusively
owns the physical CPUs. Mwait will always not be supported on a VM does not
own pcpu.This patch does this by:
Restrict the use of MWAIT passthru is fine, even completely remove it.
But we the current solution doesn't support runtime change of the setting, right?

The rest is fine.

1. Clear vCPUID.05H.EXC[bit 0,1] and vCPUID.01H:ECX.[bit 3].
2. Clear MSR_IA32_MISC_ENABLE_MONITOR_ENA.
3. Trap instruction 'MONITOR' and 'MWAIT'.

v3->v4:
1. Set percpu related cpuid -> set_vcpuid_entries.
2. HIDE UWAIT/UMONITOR to shared vcpu.
3. inject #GP -> inject #UD.

v2->v3:
1.mwait_vmexit_handler -> mwait_monitor_vmexit_handler.
2.own_pcpu -> guest_flag(GUEST_FLAG_OWN_PCPU) 3.Check own_pcpu
when set vm_arch.vm_mwait_cap.

v1->v2:
1.add own_pcpu.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
hypervisor/arch/x86/guest/vcpuid.c | 15 +++++++++++++++
hypervisor/arch/x86/guest/vm.c | 2 +-
hypervisor/arch/x86/guest/vmcs.c | 16 +++++++++++++++-
hypervisor/arch/x86/guest/vmexit.c | 15 +++++++++++++--
hypervisor/include/arch/x86/asm/cpuid.h | 4 ++++
hypervisor/include/public/acrn_common.h | 1 +
6 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcpuid.c
b/hypervisor/arch/x86/guest/vcpuid.c
index 580098719..3fd4befcb 100644
--- a/hypervisor/arch/x86/guest/vcpuid.c
+++ b/hypervisor/arch/x86/guest/vcpuid.c
@@ -538,6 +538,13 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
}
}
break;
+ case 0x05U:
+ if(!vm->arch_vm.vm_mwait_cap) {
+ init_vcpuid_entry(i, 0U,
CPUID_CHECK_SUBLEAF, &entry);
+ entry.ecx &= ~(CPUID_ECX_MWAIT
| CPUID_ECX_MWAIT_INT);
+ result = set_vcpuid_entry(vm,
&entry);
+ }
+ break;
case 0x07U:
init_vcpuid_entry(i, 0U,
CPUID_CHECK_SUBLEAF, &entry);
if (entry.eax != 0U) {
@@ -547,6 +554,9 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
if (is_vsgx_supported(vm->vm_id)) {
entry.ebx |= CPUID_EBX_SGX;
}
+ if(!vm->arch_vm.vm_mwait_cap) {
+ entry.ecx &=
~CPUID_ECX_WAITPKG;
+ }

#ifdef CONFIG_VCAT_ENABLED
if (is_vcat_configured(vm)) {
@@ -635,6 +645,11 @@ static void guest_cpuid_01h(struct acrn_vcpu *vcpu,
uint32_t *eax, uint32_t *ebx
*ecx &= ~CPUID_ECX_VMX;
}

+ /* mask MONINTOR/MWAIT for shared cpu */
+ if (!vcpu->vm->arch_vm.vm_mwait_cap) {
+ *ecx &= ~CPUID_ECX_MONITOR;
+ }
+
/* set Hypervisor Present Bit */
*ecx |= CPUID_ECX_HV;

diff --git a/hypervisor/arch/x86/guest/vm.c
b/hypervisor/arch/x86/guest/vm.c index bdd31d3fa..7747e8ccd 100644
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -704,7 +704,7 @@ int32_t create_vm(uint16_t vm_id, uint64_t
pcpu_bitmap, struct acrn_vm_config *v
spinlock_init(&vm->arch_vm.iwkey_backup_lock);

vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC;
- vm->arch_vm.vm_mwait_cap = has_monitor_cap();
+ vm->arch_vm.vm_mwait_cap = (has_monitor_cap() &&
+(vm_config->guest_flags & GUEST_FLAG_OWN_PCPU) != 0U);
vm->intr_inject_delay_delta = 0UL;
vm->nr_emul_mmio_regions = 0U;
vm->vcpuid_entry_nr = 0U;
diff --git a/hypervisor/arch/x86/guest/vmcs.c
b/hypervisor/arch/x86/guest/vmcs.c
index 684507d7d..2dee7285c 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -25,6 +25,7 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu,
uint64_t cr0, uint64_t cr3, {
struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu-
arch.cur_context];
struct ext_context *ectx = &ctx->ext_ctx;
+ uint64_t value64 = 0UL;

pr_dbg("%s,cr0:0x%lx, cr4:0x%lx.", __func__, cr0, cr4);

@@ -53,7 +54,9 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu,
uint64_t cr0, uint64_t cr3,
load_segment(ectx->ldtr, VMX_GUEST_LDTR);

/* init guest ia32_misc_enable value for guest read */
- vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE,
msr_read(MSR_IA32_MISC_ENABLE));
+ value64 = vcpu->vm->arch_vm.vm_mwait_cap ?
msr_read(MSR_IA32_MISC_ENABLE) :
+ (msr_read(MSR_IA32_MISC_ENABLE) &
(~MSR_IA32_MISC_ENABLE_MONITOR_ENA));
+ vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, value64);

/* fixed values */
exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, 0U); @@ -311,6
+314,13 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 |= VMX_PROCBASED_CTLS_RDPMC;
}

+ /*
+ * Enable VM_EXIT for mwait for VM which may share pcpu.
+ */
+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value32 |= VMX_PROCBASED_CTLS_MWAIT |
VMX_PROCBASED_CTLS_MONITOR;
+ }
+
vcpu->arch.proc_vm_exec_ctrls = value32;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32);
pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32); @@ -
333,6 +343,10 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 &= ~VMX_PROCBASED_CTLS2_INVPCID;
}

+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value32 &= ~VMX_PROCBASED_CTLS2_UWAIT_PAUSE;
+ }
+
if (is_apicv_advanced_feature_supported()) {
value32 |= VMX_PROCBASED_CTLS2_VIRQ;
value32 |= VMX_PROCBASED_CTLS2_VAPIC_REGS; diff --git
a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c
index 2888278aa..7adb17dec 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -40,6 +40,7 @@ static int32_t hlt_vmexit_handler(struct acrn_vcpu
*vcpu); static int32_t mtf_vmexit_handler(struct acrn_vcpu *vcpu); static
int32_t loadiwkey_vmexit_handler(struct acrn_vcpu *vcpu); static int32_t
init_signal_vmexit_handler(__unused struct acrn_vcpu *vcpu);
+static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu);

/* VM Dispatch table for Exit condition handling */ static const struct
vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = { @@ -151,11
+152,11 @@ static const struct vm_exit_dispatch
dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_MWAIT] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_monitor_vmexit_handler },
[VMX_EXIT_REASON_MONITOR_TRAP] = {
.handler = mtf_vmexit_handler},
[VMX_EXIT_REASON_MONITOR] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_monitor_vmexit_handler },
[VMX_EXIT_REASON_PAUSE] = {
.handler = pause_vmexit_handler},
[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = { @@ -
287,6 +288,16 @@ int32_t vmexit_handler(struct acrn_vcpu *vcpu)
return ret;
}

+static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu) {
+ pr_fatal("Error: Unsupported mwait option from guest at 0x%016lx ",
+ exec_vmread(VMX_GUEST_RIP));
+
+ vcpu_inject_ud(vcpu);
+
+ return 0;
+}
+
static int32_t unhandled_vmexit_handler(struct acrn_vcpu *vcpu) {
pr_fatal("Error: Unhandled VM exit condition from guest at 0x%016lx
", diff --git a/hypervisor/include/arch/x86/asm/cpuid.h
b/hypervisor/include/arch/x86/asm/cpuid.h
index 491758bab..95e63f034 100644
--- a/hypervisor/include/arch/x86/asm/cpuid.h
+++ b/hypervisor/include/arch/x86/asm/cpuid.h
@@ -42,6 +42,8 @@
#define CPUID_ECX_OSXSAVE (1U<<27U)
#define CPUID_ECX_AVX (1U<<28U)
#define CPUID_ECX_HV (1U<<31U)
+#define CPUID_ECX_MWAIT (1U<<0U)
+#define CPUID_ECX_MWAIT_INT (1U<<0U)
#define CPUID_EDX_FPU (1U<<0U)
#define CPUID_EDX_VME (1U<<1U)
#define CPUID_EDX_DE (1U<<2U)
@@ -88,6 +90,8 @@
#define CPUID_ECX_UMIP (1U<<2U)
/* CPUID.07H:ECX.PKE */
#define CPUID_ECX_PKE (1U<<3U)
+/* CPUID.07H:ECX.WAITPKG */
+#define CPUID_ECX_WAITPKG (1U<<5U)
/* CPUID.07H:ECX.CET_SS */
#define CPUID_ECX_CET_SS (1U<<7U)
/* CPUID.07H:ECX.LA57 */
diff --git a/hypervisor/include/public/acrn_common.h
b/hypervisor/include/public/acrn_common.h
index fee71a655..b10e88ff7 100644
--- a/hypervisor/include/public/acrn_common.h
+++ b/hypervisor/include/public/acrn_common.h
@@ -62,6 +62,7 @@
#define GUEST_FLAG_TEE (1UL << 9U) /*
Whether the VM is TEE VM */
#define GUEST_FLAG_REE (1UL << 10U) /*
Whether the VM is REE VM */
#define GUEST_FLAG_PMU_PASSTHROUGH (1UL << 11U) /* Whether
PMU is passed through */
+#define GUEST_FLAG_OWN_PCPU (1UL << 12U) /* Whether the VM
own pcpu assigned */


/* TODO: We may need to get this addr from guest ACPI instead of hardcode
here */
--
2.25.1


Zhao, Yuanyuan
 

On 2022/9/9 0:12, Dong, Eddie wrote:

-----Original Message-----
From: Yuanyuan Zhao <yuanyuan.zhao@...>
Sent: Wednesday, September 7, 2022 1:19 AM
To: Dong, Eddie <eddie.dong@...>; Mao, Junjie
<junjie.mao@...>; Li, Fei1 <fei1.li@...>; acrn-
dev@...
Cc: yuanyuan.zhao@...
Subject: [PATCH v4 1/2] hv: prevent VMs make pCPU enter C-state.

If a shared pCPU enters C-State by a vCPU runs on it, pPCU may can't wake on
time when other vCPUs use it. So add a new guest_flag
W/ 2nd thinking, why a shared PCPU cannot be waken up? I thought it is still ok.
I think this patch is for the correctness, because MONITOR/MWAIT will not releay enter C-state in VM.

And MONITOR/MWAIT is also used to 'play_dead', then 100% of vcpu will
be used by sys.

So should this patch disable MONITOR/MWAIT to all VM?



'GUEST_FLAG_OWN_PCPU' which used to tell hypervisor if a VM exclusively
owns the physical CPUs. Mwait will always not be supported on a VM does not
own pcpu.This patch does this by:
Restrict the use of MWAIT passthru is fine, even completely remove it.
But we the current solution doesn't support runtime change of the setting, right?
Yes, runtime change is not supported.
The rest is fine.

1. Clear vCPUID.05H.EXC[bit 0,1] and vCPUID.01H:ECX.[bit 3].
2. Clear MSR_IA32_MISC_ENABLE_MONITOR_ENA.
3. Trap instruction 'MONITOR' and 'MWAIT'.

v3->v4:
1. Set percpu related cpuid -> set_vcpuid_entries.
2. HIDE UWAIT/UMONITOR to shared vcpu.
3. inject #GP -> inject #UD.

v2->v3:
1.mwait_vmexit_handler -> mwait_monitor_vmexit_handler.
2.own_pcpu -> guest_flag(GUEST_FLAG_OWN_PCPU) 3.Check own_pcpu
when set vm_arch.vm_mwait_cap.

v1->v2:
1.add own_pcpu.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
hypervisor/arch/x86/guest/vcpuid.c | 15 +++++++++++++++
hypervisor/arch/x86/guest/vm.c | 2 +-
hypervisor/arch/x86/guest/vmcs.c | 16 +++++++++++++++-
hypervisor/arch/x86/guest/vmexit.c | 15 +++++++++++++--
hypervisor/include/arch/x86/asm/cpuid.h | 4 ++++
hypervisor/include/public/acrn_common.h | 1 +
6 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcpuid.c
b/hypervisor/arch/x86/guest/vcpuid.c
index 580098719..3fd4befcb 100644
--- a/hypervisor/arch/x86/guest/vcpuid.c
+++ b/hypervisor/arch/x86/guest/vcpuid.c
@@ -538,6 +538,13 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
}
}
break;
+ case 0x05U:
+ if(!vm->arch_vm.vm_mwait_cap) {
+ init_vcpuid_entry(i, 0U,
CPUID_CHECK_SUBLEAF, &entry);
+ entry.ecx &= ~(CPUID_ECX_MWAIT
| CPUID_ECX_MWAIT_INT);
+ result = set_vcpuid_entry(vm,
&entry);
+ }
+ break;
case 0x07U:
init_vcpuid_entry(i, 0U,
CPUID_CHECK_SUBLEAF, &entry);
if (entry.eax != 0U) {
@@ -547,6 +554,9 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
if (is_vsgx_supported(vm->vm_id)) {
entry.ebx |= CPUID_EBX_SGX;
}
+ if(!vm->arch_vm.vm_mwait_cap) {
+ entry.ecx &=
~CPUID_ECX_WAITPKG;
+ }

#ifdef CONFIG_VCAT_ENABLED
if (is_vcat_configured(vm)) {
@@ -635,6 +645,11 @@ static void guest_cpuid_01h(struct acrn_vcpu *vcpu,
uint32_t *eax, uint32_t *ebx
*ecx &= ~CPUID_ECX_VMX;
}

+ /* mask MONINTOR/MWAIT for shared cpu */
+ if (!vcpu->vm->arch_vm.vm_mwait_cap) {
+ *ecx &= ~CPUID_ECX_MONITOR;
+ }
+
/* set Hypervisor Present Bit */
*ecx |= CPUID_ECX_HV;

diff --git a/hypervisor/arch/x86/guest/vm.c
b/hypervisor/arch/x86/guest/vm.c index bdd31d3fa..7747e8ccd 100644
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -704,7 +704,7 @@ int32_t create_vm(uint16_t vm_id, uint64_t
pcpu_bitmap, struct acrn_vm_config *v
spinlock_init(&vm->arch_vm.iwkey_backup_lock);

vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC;
- vm->arch_vm.vm_mwait_cap = has_monitor_cap();
+ vm->arch_vm.vm_mwait_cap = (has_monitor_cap() &&
+(vm_config->guest_flags & GUEST_FLAG_OWN_PCPU) != 0U);
vm->intr_inject_delay_delta = 0UL;
vm->nr_emul_mmio_regions = 0U;
vm->vcpuid_entry_nr = 0U;
diff --git a/hypervisor/arch/x86/guest/vmcs.c
b/hypervisor/arch/x86/guest/vmcs.c
index 684507d7d..2dee7285c 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -25,6 +25,7 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu,
uint64_t cr0, uint64_t cr3, {
struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu-
arch.cur_context];
struct ext_context *ectx = &ctx->ext_ctx;
+ uint64_t value64 = 0UL;

pr_dbg("%s,cr0:0x%lx, cr4:0x%lx.", __func__, cr0, cr4);

@@ -53,7 +54,9 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu,
uint64_t cr0, uint64_t cr3,
load_segment(ectx->ldtr, VMX_GUEST_LDTR);

/* init guest ia32_misc_enable value for guest read */
- vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE,
msr_read(MSR_IA32_MISC_ENABLE));
+ value64 = vcpu->vm->arch_vm.vm_mwait_cap ?
msr_read(MSR_IA32_MISC_ENABLE) :
+ (msr_read(MSR_IA32_MISC_ENABLE) &
(~MSR_IA32_MISC_ENABLE_MONITOR_ENA));
+ vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, value64);

/* fixed values */
exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, 0U); @@ -311,6
+314,13 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 |= VMX_PROCBASED_CTLS_RDPMC;
}

+ /*
+ * Enable VM_EXIT for mwait for VM which may share pcpu.
+ */
+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value32 |= VMX_PROCBASED_CTLS_MWAIT |
VMX_PROCBASED_CTLS_MONITOR;
+ }
+
vcpu->arch.proc_vm_exec_ctrls = value32;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32);
pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32); @@ -
333,6 +343,10 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 &= ~VMX_PROCBASED_CTLS2_INVPCID;
}

+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value32 &= ~VMX_PROCBASED_CTLS2_UWAIT_PAUSE;
+ }
+
if (is_apicv_advanced_feature_supported()) {
value32 |= VMX_PROCBASED_CTLS2_VIRQ;
value32 |= VMX_PROCBASED_CTLS2_VAPIC_REGS; diff --git
a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c
index 2888278aa..7adb17dec 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -40,6 +40,7 @@ static int32_t hlt_vmexit_handler(struct acrn_vcpu
*vcpu); static int32_t mtf_vmexit_handler(struct acrn_vcpu *vcpu); static
int32_t loadiwkey_vmexit_handler(struct acrn_vcpu *vcpu); static int32_t
init_signal_vmexit_handler(__unused struct acrn_vcpu *vcpu);
+static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu);

/* VM Dispatch table for Exit condition handling */ static const struct
vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = { @@ -151,11
+152,11 @@ static const struct vm_exit_dispatch
dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_MWAIT] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_monitor_vmexit_handler },
[VMX_EXIT_REASON_MONITOR_TRAP] = {
.handler = mtf_vmexit_handler},
[VMX_EXIT_REASON_MONITOR] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_monitor_vmexit_handler },
[VMX_EXIT_REASON_PAUSE] = {
.handler = pause_vmexit_handler},
[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = { @@ -
287,6 +288,16 @@ int32_t vmexit_handler(struct acrn_vcpu *vcpu)
return ret;
}

+static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu) {
+ pr_fatal("Error: Unsupported mwait option from guest at 0x%016lx ",
+ exec_vmread(VMX_GUEST_RIP));
+
+ vcpu_inject_ud(vcpu);
+
+ return 0;
+}
+
static int32_t unhandled_vmexit_handler(struct acrn_vcpu *vcpu) {
pr_fatal("Error: Unhandled VM exit condition from guest at 0x%016lx
", diff --git a/hypervisor/include/arch/x86/asm/cpuid.h
b/hypervisor/include/arch/x86/asm/cpuid.h
index 491758bab..95e63f034 100644
--- a/hypervisor/include/arch/x86/asm/cpuid.h
+++ b/hypervisor/include/arch/x86/asm/cpuid.h
@@ -42,6 +42,8 @@
#define CPUID_ECX_OSXSAVE (1U<<27U)
#define CPUID_ECX_AVX (1U<<28U)
#define CPUID_ECX_HV (1U<<31U)
+#define CPUID_ECX_MWAIT (1U<<0U)
+#define CPUID_ECX_MWAIT_INT (1U<<0U)
#define CPUID_EDX_FPU (1U<<0U)
#define CPUID_EDX_VME (1U<<1U)
#define CPUID_EDX_DE (1U<<2U)
@@ -88,6 +90,8 @@
#define CPUID_ECX_UMIP (1U<<2U)
/* CPUID.07H:ECX.PKE */
#define CPUID_ECX_PKE (1U<<3U)
+/* CPUID.07H:ECX.WAITPKG */
+#define CPUID_ECX_WAITPKG (1U<<5U)
/* CPUID.07H:ECX.CET_SS */
#define CPUID_ECX_CET_SS (1U<<7U)
/* CPUID.07H:ECX.LA57 */
diff --git a/hypervisor/include/public/acrn_common.h
b/hypervisor/include/public/acrn_common.h
index fee71a655..b10e88ff7 100644
--- a/hypervisor/include/public/acrn_common.h
+++ b/hypervisor/include/public/acrn_common.h
@@ -62,6 +62,7 @@
#define GUEST_FLAG_TEE (1UL << 9U) /*
Whether the VM is TEE VM */
#define GUEST_FLAG_REE (1UL << 10U) /*
Whether the VM is REE VM */
#define GUEST_FLAG_PMU_PASSTHROUGH (1UL << 11U) /* Whether
PMU is passed through */
+#define GUEST_FLAG_OWN_PCPU (1UL << 12U) /* Whether the VM
own pcpu assigned */


/* TODO: We may need to get this addr from guest ACPI instead of hardcode
here */
--
2.25.1


Zhao, Yuanyuan
 

On 2022/9/9 0:12, Eddie Dong wrote:

-----Original Message-----
From: Yuanyuan Zhao <yuanyuan.zhao@...>
Sent: Wednesday, September 7, 2022 1:19 AM
To: Dong, Eddie <eddie.dong@...>; Mao, Junjie
<junjie.mao@...>; Li, Fei1 <fei1.li@...>; acrn-
dev@...
Cc: yuanyuan.zhao@...
Subject: [PATCH v4 1/2] hv: prevent VMs make pCPU enter C-state.

If a shared pCPU enters C-State by a vCPU runs on it, pPCU may can't wake on
time when other vCPUs use it. So add a new guest_flag
W/ 2nd thinking, why a shared PCPU cannot be waken up? I thought it is still ok.
Yes, I tried on tgl that pCPU can be waken up by hypervisor schedule timer.

But in SDM, it said that:
"In addition, an external interrupt causes the processor to exit the implementation-dependent-optimized state
either (1) if the interrupt would be delivered to software (e.g., as it would be if HLT had been executed instead of
MWAIT); or (2) if ECX[0] = 1. Software can execute MWAIT with ECX[0] = 1 only if CPUID.05H:ECX[bit 1] = 1.
(Implementation-specific conditions may result in an interrupt causing the processor to exit the implementationdependent-optimized state even if interrupts are masked and ECX[0] = 0.)"
And in SDM 25.3:
"If the “MWAIT exiting” VM-execution control is 0, MWAIT operates normally if one of the following are true:
(1) ECX[0] is 0; (2) RFLAGS.IF = 1;"

Whether MWAIT be blocked when 'ECX[0] = 0' is implementation specific, so i think it's better to hide the MWAIT and trap instruction.
That can prevent malicious attacks by block pCPU that may happen on some platforms.

'GUEST_FLAG_OWN_PCPU' which used to tell hypervisor if a VM exclusively
owns the physical CPUs. Mwait will always not be supported on a VM does not
own pcpu.This patch does this by:
Restrict the use of MWAIT passthru is fine, even completely remove it.
But we the current solution doesn't support runtime change of the setting, right?
Yes, run time change is not supported.

The rest is fine.

1. Clear vCPUID.05H.EXC[bit 0,1] and vCPUID.01H:ECX.[bit 3].
2. Clear MSR_IA32_MISC_ENABLE_MONITOR_ENA.
3. Trap instruction 'MONITOR' and 'MWAIT'.

v3->v4:
1. Set percpu related cpuid -> set_vcpuid_entries.
2. HIDE UWAIT/UMONITOR to shared vcpu.
3. inject #GP -> inject #UD.

v2->v3:
1.mwait_vmexit_handler -> mwait_monitor_vmexit_handler.
2.own_pcpu -> guest_flag(GUEST_FLAG_OWN_PCPU) 3.Check own_pcpu
when set vm_arch.vm_mwait_cap.

v1->v2:
1.add own_pcpu.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
hypervisor/arch/x86/guest/vcpuid.c | 15 +++++++++++++++
hypervisor/arch/x86/guest/vm.c | 2 +-
hypervisor/arch/x86/guest/vmcs.c | 16 +++++++++++++++-
hypervisor/arch/x86/guest/vmexit.c | 15 +++++++++++++--
hypervisor/include/arch/x86/asm/cpuid.h | 4 ++++
hypervisor/include/public/acrn_common.h | 1 +
6 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcpuid.c
b/hypervisor/arch/x86/guest/vcpuid.c
index 580098719..3fd4befcb 100644
--- a/hypervisor/arch/x86/guest/vcpuid.c
+++ b/hypervisor/arch/x86/guest/vcpuid.c
@@ -538,6 +538,13 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
}
}
break;
+ case 0x05U:
+ if(!vm->arch_vm.vm_mwait_cap) {
+ init_vcpuid_entry(i, 0U,
CPUID_CHECK_SUBLEAF, &entry);
+ entry.ecx &= ~(CPUID_ECX_MWAIT
| CPUID_ECX_MWAIT_INT);
+ result = set_vcpuid_entry(vm,
&entry);
+ }
+ break;
case 0x07U:
init_vcpuid_entry(i, 0U,
CPUID_CHECK_SUBLEAF, &entry);
if (entry.eax != 0U) {
@@ -547,6 +554,9 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
if (is_vsgx_supported(vm->vm_id)) {
entry.ebx |= CPUID_EBX_SGX;
}
+ if(!vm->arch_vm.vm_mwait_cap) {
+ entry.ecx &=
~CPUID_ECX_WAITPKG;
+ }

#ifdef CONFIG_VCAT_ENABLED
if (is_vcat_configured(vm)) {
@@ -635,6 +645,11 @@ static void guest_cpuid_01h(struct acrn_vcpu *vcpu,
uint32_t *eax, uint32_t *ebx
*ecx &= ~CPUID_ECX_VMX;
}

+ /* mask MONINTOR/MWAIT for shared cpu */
+ if (!vcpu->vm->arch_vm.vm_mwait_cap) {
+ *ecx &= ~CPUID_ECX_MONITOR;
+ }
+
/* set Hypervisor Present Bit */
*ecx |= CPUID_ECX_HV;

diff --git a/hypervisor/arch/x86/guest/vm.c
b/hypervisor/arch/x86/guest/vm.c index bdd31d3fa..7747e8ccd 100644
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -704,7 +704,7 @@ int32_t create_vm(uint16_t vm_id, uint64_t
pcpu_bitmap, struct acrn_vm_config *v
spinlock_init(&vm->arch_vm.iwkey_backup_lock);

vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC;
- vm->arch_vm.vm_mwait_cap = has_monitor_cap();
+ vm->arch_vm.vm_mwait_cap = (has_monitor_cap() &&
+(vm_config->guest_flags & GUEST_FLAG_OWN_PCPU) != 0U);
vm->intr_inject_delay_delta = 0UL;
vm->nr_emul_mmio_regions = 0U;
vm->vcpuid_entry_nr = 0U;
diff --git a/hypervisor/arch/x86/guest/vmcs.c
b/hypervisor/arch/x86/guest/vmcs.c
index 684507d7d..2dee7285c 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -25,6 +25,7 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu,
uint64_t cr0, uint64_t cr3, {
struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu-
arch.cur_context];
struct ext_context *ectx = &ctx->ext_ctx;
+ uint64_t value64 = 0UL;

pr_dbg("%s,cr0:0x%lx, cr4:0x%lx.", __func__, cr0, cr4);

@@ -53,7 +54,9 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu,
uint64_t cr0, uint64_t cr3,
load_segment(ectx->ldtr, VMX_GUEST_LDTR);

/* init guest ia32_misc_enable value for guest read */
- vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE,
msr_read(MSR_IA32_MISC_ENABLE));
+ value64 = vcpu->vm->arch_vm.vm_mwait_cap ?
msr_read(MSR_IA32_MISC_ENABLE) :
+ (msr_read(MSR_IA32_MISC_ENABLE) &
(~MSR_IA32_MISC_ENABLE_MONITOR_ENA));
+ vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, value64);

/* fixed values */
exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, 0U); @@ -311,6
+314,13 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 |= VMX_PROCBASED_CTLS_RDPMC;
}

+ /*
+ * Enable VM_EXIT for mwait for VM which may share pcpu.
+ */
+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value32 |= VMX_PROCBASED_CTLS_MWAIT |
VMX_PROCBASED_CTLS_MONITOR;
+ }
+
vcpu->arch.proc_vm_exec_ctrls = value32;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32);
pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32); @@ -
333,6 +343,10 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 &= ~VMX_PROCBASED_CTLS2_INVPCID;
}

+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value32 &= ~VMX_PROCBASED_CTLS2_UWAIT_PAUSE;
+ }
+
if (is_apicv_advanced_feature_supported()) {
value32 |= VMX_PROCBASED_CTLS2_VIRQ;
value32 |= VMX_PROCBASED_CTLS2_VAPIC_REGS; diff --git
a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c
index 2888278aa..7adb17dec 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -40,6 +40,7 @@ static int32_t hlt_vmexit_handler(struct acrn_vcpu
*vcpu); static int32_t mtf_vmexit_handler(struct acrn_vcpu *vcpu); static
int32_t loadiwkey_vmexit_handler(struct acrn_vcpu *vcpu); static int32_t
init_signal_vmexit_handler(__unused struct acrn_vcpu *vcpu);
+static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu);

/* VM Dispatch table for Exit condition handling */ static const struct
vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = { @@ -151,11
+152,11 @@ static const struct vm_exit_dispatch
dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_MWAIT] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_monitor_vmexit_handler },
[VMX_EXIT_REASON_MONITOR_TRAP] = {
.handler = mtf_vmexit_handler},
[VMX_EXIT_REASON_MONITOR] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_monitor_vmexit_handler },
[VMX_EXIT_REASON_PAUSE] = {
.handler = pause_vmexit_handler},
[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = { @@ -
287,6 +288,16 @@ int32_t vmexit_handler(struct acrn_vcpu *vcpu)
return ret;
}

+static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu) {
+ pr_fatal("Error: Unsupported mwait option from guest at 0x%016lx ",
+ exec_vmread(VMX_GUEST_RIP));
+
+ vcpu_inject_ud(vcpu);
+
+ return 0;
+}
+
static int32_t unhandled_vmexit_handler(struct acrn_vcpu *vcpu) {
pr_fatal("Error: Unhandled VM exit condition from guest at 0x%016lx
", diff --git a/hypervisor/include/arch/x86/asm/cpuid.h
b/hypervisor/include/arch/x86/asm/cpuid.h
index 491758bab..95e63f034 100644
--- a/hypervisor/include/arch/x86/asm/cpuid.h
+++ b/hypervisor/include/arch/x86/asm/cpuid.h
@@ -42,6 +42,8 @@
#define CPUID_ECX_OSXSAVE (1U<<27U)
#define CPUID_ECX_AVX (1U<<28U)
#define CPUID_ECX_HV (1U<<31U)
+#define CPUID_ECX_MWAIT (1U<<0U)
+#define CPUID_ECX_MWAIT_INT (1U<<0U)
#define CPUID_EDX_FPU (1U<<0U)
#define CPUID_EDX_VME (1U<<1U)
#define CPUID_EDX_DE (1U<<2U)
@@ -88,6 +90,8 @@
#define CPUID_ECX_UMIP (1U<<2U)
/* CPUID.07H:ECX.PKE */
#define CPUID_ECX_PKE (1U<<3U)
+/* CPUID.07H:ECX.WAITPKG */
+#define CPUID_ECX_WAITPKG (1U<<5U)
/* CPUID.07H:ECX.CET_SS */
#define CPUID_ECX_CET_SS (1U<<7U)
/* CPUID.07H:ECX.LA57 */
diff --git a/hypervisor/include/public/acrn_common.h
b/hypervisor/include/public/acrn_common.h
index fee71a655..b10e88ff7 100644
--- a/hypervisor/include/public/acrn_common.h
+++ b/hypervisor/include/public/acrn_common.h
@@ -62,6 +62,7 @@
#define GUEST_FLAG_TEE (1UL << 9U) /*
Whether the VM is TEE VM */
#define GUEST_FLAG_REE (1UL << 10U) /*
Whether the VM is REE VM */
#define GUEST_FLAG_PMU_PASSTHROUGH (1UL << 11U) /* Whether
PMU is passed through */
+#define GUEST_FLAG_OWN_PCPU (1UL << 12U) /* Whether the VM
own pcpu assigned */


/* TODO: We may need to get this addr from guest ACPI instead of hardcode
here */
--
2.25.1


Eddie Dong
 

-----Original Message-----
From: Zhao, Yuanyuan <yuanyuan.zhao@...>
Sent: Thursday, September 29, 2022 2:04 AM
To: Dong, Eddie <eddie.dong@...>; acrn-dev@...;
Mao, Junjie <junjie.mao@...>; Li, Fei1 <fei1.li@...>
Subject: Re: [acrn-dev] [PATCH v4 1/2] hv: prevent VMs make pCPU enter C-
state.

On 2022/9/9 0:12, Eddie Dong wrote:


-----Original Message-----
From: Yuanyuan Zhao <yuanyuan.zhao@...>
Sent: Wednesday, September 7, 2022 1:19 AM
To: Dong, Eddie <eddie.dong@...>; Mao, Junjie
<junjie.mao@...>; Li, Fei1 <fei1.li@...>; acrn-
dev@...
Cc: yuanyuan.zhao@...
Subject: [PATCH v4 1/2] hv: prevent VMs make pCPU enter C-state.

If a shared pCPU enters C-State by a vCPU runs on it, pPCU may can't
wake on time when other vCPUs use it. So add a new guest_flag
W/ 2nd thinking, why a shared PCPU cannot be waken up? I thought it is still
ok.
Yes, I tried on tgl that pCPU can be waken up by hypervisor schedule timer.

But in SDM, it said that:
"In addition, an external interrupt causes the processor to exit the
implementation-dependent-optimized state either (1) if the interrupt would
be delivered to software (e.g., as it would be if HLT had been executed instead
of MWAIT); or (2) if ECX[0] = 1. Software can execute MWAIT with ECX[0] = 1
only if CPUID.05H:ECX[bit 1] = 1.
(Implementation-specific conditions may result in an interrupt causing the
processor to exit the implementationdependent-optimized state even if
interrupts are masked and ECX[0] = 0.)"
And in SDM 25.3:
"If the “MWAIT exiting” VM-execution control is 0, MWAIT operates normally
if one of the following are true:
(1) ECX[0] is 0; (2) RFLAGS.IF = 1;"

Whether MWAIT be blocked when 'ECX[0] = 0' is implementation specific, so i
NO. This is saying about entering MWAIT state (implementation specific), not about
Leaving MWAIT state.

This is not a problem IMO. Please check with Li Fei ..

think it's better to hide the MWAIT and trap instruction.
That can prevent malicious attacks by block pCPU that may happen on some
platforms.

'GUEST_FLAG_OWN_PCPU' which used to tell hypervisor if a VM
exclusively owns the physical CPUs. Mwait will always not be
supported on a VM does not own pcpu.This patch does this by:
Restrict the use of MWAIT passthru is fine, even completely remove it.
But we the current solution doesn't support runtime change of the setting,
right?

Yes, run time change is not supported.


The rest is fine.

1. Clear vCPUID.05H.EXC[bit 0,1] and vCPUID.01H:ECX.[bit 3].
2. Clear MSR_IA32_MISC_ENABLE_MONITOR_ENA.
3. Trap instruction 'MONITOR' and 'MWAIT'.

v3->v4:
1. Set percpu related cpuid -> set_vcpuid_entries.
2. HIDE UWAIT/UMONITOR to shared vcpu.
3. inject #GP -> inject #UD.

v2->v3:
1.mwait_vmexit_handler -> mwait_monitor_vmexit_handler.
2.own_pcpu -> guest_flag(GUEST_FLAG_OWN_PCPU) 3.Check own_pcpu
when
set vm_arch.vm_mwait_cap.

v1->v2:
1.add own_pcpu.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
hypervisor/arch/x86/guest/vcpuid.c | 15 +++++++++++++++
hypervisor/arch/x86/guest/vm.c | 2 +-
hypervisor/arch/x86/guest/vmcs.c | 16 +++++++++++++++-
hypervisor/arch/x86/guest/vmexit.c | 15 +++++++++++++--
hypervisor/include/arch/x86/asm/cpuid.h | 4 ++++
hypervisor/include/public/acrn_common.h | 1 +
6 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcpuid.c
b/hypervisor/arch/x86/guest/vcpuid.c
index 580098719..3fd4befcb 100644
--- a/hypervisor/arch/x86/guest/vcpuid.c
+++ b/hypervisor/arch/x86/guest/vcpuid.c
@@ -538,6 +538,13 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
}
}
break;
+ case 0x05U:
+ if(!vm->arch_vm.vm_mwait_cap) {
+ init_vcpuid_entry(i, 0U,
CPUID_CHECK_SUBLEAF, &entry);
+ entry.ecx &= ~(CPUID_ECX_MWAIT
| CPUID_ECX_MWAIT_INT);
+ result = set_vcpuid_entry(vm,
&entry);
+ }
+ break;
case 0x07U:
init_vcpuid_entry(i, 0U,
CPUID_CHECK_SUBLEAF, &entry);
if (entry.eax != 0U) {
@@ -547,6 +554,9 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
if (is_vsgx_supported(vm->vm_id)) {
entry.ebx |= CPUID_EBX_SGX;
}
+ if(!vm->arch_vm.vm_mwait_cap) {
+ entry.ecx &=
~CPUID_ECX_WAITPKG;
+ }

#ifdef CONFIG_VCAT_ENABLED
if (is_vcat_configured(vm)) {
@@ -635,6 +645,11 @@ static void guest_cpuid_01h(struct acrn_vcpu
*vcpu, uint32_t *eax, uint32_t *ebx
*ecx &= ~CPUID_ECX_VMX;
}

+ /* mask MONINTOR/MWAIT for shared cpu */
+ if (!vcpu->vm->arch_vm.vm_mwait_cap) {
+ *ecx &= ~CPUID_ECX_MONITOR;
+ }
+
/* set Hypervisor Present Bit */
*ecx |= CPUID_ECX_HV;

diff --git a/hypervisor/arch/x86/guest/vm.c
b/hypervisor/arch/x86/guest/vm.c index bdd31d3fa..7747e8ccd 100644
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -704,7 +704,7 @@ int32_t create_vm(uint16_t vm_id, uint64_t
pcpu_bitmap, struct acrn_vm_config *v
spinlock_init(&vm->arch_vm.iwkey_backup_lock);

vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC;
- vm->arch_vm.vm_mwait_cap = has_monitor_cap();
+ vm->arch_vm.vm_mwait_cap = (has_monitor_cap() &&
+(vm_config->guest_flags & GUEST_FLAG_OWN_PCPU) != 0U);
vm->intr_inject_delay_delta = 0UL;
vm->nr_emul_mmio_regions = 0U;
vm->vcpuid_entry_nr = 0U;
diff --git a/hypervisor/arch/x86/guest/vmcs.c
b/hypervisor/arch/x86/guest/vmcs.c
index 684507d7d..2dee7285c 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -25,6 +25,7 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu,
uint64_t cr0, uint64_t cr3, {
struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu-
arch.cur_context];
struct ext_context *ectx = &ctx->ext_ctx;
+ uint64_t value64 = 0UL;

pr_dbg("%s,cr0:0x%lx, cr4:0x%lx.", __func__, cr0, cr4);

@@ -53,7 +54,9 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu,
uint64_t cr0, uint64_t cr3,
load_segment(ectx->ldtr, VMX_GUEST_LDTR);

/* init guest ia32_misc_enable value for guest read */
- vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE,
msr_read(MSR_IA32_MISC_ENABLE));
+ value64 = vcpu->vm->arch_vm.vm_mwait_cap ?
msr_read(MSR_IA32_MISC_ENABLE) :
+ (msr_read(MSR_IA32_MISC_ENABLE) &
(~MSR_IA32_MISC_ENABLE_MONITOR_ENA));
+ vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, value64);

/* fixed values */
exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, 0U); @@ -311,6
+314,13 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 |= VMX_PROCBASED_CTLS_RDPMC;
}

+ /*
+ * Enable VM_EXIT for mwait for VM which may share pcpu.
+ */
+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value32 |= VMX_PROCBASED_CTLS_MWAIT |
VMX_PROCBASED_CTLS_MONITOR;
+ }
+
vcpu->arch.proc_vm_exec_ctrls = value32;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32);
pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32); @@ -
333,6 +343,10 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 &= ~VMX_PROCBASED_CTLS2_INVPCID;
}

+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value32 &= ~VMX_PROCBASED_CTLS2_UWAIT_PAUSE;
+ }
+
if (is_apicv_advanced_feature_supported()) {
value32 |= VMX_PROCBASED_CTLS2_VIRQ;
value32 |= VMX_PROCBASED_CTLS2_VAPIC_REGS; diff --git
a/hypervisor/arch/x86/guest/vmexit.c
b/hypervisor/arch/x86/guest/vmexit.c
index 2888278aa..7adb17dec 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -40,6 +40,7 @@ static int32_t hlt_vmexit_handler(struct acrn_vcpu
*vcpu); static int32_t mtf_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t loadiwkey_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t init_signal_vmexit_handler(__unused struct acrn_vcpu
*vcpu);
+static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu
+*vcpu);

/* VM Dispatch table for Exit condition handling */ static const
struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = { @@
-151,11
+152,11 @@ static const struct vm_exit_dispatch
dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_MWAIT] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_monitor_vmexit_handler },
[VMX_EXIT_REASON_MONITOR_TRAP] = {
.handler = mtf_vmexit_handler},
[VMX_EXIT_REASON_MONITOR] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_monitor_vmexit_handler },
[VMX_EXIT_REASON_PAUSE] = {
.handler = pause_vmexit_handler},
[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = { @@ -
287,6 +288,16 @@ int32_t vmexit_handler(struct acrn_vcpu *vcpu)
return ret;
}

+static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu) {
+ pr_fatal("Error: Unsupported mwait option from guest at 0x%016lx ",
+ exec_vmread(VMX_GUEST_RIP));
+
+ vcpu_inject_ud(vcpu);
+
+ return 0;
+}
+
static int32_t unhandled_vmexit_handler(struct acrn_vcpu *vcpu) {
pr_fatal("Error: Unhandled VM exit condition from guest at
0x%016lx ", diff --git a/hypervisor/include/arch/x86/asm/cpuid.h
b/hypervisor/include/arch/x86/asm/cpuid.h
index 491758bab..95e63f034 100644
--- a/hypervisor/include/arch/x86/asm/cpuid.h
+++ b/hypervisor/include/arch/x86/asm/cpuid.h
@@ -42,6 +42,8 @@
#define CPUID_ECX_OSXSAVE (1U<<27U)
#define CPUID_ECX_AVX (1U<<28U)
#define CPUID_ECX_HV (1U<<31U)
+#define CPUID_ECX_MWAIT (1U<<0U)
+#define CPUID_ECX_MWAIT_INT (1U<<0U)
#define CPUID_EDX_FPU (1U<<0U)
#define CPUID_EDX_VME (1U<<1U)
#define CPUID_EDX_DE (1U<<2U)
@@ -88,6 +90,8 @@
#define CPUID_ECX_UMIP (1U<<2U)
/* CPUID.07H:ECX.PKE */
#define CPUID_ECX_PKE (1U<<3U)
+/* CPUID.07H:ECX.WAITPKG */
+#define CPUID_ECX_WAITPKG (1U<<5U)
/* CPUID.07H:ECX.CET_SS */
#define CPUID_ECX_CET_SS (1U<<7U)
/* CPUID.07H:ECX.LA57 */
diff --git a/hypervisor/include/public/acrn_common.h
b/hypervisor/include/public/acrn_common.h
index fee71a655..b10e88ff7 100644
--- a/hypervisor/include/public/acrn_common.h
+++ b/hypervisor/include/public/acrn_common.h
@@ -62,6 +62,7 @@
#define GUEST_FLAG_TEE (1UL << 9U) /*
Whether the VM is TEE VM */
#define GUEST_FLAG_REE (1UL << 10U) /*
Whether the VM is REE VM */
#define GUEST_FLAG_PMU_PASSTHROUGH (1UL << 11U) /* Whether
PMU is passed through */
+#define GUEST_FLAG_OWN_PCPU (1UL << 12U) /* Whether the VM
own pcpu assigned */


/* TODO: We may need to get this addr from guest ACPI instead of
hardcode here */
--
2.25.1





Li, Fei1
 

On 2022-09-30 at 02:08:25 +0800, Dong, Eddie wrote:


-----Original Message-----
From: Zhao, Yuanyuan <yuanyuan.zhao@...>
Sent: Thursday, September 29, 2022 2:04 AM
To: Dong, Eddie <eddie.dong@...>; acrn-dev@...;
Mao, Junjie <junjie.mao@...>; Li, Fei1 <fei1.li@...>
Subject: Re: [acrn-dev] [PATCH v4 1/2] hv: prevent VMs make pCPU enter C-
state.

On 2022/9/9 0:12, Eddie Dong wrote:


-----Original Message-----
From: Yuanyuan Zhao <yuanyuan.zhao@...>
Sent: Wednesday, September 7, 2022 1:19 AM
To: Dong, Eddie <eddie.dong@...>; Mao, Junjie
<junjie.mao@...>; Li, Fei1 <fei1.li@...>; acrn-
dev@...
Cc: yuanyuan.zhao@...
Subject: [PATCH v4 1/2] hv: prevent VMs make pCPU enter C-state.

If a shared pCPU enters C-State by a vCPU runs on it, pPCU may can't
wake on time when other vCPUs use it. So add a new guest_flag
W/ 2nd thinking, why a shared PCPU cannot be waken up? I thought it is still
ok.
Yes, I tried on tgl that pCPU can be waken up by hypervisor schedule timer.

But in SDM, it said that:
"In addition, an external interrupt causes the processor to exit the
implementation-dependent-optimized state either (1) if the interrupt would
be delivered to software (e.g., as it would be if HLT had been executed instead
of MWAIT); or (2) if ECX[0] = 1. Software can execute MWAIT with ECX[0] = 1
only if CPUID.05H:ECX[bit 1] = 1.
(Implementation-specific conditions may result in an interrupt causing the
processor to exit the implementationdependent-optimized state even if
interrupts are masked and ECX[0] = 0.)"
And in SDM 25.3:
"If the “MWAIT exiting” VM-execution control is 0, MWAIT operates normally
if one of the following are true:
(1) ECX[0] is 0; (2) RFLAGS.IF = 1;"

Whether MWAIT be blocked when 'ECX[0] = 0' is implementation specific, so i
NO. This is saying about entering MWAIT state (implementation specific), not about
Leaving MWAIT state.

This is not a problem IMO. Please check with Li Fei ..
MWAIT Extension Register (ECX)
Bits Description
0 Treat interrupts as break events even if masked (e.g., even if EFLAGS.IF=0).
May be set only if CPUID.05H:ECX[bit 1] = 1.

Hi Eddie
IMHO, ECX[0] = 0 means masked interrupts may not be treated as break events of Mwait.
However, this descriptes the native behaviour.

To us, it's OK if the masked interrupts (exterrupt interrupts in root mode) could
cause VM exits even the logical CPU in the states entered using the MWAIT instructions.

I think it's ture. I only see exterrupt interrupts can't cause VM exits if the
logical CPU in shutdown state when the “external-interrupt exiting” VM-execution
control is 1.

Thanks.

think it's better to hide the MWAIT and trap instruction.
That can prevent malicious attacks by block pCPU that may happen on some
platforms.

'GUEST_FLAG_OWN_PCPU' which used to tell hypervisor if a VM
exclusively owns the physical CPUs. Mwait will always not be
supported on a VM does not own pcpu.This patch does this by:
Restrict the use of MWAIT passthru is fine, even completely remove it.
But we the current solution doesn't support runtime change of the setting,
right?

Yes, run time change is not supported.


The rest is fine.

1. Clear vCPUID.05H.EXC[bit 0,1] and vCPUID.01H:ECX.[bit 3].
2. Clear MSR_IA32_MISC_ENABLE_MONITOR_ENA.
3. Trap instruction 'MONITOR' and 'MWAIT'.

v3->v4:
1. Set percpu related cpuid -> set_vcpuid_entries.
2. HIDE UWAIT/UMONITOR to shared vcpu.
3. inject #GP -> inject #UD.

v2->v3:
1.mwait_vmexit_handler -> mwait_monitor_vmexit_handler.
2.own_pcpu -> guest_flag(GUEST_FLAG_OWN_PCPU) 3.Check own_pcpu
when
set vm_arch.vm_mwait_cap.

v1->v2:
1.add own_pcpu.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
hypervisor/arch/x86/guest/vcpuid.c | 15 +++++++++++++++
hypervisor/arch/x86/guest/vm.c | 2 +-
hypervisor/arch/x86/guest/vmcs.c | 16 +++++++++++++++-
hypervisor/arch/x86/guest/vmexit.c | 15 +++++++++++++--
hypervisor/include/arch/x86/asm/cpuid.h | 4 ++++
hypervisor/include/public/acrn_common.h | 1 +
6 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcpuid.c
b/hypervisor/arch/x86/guest/vcpuid.c
index 580098719..3fd4befcb 100644
--- a/hypervisor/arch/x86/guest/vcpuid.c
+++ b/hypervisor/arch/x86/guest/vcpuid.c
@@ -538,6 +538,13 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
}
}
break;
+ case 0x05U:
+ if(!vm->arch_vm.vm_mwait_cap) {
+ init_vcpuid_entry(i, 0U,
CPUID_CHECK_SUBLEAF, &entry);
+ entry.ecx &= ~(CPUID_ECX_MWAIT
| CPUID_ECX_MWAIT_INT);
+ result = set_vcpuid_entry(vm,
&entry);
+ }
+ break;
case 0x07U:
init_vcpuid_entry(i, 0U,
CPUID_CHECK_SUBLEAF, &entry);
if (entry.eax != 0U) {
@@ -547,6 +554,9 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
if (is_vsgx_supported(vm->vm_id)) {
entry.ebx |= CPUID_EBX_SGX;
}
+ if(!vm->arch_vm.vm_mwait_cap) {
+ entry.ecx &=
~CPUID_ECX_WAITPKG;
+ }

#ifdef CONFIG_VCAT_ENABLED
if (is_vcat_configured(vm)) {
@@ -635,6 +645,11 @@ static void guest_cpuid_01h(struct acrn_vcpu
*vcpu, uint32_t *eax, uint32_t *ebx
*ecx &= ~CPUID_ECX_VMX;
}

+ /* mask MONINTOR/MWAIT for shared cpu */
+ if (!vcpu->vm->arch_vm.vm_mwait_cap) {
+ *ecx &= ~CPUID_ECX_MONITOR;
+ }
+
/* set Hypervisor Present Bit */
*ecx |= CPUID_ECX_HV;

diff --git a/hypervisor/arch/x86/guest/vm.c
b/hypervisor/arch/x86/guest/vm.c index bdd31d3fa..7747e8ccd 100644
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -704,7 +704,7 @@ int32_t create_vm(uint16_t vm_id, uint64_t
pcpu_bitmap, struct acrn_vm_config *v
spinlock_init(&vm->arch_vm.iwkey_backup_lock);

vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC;
- vm->arch_vm.vm_mwait_cap = has_monitor_cap();
+ vm->arch_vm.vm_mwait_cap = (has_monitor_cap() &&
+(vm_config->guest_flags & GUEST_FLAG_OWN_PCPU) != 0U);
vm->intr_inject_delay_delta = 0UL;
vm->nr_emul_mmio_regions = 0U;
vm->vcpuid_entry_nr = 0U;
diff --git a/hypervisor/arch/x86/guest/vmcs.c
b/hypervisor/arch/x86/guest/vmcs.c
index 684507d7d..2dee7285c 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -25,6 +25,7 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu,
uint64_t cr0, uint64_t cr3, {
struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu-
arch.cur_context];
struct ext_context *ectx = &ctx->ext_ctx;
+ uint64_t value64 = 0UL;

pr_dbg("%s,cr0:0x%lx, cr4:0x%lx.", __func__, cr0, cr4);

@@ -53,7 +54,9 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu,
uint64_t cr0, uint64_t cr3,
load_segment(ectx->ldtr, VMX_GUEST_LDTR);

/* init guest ia32_misc_enable value for guest read */
- vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE,
msr_read(MSR_IA32_MISC_ENABLE));
+ value64 = vcpu->vm->arch_vm.vm_mwait_cap ?
msr_read(MSR_IA32_MISC_ENABLE) :
+ (msr_read(MSR_IA32_MISC_ENABLE) &
(~MSR_IA32_MISC_ENABLE_MONITOR_ENA));
+ vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, value64);

/* fixed values */
exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, 0U); @@ -311,6
+314,13 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 |= VMX_PROCBASED_CTLS_RDPMC;
}

+ /*
+ * Enable VM_EXIT for mwait for VM which may share pcpu.
+ */
+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value32 |= VMX_PROCBASED_CTLS_MWAIT |
VMX_PROCBASED_CTLS_MONITOR;
+ }
+
vcpu->arch.proc_vm_exec_ctrls = value32;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32);
pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32); @@ -
333,6 +343,10 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 &= ~VMX_PROCBASED_CTLS2_INVPCID;
}

+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value32 &= ~VMX_PROCBASED_CTLS2_UWAIT_PAUSE;
+ }
+
if (is_apicv_advanced_feature_supported()) {
value32 |= VMX_PROCBASED_CTLS2_VIRQ;
value32 |= VMX_PROCBASED_CTLS2_VAPIC_REGS; diff --git
a/hypervisor/arch/x86/guest/vmexit.c
b/hypervisor/arch/x86/guest/vmexit.c
index 2888278aa..7adb17dec 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -40,6 +40,7 @@ static int32_t hlt_vmexit_handler(struct acrn_vcpu
*vcpu); static int32_t mtf_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t loadiwkey_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t init_signal_vmexit_handler(__unused struct acrn_vcpu
*vcpu);
+static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu
+*vcpu);

/* VM Dispatch table for Exit condition handling */ static const
struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = { @@
-151,11
+152,11 @@ static const struct vm_exit_dispatch
dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_MWAIT] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_monitor_vmexit_handler },
[VMX_EXIT_REASON_MONITOR_TRAP] = {
.handler = mtf_vmexit_handler},
[VMX_EXIT_REASON_MONITOR] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_monitor_vmexit_handler },
[VMX_EXIT_REASON_PAUSE] = {
.handler = pause_vmexit_handler},
[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = { @@ -
287,6 +288,16 @@ int32_t vmexit_handler(struct acrn_vcpu *vcpu)
return ret;
}

+static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu) {
+ pr_fatal("Error: Unsupported mwait option from guest at 0x%016lx ",
+ exec_vmread(VMX_GUEST_RIP));
+
+ vcpu_inject_ud(vcpu);
+
+ return 0;
+}
+
static int32_t unhandled_vmexit_handler(struct acrn_vcpu *vcpu) {
pr_fatal("Error: Unhandled VM exit condition from guest at
0x%016lx ", diff --git a/hypervisor/include/arch/x86/asm/cpuid.h
b/hypervisor/include/arch/x86/asm/cpuid.h
index 491758bab..95e63f034 100644
--- a/hypervisor/include/arch/x86/asm/cpuid.h
+++ b/hypervisor/include/arch/x86/asm/cpuid.h
@@ -42,6 +42,8 @@
#define CPUID_ECX_OSXSAVE (1U<<27U)
#define CPUID_ECX_AVX (1U<<28U)
#define CPUID_ECX_HV (1U<<31U)
+#define CPUID_ECX_MWAIT (1U<<0U)
+#define CPUID_ECX_MWAIT_INT (1U<<0U)
#define CPUID_EDX_FPU (1U<<0U)
#define CPUID_EDX_VME (1U<<1U)
#define CPUID_EDX_DE (1U<<2U)
@@ -88,6 +90,8 @@
#define CPUID_ECX_UMIP (1U<<2U)
/* CPUID.07H:ECX.PKE */
#define CPUID_ECX_PKE (1U<<3U)
+/* CPUID.07H:ECX.WAITPKG */
+#define CPUID_ECX_WAITPKG (1U<<5U)
/* CPUID.07H:ECX.CET_SS */
#define CPUID_ECX_CET_SS (1U<<7U)
/* CPUID.07H:ECX.LA57 */
diff --git a/hypervisor/include/public/acrn_common.h
b/hypervisor/include/public/acrn_common.h
index fee71a655..b10e88ff7 100644
--- a/hypervisor/include/public/acrn_common.h
+++ b/hypervisor/include/public/acrn_common.h
@@ -62,6 +62,7 @@
#define GUEST_FLAG_TEE (1UL << 9U) /*
Whether the VM is TEE VM */
#define GUEST_FLAG_REE (1UL << 10U) /*
Whether the VM is REE VM */
#define GUEST_FLAG_PMU_PASSTHROUGH (1UL << 11U) /* Whether
PMU is passed through */
+#define GUEST_FLAG_OWN_PCPU (1UL << 12U) /* Whether the VM
own pcpu assigned */


/* TODO: We may need to get this addr from guest ACPI instead of
hardcode here */
--
2.25.1