Re: [PATCH v4 1/2] hv: prevent VMs make pCPU enter C-state.


Eddie Dong
 

-----Original Message-----
From: Zhao, Yuanyuan <yuanyuan.zhao@...>
Sent: Thursday, September 29, 2022 2:04 AM
To: Dong, Eddie <eddie.dong@...>; acrn-dev@...;
Mao, Junjie <junjie.mao@...>; Li, Fei1 <fei1.li@...>
Subject: Re: [acrn-dev] [PATCH v4 1/2] hv: prevent VMs make pCPU enter C-
state.

On 2022/9/9 0:12, Eddie Dong wrote:


-----Original Message-----
From: Yuanyuan Zhao <yuanyuan.zhao@...>
Sent: Wednesday, September 7, 2022 1:19 AM
To: Dong, Eddie <eddie.dong@...>; Mao, Junjie
<junjie.mao@...>; Li, Fei1 <fei1.li@...>; acrn-
dev@...
Cc: yuanyuan.zhao@...
Subject: [PATCH v4 1/2] hv: prevent VMs make pCPU enter C-state.

If a shared pCPU enters C-State by a vCPU runs on it, pPCU may can't
wake on time when other vCPUs use it. So add a new guest_flag
W/ 2nd thinking, why a shared PCPU cannot be waken up? I thought it is still
ok.
Yes, I tried on tgl that pCPU can be waken up by hypervisor schedule timer.

But in SDM, it said that:
"In addition, an external interrupt causes the processor to exit the
implementation-dependent-optimized state either (1) if the interrupt would
be delivered to software (e.g., as it would be if HLT had been executed instead
of MWAIT); or (2) if ECX[0] = 1. Software can execute MWAIT with ECX[0] = 1
only if CPUID.05H:ECX[bit 1] = 1.
(Implementation-specific conditions may result in an interrupt causing the
processor to exit the implementationdependent-optimized state even if
interrupts are masked and ECX[0] = 0.)"
And in SDM 25.3:
"If the “MWAIT exiting” VM-execution control is 0, MWAIT operates normally
if one of the following are true:
(1) ECX[0] is 0; (2) RFLAGS.IF = 1;"

Whether MWAIT be blocked when 'ECX[0] = 0' is implementation specific, so i
NO. This is saying about entering MWAIT state (implementation specific), not about
Leaving MWAIT state.

This is not a problem IMO. Please check with Li Fei ..

think it's better to hide the MWAIT and trap instruction.
That can prevent malicious attacks by block pCPU that may happen on some
platforms.

'GUEST_FLAG_OWN_PCPU' which used to tell hypervisor if a VM
exclusively owns the physical CPUs. Mwait will always not be
supported on a VM does not own pcpu.This patch does this by:
Restrict the use of MWAIT passthru is fine, even completely remove it.
But we the current solution doesn't support runtime change of the setting,
right?

Yes, run time change is not supported.


The rest is fine.

1. Clear vCPUID.05H.EXC[bit 0,1] and vCPUID.01H:ECX.[bit 3].
2. Clear MSR_IA32_MISC_ENABLE_MONITOR_ENA.
3. Trap instruction 'MONITOR' and 'MWAIT'.

v3->v4:
1. Set percpu related cpuid -> set_vcpuid_entries.
2. HIDE UWAIT/UMONITOR to shared vcpu.
3. inject #GP -> inject #UD.

v2->v3:
1.mwait_vmexit_handler -> mwait_monitor_vmexit_handler.
2.own_pcpu -> guest_flag(GUEST_FLAG_OWN_PCPU) 3.Check own_pcpu
when
set vm_arch.vm_mwait_cap.

v1->v2:
1.add own_pcpu.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
hypervisor/arch/x86/guest/vcpuid.c | 15 +++++++++++++++
hypervisor/arch/x86/guest/vm.c | 2 +-
hypervisor/arch/x86/guest/vmcs.c | 16 +++++++++++++++-
hypervisor/arch/x86/guest/vmexit.c | 15 +++++++++++++--
hypervisor/include/arch/x86/asm/cpuid.h | 4 ++++
hypervisor/include/public/acrn_common.h | 1 +
6 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcpuid.c
b/hypervisor/arch/x86/guest/vcpuid.c
index 580098719..3fd4befcb 100644
--- a/hypervisor/arch/x86/guest/vcpuid.c
+++ b/hypervisor/arch/x86/guest/vcpuid.c
@@ -538,6 +538,13 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
}
}
break;
+ case 0x05U:
+ if(!vm->arch_vm.vm_mwait_cap) {
+ init_vcpuid_entry(i, 0U,
CPUID_CHECK_SUBLEAF, &entry);
+ entry.ecx &= ~(CPUID_ECX_MWAIT
| CPUID_ECX_MWAIT_INT);
+ result = set_vcpuid_entry(vm,
&entry);
+ }
+ break;
case 0x07U:
init_vcpuid_entry(i, 0U,
CPUID_CHECK_SUBLEAF, &entry);
if (entry.eax != 0U) {
@@ -547,6 +554,9 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
if (is_vsgx_supported(vm->vm_id)) {
entry.ebx |= CPUID_EBX_SGX;
}
+ if(!vm->arch_vm.vm_mwait_cap) {
+ entry.ecx &=
~CPUID_ECX_WAITPKG;
+ }

#ifdef CONFIG_VCAT_ENABLED
if (is_vcat_configured(vm)) {
@@ -635,6 +645,11 @@ static void guest_cpuid_01h(struct acrn_vcpu
*vcpu, uint32_t *eax, uint32_t *ebx
*ecx &= ~CPUID_ECX_VMX;
}

+ /* mask MONINTOR/MWAIT for shared cpu */
+ if (!vcpu->vm->arch_vm.vm_mwait_cap) {
+ *ecx &= ~CPUID_ECX_MONITOR;
+ }
+
/* set Hypervisor Present Bit */
*ecx |= CPUID_ECX_HV;

diff --git a/hypervisor/arch/x86/guest/vm.c
b/hypervisor/arch/x86/guest/vm.c index bdd31d3fa..7747e8ccd 100644
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -704,7 +704,7 @@ int32_t create_vm(uint16_t vm_id, uint64_t
pcpu_bitmap, struct acrn_vm_config *v
spinlock_init(&vm->arch_vm.iwkey_backup_lock);

vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC;
- vm->arch_vm.vm_mwait_cap = has_monitor_cap();
+ vm->arch_vm.vm_mwait_cap = (has_monitor_cap() &&
+(vm_config->guest_flags & GUEST_FLAG_OWN_PCPU) != 0U);
vm->intr_inject_delay_delta = 0UL;
vm->nr_emul_mmio_regions = 0U;
vm->vcpuid_entry_nr = 0U;
diff --git a/hypervisor/arch/x86/guest/vmcs.c
b/hypervisor/arch/x86/guest/vmcs.c
index 684507d7d..2dee7285c 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -25,6 +25,7 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu,
uint64_t cr0, uint64_t cr3, {
struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu-
arch.cur_context];
struct ext_context *ectx = &ctx->ext_ctx;
+ uint64_t value64 = 0UL;

pr_dbg("%s,cr0:0x%lx, cr4:0x%lx.", __func__, cr0, cr4);

@@ -53,7 +54,9 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu,
uint64_t cr0, uint64_t cr3,
load_segment(ectx->ldtr, VMX_GUEST_LDTR);

/* init guest ia32_misc_enable value for guest read */
- vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE,
msr_read(MSR_IA32_MISC_ENABLE));
+ value64 = vcpu->vm->arch_vm.vm_mwait_cap ?
msr_read(MSR_IA32_MISC_ENABLE) :
+ (msr_read(MSR_IA32_MISC_ENABLE) &
(~MSR_IA32_MISC_ENABLE_MONITOR_ENA));
+ vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, value64);

/* fixed values */
exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, 0U); @@ -311,6
+314,13 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 |= VMX_PROCBASED_CTLS_RDPMC;
}

+ /*
+ * Enable VM_EXIT for mwait for VM which may share pcpu.
+ */
+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value32 |= VMX_PROCBASED_CTLS_MWAIT |
VMX_PROCBASED_CTLS_MONITOR;
+ }
+
vcpu->arch.proc_vm_exec_ctrls = value32;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32);
pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32); @@ -
333,6 +343,10 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 &= ~VMX_PROCBASED_CTLS2_INVPCID;
}

+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value32 &= ~VMX_PROCBASED_CTLS2_UWAIT_PAUSE;
+ }
+
if (is_apicv_advanced_feature_supported()) {
value32 |= VMX_PROCBASED_CTLS2_VIRQ;
value32 |= VMX_PROCBASED_CTLS2_VAPIC_REGS; diff --git
a/hypervisor/arch/x86/guest/vmexit.c
b/hypervisor/arch/x86/guest/vmexit.c
index 2888278aa..7adb17dec 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -40,6 +40,7 @@ static int32_t hlt_vmexit_handler(struct acrn_vcpu
*vcpu); static int32_t mtf_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t loadiwkey_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t init_signal_vmexit_handler(__unused struct acrn_vcpu
*vcpu);
+static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu
+*vcpu);

/* VM Dispatch table for Exit condition handling */ static const
struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = { @@
-151,11
+152,11 @@ static const struct vm_exit_dispatch
dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_MWAIT] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_monitor_vmexit_handler },
[VMX_EXIT_REASON_MONITOR_TRAP] = {
.handler = mtf_vmexit_handler},
[VMX_EXIT_REASON_MONITOR] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_monitor_vmexit_handler },
[VMX_EXIT_REASON_PAUSE] = {
.handler = pause_vmexit_handler},
[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = { @@ -
287,6 +288,16 @@ int32_t vmexit_handler(struct acrn_vcpu *vcpu)
return ret;
}

+static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu) {
+ pr_fatal("Error: Unsupported mwait option from guest at 0x%016lx ",
+ exec_vmread(VMX_GUEST_RIP));
+
+ vcpu_inject_ud(vcpu);
+
+ return 0;
+}
+
static int32_t unhandled_vmexit_handler(struct acrn_vcpu *vcpu) {
pr_fatal("Error: Unhandled VM exit condition from guest at
0x%016lx ", diff --git a/hypervisor/include/arch/x86/asm/cpuid.h
b/hypervisor/include/arch/x86/asm/cpuid.h
index 491758bab..95e63f034 100644
--- a/hypervisor/include/arch/x86/asm/cpuid.h
+++ b/hypervisor/include/arch/x86/asm/cpuid.h
@@ -42,6 +42,8 @@
#define CPUID_ECX_OSXSAVE (1U<<27U)
#define CPUID_ECX_AVX (1U<<28U)
#define CPUID_ECX_HV (1U<<31U)
+#define CPUID_ECX_MWAIT (1U<<0U)
+#define CPUID_ECX_MWAIT_INT (1U<<0U)
#define CPUID_EDX_FPU (1U<<0U)
#define CPUID_EDX_VME (1U<<1U)
#define CPUID_EDX_DE (1U<<2U)
@@ -88,6 +90,8 @@
#define CPUID_ECX_UMIP (1U<<2U)
/* CPUID.07H:ECX.PKE */
#define CPUID_ECX_PKE (1U<<3U)
+/* CPUID.07H:ECX.WAITPKG */
+#define CPUID_ECX_WAITPKG (1U<<5U)
/* CPUID.07H:ECX.CET_SS */
#define CPUID_ECX_CET_SS (1U<<7U)
/* CPUID.07H:ECX.LA57 */
diff --git a/hypervisor/include/public/acrn_common.h
b/hypervisor/include/public/acrn_common.h
index fee71a655..b10e88ff7 100644
--- a/hypervisor/include/public/acrn_common.h
+++ b/hypervisor/include/public/acrn_common.h
@@ -62,6 +62,7 @@
#define GUEST_FLAG_TEE (1UL << 9U) /*
Whether the VM is TEE VM */
#define GUEST_FLAG_REE (1UL << 10U) /*
Whether the VM is REE VM */
#define GUEST_FLAG_PMU_PASSTHROUGH (1UL << 11U) /* Whether
PMU is passed through */
+#define GUEST_FLAG_OWN_PCPU (1UL << 12U) /* Whether the VM
own pcpu assigned */


/* TODO: We may need to get this addr from guest ACPI instead of
hardcode here */
--
2.25.1




Join acrn-dev@lists.projectacrn.org to automatically receive all group messages.