Re: [PATCH] hv: prevent VMs make pCPU enter C-state.


Zhao, Yuanyuan
 

On 2022/7/22 4:17, Dong, Eddie wrote:
I thought what you did is to remove the MONITOR cap from guest, and make sure VM-exit to MONITOR execution of guest.
This is fine. Please make the commit message clear.
Ok, i will change it to:
"Remove the MONITOR capability from VMs which share pCPU with others. For these VMs, execution of MONITOR and MWAIT will cause a VM-exit."

Also, we need to make it conditionally: for dedicated core case, we do want to let vCPU execute MONITOR and enter deep C state.
OK. I will check if the VM is Prelaunched VM or RTVM before hide it.


-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Zhao, Yuanyuan
Sent: Thursday, July 21, 2022 5:37 AM
To: Mao, Junjie <junjie.mao@...>; Li, Fei1 <fei1.li@...>; acrn-
dev@...
Cc: yuanyuan.zhao@...
Subject: [acrn-dev] [PATCH] hv: prevent VMs make pCPU enter C-state.

If a shared pCPU enters C-State because of a vCPU runs on it, pPCU may can't
wake on time when other vCPUs use it. So hide the mwait from the user.

This patch this by:
1. Clear vCPUID.05H.EXC[bit 0,1] and vCPUID.01H:ECX.[bit 3].
2. Clear MSR_IA32_MISC_ENABLE_MONITOR_ENA.
3. Trap instruction 'MONITOR' and 'MWAIT'.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
hypervisor/arch/x86/guest/vcpuid.c | 15 ++++++++++++++-
hypervisor/arch/x86/guest/vm.c | 2 +-
hypervisor/arch/x86/guest/vmcs.c | 10 ++++++++--
hypervisor/include/arch/x86/asm/cpuid.h | 2 ++
4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcpuid.c
b/hypervisor/arch/x86/guest/vcpuid.c
index 01a89228d..548a76b22 100644
--- a/hypervisor/arch/x86/guest/vcpuid.c
+++ b/hypervisor/arch/x86/guest/vcpuid.c
@@ -492,7 +492,7 @@ static int32_t set_vcpuid_extended_function(struct
acrn_vm *vm)

static inline bool is_percpu_related(uint32_t leaf) {
- return ((leaf == 0x1U) || (leaf == 0xbU) || (leaf == 0xdU) || (leaf ==
0x19U) || (leaf == 0x80000001U) || (leaf == 0x2U) || (leaf == 0x1aU));
+ return ((leaf == 0x1U) || (leaf == 0x5U) || (leaf == 0xbU) || (leaf ==
+0xdU) || (leaf == 0x19U) || (leaf == 0x80000001U) || (leaf == 0x2U) ||
+(leaf == 0x1aU));
}

int32_t set_vcpuid_entries(struct acrn_vm *vm) @@ -699,6 +699,15 @@
static void guest_cpuid_01h(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t
*ebx
}
}

+static void guest_cpuid_05h(__unused struct acrn_vcpu *vcpu, uint32_t
+*eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) {
+
+ cpuid_subleaf(0x05U, *ecx, eax, ebx, ecx, edx);
+
+ *ecx &= ~CPUID_ECX_MWAIT;
+ *ecx &= ~CPUID_ECX_MWAIT_INT;
+}
+
static void guest_cpuid_0bh(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t
*ebx, uint32_t *ecx, uint32_t *edx) {
/* Forward host cpu topology to the guest, guest will know the native
platform information such as host cpu topology here */ @@ -831,6 +840,10
@@ void guest_cpuid(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx,
uint32_t
guest_cpuid_01h(vcpu, eax, ebx, ecx, edx);
break;

+ case 0x05U:
+ guest_cpuid_05h(vcpu, eax, ebx, ecx, edx);
+ break;
+
case 0x0bU:
guest_cpuid_0bh(vcpu, eax, ebx, ecx, edx);
break;
diff --git a/hypervisor/arch/x86/guest/vm.c b/hypervisor/arch/x86/guest/vm.c
index aff477abb..6d02a244c 100644
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -704,7 +704,7 @@ int32_t create_vm(uint16_t vm_id, uint64_t
pcpu_bitmap, struct acrn_vm_config *v
spinlock_init(&vm->arch_vm.iwkey_backup_lock);

vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC;
- vm->arch_vm.vm_mwait_cap = has_monitor_cap();
+ vm->arch_vm.vm_mwait_cap = false;
vm->intr_inject_delay_delta = 0UL;
vm->nr_emul_mmio_regions = 0U;
vm->vcpuid_entry_nr = 0U;
diff --git a/hypervisor/arch/x86/guest/vmcs.c
b/hypervisor/arch/x86/guest/vmcs.c
index dcf648ee1..6dda5c926 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -25,6 +25,7 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu,
uint64_t cr0, uint64_t cr3, {
struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu-
arch.cur_context];
struct ext_context *ectx = &ctx->ext_ctx;
+ uint64_t value64 = 0UL;

pr_dbg("%s,cr0:0x%lx, cr4:0x%lx.", __func__, cr0, cr4);

@@ -53,7 +54,11 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu,
uint64_t cr0, uint64_t cr3,
load_segment(ectx->ldtr, VMX_GUEST_LDTR);

/* init guest ia32_misc_enable value for guest read */
- vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE,
msr_read(MSR_IA32_MISC_ENABLE));
+ value64 = msr_read(MSR_IA32_MISC_ENABLE);
+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value64 &= ~MSR_IA32_MISC_ENABLE_MONITOR_ENA;
+ }
+ vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, value64);

/* fixed values */
exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, 0U); @@ -293,7
+298,8 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 = check_vmx_ctrl(MSR_IA32_VMX_PROCBASED_CTLS,
VMX_PROCBASED_CTLS_TSC_OFF |
VMX_PROCBASED_CTLS_TPR_SHADOW |
VMX_PROCBASED_CTLS_IO_BITMAP |
VMX_PROCBASED_CTLS_MSR_BITMAP |
- VMX_PROCBASED_CTLS_HLT |
VMX_PROCBASED_CTLS_SECONDARY | VMX_PROCBASED_CTLS_TERTIARY);
+ VMX_PROCBASED_CTLS_HLT |
VMX_PROCBASED_CTLS_SECONDARY | VMX_PROCBASED_CTLS_TERTIARY |
+ VMX_PROCBASED_CTLS_MWAIT |
VMX_PROCBASED_CTLS_MONITOR);

/*Disable VM_EXIT for CR3 access*/
value32 &= ~(VMX_PROCBASED_CTLS_CR3_LOAD |
VMX_PROCBASED_CTLS_CR3_STORE); diff --git
a/hypervisor/include/arch/x86/asm/cpuid.h
b/hypervisor/include/arch/x86/asm/cpuid.h
index 5279310bb..462d28ffe 100644
--- a/hypervisor/include/arch/x86/asm/cpuid.h
+++ b/hypervisor/include/arch/x86/asm/cpuid.h
@@ -42,6 +42,8 @@
#define CPUID_ECX_OSXSAVE (1U<<27U)
#define CPUID_ECX_AVX (1U<<28U)
#define CPUID_ECX_HV (1U<<31U)
+#define CPUID_ECX_MWAIT (1U<<0U)
+#define CPUID_ECX_MWAIT_INT (1U<<0U)
#define CPUID_EDX_FPU (1U<<0U)
#define CPUID_EDX_VME (1U<<1U)
#define CPUID_EDX_DE (1U<<2U)
--
2.25.1



Join acrn-dev@lists.projectacrn.org to automatically receive all group messages.