[PATCH] hv: prevent VMs make pCPU enter C-state.


Zhao, Yuanyuan
 

If a shared pCPU enters C-State because of a vCPU runs on it,
pPCU may can't wake on time when other vCPUs use it. So hide the
mwait from the user.

This patch this by:
1. Clear vCPUID.05H.EXC[bit 0,1] and vCPUID.01H:ECX.[bit 3].
2. Clear MSR_IA32_MISC_ENABLE_MONITOR_ENA.
3. Trap instruction 'MONITOR' and 'MWAIT'.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
hypervisor/arch/x86/guest/vcpuid.c | 15 ++++++++++++++-
hypervisor/arch/x86/guest/vm.c | 2 +-
hypervisor/arch/x86/guest/vmcs.c | 10 ++++++++--
hypervisor/include/arch/x86/asm/cpuid.h | 2 ++
4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcpuid.c b/hypervisor/arch/x86/guest/vcpuid.c
index 01a89228d..548a76b22 100644
--- a/hypervisor/arch/x86/guest/vcpuid.c
+++ b/hypervisor/arch/x86/guest/vcpuid.c
@@ -492,7 +492,7 @@ static int32_t set_vcpuid_extended_function(struct acrn_vm *vm)

static inline bool is_percpu_related(uint32_t leaf)
{
- return ((leaf == 0x1U) || (leaf == 0xbU) || (leaf == 0xdU) || (leaf == 0x19U) || (leaf == 0x80000001U) || (leaf == 0x2U) || (leaf == 0x1aU));
+ return ((leaf == 0x1U) || (leaf == 0x5U) || (leaf == 0xbU) || (leaf == 0xdU) || (leaf == 0x19U) || (leaf == 0x80000001U) || (leaf == 0x2U) || (leaf == 0x1aU));
}

int32_t set_vcpuid_entries(struct acrn_vm *vm)
@@ -699,6 +699,15 @@ static void guest_cpuid_01h(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx
}
}

+static void guest_cpuid_05h(__unused struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
+{
+
+ cpuid_subleaf(0x05U, *ecx, eax, ebx, ecx, edx);
+
+ *ecx &= ~CPUID_ECX_MWAIT;
+ *ecx &= ~CPUID_ECX_MWAIT_INT;
+}
+
static void guest_cpuid_0bh(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
{
/* Forward host cpu topology to the guest, guest will know the native platform information such as host cpu topology here */
@@ -831,6 +840,10 @@ void guest_cpuid(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t
guest_cpuid_01h(vcpu, eax, ebx, ecx, edx);
break;

+ case 0x05U:
+ guest_cpuid_05h(vcpu, eax, ebx, ecx, edx);
+ break;
+
case 0x0bU:
guest_cpuid_0bh(vcpu, eax, ebx, ecx, edx);
break;
diff --git a/hypervisor/arch/x86/guest/vm.c b/hypervisor/arch/x86/guest/vm.c
index aff477abb..6d02a244c 100644
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -704,7 +704,7 @@ int32_t create_vm(uint16_t vm_id, uint64_t pcpu_bitmap, struct acrn_vm_config *v
spinlock_init(&vm->arch_vm.iwkey_backup_lock);

vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC;
- vm->arch_vm.vm_mwait_cap = has_monitor_cap();
+ vm->arch_vm.vm_mwait_cap = false;
vm->intr_inject_delay_delta = 0UL;
vm->nr_emul_mmio_regions = 0U;
vm->vcpuid_entry_nr = 0U;
diff --git a/hypervisor/arch/x86/guest/vmcs.c b/hypervisor/arch/x86/guest/vmcs.c
index dcf648ee1..6dda5c926 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -25,6 +25,7 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3,
{
struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu->arch.cur_context];
struct ext_context *ectx = &ctx->ext_ctx;
+ uint64_t value64 = 0UL;

pr_dbg("%s,cr0:0x%lx, cr4:0x%lx.", __func__, cr0, cr4);

@@ -53,7 +54,11 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3,
load_segment(ectx->ldtr, VMX_GUEST_LDTR);

/* init guest ia32_misc_enable value for guest read */
- vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, msr_read(MSR_IA32_MISC_ENABLE));
+ value64 = msr_read(MSR_IA32_MISC_ENABLE);
+ if(!vcpu->vm->arch_vm.vm_mwait_cap) {
+ value64 &= ~MSR_IA32_MISC_ENABLE_MONITOR_ENA;
+ }
+ vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, value64);

/* fixed values */
exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, 0U);
@@ -293,7 +298,8 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 = check_vmx_ctrl(MSR_IA32_VMX_PROCBASED_CTLS,
VMX_PROCBASED_CTLS_TSC_OFF | VMX_PROCBASED_CTLS_TPR_SHADOW |
VMX_PROCBASED_CTLS_IO_BITMAP | VMX_PROCBASED_CTLS_MSR_BITMAP |
- VMX_PROCBASED_CTLS_HLT | VMX_PROCBASED_CTLS_SECONDARY | VMX_PROCBASED_CTLS_TERTIARY);
+ VMX_PROCBASED_CTLS_HLT | VMX_PROCBASED_CTLS_SECONDARY | VMX_PROCBASED_CTLS_TERTIARY |
+ VMX_PROCBASED_CTLS_MWAIT | VMX_PROCBASED_CTLS_MONITOR);

/*Disable VM_EXIT for CR3 access*/
value32 &= ~(VMX_PROCBASED_CTLS_CR3_LOAD | VMX_PROCBASED_CTLS_CR3_STORE);
diff --git a/hypervisor/include/arch/x86/asm/cpuid.h b/hypervisor/include/arch/x86/asm/cpuid.h
index 5279310bb..462d28ffe 100644
--- a/hypervisor/include/arch/x86/asm/cpuid.h
+++ b/hypervisor/include/arch/x86/asm/cpuid.h
@@ -42,6 +42,8 @@
#define CPUID_ECX_OSXSAVE (1U<<27U)
#define CPUID_ECX_AVX (1U<<28U)
#define CPUID_ECX_HV (1U<<31U)
+#define CPUID_ECX_MWAIT (1U<<0U)
+#define CPUID_ECX_MWAIT_INT (1U<<0U)
#define CPUID_EDX_FPU (1U<<0U)
#define CPUID_EDX_VME (1U<<1U)
#define CPUID_EDX_DE (1U<<2U)
--
2.25.1

Join {acrn-dev@lists.projectacrn.org to automatically receive all group messages.