[PATCH v2 1/2] hv: prevent VMs make pCPU enter C-state.


Zhao, Yuanyuan
 

On 2022/8/26 16:30, Li, Fei1 wrote:
If a shared pCPU enters C-State because of a vCPU runs on it, pPCU may can't wake on time when other vCPUs use it. So hide the mwait from the user.

This patch this by:
1. Clear vCPUID.05H.EXC[bit 0,1] and vCPUID.01H:ECX.[bit 3].
2. Clear MSR_IA32_MISC_ENABLE_MONITOR_ENA.
3. Trap instruction 'MONITOR' and 'MWAIT'.
Please refine your commit message.
hv: prevent VMs make pCPU enter C-state.

If a shared pCPU enters C-State because of a vCPU runs on it,
pPCU may can't wake on time when other vCPUs use it. So add a
new flag 'own_pcpu' which used to tell hypervisor is a VM
exclusively owns the physical CPUs. Mwait will always not be
supported on a VM without 'own_pcpu'.This patch do this by:
1. Clear vCPUID.05H.EXC[bit 0,1] and vCPUID.01H:ECX.[bit 3].
2. Clear MSR_IA32_MISC_ENABLE_MONITOR_ENA.
3. Trap instruction 'MONITOR' and 'MWAIT'.


v1->v2:
Add own_pcpu.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
hypervisor/arch/x86/guest/vcpuid.c | 17 ++++++++++++++++-
hypervisor/arch/x86/guest/vmcs.c | 16 +++++++++++++++-
hypervisor/arch/x86/guest/vmexit.c | 15 +++++++++++++--
hypervisor/include/arch/x86/asm/cpuid.h | 2 ++
hypervisor/include/arch/x86/asm/vm_config.h | 1 +
5 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcpuid.c b/hypervisor/arch/x86/guest/vcpuid.c
index 580098719..6e3cc1cb8 100644
--- a/hypervisor/arch/x86/guest/vcpuid.c
+++ b/hypervisor/arch/x86/guest/vcpuid.c
@@ -492,7 +492,7 @@ static int32_t set_vcpuid_extended_function(struct acrn_vm *vm)
static inline bool is_percpu_related(uint32_t leaf) {
- return ((leaf == 0x1U) || (leaf == 0xbU) || (leaf == 0xdU) || (leaf == 0x19U) || (leaf == 0x80000001U) || (leaf == 0x2U) || (leaf == 0x1aU));
+ return ((leaf == 0x1U) || (leaf == 0x5U) || (leaf == 0xbU) || (leaf ==
+0xdU) || (leaf == 0x19U) || (leaf == 0x80000001U) || (leaf == 0x2U) ||
+(leaf == 0x1aU));
}

int32_t set_vcpuid_entries(struct acrn_vm *vm) @@ -699,6 +699,17 @@ static void guest_cpuid_01h(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx
}
}

+static void guest_cpuid_05h(__unused struct acrn_vcpu *vcpu, uint32_t
+*eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) {
+ struct acrn_vm_config *vm_config = get_vm_config(vcpu->vm->vm_id);
+
+ cpuid_subleaf(0x05U, *ecx, eax, ebx, ecx, edx);
+ if(!vm_config->own_pcpu) {
+ *ecx &= ~CPUID_ECX_MWAIT;
+ *ecx &= ~CPUID_ECX_MWAIT_INT;
+ }
+}
+
+
static void guest_cpuid_0bh(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) {
/* Forward host cpu topology to the guest, guest will know the native platform information such as host cpu topology here */ @@ -831,6 +842,10 @@ void guest_cpuid(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t
guest_cpuid_01h(vcpu, eax, ebx, ecx, edx);
break;

+ case 0x05U:
+ guest_cpuid_05h(vcpu, eax, ebx, ecx, edx);
+ break;
+
case 0x0bU:
guest_cpuid_0bh(vcpu, eax, ebx, ecx, edx);
break;
diff --git a/hypervisor/arch/x86/guest/vmcs.c b/hypervisor/arch/x86/guest/vmcs.c
index 684507d7d..8def48a71 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -25,6 +25,8 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3, {
struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu->arch.cur_context];
struct ext_context *ectx = &ctx->ext_ctx;
+ struct acrn_vm_config *vm_config = get_vm_config(vcpu->vm->vm_id);
+ uint64_t value64 = 0UL;
pr_dbg("%s,cr0:0x%lx, cr4:0x%lx.", __func__, cr0, cr4);

@@ -53,7 +55,11 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3,
load_segment(ectx->ldtr, VMX_GUEST_LDTR);

/* init guest ia32_misc_enable value for guest read */
- vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, msr_read(MSR_IA32_MISC_ENABLE));
+ value64 = msr_read(MSR_IA32_MISC_ENABLE);
+ if(!vcpu->vm->arch_vm.vm_mwait_cap || !vm_config->own_pcpu) {
I think we'd better to disable vm_mwait_cap in this case and only checking vm_mwait_cap .
I also considered about it.
For vm_mwait_cap is a feature of arch_vm, shall it be affected by user definition?



+ value64 &= ~MSR_IA32_MISC_ENABLE_MONITOR_ENA;
+ }
+ vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, value64);

/* fixed values */
exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, 0U); @@ -261,6 +267,7 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
uint32_t value32;
uint64_t value64;
struct acrn_vm *vm = vcpu->vm;
+ struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);

/* Log messages to show initializing VMX execution controls */
pr_dbg("Initialize execution control "); @@ -311,6 +318,13 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 |= VMX_PROCBASED_CTLS_RDPMC;
}

+ /*
+ * Enable VM_EXIT for mwait for VM which may share pcpu.
+ */
+ if(!vm_config->own_pcpu) {
+ value32 |= VMX_PROCBASED_CTLS_MWAIT | VMX_PROCBASED_CTLS_MONITOR;
+ }
+
vcpu->arch.proc_vm_exec_ctrls = value32;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32);
pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32); diff --git a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c
index 2888278aa..ce8336bad 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -40,6 +40,7 @@ static int32_t hlt_vmexit_handler(struct acrn_vcpu *vcpu); static int32_t mtf_vmexit_handler(struct acrn_vcpu *vcpu); static int32_t loadiwkey_vmexit_handler(struct acrn_vcpu *vcpu); static int32_t init_signal_vmexit_handler(__unused struct acrn_vcpu *vcpu);
+static int32_t mwait_vmexit_handler(struct acrn_vcpu *vcpu);

/* VM Dispatch table for Exit condition handling */ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = { @@ -151,11 +152,11 @@ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_MWAIT] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_vmexit_handler},
[VMX_EXIT_REASON_MONITOR_TRAP] = {
.handler = mtf_vmexit_handler},
[VMX_EXIT_REASON_MONITOR] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_vmexit_handler},
[VMX_EXIT_REASON_PAUSE] = {
.handler = pause_vmexit_handler},
[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = { @@ -287,6 +288,16 @@ int32_t vmexit_handler(struct acrn_vcpu *vcpu)
return ret;
}

+static int32_t mwait_vmexit_handler(struct acrn_vcpu *vcpu) {
mwait_monitor_vmexit_handler ?
OK, update next version.
+ pr_fatal("Error: Unsupported mwait option from guest at 0x%016lx ",
+ exec_vmread(VMX_GUEST_RIP));
+
+ vcpu_inject_gp(vcpu, 0U);
+
+ return 0;
+}
+
static int32_t unhandled_vmexit_handler(struct acrn_vcpu *vcpu) {
pr_fatal("Error: Unhandled VM exit condition from guest at 0x%016lx ", diff --git a/hypervisor/include/arch/x86/asm/cpuid.h b/hypervisor/include/arch/x86/asm/cpuid.h
index 491758bab..0d7be0f23 100644
--- a/hypervisor/include/arch/x86/asm/cpuid.h
+++ b/hypervisor/include/arch/x86/asm/cpuid.h
@@ -42,6 +42,8 @@
#define CPUID_ECX_OSXSAVE (1U<<27U)
#define CPUID_ECX_AVX (1U<<28U)
#define CPUID_ECX_HV (1U<<31U)
+#define CPUID_ECX_MWAIT (1U<<0U)
+#define CPUID_ECX_MWAIT_INT (1U<<0U)
#define CPUID_EDX_FPU (1U<<0U)
#define CPUID_EDX_VME (1U<<1U)
#define CPUID_EDX_DE (1U<<2U)
diff --git a/hypervisor/include/arch/x86/asm/vm_config.h b/hypervisor/include/arch/x86/asm/vm_config.h
index ef80701d6..7d0b205ea 100644
--- a/hypervisor/include/arch/x86/asm/vm_config.h
+++ b/hypervisor/include/arch/x86/asm/vm_config.h
@@ -167,6 +167,7 @@ struct acrn_vm_config {
*/
uint32_t vm_prio; /* The priority for VM vCPU scheduling */
uint16_t companion_vm_id; /* The companion VM id for this VM */
+ bool own_pcpu; /* The VM exclusively owns physical CPUs */
What's the difference between CPU pass-thru and exclusively owns physical CPUs ?
Pass-throughed CPUs can't be used by other dynamic VM.
Exclusively owns physical CPUs can be use by other VM, but there may cause risk in some case. By now the risk is MWAIT.

struct acrn_vm_mem_config memory; /* memory configuration of VM */
struct epc_section epc; /* EPC memory configuration of VM */
uint16_t pci_dev_num; /* indicate how many PCI devices in VM */
--
2.25.1


Li, Fei1
 

If a shared pCPU enters C-State because of a vCPU runs on it, pPCU may can't wake on time when other vCPUs use it. So hide the mwait from the user.

This patch this by:
1. Clear vCPUID.05H.EXC[bit 0,1] and vCPUID.01H:ECX.[bit 3].
2. Clear MSR_IA32_MISC_ENABLE_MONITOR_ENA.
3. Trap instruction 'MONITOR' and 'MWAIT'.
Please refine your commit message.
v1->v2:
Add own_pcpu.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
hypervisor/arch/x86/guest/vcpuid.c | 17 ++++++++++++++++-
hypervisor/arch/x86/guest/vmcs.c | 16 +++++++++++++++-
hypervisor/arch/x86/guest/vmexit.c | 15 +++++++++++++--
hypervisor/include/arch/x86/asm/cpuid.h | 2 ++
hypervisor/include/arch/x86/asm/vm_config.h | 1 +
5 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcpuid.c b/hypervisor/arch/x86/guest/vcpuid.c
index 580098719..6e3cc1cb8 100644
--- a/hypervisor/arch/x86/guest/vcpuid.c
+++ b/hypervisor/arch/x86/guest/vcpuid.c
@@ -492,7 +492,7 @@ static int32_t set_vcpuid_extended_function(struct acrn_vm *vm)
static inline bool is_percpu_related(uint32_t leaf) {
- return ((leaf == 0x1U) || (leaf == 0xbU) || (leaf == 0xdU) || (leaf == 0x19U) || (leaf == 0x80000001U) || (leaf == 0x2U) || (leaf == 0x1aU));
+ return ((leaf == 0x1U) || (leaf == 0x5U) || (leaf == 0xbU) || (leaf ==
+0xdU) || (leaf == 0x19U) || (leaf == 0x80000001U) || (leaf == 0x2U) ||
+(leaf == 0x1aU));
}

int32_t set_vcpuid_entries(struct acrn_vm *vm) @@ -699,6 +699,17 @@ static void guest_cpuid_01h(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx
}
}

+static void guest_cpuid_05h(__unused struct acrn_vcpu *vcpu, uint32_t
+*eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) {
+ struct acrn_vm_config *vm_config = get_vm_config(vcpu->vm->vm_id);
+
+ cpuid_subleaf(0x05U, *ecx, eax, ebx, ecx, edx);
+ if(!vm_config->own_pcpu) {
+ *ecx &= ~CPUID_ECX_MWAIT;
+ *ecx &= ~CPUID_ECX_MWAIT_INT;
+ }
+}
+
+
static void guest_cpuid_0bh(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) {
/* Forward host cpu topology to the guest, guest will know the native platform information such as host cpu topology here */ @@ -831,6 +842,10 @@ void guest_cpuid(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t
guest_cpuid_01h(vcpu, eax, ebx, ecx, edx);
break;

+ case 0x05U:
+ guest_cpuid_05h(vcpu, eax, ebx, ecx, edx);
+ break;
+
case 0x0bU:
guest_cpuid_0bh(vcpu, eax, ebx, ecx, edx);
break;
diff --git a/hypervisor/arch/x86/guest/vmcs.c b/hypervisor/arch/x86/guest/vmcs.c
index 684507d7d..8def48a71 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -25,6 +25,8 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3, {
struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu->arch.cur_context];
struct ext_context *ectx = &ctx->ext_ctx;
+ struct acrn_vm_config *vm_config = get_vm_config(vcpu->vm->vm_id);
+ uint64_t value64 = 0UL;
pr_dbg("%s,cr0:0x%lx, cr4:0x%lx.", __func__, cr0, cr4);

@@ -53,7 +55,11 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3,
load_segment(ectx->ldtr, VMX_GUEST_LDTR);

/* init guest ia32_misc_enable value for guest read */
- vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, msr_read(MSR_IA32_MISC_ENABLE));
+ value64 = msr_read(MSR_IA32_MISC_ENABLE);
+ if(!vcpu->vm->arch_vm.vm_mwait_cap || !vm_config->own_pcpu) {
I think we'd better to disable vm_mwait_cap in this case and only checking vm_mwait_cap .
+ value64 &= ~MSR_IA32_MISC_ENABLE_MONITOR_ENA;
+ }
+ vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, value64);

/* fixed values */
exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, 0U); @@ -261,6 +267,7 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
uint32_t value32;
uint64_t value64;
struct acrn_vm *vm = vcpu->vm;
+ struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);

/* Log messages to show initializing VMX execution controls */
pr_dbg("Initialize execution control "); @@ -311,6 +318,13 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 |= VMX_PROCBASED_CTLS_RDPMC;
}

+ /*
+ * Enable VM_EXIT for mwait for VM which may share pcpu.
+ */
+ if(!vm_config->own_pcpu) {
+ value32 |= VMX_PROCBASED_CTLS_MWAIT | VMX_PROCBASED_CTLS_MONITOR;
+ }
+
vcpu->arch.proc_vm_exec_ctrls = value32;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32);
pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32); diff --git a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c
index 2888278aa..ce8336bad 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -40,6 +40,7 @@ static int32_t hlt_vmexit_handler(struct acrn_vcpu *vcpu); static int32_t mtf_vmexit_handler(struct acrn_vcpu *vcpu); static int32_t loadiwkey_vmexit_handler(struct acrn_vcpu *vcpu); static int32_t init_signal_vmexit_handler(__unused struct acrn_vcpu *vcpu);
+static int32_t mwait_vmexit_handler(struct acrn_vcpu *vcpu);

/* VM Dispatch table for Exit condition handling */ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = { @@ -151,11 +152,11 @@ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_MWAIT] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_vmexit_handler},
[VMX_EXIT_REASON_MONITOR_TRAP] = {
.handler = mtf_vmexit_handler},
[VMX_EXIT_REASON_MONITOR] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_vmexit_handler},
[VMX_EXIT_REASON_PAUSE] = {
.handler = pause_vmexit_handler},
[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = { @@ -287,6 +288,16 @@ int32_t vmexit_handler(struct acrn_vcpu *vcpu)
return ret;
}

+static int32_t mwait_vmexit_handler(struct acrn_vcpu *vcpu) {
mwait_monitor_vmexit_handler ?
+ pr_fatal("Error: Unsupported mwait option from guest at 0x%016lx ",
+ exec_vmread(VMX_GUEST_RIP));
+
+ vcpu_inject_gp(vcpu, 0U);
+
+ return 0;
+}
+
static int32_t unhandled_vmexit_handler(struct acrn_vcpu *vcpu) {
pr_fatal("Error: Unhandled VM exit condition from guest at 0x%016lx ", diff --git a/hypervisor/include/arch/x86/asm/cpuid.h b/hypervisor/include/arch/x86/asm/cpuid.h
index 491758bab..0d7be0f23 100644
--- a/hypervisor/include/arch/x86/asm/cpuid.h
+++ b/hypervisor/include/arch/x86/asm/cpuid.h
@@ -42,6 +42,8 @@
#define CPUID_ECX_OSXSAVE (1U<<27U)
#define CPUID_ECX_AVX (1U<<28U)
#define CPUID_ECX_HV (1U<<31U)
+#define CPUID_ECX_MWAIT (1U<<0U)
+#define CPUID_ECX_MWAIT_INT (1U<<0U)
#define CPUID_EDX_FPU (1U<<0U)
#define CPUID_EDX_VME (1U<<1U)
#define CPUID_EDX_DE (1U<<2U)
diff --git a/hypervisor/include/arch/x86/asm/vm_config.h b/hypervisor/include/arch/x86/asm/vm_config.h
index ef80701d6..7d0b205ea 100644
--- a/hypervisor/include/arch/x86/asm/vm_config.h
+++ b/hypervisor/include/arch/x86/asm/vm_config.h
@@ -167,6 +167,7 @@ struct acrn_vm_config {
*/
uint32_t vm_prio; /* The priority for VM vCPU scheduling */
uint16_t companion_vm_id; /* The companion VM id for this VM */
+ bool own_pcpu; /* The VM exclusively owns physical CPUs */
What's the difference between CPU pass-thru and exclusively owns physical CPUs ?
struct acrn_vm_mem_config memory; /* memory configuration of VM */
struct epc_section epc; /* EPC memory configuration of VM */
uint16_t pci_dev_num; /* indicate how many PCI devices in VM */
--
2.25.1


Zhao, Yuanyuan
 

If a shared pCPU enters C-State because of a vCPU runs on it,
pPCU may can't wake on time when other vCPUs use it. So hide the
mwait from the user.

This patch this by:
1. Clear vCPUID.05H.EXC[bit 0,1] and vCPUID.01H:ECX.[bit 3].
2. Clear MSR_IA32_MISC_ENABLE_MONITOR_ENA.
3. Trap instruction 'MONITOR' and 'MWAIT'.

v1->v2:
Add own_pcpu.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
hypervisor/arch/x86/guest/vcpuid.c | 17 ++++++++++++++++-
hypervisor/arch/x86/guest/vmcs.c | 16 +++++++++++++++-
hypervisor/arch/x86/guest/vmexit.c | 15 +++++++++++++--
hypervisor/include/arch/x86/asm/cpuid.h | 2 ++
hypervisor/include/arch/x86/asm/vm_config.h | 1 +
5 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcpuid.c b/hypervisor/arch/x86/guest/vcpuid.c
index 580098719..6e3cc1cb8 100644
--- a/hypervisor/arch/x86/guest/vcpuid.c
+++ b/hypervisor/arch/x86/guest/vcpuid.c
@@ -492,7 +492,7 @@ static int32_t set_vcpuid_extended_function(struct acrn_vm *vm)

static inline bool is_percpu_related(uint32_t leaf)
{
- return ((leaf == 0x1U) || (leaf == 0xbU) || (leaf == 0xdU) || (leaf == 0x19U) || (leaf == 0x80000001U) || (leaf == 0x2U) || (leaf == 0x1aU));
+ return ((leaf == 0x1U) || (leaf == 0x5U) || (leaf == 0xbU) || (leaf == 0xdU) || (leaf == 0x19U) || (leaf == 0x80000001U) || (leaf == 0x2U) || (leaf == 0x1aU));
}

int32_t set_vcpuid_entries(struct acrn_vm *vm)
@@ -699,6 +699,17 @@ static void guest_cpuid_01h(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx
}
}

+static void guest_cpuid_05h(__unused struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
+{
+ struct acrn_vm_config *vm_config = get_vm_config(vcpu->vm->vm_id);
+
+ cpuid_subleaf(0x05U, *ecx, eax, ebx, ecx, edx);
+ if(!vm_config->own_pcpu) {
+ *ecx &= ~CPUID_ECX_MWAIT;
+ *ecx &= ~CPUID_ECX_MWAIT_INT;
+ }
+}
+
static void guest_cpuid_0bh(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
{
/* Forward host cpu topology to the guest, guest will know the native platform information such as host cpu topology here */
@@ -831,6 +842,10 @@ void guest_cpuid(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t
guest_cpuid_01h(vcpu, eax, ebx, ecx, edx);
break;

+ case 0x05U:
+ guest_cpuid_05h(vcpu, eax, ebx, ecx, edx);
+ break;
+
case 0x0bU:
guest_cpuid_0bh(vcpu, eax, ebx, ecx, edx);
break;
diff --git a/hypervisor/arch/x86/guest/vmcs.c b/hypervisor/arch/x86/guest/vmcs.c
index 684507d7d..8def48a71 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -25,6 +25,8 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3,
{
struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu->arch.cur_context];
struct ext_context *ectx = &ctx->ext_ctx;
+ struct acrn_vm_config *vm_config = get_vm_config(vcpu->vm->vm_id);
+ uint64_t value64 = 0UL;

pr_dbg("%s,cr0:0x%lx, cr4:0x%lx.", __func__, cr0, cr4);

@@ -53,7 +55,11 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3,
load_segment(ectx->ldtr, VMX_GUEST_LDTR);

/* init guest ia32_misc_enable value for guest read */
- vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, msr_read(MSR_IA32_MISC_ENABLE));
+ value64 = msr_read(MSR_IA32_MISC_ENABLE);
+ if(!vcpu->vm->arch_vm.vm_mwait_cap || !vm_config->own_pcpu) {
+ value64 &= ~MSR_IA32_MISC_ENABLE_MONITOR_ENA;
+ }
+ vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, value64);

/* fixed values */
exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, 0U);
@@ -261,6 +267,7 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
uint32_t value32;
uint64_t value64;
struct acrn_vm *vm = vcpu->vm;
+ struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);

/* Log messages to show initializing VMX execution controls */
pr_dbg("Initialize execution control ");
@@ -311,6 +318,13 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 |= VMX_PROCBASED_CTLS_RDPMC;
}

+ /*
+ * Enable VM_EXIT for mwait for VM which may share pcpu.
+ */
+ if(!vm_config->own_pcpu) {
+ value32 |= VMX_PROCBASED_CTLS_MWAIT | VMX_PROCBASED_CTLS_MONITOR;
+ }
+
vcpu->arch.proc_vm_exec_ctrls = value32;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32);
pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32);
diff --git a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c
index 2888278aa..ce8336bad 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -40,6 +40,7 @@ static int32_t hlt_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t mtf_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t loadiwkey_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t init_signal_vmexit_handler(__unused struct acrn_vcpu *vcpu);
+static int32_t mwait_vmexit_handler(struct acrn_vcpu *vcpu);

/* VM Dispatch table for Exit condition handling */
static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
@@ -151,11 +152,11 @@ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_MWAIT] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_vmexit_handler},
[VMX_EXIT_REASON_MONITOR_TRAP] = {
.handler = mtf_vmexit_handler},
[VMX_EXIT_REASON_MONITOR] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_vmexit_handler},
[VMX_EXIT_REASON_PAUSE] = {
.handler = pause_vmexit_handler},
[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = {
@@ -287,6 +288,16 @@ int32_t vmexit_handler(struct acrn_vcpu *vcpu)
return ret;
}

+static int32_t mwait_vmexit_handler(struct acrn_vcpu *vcpu)
+{
+ pr_fatal("Error: Unsupported mwait option from guest at 0x%016lx ",
+ exec_vmread(VMX_GUEST_RIP));
+
+ vcpu_inject_gp(vcpu, 0U);
+
+ return 0;
+}
+
static int32_t unhandled_vmexit_handler(struct acrn_vcpu *vcpu)
{
pr_fatal("Error: Unhandled VM exit condition from guest at 0x%016lx ",
diff --git a/hypervisor/include/arch/x86/asm/cpuid.h b/hypervisor/include/arch/x86/asm/cpuid.h
index 491758bab..0d7be0f23 100644
--- a/hypervisor/include/arch/x86/asm/cpuid.h
+++ b/hypervisor/include/arch/x86/asm/cpuid.h
@@ -42,6 +42,8 @@
#define CPUID_ECX_OSXSAVE (1U<<27U)
#define CPUID_ECX_AVX (1U<<28U)
#define CPUID_ECX_HV (1U<<31U)
+#define CPUID_ECX_MWAIT (1U<<0U)
+#define CPUID_ECX_MWAIT_INT (1U<<0U)
#define CPUID_EDX_FPU (1U<<0U)
#define CPUID_EDX_VME (1U<<1U)
#define CPUID_EDX_DE (1U<<2U)
diff --git a/hypervisor/include/arch/x86/asm/vm_config.h b/hypervisor/include/arch/x86/asm/vm_config.h
index ef80701d6..7d0b205ea 100644
--- a/hypervisor/include/arch/x86/asm/vm_config.h
+++ b/hypervisor/include/arch/x86/asm/vm_config.h
@@ -167,6 +167,7 @@ struct acrn_vm_config {
*/
uint32_t vm_prio; /* The priority for VM vCPU scheduling */
uint16_t companion_vm_id; /* The companion VM id for this VM */
+ bool own_pcpu; /* The VM exclusively owns physical CPUs */
struct acrn_vm_mem_config memory; /* memory configuration of VM */
struct epc_section epc; /* EPC memory configuration of VM */
uint16_t pci_dev_num; /* indicate how many PCI devices in VM */
--
2.25.1


Zhao, Yuanyuan
 

If a shared pCPU enters C-State because of a vCPU runs on it,
pPCU may can't wake on time when other vCPUs use it. So hide the
mwait from the user.

This patch this by:
1. Clear vCPUID.05H.EXC[bit 0,1] and vCPUID.01H:ECX.[bit 3].
2. Clear MSR_IA32_MISC_ENABLE_MONITOR_ENA.
3. Trap instruction 'MONITOR' and 'MWAIT'.

v1->v2:
Add own_pcpu.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
hypervisor/arch/x86/guest/vcpuid.c | 17 ++++++++++++++++-
hypervisor/arch/x86/guest/vmcs.c | 16 +++++++++++++++-
hypervisor/arch/x86/guest/vmexit.c | 15 +++++++++++++--
hypervisor/include/arch/x86/asm/cpuid.h | 2 ++
hypervisor/include/arch/x86/asm/vm_config.h | 1 +
5 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcpuid.c b/hypervisor/arch/x86/guest/vcpuid.c
index 580098719..6e3cc1cb8 100644
--- a/hypervisor/arch/x86/guest/vcpuid.c
+++ b/hypervisor/arch/x86/guest/vcpuid.c
@@ -492,7 +492,7 @@ static int32_t set_vcpuid_extended_function(struct acrn_vm *vm)

static inline bool is_percpu_related(uint32_t leaf)
{
- return ((leaf == 0x1U) || (leaf == 0xbU) || (leaf == 0xdU) || (leaf == 0x19U) || (leaf == 0x80000001U) || (leaf == 0x2U) || (leaf == 0x1aU));
+ return ((leaf == 0x1U) || (leaf == 0x5U) || (leaf == 0xbU) || (leaf == 0xdU) || (leaf == 0x19U) || (leaf == 0x80000001U) || (leaf == 0x2U) || (leaf == 0x1aU));
}

int32_t set_vcpuid_entries(struct acrn_vm *vm)
@@ -699,6 +699,17 @@ static void guest_cpuid_01h(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx
}
}

+static void guest_cpuid_05h(__unused struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
+{
+ struct acrn_vm_config *vm_config = get_vm_config(vcpu->vm->vm_id);
+
+ cpuid_subleaf(0x05U, *ecx, eax, ebx, ecx, edx);
+ if(!vm_config->own_pcpu) {
+ *ecx &= ~CPUID_ECX_MWAIT;
+ *ecx &= ~CPUID_ECX_MWAIT_INT;
+ }
+}
+
static void guest_cpuid_0bh(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
{
/* Forward host cpu topology to the guest, guest will know the native platform information such as host cpu topology here */
@@ -831,6 +842,10 @@ void guest_cpuid(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t
guest_cpuid_01h(vcpu, eax, ebx, ecx, edx);
break;

+ case 0x05U:
+ guest_cpuid_05h(vcpu, eax, ebx, ecx, edx);
+ break;
+
case 0x0bU:
guest_cpuid_0bh(vcpu, eax, ebx, ecx, edx);
break;
diff --git a/hypervisor/arch/x86/guest/vmcs.c b/hypervisor/arch/x86/guest/vmcs.c
index 684507d7d..8def48a71 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -25,6 +25,8 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3,
{
struct guest_cpu_context *ctx = &vcpu->arch.contexts[vcpu->arch.cur_context];
struct ext_context *ectx = &ctx->ext_ctx;
+ struct acrn_vm_config *vm_config = get_vm_config(vcpu->vm->vm_id);
+ uint64_t value64 = 0UL;

pr_dbg("%s,cr0:0x%lx, cr4:0x%lx.", __func__, cr0, cr4);

@@ -53,7 +55,11 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3,
load_segment(ectx->ldtr, VMX_GUEST_LDTR);

/* init guest ia32_misc_enable value for guest read */
- vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, msr_read(MSR_IA32_MISC_ENABLE));
+ value64 = msr_read(MSR_IA32_MISC_ENABLE);
+ if(!vcpu->vm->arch_vm.vm_mwait_cap || !vm_config->own_pcpu) {
+ value64 &= ~MSR_IA32_MISC_ENABLE_MONITOR_ENA;
+ }
+ vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, value64);

/* fixed values */
exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, 0U);
@@ -261,6 +267,7 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
uint32_t value32;
uint64_t value64;
struct acrn_vm *vm = vcpu->vm;
+ struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);

/* Log messages to show initializing VMX execution controls */
pr_dbg("Initialize execution control ");
@@ -311,6 +318,13 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 |= VMX_PROCBASED_CTLS_RDPMC;
}

+ /*
+ * Enable VM_EXIT for mwait for VM which may share pcpu.
+ */
+ if(!vm_config->own_pcpu) {
+ value32 |= VMX_PROCBASED_CTLS_MWAIT | VMX_PROCBASED_CTLS_MONITOR;
+ }
+
vcpu->arch.proc_vm_exec_ctrls = value32;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32);
pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32);
diff --git a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c
index 2888278aa..ce8336bad 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -40,6 +40,7 @@ static int32_t hlt_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t mtf_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t loadiwkey_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t init_signal_vmexit_handler(__unused struct acrn_vcpu *vcpu);
+static int32_t mwait_vmexit_handler(struct acrn_vcpu *vcpu);

/* VM Dispatch table for Exit condition handling */
static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
@@ -151,11 +152,11 @@ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_MWAIT] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_vmexit_handler},
[VMX_EXIT_REASON_MONITOR_TRAP] = {
.handler = mtf_vmexit_handler},
[VMX_EXIT_REASON_MONITOR] = {
- .handler = unhandled_vmexit_handler},
+ .handler = mwait_vmexit_handler},
[VMX_EXIT_REASON_PAUSE] = {
.handler = pause_vmexit_handler},
[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = {
@@ -287,6 +288,16 @@ int32_t vmexit_handler(struct acrn_vcpu *vcpu)
return ret;
}

+static int32_t mwait_vmexit_handler(struct acrn_vcpu *vcpu)
+{
+ pr_fatal("Error: Unsupported mwait option from guest at 0x%016lx ",
+ exec_vmread(VMX_GUEST_RIP));
+
+ vcpu_inject_gp(vcpu, 0U);
+
+ return 0;
+}
+
static int32_t unhandled_vmexit_handler(struct acrn_vcpu *vcpu)
{
pr_fatal("Error: Unhandled VM exit condition from guest at 0x%016lx ",
diff --git a/hypervisor/include/arch/x86/asm/cpuid.h b/hypervisor/include/arch/x86/asm/cpuid.h
index 491758bab..0d7be0f23 100644
--- a/hypervisor/include/arch/x86/asm/cpuid.h
+++ b/hypervisor/include/arch/x86/asm/cpuid.h
@@ -42,6 +42,8 @@
#define CPUID_ECX_OSXSAVE (1U<<27U)
#define CPUID_ECX_AVX (1U<<28U)
#define CPUID_ECX_HV (1U<<31U)
+#define CPUID_ECX_MWAIT (1U<<0U)
+#define CPUID_ECX_MWAIT_INT (1U<<0U)
#define CPUID_EDX_FPU (1U<<0U)
#define CPUID_EDX_VME (1U<<1U)
#define CPUID_EDX_DE (1U<<2U)
diff --git a/hypervisor/include/arch/x86/asm/vm_config.h b/hypervisor/include/arch/x86/asm/vm_config.h
index ef80701d6..7d0b205ea 100644
--- a/hypervisor/include/arch/x86/asm/vm_config.h
+++ b/hypervisor/include/arch/x86/asm/vm_config.h
@@ -167,6 +167,7 @@ struct acrn_vm_config {
*/
uint32_t vm_prio; /* The priority for VM vCPU scheduling */
uint16_t companion_vm_id; /* The companion VM id for this VM */
+ bool own_pcpu; /* The VM exclusively owns physical CPUs */
struct acrn_vm_mem_config memory; /* memory configuration of VM */
struct epc_section epc; /* EPC memory configuration of VM */
uint16_t pci_dev_num; /* indicate how many PCI devices in VM */
--
2.25.1