Re: [PATCH V7 1/5] hv: vCAT: initialize vCAT MSRs during vmcs init


Dongsheng Zhang
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Eddie Dong
Sent: Sunday, October 24, 2021 8:00 PM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V7 1/5] hv: vCAT: initialize vCAT MSRs during
vmcs init



-----Original Message-----
From: acrn-dev@... <acrn-dev@...>
On Behalf Of Dongsheng Zhang
Sent: Sunday, October 24, 2021 5:39 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH V7 1/5] hv: vCAT: initialize vCAT MSRs
during vmcs init

From: dongshen <dongsheng.x.zhang@...>

Initialize vCBM MSR

Initialize vCLOSID MSR

Add some vCAT functions:
Retrieve max_vcbm and max_pcbm
Check if vCAT is configured or not for the VM Map vclosid to pclosid
write_vclosid: vCLOSID MSR write handler
write_vcbm: vCBM MSR write handler

Tracked-On: #5917
Signed-off-by: dongshen <dongsheng.x.zhang@...>
---
hypervisor/Makefile | 3 +
hypervisor/arch/x86/guest/vcat.c | 257
+++++++++++++++++++
hypervisor/arch/x86/guest/vm.c | 10 +
hypervisor/arch/x86/guest/vmsr.c | 15 +-
hypervisor/arch/x86/rdt.c | 8 +
hypervisor/include/arch/x86/asm/guest/vcat.h | 16 ++
hypervisor/include/arch/x86/asm/guest/vm.h | 1 +
hypervisor/include/arch/x86/asm/rdt.h | 1 +
8 files changed, 309 insertions(+), 2 deletions(-) create mode
100644 hypervisor/arch/x86/guest/vcat.c create mode 100644
hypervisor/include/arch/x86/asm/guest/vcat.h

diff --git a/hypervisor/Makefile b/hypervisor/Makefile index
50bb2891c..a5c02d115 100644
--- a/hypervisor/Makefile
+++ b/hypervisor/Makefile
@@ -330,6 +330,9 @@ VP_DM_C_SRCS += arch/x86/guest/vmx_io.c
VP_DM_C_SRCS += arch/x86/guest/instr_emul.c VP_DM_C_SRCS +=
arch/x86/guest/lock_instr_emul.c VP_DM_C_SRCS +=
arch/x86/guest/vm_reset.c
+ifeq ($(CONFIG_VCAT_ENABLED),y)
+VP_DM_C_SRCS += arch/x86/guest/vcat.c endif
VP_DM_C_SRCS += common/ptdev.c

# virtual platform trusty
diff --git a/hypervisor/arch/x86/guest/vcat.c
b/hypervisor/arch/x86/guest/vcat.c
new file mode 100644
index 000000000..4d1e06697
--- /dev/null
+++ b/hypervisor/arch/x86/guest/vcat.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (C) 2021 Intel Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause */
+
+#include <types.h>
+#include <errno.h>
+#include <logmsg.h>
+#include <asm/cpufeatures.h>
+#include <asm/cpuid.h>
+#include <asm/rdt.h>
+#include <asm/lib/bits.h>
+#include <asm/board.h>
+#include <asm/vm_config.h>
+#include <asm/msr.h>
+#include <asm/guest/vcpu.h>
+#include <asm/guest/vm.h>
+#include <asm/guest/vcat.h>
+#include <asm/per_cpu.h>
+
+/**
+ * @pre vm != NULL
+ */
+bool is_l2_vcat_configured(const struct acrn_vm *vm) {
+ return is_vcat_configured(vm) &&
+(get_rdt_res_cap_info(RDT_RESOURCE_L2)->num_closids > 0U); }
+
+/**
+ * @pre vm != NULL
+ */
+bool is_l3_vcat_configured(const struct acrn_vm *vm) {
+ return is_vcat_configured(vm) &&
+(get_rdt_res_cap_info(RDT_RESOURCE_L3)->num_closids > 0U); }
+
+/**
+ * @brief Return number of vCLOSIDs of vm
+ *
+ * @pre vm != NULL && vm->vm_id < CONFIG_MAX_VM_NUM */
uint16_t
+vcat_get_num_vclosids(const struct acrn_vm *vm) {
+ uint16_t num_vclosids = 0U;
+
+ if (is_vcat_configured(vm)) {
+ /*
+ * For performance and simplicity, here number of vCLOSIDs
(num_vclosids) is set
+ * equal to the number of pCLOSIDs assigned to this VM
(get_vm_config(vm->vm_id)->num_pclosids).
+ * But technically, we do not have to make such an
assumption. For
example,
+ * Hypervisor could implement CLOSID context switch, then
number
of vCLOSIDs
+ * can be greater than the number of pCLOSIDs assigned. etc.
+ */
+ num_vclosids = get_vm_config(vm->vm_id)->num_pclosids;
+ }
+
+ return num_vclosids;
+}
+
+/**
+ * @brief Map vCLOSID to pCLOSID
+ *
+ * @pre vm != NULL && vm->vm_id < CONFIG_MAX_VM_NUM
+ * @pre (get_vm_config(vm->vm_id)->pclosids != NULL) && (vclosid <
+get_vm_config(vm->vm_id)->num_pclosids)
+ */
+static uint16_t vclosid_to_pclosid(const struct acrn_vm *vm, uint16_t
+vclosid) {
+ ASSERT(vclosid < vcat_get_num_vclosids(vm), "vclosid is out of
+range!");
+
+ /*
+ * pclosids points to an array of assigned pCLOSIDs
+ * Use vCLOSID as the index into the pclosids array, returning the
corresponding pCLOSID
+ *
+ * Note that write_vcbm_msr() calls vclosid_to_pclosid() indirectly, in
write_vcbm_msr(),
+ * the is_l2_vcbm_msr()/is_l3_vcbm_msr() calls ensure that vclosid is
always less than
+ * get_vm_config(vm->vm_id)->num_pclosids, so vclosid is always an
array index within bound here
+ */
+ return get_vm_config(vm->vm_id)->pclosids[vclosid];
+}
+
+/**
+ * @brief Return max_pcbm of vm
+ * @pre vm != NULL && vm->vm_id < CONFIG_MAX_VM_NUM
+ * @pre res == RDT_RESOURCE_L2 || res == RDT_RESOURCE_L3 */ static
+uint64_t get_max_pcbm(const struct acrn_vm *vm, int res) {
+ /*
+ * max_pcbm/CLOS_MASK is defined in scenario file and is a
contiguous
bitmask starting
+ * at bit position low (the lowest assigned physical cache way) and
ending at position
+ * high (the highest assigned physical cache way, inclusive). As CBM
only
allows
+ * contiguous '1' combinations, so max_pcbm essentially is a bitmask
that selects/covers
+ * all the physical cache ways assigned to the VM.
+ *
+ * For illustrative purpose, here we assume that we have the two
functions
+ * GENMASK() and BIT() defined as follows:
+ * GENMASK(high, low): create a contiguous bitmask starting at bit
position low and
+ * ending at position high, inclusive.
+ * BIT(n): create a bitmask with bit n set.
+ *
+ * max_pcbm, min_pcbm, max_vcbm, min_vcbm and the
relationship
between them
+ * can be expressed as:
+ * max_pcbm = GENMASK(high, low)
+ * min_pcbm = BIT(low)
+ *
+ * max_vcbm = GENMASK(high - low, 0)
+ * min_vcbm = BIT(0)
+ * vcbm_len = bitmap_weight(max_pcbm) = high - low + 1
+ *
+ * pcbm to vcbm (mask off the unwanted bits to prevent erroneous
mask values):
+ * vcbm = (pcbm & max_pcbm) >> low
+ *
+ * vcbm to pcbm:
+ * pcbm = (vcbm & max_vcbm) << low
+ *
+ * max_pcbm will be mapped to max_vcbm
+ * min_pcbm will be mapped to min_vcbm
+ */
+ uint64_t max_pcbm = 0UL;
+
+ if (is_l2_vcat_configured(vm) && (res == RDT_RESOURCE_L2)) {
+ max_pcbm = get_vm_config(vm->vm_id)->max_l2_pcbm;
+ } else if (is_l3_vcat_configured(vm) && (res == RDT_RESOURCE_L3)) {
+ max_pcbm = get_vm_config(vm->vm_id)->max_l3_pcbm;
+ }
+
+ return max_pcbm;
+}
+
+/**
+ * @brief Return vcbm_len of vm
+ * @pre vm != NULL
+ */
+uint16_t vcat_get_vcbm_len(const struct acrn_vm *vm, int res) {
+ return bitmap_weight(get_max_pcbm(vm, res)); }
+
+/**
+ * @brief Return max_vcbm of vm
+ * @pre vm != NULL
+ */
+static uint64_t vcat_get_max_vcbm(const struct acrn_vm *vm, int res) {
+ uint64_t max_pcbm = get_max_pcbm(vm, res);
+ /* Find the position low (the first bit set) in max_pcbm */
+ uint16_t low = ffs64(max_pcbm);
+
+ /* Right shift max_pcbm by low to get max_vcbm */
+ return max_pcbm >> low;
+}
+
+/**
+ * @brief vCBM MSR write handler
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL
+ * @pre res == RDT_RESOURCE_L2 || res == RDT_RESOURCE_L3 */ static
+int32_t write_vcbm(__unused struct acrn_vcpu *vcpu, __unused
uint32_t
+vmsr, __unused uint64_t val, __unused int res) {
+ /* TODO: this is going to be implemented in a subsequent commit,
will
perform the following actions:
+ * write vCBM
+ * vmsr to pmsr and vcbm to pcbm
+ * write pCBM
+ */
+ return -EFAULT;
+}
+
+/**
+ * @brief vCLOSID MSR write handler
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ int32_t
+write_vclosid(struct acrn_vcpu *vcpu, uint64_t val) {
+ int32_t ret = -EACCES;
+
+ if (is_vcat_configured(vcpu->vm)) {
+ uint32_t vclosid = (uint32_t)((val >> 32U) & 0xFFFFFFFFUL);
+
+ /*
+ * Validity check on val:
+ * Bits 31:10 (0xFFFFFC00) of val are reserved and must be
written
with zeros
+ * Bits 63:32: vclosid must be within range
+ */
+ if (((val & 0xFFFFFC00UL) == 0UL) && (vclosid <
(uint32_t)vcat_get_num_vclosids(vcpu->vm))) {
If guest write non-zero to bit 0..9, what should happen?
Don: it will be allowed. I thought about this when preparing the patch and guest you may challenge me. The RMID bits should be 0 in this case, as the monitoring cap is not exposed, the guest
should not set it to a non-zero value, so we can check if Bits 63:0 are all 0s here instead, if we take this into consideration. I can change this to not allow changing RMID.



+ uint16_t pclosid;
+
+ /* Write the new vCLOSID value */
+ vcpu_set_guest_msr(vcpu, MSR_IA32_PQR_ASSOC,
val);
+
+ pclosid = vclosid_to_pclosid(vcpu->vm,
(uint16_t)vclosid);
+ /*
+ * Write the new pCLOSID value to the guest msr
area
+ *
+ * The prepare_auto_msr_area() function has already
initialized the vcpu->arch.msr_area.
+ * Here we only need to update the
vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value field,
+ * all other vcpu->arch.msr_area fields remains
unchanged at
runtime.
+ */
+
vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value =
+clos2pqr_msr(pclosid);
+
+ ret = 0;
+ } else {
+ ret = -EINVAL;
+ }
+ }
+
+ return ret;
Why want to return both EACCES & EINVAL?
Don:
My intention:
EACCES: is_vcat_configured()=false, vcat is not enabled for this vm.
EINVAL: is_vcat_configured()=true, but validity checking failed.
But actually the caller function does not differentiate the error code when injecting #GP, see the code in hv_main.c:
/* Dispatch handler */
ret = vmexit_handler(vcpu);
if (ret < 0) {
pr_fatal("dispatch VM exit handler failed for reason"
" %d, ret = %d!", vcpu->arch.exit_reason, ret);
vcpu_inject_gp(vcpu, 0U);
continue;



+}
+
+/**
+ * @brief Initialize vCBM MSRs
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ static void
+init_vcbms(struct acrn_vcpu *vcpu, int res, uint32_t msr_base) {
+ uint64_t max_vcbm = vcat_get_max_vcbm(vcpu->vm, res);
+
+ if (max_vcbm != 0UL) {
+ uint32_t vmsr;
+ /* num_vcbm_msrs = num_vclosids */
+ uint16_t num_vcbm_msrs = vcat_get_num_vclosids(vcpu-
vm);
+
+ /*
+ * For each vCBM MSR, its initial vCBM is set to max_vcbm,
+ * a bitmask with vcbm_len bits (from 0 to vcbm_len - 1,
inclusive)
+ * set to 1 and all other bits set to 0.
+ *
+ * As CBM only allows contiguous '1' combinations, so
max_vcbm
essentially
+ * is a bitmask that selects all the virtual cache ways assigned
to
the VM.
+ * It covers all the virtual cache ways the guest VM may
access, i.e.
the
+ * superset bitmask.
+ */
+ for (vmsr = msr_base; vmsr < (msr_base + num_vcbm_msrs);
vmsr++) {
+ /* Write vCBM MSR */
+ (void)write_vcbm(vcpu, vmsr, max_vcbm, res);
+ }
+ }
+}
+
+/**
+ * @brief Initialize vCAT MSRs
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ void
init_vcat_msrs(struct
+acrn_vcpu *vcpu) {
+ if (is_vcat_configured(vcpu->vm)) {
+ init_vcbms(vcpu, RDT_RESOURCE_L2,
MSR_IA32_L2_MASK_BASE);
+
+ init_vcbms(vcpu, RDT_RESOURCE_L3,
MSR_IA32_L3_MASK_BASE);
+
+ (void)write_vclosid(vcpu, 0U);
+ }
+}
diff --git a/hypervisor/arch/x86/guest/vm.c
b/hypervisor/arch/x86/guest/vm.c index 2181e4a30..013a24857 100644
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -151,6 +151,16 @@ bool is_nvmx_configured(const struct acrn_vm
*vm)
return ((vm_config->guest_flags & GUEST_FLAG_NVMX_ENABLED) !=
0U); }

+/**
+ * @pre vm != NULL && vm_config != NULL && vm->vmid <
CONFIG_MAX_VM_NUM
+*/ bool is_vcat_configured(const struct acrn_vm *vm) {
+ struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
+
+ return ((vm_config->guest_flags & GUEST_FLAG_VCAT_ENABLED) !=
0U); }
+
/**
* @brief VT-d PI posted mode can possibly be used for PTDEVs assigned
* to this VM if platform supports VT-d PI AND lapic passthru is not
configured diff --git a/hypervisor/arch/x86/guest/vmsr.c
b/hypervisor/arch/x86/guest/vmsr.c
index a6b26196e..f5e611fcb 100644
--- a/hypervisor/arch/x86/guest/vmsr.c
+++ b/hypervisor/arch/x86/guest/vmsr.c
@@ -22,6 +22,7 @@
#include <asm/tsc.h>
#include <trace.h>
#include <logmsg.h>
+#include <asm/guest/vcat.h>

#define INTERCEPT_DISABLE (0U)
#define INTERCEPT_READ (1U << 0U)
@@ -338,8 +339,10 @@ static void prepare_auto_msr_area(struct
acrn_vcpu *vcpu)

vcpu_clos = cfg->pclosids[vcpu->vcpu_id%cfg-
num_pclosids];

- /* RDT: only load/restore MSR_IA32_PQR_ASSOC when hv
and
guest have different settings */
- if (vcpu_clos != hv_clos) {
+ /* RDT: only load/restore MSR_IA32_PQR_ASSOC when hv
and
guest have different settings
+ * vCAT: always load/restore MSR_IA32_PQR_ASSOC
+ */
+ if (is_vcat_configured(vcpu->vm) || (vcpu_clos != hv_clos)) {

vcpu-
arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].msr_index =
MSR_IA32_PQR_ASSOC;

vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value =
clos2pqr_msr(vcpu_clos);

vcpu->arch.msr_area.host[MSR_AREA_IA32_PQR_ASSOC].msr_index
=
MSR_IA32_PQR_ASSOC; @@ -371,6 +374,14 @@ void
init_emulated_msrs(struct acrn_vcpu *vcpu)
}

vcpu_set_guest_msr(vcpu, MSR_IA32_FEATURE_CONTROL, val64);
+
+#ifdef CONFIG_VCAT_ENABLED
+ /*
+ * init_vcat_msrs() will overwrite the
vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value
+ * set by prepare_auto_msr_area()
+ */
+ init_vcat_msrs(vcpu);
+#endif
Putting inside init_emulated_msrs() or init_msr_emulation() is much better

I remember we already concluded it, not?
Don: you misread it? yes, it is indeed in init_emulated_msrs(), see complete code:
void init_emulated_msrs(struct acrn_vcpu *vcpu)
{
uint64_t val64 = 0UL;

/* MSR_IA32_FEATURE_CONTROL */
if (is_nvmx_configured(vcpu->vm)) {
/* currently support VMX outside SMX only */
val64 |= MSR_IA32_FEATURE_CONTROL_VMX_NO_SMX;
}

val64 |= MSR_IA32_FEATURE_CONTROL_LOCK;
if (is_vsgx_supported(vcpu->vm->vm_id)) {
val64 |= MSR_IA32_FEATURE_CONTROL_SGX_GE;
}

vcpu_set_guest_msr(vcpu, MSR_IA32_FEATURE_CONTROL, val64);

#ifdef CONFIG_VCAT_ENABLED
/*
* init_vcat_msrs() will overwrite the vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value
* set by prepare_auto_msr_area()
*/
init_vcat_msrs(vcpu);
#endif
}


}

#ifdef CONFIG_VCAT_ENABLED
diff --git a/hypervisor/arch/x86/rdt.c b/hypervisor/arch/x86/rdt.c index
f6dc960f9..b21fd5209 100644
--- a/hypervisor/arch/x86/rdt.c
+++ b/hypervisor/arch/x86/rdt.c
@@ -60,6 +60,14 @@ static struct rdt_info
res_cap_info[RDT_NUM_RESOURCES] = {
},
};

+/*
+ * @pre res == RDT_RESOURCE_L3 || res == RDT_RESOURCE_L2 || res ==
+RDT_RESOURCE_MBA */ const struct rdt_info *get_rdt_res_cap_info(int
+res) {
+ return &res_cap_info[res];
+}
+
/*
* @pre res == RDT_RESOURCE_L3 || res == RDT_RESOURCE_L2
*/
diff --git a/hypervisor/include/arch/x86/asm/guest/vcat.h
b/hypervisor/include/arch/x86/asm/guest/vcat.h
new file mode 100644
index 000000000..6afd8abc2
--- /dev/null
+++ b/hypervisor/include/arch/x86/asm/guest/vcat.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (C) 2021 Intel Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause */
+
+#ifndef VCAT_H_
+#define VCAT_H_
+
+#include <asm/guest/vm.h>
+
+uint16_t vcat_get_vcbm_len(const struct acrn_vm *vm, int res); void
+init_vcat_msrs(struct acrn_vcpu *vcpu);
+
+#endif /* VCAT_H_ */
+
diff --git a/hypervisor/include/arch/x86/asm/guest/vm.h
b/hypervisor/include/arch/x86/asm/guest/vm.h
index 9a6171be0..2cb06c027 100644
--- a/hypervisor/include/arch/x86/asm/guest/vm.h
+++ b/hypervisor/include/arch/x86/asm/guest/vm.h
@@ -256,6 +256,7 @@ void vrtc_init(struct acrn_vm *vm); bool
is_lapic_pt_configured(const struct acrn_vm *vm); bool is_rt_vm(const
struct acrn_vm *vm); bool is_nvmx_configured(const struct acrn_vm *vm);
+bool is_vcat_configured(const struct acrn_vm *vm);
bool is_pi_capable(const struct acrn_vm *vm); bool has_rt_vm(void);
struct acrn_vm *get_highest_severity_vm(bool runtime); diff --git
a/hypervisor/include/arch/x86/asm/rdt.h
b/hypervisor/include/arch/x86/asm/rdt.h
index f6c4448e2..95e149fcb 100644
--- a/hypervisor/include/arch/x86/asm/rdt.h
+++ b/hypervisor/include/arch/x86/asm/rdt.h
@@ -47,5 +47,6 @@ void init_rdt_info(void); void setup_clos(uint16_t
pcpu_id); uint64_t clos2pqr_msr(uint16_t clos); bool
is_platform_rdt_capable(void);
+const struct rdt_info *get_rdt_res_cap_info(int res);

#endif /* RDT_H */
--
2.25.1







Join acrn-dev@lists.projectacrn.org to automatically receive all group messages.