Date   

Re: [PATCH V6 8/8] hv: vCAT: propagate vCBM to other vCPUs that share cache with vcpu

Eddie Dong
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Dongsheng Zhang
Sent: Thursday, October 21, 2021 1:21 AM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 8/8] hv: vCAT: propagate vCBM to other
vCPUs that share cache with vcpu



-----Original Message-----
From: acrn-dev@... <acrn-dev@...>
On Behalf Of Eddie Dong
Sent: Wednesday, October 20, 2021 6:11 AM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 8/8] hv: vCAT: propagate vCBM to
other vCPUs that share cache with vcpu



-----Original Message-----
From: acrn-dev@...
<acrn-dev@...> On Behalf Of Dongsheng Zhang
Sent: Wednesday, October 20, 2021 6:42 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH V6 8/8] hv: vCAT: propagate vCBM to other
vCPUs that share cache with vcpu

From: dongshen <dongsheng.x.zhang@...>

Move the nearest_pow2() and get_cache_shift() functions from
hypercall.c to cpu_caps.c Store L2/L3 cat id shift in struct
cpuinfo_x86

Implement the propagate_vcbm() function:
Set vCBM to to all the vCPUs that share cache with vcpu
to mimic hardware CAT behavior

Tracked-On: #5917
Signed-off-by: dongshen <dongsheng.x.zhang@...>
---
hypervisor/arch/x86/cpu_caps.c | 55
+++++++++++++++++++
hypervisor/arch/x86/guest/vcat.c | 62
+++++++++++++++++++++-
hypervisor/common/hypercall.c | 50 ++---------------
hypervisor/include/arch/x86/asm/cpu_caps.h | 4 ++
4 files changed, 122 insertions(+), 49 deletions(-)

diff --git a/hypervisor/arch/x86/cpu_caps.c
b/hypervisor/arch/x86/cpu_caps.c index 89fd559b1..33ea3aa36 100644
--- a/hypervisor/arch/x86/cpu_caps.c
+++ b/hypervisor/arch/x86/cpu_caps.c
@@ -16,6 +16,7 @@
#include <errno.h>
#include <logmsg.h>
#include <asm/guest/vmcs.h>
+#include <asm/lib/bits.h>

/* TODO: add more capability per requirement */
/* APICv features */
@@ -322,6 +323,58 @@ static uint64_t get_address_mask(uint8_t limit)
return ((1UL << limit) - 1UL) & PAGE_MASK; }

+/*
+ * nearest_pow2(n) is the nearest power of 2 integer that is not
+less than n
+ * The last (most significant) bit set of (n*2-1) matches the above
+definition */ static uint32_t nearest_pow2(uint32_t n) {
+ uint32_t p = n;
+
+ if (n >= 2U) {
+ p = fls32(2U*n - 1U);
+ }
+
+ return p;
+}
+
+static void get_cat_id_shift(uint32_t *l2_cat_id_shift, uint32_t
+*l3_cat_id_shift) {
What does cat id mean?
Or are u defining a new term cache-id-shift?
Cannot understand.

I guess you are saying: cache_id? Cache_type? Cache_level? I am lost
Don: I mean l2/l3 cache id
If cache ID is an official term, why you need to invent a new name?


+ uint32_t subleaf;
+
+ *l2_cat_id_shift = 0U;
+ *l3_cat_id_shift = 0U;
+
+ for (subleaf = 0U;; subleaf++) {
+ uint32_t eax, ebx, ecx, edx;
+ uint32_t cache_type, cache_level, id, shift;
+
+ cpuid_subleaf(0x4U, subleaf, &eax, &ebx, &ecx, &edx);
+
+ cache_type = eax & 0x1fU;
+ cache_level = (eax >> 5U) & 0x7U;
+
+ /* Intel SDM Vol 2, CPUID 04H:
+ * EAX: bits 25 - 14: Maximum number of addressable IDs for
logical
processors sharing this cache.
+ * The nearest power-of-2 integer that is not smaller than (1
+
EAX[25:14]) is the number of unique
+ * initial APIC IDs reserved for addressing different logical
processors sharing this cache
+ */
+ id = (eax >> 14U) & 0xfffU;
+ shift = nearest_pow2(id + 1U);
+
+ /* No more caches */
+ if ((cache_type == 0U) || (cache_type >= 4U)) {
+ break;
+ }
+
+ if (cache_level == 2U) {
+ *l2_cat_id_shift = shift;
+ } else if (cache_level == 3U) {
+ *l3_cat_id_shift = shift;
+ }
+ }
+}
+
void init_pcpu_capabilities(void)
{
uint32_t eax, unused;
@@ -385,6 +438,8 @@ void init_pcpu_capabilities(void)
get_address_mask(boot_cpu_data.phys_bits);
}

+ get_cat_id_shift(&boot_cpu_data.l2_cat_id_shift,
+&boot_cpu_data.l3_cat_id_shift);
+
detect_pcpu_cap();
}

diff --git a/hypervisor/arch/x86/guest/vcat.c
b/hypervisor/arch/x86/guest/vcat.c
index 1dda6b61f..9e302b25e 100644
--- a/hypervisor/arch/x86/guest/vcat.c
+++ b/hypervisor/arch/x86/guest/vcat.c
@@ -410,6 +410,55 @@ static uint32_t vmsr_to_pmsr(const struct
acrn_vm
*vm, uint32_t vmsr, int res)
return pmsr;
}

+static void get_vcat_id_shift(uint32_t *l2_shift, uint32_t *l3_shift) {
+ struct cpuinfo_x86 *cpu_info = get_pcpu_info();
+
+ /* Assume that virtual cat id shift is equal to physical cat id
+shift for now
*/
+ *l2_shift = cpu_info->l2_cat_id_shift;
+ *l3_shift = cpu_info->l3_cat_id_shift; }
+
+/**
+ * @brief Propagate vCBM to other vCPUs that share cache with vcpu
+ * @pre vcpu != NULL && vcpu->vm != NULL */ static void
+propagate_vcbm(struct acrn_vcpu *vcpu, uint32_t vmsr, uint64_t val) {
+ uint16_t i;
+ struct acrn_vcpu *tmp_vcpu;
+ uint32_t l2_shift, l3_shift, l2_id, l3_id;
+ struct acrn_vm *vm = vcpu->vm;
+ uint32_t apicid = vlapic_get_apicid(vcpu_vlapic(vcpu));
+
+ get_vcat_id_shift(&l2_shift, &l3_shift);


+ /* Intel SDM Vol 2, CPUID 04H:
+ * EAX: bits 25 - 14: Maximum number of addressable IDs for
+logical
processors sharing this cache.
+ *
+ * l2_shift/l3_shift: the nearest power-of-2 integer that is not
+smaller
than (1 + EAX[25:14])
+ * is the number of unique initial APIC IDs reserved for
+addressing
different logical processors
+ * sharing this cache
+ */
+ l2_id = apicid >> l2_shift;
+ l3_id = apicid >> l3_shift;
+
+ /*
+ * Determine which logical processors share an MSR (for instance
local
+ * to a core, or shared across multiple cores) by checking if they
+have the
same
+ * L2/L3 cache id
+ */
+ foreach_vcpu(i, vm, tmp_vcpu) {
+ uint32_t tmp_apicid =
vlapic_get_apicid(vcpu_vlapic(tmp_vcpu));
+ uint32_t tmp_l2_id = tmp_apicid >> l2_shift;
+ uint32_t tmp_l3_id = tmp_apicid >> l3_shift;
+
This patch is becoming worse than previous version.

We want to propagate among VCPUs with same cache ID. All what we
compare is among virtual cache ID.
However this code is comparing between P and V.
Don: this is following our email discussion, it is compare V to V:
Maybe we should add one API to get virtual cache shift in vcat.c, like
get_vcache_shift, you can make it call get_cache_shift directly.
And get_cache_shift should be one low level API, maybe cpu_caps.c should be
a good place?

get_vcat_id_shift(): returns the virtual cache shift (internally we assume
virtual cache shift is equal to physical cache shift), so it is just a wrapper.
Rollback to previous version, and carefully fix the comment of previous version.






+ if ((is_l2_vcbm_msr(vm, vmsr) && (l2_id == tmp_l2_id))
+ || (is_l3_vcbm_msr(vm, vmsr) && (l3_id ==
tmp_l3_id))) {
+ vcpu_set_guest_msr(tmp_vcpu, vmsr, val);
+ }
+ }
+}
+
static void write_pcbm(uint32_t pmsr, uint64_t pcbm) {
/* Preserve reserved bits, and only set the pCBM bits */ @@ -461,8
+510,17 @@ static int32_t write_vcbm(struct acrn_vcpu *vcpu,
+uint32_t
vmsr, uint64_t val, i
uint32_t pmsr;
uint64_t pcbm;

- /* Write vCBM first */
- vcpu_set_guest_msr(vcpu, vmsr, masked_vcbm | (val &
0xFFFFFFFF00000000UL));
+ /*
+ * Write vCBM first:
+ * The L2 mask MSRs are scoped at the same level as the L2
cache
(similarly,
+ * the L3 mask MSRs are scoped at the same level as the L3
cache).
+ *
+ * For example, the MSR_IA32_L3_MASK_n MSRs are scoped
at
socket level, which means if
+ * we program MSR_IA32_L3_MASK_n on one cpu and the
same
MSR_IA32_L3_MASK_n on all other cpus
+ * of the same socket will also get the change!
+ * Set vcbm to all the vCPUs that share cache with vcpu to
mimic
this hardware behavior.
+ */
+ propagate_vcbm(vcpu, vmsr, masked_vcbm | (val &
+0xFFFFFFFF00000000UL));

/* Write pCBM: */
pmsr = vmsr_to_pmsr(vcpu->vm, vmsr, res); diff --git
a/hypervisor/common/hypercall.c b/hypervisor/common/hypercall.c
index
fb19fe4a0..3f5134d45 100644
--- a/hypervisor/common/hypercall.c
+++ b/hypervisor/common/hypercall.c
@@ -133,52 +133,6 @@ int32_t hcall_get_api_version(struct acrn_vcpu
*vcpu, __unused struct acrn_vm *t
return copy_to_gpa(vcpu->vm, &version, param1, sizeof(version));
}

-/*
- * nearest_pow2(n) is the nearest power of 2 integer that is not
less than n
- * The last (most significant) bit set of (n*2-1) matches the above
definition
- */
-static uint32_t nearest_pow2(uint32_t n) -{
- uint32_t p = n;
-
- if (n >= 2U) {
- p = fls32(2U*n - 1U);
- }
-
- return p;
-}
-
-static void get_cache_shift(uint32_t *l2_shift, uint32_t *l3_shift) -{
- uint32_t subleaf;
-
- *l2_shift = 0U;
- *l3_shift = 0U;
-
- for (subleaf = 0U;; subleaf++) {
- uint32_t eax, ebx, ecx, edx;
- uint32_t cache_type, cache_level, id, shift;
-
- cpuid_subleaf(0x4U, subleaf, &eax, &ebx, &ecx, &edx);
-
- cache_type = eax & 0x1fU;
- cache_level = (eax >> 5U) & 0x7U;
- id = (eax >> 14U) & 0xfffU;
- shift = nearest_pow2(id + 1U);
-
- /* No more caches */
- if ((cache_type == 0U) || (cache_type >= 4U)) {
- break;
- }
-
- if (cache_level == 2U) {
- *l2_shift = shift;
- } else if (cache_level == 3U) {
- *l3_shift = shift;
- }
- }
-}
-
/**
* @brief Get basic platform information.
*
@@ -204,8 +158,10 @@ int32_t hcall_get_platform_info(struct
acrn_vcpu *vcpu, __unused struct acrn_vm
if (ret == 0) {
uint16_t i;
uint16_t pcpu_nums = get_pcpu_nums();
+ struct cpuinfo_x86 *cpu_info = get_pcpu_info();

- get_cache_shift(&pi.hw.l2_cat_shift, &pi.hw.l3_cat_shift);
+ pi.hw.l2_cat_shift = cpu_info->l2_cat_id_shift;
+ pi.hw.l3_cat_shift = cpu_info->l3_cat_id_shift;

for (i = 0U; i < min(pcpu_nums,
ACRN_PLATFORM_LAPIC_IDS_MAX);
i++) {
pi.hw.lapic_ids[i] = per_cpu(lapic_id, i); diff --git
a/hypervisor/include/arch/x86/asm/cpu_caps.h
b/hypervisor/include/arch/x86/asm/cpu_caps.h
index 4f679f209..c22e306a0 100644
--- a/hypervisor/include/arch/x86/asm/cpu_caps.h
+++ b/hypervisor/include/arch/x86/asm/cpu_caps.h
@@ -43,6 +43,10 @@ struct cpuinfo_x86 {
uint64_t physical_address_mask;
uint32_t cpuid_leaves[FEATURE_WORDS];
char model_name[64];
+ /* Right-shift count that will allow software to extract part of
+APIC ID to
distinguish L2 CAT ID */
+ uint32_t l2_cat_id_shift;
+ /* Right-shift count that will allow software to extract part of
+APIC ID to
distinguish L3 CAT ID */
+ uint32_t l3_cat_id_shift;
};

bool has_monitor_cap(void);
--
2.25.1











Re: [PATCH V6 7/8] hv: vCAT: implementing the vCAT MSRs write handler

Eddie Dong
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Dongsheng Zhang
Sent: Thursday, October 21, 2021 1:13 AM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 7/8] hv: vCAT: implementing the vCAT
MSRs write handler



-----Original Message-----
From: acrn-dev@... <acrn-dev@...>
On Behalf Of Eddie Dong
Sent: Wednesday, October 20, 2021 5:53 AM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 7/8] hv: vCAT: implementing the vCAT
MSRs write handler



-----Original Message-----
From: acrn-dev@...
<acrn-dev@...> On Behalf Of Dongsheng Zhang
Sent: Wednesday, October 20, 2021 6:42 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH V6 7/8] hv: vCAT: implementing the vCAT
MSRs write handler

From: dongshen <dongsheng.x.zhang@...>

Implement the write_vcat_msr() function to handle the
MSR_IA32_PQR_ASSOC and MSR_IA32_type_MASK_n vCAT MSRs write
request.

Several vCAT P2V (physical to virtual) and V2P (virtual to physical)
mappings
exist:

struct acrn_vm_config *vm_config = get_vm_config(vm_id)

max_pcbm = vm_config->max_type_pcbm (type: l2 or l3)
mask_shift = ffs64(max_pcbm)

vclosid = vmsr - MSR_IA32_type_MASK_0
pclosid = vm_config->pclosids[vclosid]

pmsr = MSR_IA32_type_MASK_0 + pclosid
pcbm = vcbm << mask_shift
vcbm = pcbm >> mask_shift

Where
MSR_IA32_type_MASK_n: L2 or L3 mask msr address for CLOSIDn,
from
0C90H through 0D8FH (inclusive).

max_pcbm: a bitmask that selects all the physical cache ways
assigned to the VM

vclosid: virtual CLOSID, always starts from 0

pclosid: corresponding physical CLOSID for a given vclosid

vmsr: virtual msr address, passed to vCAT handlers by the
caller functions rdmsr_vmexit_handler()/wrmsr_vmexit_handler()

pmsr: physical msr address

vcbm: virtual CBM, passed to vCAT handlers by the
caller functions rdmsr_vmexit_handler()/wrmsr_vmexit_handler()

pcbm: physical CBM

Tracked-On: #5917
Signed-off-by: dongshen <dongsheng.x.zhang@...>
---
hypervisor/arch/x86/guest/vcat.c | 100
+++++++++++++++++--
hypervisor/arch/x86/guest/vmsr.c | 9 ++
hypervisor/include/arch/x86/asm/guest/vcat.h | 1 +
3 files changed, 104 insertions(+), 6 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcat.c
b/hypervisor/arch/x86/guest/vcat.c
index 03e9a3cc9..1dda6b61f 100644
--- a/hypervisor/arch/x86/guest/vcat.c
+++ b/hypervisor/arch/x86/guest/vcat.c
@@ -365,6 +365,21 @@ int32_t read_vcat_msr(const struct acrn_vcpu
*vcpu, uint32_t vmsr, uint64_t *rva
return ret;
}

+/**
+ * @brief Map vCBM to pCBM
+ *
+ * @pre vm != NULL
+ */
+static uint64_t vcbm_to_pcbm(const struct acrn_vm *vm, uint64_t
+vcbm, int res) {
+ uint64_t max_pcbm = get_max_pcbm(vm, res);
+
+ /* Find the position low (the first bit set) in max_pcbm */
+ uint16_t low = ffs64(max_pcbm);
+
+ return vcbm << low;
+}
+
/**
* @brief Map vMSR address (abbreviated as vmsr) to corresponding
pMSR address (abbreviated as pmsr)
* Each vMSR or pMSR is identified by a 32-bit integer @@ -395,19
+410,70 @@ static uint32_t vmsr_to_pmsr(const struct acrn_vm *vm,
uint32_t vmsr, int res)
return pmsr;
}

+static void write_pcbm(uint32_t pmsr, uint64_t pcbm) {
+ /* Preserve reserved bits, and only set the pCBM bits */
+ uint64_t pmsr_value = (msr_read(pmsr) & 0xFFFFFFFF00000000UL)
|
pcbm;
+
+ msr_write(pmsr, pmsr_value);
+}
+
+/* Check if bitmask is contiguous:
+ * All (and only) contiguous '1' combinations are allowed (e.g.
+FFFFH, 0FF0H, 003CH, etc.) */ static bool
+is_contiguous_bit_set(uint64_t
+bitmask) {
+ bool ret = false;
+ uint64_t tmp64 = bitmask;
+
+ if (tmp64 != 0UL) {
+ while ((tmp64 & 1UL) == 0UL) {
+ tmp64 >>= 1U;
+ }
+
+ while ((tmp64 & 1UL) != 0UL) {
+ tmp64 >>= 1U;
+ }
+
+ if (tmp64 == 0UL) {
+ ret = true;
+ }
+ }
That is too ugly.

We can use bit-scan instruction to find the highest set-bit (say
high), and the lowest set bit (say low) Then the contiguous value must
be: (1<<(high+1)) --
(1<<low)
Don: no, I do not think your algo is right, we need to check if the 1s are
contiguous.
Did you try it?

Return ( tmp64 == ((1<<(high+1)) - (1 << low));


Can you rethink on this?


+
+ return ret;
+}
+
/**
* @brief vCBM MSR write handler
*
* @pre vcpu != NULL && vcpu->vm != NULL
*/
-static int32_t write_vcbm(__unused struct acrn_vcpu *vcpu, __unused
uint32_t vmsr, __unused uint64_t val, __unused int res)
+static int32_t write_vcbm(struct acrn_vcpu *vcpu, uint32_t vmsr,
+uint64_t val, int res)
{
- /* TODO: this is going to be implemented in a subsequent commit,
will
perform the following actions:
- * write vCBM
- * vmsr_to_pmsr and vcbm_to_pcbm
- * write pCBM
+ int32_t ret = -EINVAL;
+ /*
+ * vcbm set bits should only be in the range of [0, vcbm_len)
(vcat_get_max_vcbm),
+ * so mask with vcat_get_max_vcbm to prevent erroneous vCBM
value
*/
- return -EFAULT;
+ uint64_t masked_vcbm = val & vcat_get_max_vcbm(vcpu->vm, res);
If SW sets bits beyond hardware supported value, I thought we should
inject #GP.
Not?
Don: yes, just return non-zero value (= -EINVAL) here if not contiguous to
upper caller, then eventually in hv_main.c, it will inject a GP to guest after
vmext_handler(), see below:
ret = vmexit_handler(vcpu);
if (ret < 0) {
pr_fatal("dispatch VM exit handler failed for reason"
" %d, ret = %d!", vcpu->arch.exit_reason, ret);
vcpu_inject_gp(vcpu, 0U);
continue;
}
I am saying you missed the check of case when the set-value is beyond the allowed scope, but you are talking is_contiguous_bit_set.

That is 2 different thing and we need to make sure both conditions are met.


+
+ if (is_contiguous_bit_set(masked_vcbm)) {
+ uint32_t pmsr;
+ uint64_t pcbm;
+
+ /* Write vCBM first */
+ vcpu_set_guest_msr(vcpu, vmsr, masked_vcbm | (val &
+0xFFFFFFFF00000000UL));
+
Didn't understand why we not directly set guest value here.
Don: we can, but if the vcbm is not within range: vcbm set bits should only be
in the range of [0, vcbm_len) (vcat_get_max_vcbm), Then what should we do
That is the scope check in above comments. We must do both.

here, return an error code to upper caller (will #GP), or just use masked off
vcbm (masked_vcbm) and set it to guest? I choose the latter one here
Here we are setting the V value. Please exactly follow what SDM says.
vcpu_set_guest_msr is setting V value, whatever the guest write, set it or inject #GP.



+ /* Write pCBM: */
+ pmsr = vmsr_to_pmsr(vcpu->vm, vmsr, res);
+ pcbm = vcbm_to_pcbm(vcpu->vm, masked_vcbm, res);
+ write_pcbm(pmsr, pcbm);
+
+ ret = 0;
+ }
+
+ /* Return non-zero to vmexit_handler if vcbm is not contiguous,
+which
will result in #GP injected to guest */
+ return ret;
}

/**
@@ -444,6 +510,28 @@ static int32_t write_vclosid(struct acrn_vcpu
*vcpu, uint64_t val)
return 0;
}

+/**
+ * @brief vCAT MSRs write handler
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ int32_t
+write_vcat_msr(struct acrn_vcpu *vcpu, uint32_t vmsr, uint64_t val) {
+ int32_t ret = -EACCES;
+
+ if (is_vcat_configured(vcpu->vm)) {
+ if (vmsr == MSR_IA32_PQR_ASSOC) {
+ ret = write_vclosid(vcpu, val);
Please move this to caller side to avoid double switch/if-else...
This one happens frequently at runtime.
Don: yes we can, but move this to caller code (vmsr.c) will make the logic hard
and requires more code.
I didn't see any hard part. Only thing we need to do is to expose write_vclosid publically.

I would try to avoid that if I have the choice.


+ } else if (is_l2_vcbm_msr(vcpu->vm, vmsr)) {
+ ret = write_vcbm(vcpu, vmsr, val,
RDT_RESOURCE_L2);
+ } else if (is_l3_vcbm_msr(vcpu->vm, vmsr)) {
+ ret = write_vcbm(vcpu, vmsr, val,
RDT_RESOURCE_L3);
+ }
+ }
+
+ return ret;
+}
+
/**
* @brief Initialize vCBM MSRs
*
diff --git a/hypervisor/arch/x86/guest/vmsr.c
b/hypervisor/arch/x86/guest/vmsr.c
index 06825d211..412388106 100644
--- a/hypervisor/arch/x86/guest/vmsr.c
+++ b/hypervisor/arch/x86/guest/vmsr.c
@@ -1092,6 +1092,15 @@ int32_t wrmsr_vmexit_handler(struct
acrn_vcpu
*vcpu)
}
break;
}
+#ifdef CONFIG_VCAT_ENABLED
+ case MSR_IA32_L2_MASK_BASE ... (MSR_IA32_L2_MASK_BASE +
NUM_VCAT_L2_MSRS - 1U):
+ case MSR_IA32_L3_MASK_BASE ... (MSR_IA32_L3_MASK_BASE +
NUM_VCAT_L3_MSRS - 1U):
+ case MSR_IA32_PQR_ASSOC:
+ {
+ err = write_vcat_msr(vcpu, msr, v);
+ break;
+ }
+#endif
default:
{
if (is_x2apic_msr(msr)) {
diff --git a/hypervisor/include/arch/x86/asm/guest/vcat.h
b/hypervisor/include/arch/x86/asm/guest/vcat.h
index 16ae0802b..8effa7604 100644
--- a/hypervisor/include/arch/x86/asm/guest/vcat.h
+++ b/hypervisor/include/arch/x86/asm/guest/vcat.h
@@ -16,6 +16,7 @@ void init_vcat_msrs(struct acrn_vcpu *vcpu);
uint16_t vcat_get_num_vclosids(const struct acrn_vm *vm); uint64_t
vcat_pcbm_to_vcbm(const struct acrn_vm *vm, uint64_t pcbm, int res);
int32_t read_vcat_msr(const struct acrn_vcpu *vcpu, uint32_t vmsr,
uint64_t *rval);
+int32_t write_vcat_msr(struct acrn_vcpu *vcpu, uint32_t vmsr,
+uint64_t val);

#endif /* VCAT_H_ */

--
2.25.1











Re: [PATCH V6 3/8] hv: vCAT: initialize the emulated_guest_msrs array for CAT msrs during platform initialization

Eddie Dong
 

I remember we don't need these 2 MACROs. Not?
Don: we need these above 2 MACROs, otherwise, #define
NUM_GUEST_MSRS(see below) will be too complicated (nested #ifdef),
too
ugly
I didn't catch. NUM_GUEST_MSRS only depends on NUM_VCAT_MSRS. It
never
use above 2 MACROs.
Don: yes, you are right, they can be removed. Only NUM_VCAT_MSRS needs
to be kept.
Be careful to not regression..

As if the above comments are fixed, I am fine.

BTW, I strongly suggest you to merge the acked patched to avoid regression and reduce the review code size.
It take too many times.


Re: [PATCH V6 4/8] hv: vCAT: initialize vCAT MSRs during vmcs init

Eddie Dong
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Dongsheng Zhang
Sent: Thursday, October 21, 2021 1:04 AM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 4/8] hv: vCAT: initialize vCAT MSRs during
vmcs init



-----Original Message-----
From: acrn-dev@... <acrn-dev@...>
On Behalf Of Eddie Dong
Sent: Wednesday, October 20, 2021 5:25 AM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 4/8] hv: vCAT: initialize vCAT MSRs
during vmcs init



-----Original Message-----
From: acrn-dev@...
<acrn-dev@...> On Behalf Of Dongsheng Zhang
Sent: Wednesday, October 20, 2021 6:41 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH V6 4/8] hv: vCAT: initialize vCAT MSRs
during vmcs init

From: dongshen <dongsheng.x.zhang@...>

Initialize vCBM MSR

Initialize vCLOSID MSR

Add some vCAT functions:
Retrieve max_vcbm
Check if vCAT is configured or not for the VM Map vclosid to
pclosid
write_vclosid: vCLOSID MSR write handler
write_vcbm: vCBM MSR write handler

Tracked-On: #5917
Signed-off-by: dongshen <dongsheng.x.zhang@...>
---
hypervisor/Makefile | 3 +
hypervisor/arch/x86/guest/vcat.c | 408
+++++++++++++++++++
hypervisor/arch/x86/guest/vm.c | 10 +
hypervisor/arch/x86/guest/vmsr.c | 15 +-
hypervisor/arch/x86/rdt.c | 8 +
hypervisor/include/arch/x86/asm/guest/vcat.h | 18 +
hypervisor/include/arch/x86/asm/guest/vm.h | 1 +
hypervisor/include/arch/x86/asm/rdt.h | 1 +
8 files changed, 462 insertions(+), 2 deletions(-) create mode
100644 hypervisor/arch/x86/guest/vcat.c create mode 100644
hypervisor/include/arch/x86/asm/guest/vcat.h

diff --git a/hypervisor/Makefile b/hypervisor/Makefile index
50bb2891c..a5c02d115 100644
--- a/hypervisor/Makefile
+++ b/hypervisor/Makefile
@@ -330,6 +330,9 @@ VP_DM_C_SRCS += arch/x86/guest/vmx_io.c
VP_DM_C_SRCS += arch/x86/guest/instr_emul.c VP_DM_C_SRCS +=
arch/x86/guest/lock_instr_emul.c VP_DM_C_SRCS +=
arch/x86/guest/vm_reset.c
+ifeq ($(CONFIG_VCAT_ENABLED),y)
+VP_DM_C_SRCS += arch/x86/guest/vcat.c endif
VP_DM_C_SRCS += common/ptdev.c

# virtual platform trusty
diff --git a/hypervisor/arch/x86/guest/vcat.c
b/hypervisor/arch/x86/guest/vcat.c
new file mode 100644
index 000000000..49968b8d9
--- /dev/null
+++ b/hypervisor/arch/x86/guest/vcat.c
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 2021 Intel Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause */
+
+#include <types.h>
+#include <errno.h>
+#include <logmsg.h>
+#include <asm/cpufeatures.h>
+#include <asm/cpuid.h>
+#include <asm/rdt.h>
+#include <asm/lib/bits.h>
+#include <asm/board.h>
+#include <asm/vm_config.h>
+#include <asm/msr.h>
+#include <asm/guest/vcpu.h>
+#include <asm/guest/vm.h>
+#include <asm/guest/vcat.h>
+#include <asm/per_cpu.h>
+
+/*
+ * List of acronyms used here:
+ *
+ * - CAT:
+ * Cache Allocation Technology
+ *
+ *- vCAT:
+ * Virtual CAT
+ *
+ *- MSRs:
+ * Machine Specific Registers, each MSR is identified by a 32-bit
integer.
+ *
+ *- pMSR:
+ * physical MSR
+ *
+ *- vMSR:
+ * virtual MSR
+ *
+ *- COS/CLOS:
+ * Class of Service. Also mean COS MSRs
+ *
+ *- CLOSID:
+ * Each CLOS has a number ID, ranges from 0 to COS_MAX
+ *
+ *- CLOSIDn:
+ * Each CLOS has a number ID denoted by n
+ *
+ *- COS_MAX:
+ * Max number of COS MSRs. ACRN uses the smallest number of
+ * CLOSIDs of all supported resources as COS_MAX to have
+consistent
+ * allocation
+ *
+-* pCLOSID:
+ * Physical CLOSID
+ *
+ *- vCLOSID:
+ * Virtual CLOSID
+ *
+ *- MSR_IA32_type_MASK_n
+ * type: L2 or L3
+ * One CAT (CBM) MSR, where n corresponds to a number (CLOSIDn)
+ *
+ *- CBM:
+ * Capacity bitmask (cache bit mask), specifies which region of
+cache
+ * can be filled into, all (and only) contiguous '1' combinations
+are allowed
+ *
+ *- pCBM:
+ * Physical CBM
+ *
+ *- pCBM length (pcbm_len):
+ * pcbm_len is calculated by `bitmap_weight(max_pcbm)`
+ * indicates number of bits set in max_pcbm
+ *
+ *- max_pcbm (maximum physical cache space assigned to VM):
+ * max_pcbm is a contiguous capacity bitmask (CBM) starting at bit
+position low
+ * (the lowest assigned physical cache way) and ending at position
+high
+ * (the highest assigned physical cache way, inclusive).
+ * As CBM only allows contiguous '1' combinations, so max_pcbm
+essentially
+ * is a bitmask that selects/covers all the physical cache ways
+assigned to
the VM.
+ *
+ * Example:
+ * pcbm_len=20
+ * max_pcbm=0xfffff
+ *
+ *- CLOS_MASK/max_pcbm: (maximum assigned/reserved physical cache
+space)
+ * vCAT is built on top of RDT, vCAT on ACRN is enabled by
+configuring the FEATURES/RDT
+ * and vm sub-sections of the scenario XML file as in the below
example:
+ * <RDT>
+ * <RDT_ENABLED>y</RDT_ENABLED>
+ * <CDP_ENABLED>n</CDP_ENABLED>
+ * <VCAT_ENABLED>y</VCAT_ENABLED>
+ * <CLOS_MASK>0x7ff</CLOS_MASK>
+ * <CLOS_MASK>0x7ff</CLOS_MASK>
+ * <CLOS_MASK>0x7ff</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * /RDT>
+ *
+ * <vm id="0">
+ * <guest_flags>
+ <guest_flag>GUEST_FLAG_VCAT_ENABLED</guest_flag>
+ </guest_flags>
+ * <clos>
+ * <vcpu_clos>3</vcpu_clos>
+ * <vcpu_clos>4</vcpu_clos>
+ * <vcpu_clos>5</vcpu_clos>
+ * <vcpu_clos>6</vcpu_clos>
+ * <vcpu_clos>7</vcpu_clos>
+ * </clos>
+ * </vm>
+ *
+ * <vm id="1">
+ * <clos>
+ * <vcpu_clos>1</vcpu_clos>
+ * <vcpu_clos>2</vcpu_clos>
+ * </clos>
+ * </vm>
+ *
+ * vm_configurations.c (generated by config-tools) with the above
+vCAT
config:
+ *
+ * static uint16_t vm0_vcpu_clos[5U] = {3U, 4U, 5U, 6U, 7U};
+ * static uint16_t vm1_vcpu_clos[2U] = {1U, 2U};
+ *
+ * struct acrn_vm_config vm_configs[CONFIG_MAX_VM_NUM] = {
+ * {
+ * .guest_flags = (GUEST_FLAG_VCAT_ENABLED),
+ * .pclosids = vm0_vcpu_clos,
+ * .num_pclosids = 5U,
+ * .max_l3_pcbm = 0xff800U,
+ * },
+ * {
+ * .pclosids = vm1_vcpu_clos,
+ * .num_pclosids = 2U,
+ * },
+ * };
+ *
+ * Config CLOS_MASK/max_pcbm per pCLOSID:
+ * vCAT is enabled by setting both RDT_ENABLED and
VCAT_ENABLED
to 'y',
+ * then specify the GUEST_FLAG_VCAT_ENABLED guest flag for the
desired VMs.
+ * Each CLOS_MASK (a.k.a. max_pcbm) setting corresponds to a
pCLOSID and
+ * specifies the allocated portion (ways) of cache.
+ * For example, if COS_MAX is 7, then 8 CLOS_MASK settings need to
be
in place
+ * where each setting corresponds to a pCLOSID starting from 0.
+ * Each CLOS_MASK may or may not overlap with the CLOS_MASK of
another pCLOSID depending
+ * on whether overlapped or isolated bitmask is desired for
particular
performance
+ * consideration.
+ *
+ * Assign pCLOSIDs per VM
+ * Assign the desired pCLOSIDs to each VM in the vm/clos section of
the
scenario file
+ * by defining the vcpu_clos settings.
+ * All pCLOSIDs should be configured with the same pCBM
(max_pcbm)
to simplify vCAT
+ * config and ensure vCAT capability symmetry across cpus of the VM.
In
the above example,
+ * pCLOSIDs 3 to 7 are all configured with the same pCBM value
0xff800,
which
+ * means a total of 9 physical cache ways have been reserved for all
the
cpus
+ * belonging to VM0
+ *
+ *- vCBM:
+ * Virtual CBM
+ *
+ *- vCBM length (vcbm_len):
+ * max number of bits to set for vCBM.
+ * vcbm_len is set equal to pcbm_len
+ * vCBM length is reported to guest VMs by using vCPUID (EAX=10H)
+ *
+ *- max_vcbm (maximum virtual cache space):
+ * Fully open vCBM (all ones bitmask), max vCBM is calculated
+ * by `(1 << vcbm_len) - 1`
+ *
+ * Usually, vCLOSID0 is associated with the fully open vCBM to
+access all assigned virtual caches */
+
+/**
+ * @pre vm != NULL
+ */
+bool is_l2_vcat_configured(const struct acrn_vm *vm) {
+ return is_vcat_configured(vm) &&
+(get_rdt_res_cap_info(RDT_RESOURCE_L2)->num_closids > 0U); }
+
+/**
+ * @pre vm != NULL
+ */
+bool is_l3_vcat_configured(const struct acrn_vm *vm) {
+ return is_vcat_configured(vm) &&
+(get_rdt_res_cap_info(RDT_RESOURCE_L3)->num_closids > 0U); }
+
+/**
+ * @brief Return number of vCLOSIDs of this VM
+ *
+ * @pre vm != NULL && vm->vm_id < CONFIG_MAX_VM_NUM */
uint16_t
+vcat_get_num_vclosids(const struct acrn_vm *vm) {
+ uint16_t num_vclosids = 0U;
+
+ if (is_vcat_configured(vm)) {
+ /*
+ * For performance and simplicity, here number of vCLOSIDs
(num_vclosids) is set
+ * equal to the number of pCLOSIDs assigned to this VM
(get_vm_config(vm->vm_id)->num_pclosids).
+ * But technically, we do not have to make such an
assumption. For
example,
+ * Hypervisor could implement CLOSID context switch, then
number
of vCLOSIDs
+ * can be greater than the number of pCLOSIDs assigned. etc.
+ */
+ num_vclosids = get_vm_config(vm->vm_id)->num_pclosids;
+ }
+
+ return num_vclosids;
+}
+
+/**
+ * @brief Map vCLOSID to pCLOSID
+ *
+ * @pre vm != NULL && vm->vm_id < CONFIG_MAX_VM_NUM
+ * @pre (get_vm_config(vm->vm_id)->pclosids != NULL) && (vclosid <
+get_vm_config(vm->vm_id)->num_pclosids)
+ */
+static uint16_t vclosid_to_pclosid(const struct acrn_vm *vm,
+uint16_t
+vclosid) {
+ ASSERT(vclosid < vcat_get_num_vclosids(vm), "vclosid is out of
+range!");
+
+ /*
+ * pclosids points to an array of assigned pCLOSIDs
+ * Use vCLOSID as the index into the pclosids array, returning the
corresponding pCLOSID
+ *
+ * Note that write_vcat_msr() calls vclosid_to_pclosid()
+indirectly, in
write_vcat_msr(),
+ * the is_l2_vcbm_msr()/is_l3_vcbm_msr() calls ensure that vclosid
+is
always less than
+ * get_vm_config(vm->vm_id)->num_pclosids, so vclosid is always
an
array index within bound here
+ */
+ return get_vm_config(vm->vm_id)->pclosids[vclosid];
+}
+
+/**
+ * @brief Return the max_pcbm of this VM.
+ * @pre vm != NULL && vm->vm_id < CONFIG_MAX_VM_NUM
+ * @pre res == RDT_RESOURCE_L2 || res == RDT_RESOURCE_L3 */
static
+uint64_t get_max_pcbm(const struct acrn_vm *vm, int res) {
+ uint64_t max_pcbm = 0UL;
+
+ if (is_l2_vcat_configured(vm) && (res == RDT_RESOURCE_L2)) {
+ max_pcbm = get_vm_config(vm->vm_id)->max_l2_pcbm;
+ } else if (is_l3_vcat_configured(vm) && (res == RDT_RESOURCE_L3))
{
+ max_pcbm = get_vm_config(vm->vm_id)->max_l3_pcbm;
+ }
+
+ return max_pcbm;
+}
+
+/**
+ * @brief Retrieve vcbm_len of vm
+ * @pre vm != NULL
+ */
+uint16_t vcat_get_vcbm_len(const struct acrn_vm *vm, int res) {
+ /* vcbm_len = pcbm_len */
This comment is incorrect.
The rational here is that set-bits in max_l2_pcbm stands for bits of
cbm the guest sees, + a shift.
Don: yes, I understand the rational, but "bitmap_weight(get_max_pcbm(vm,
res));" here does just return the vcbm_len?, looks yes to me, do not
understand why the comment is incorrect.
Pcbm is the physical platform supported CBM. Its supported length is much larger than the bits the guest is assigned.

Say, A platform CBM has 8 bits, we assign 4 bits to (VCAT) VM0, another 4 bits to (VCAT) VM1.
Pcbm_len is 8, while vcbm_len is 4. They should not be same.




+ return bitmap_weight(get_max_pcbm(vm, res)); }
+
+/**
+ * @brief Retrieve max_vcbm of vm
+ * @pre vm != NULL
+ */
+static uint64_t vcat_get_max_vcbm(const struct acrn_vm *vm, int res) {
+ uint16_t vcbm_len = vcat_get_vcbm_len(vm, res);
+ uint64_t max_vcbm = 0UL;
If " set-bits in max_l2_pcbm stands for bits of cbm the guest sees, + a shift."
Is true,
Ideally max_vcbm should be max_pcbm >> shift.

Shift can be easily calculated by the number of 0 in LSB side.

The current implementation is not wrong, but very indirect.
Don: I can change the code

+
+ if (vcbm_len != 0U) {
+ max_vcbm = (1U << vcbm_len) - 1U;
+ }
+
+ return max_vcbm;
+}
+
+/**
The following comments may be too long. Explain the hard-to-understand
acronyms here only, and those easy-to-be-confused ones.
Don: we can rename
It is less an issue in this API though I comment here. But more an issue at the beginning.
It is not an issue of rename. You spent 150+ Line of text to make the comment at very beginning, but most of them are very obvious.




+ * @brief Map vMSR address (abbreviated as vmsr) to corresponding
pMSR
+address (abbreviated as pmsr)
+ * Each vMSR or pMSR is identified by a 32-bit integer
+ *
+ * @pre vm != NULL
+ * @pre res == RDT_RESOURCE_L2 || res == RDT_RESOURCE_L3 */
static
+uint32_t vmsr_to_pmsr(const struct acrn_vm *vm, uint32_t vmsr, int
+res) {
+ uint32_t pmsr = vmsr;
+ uint16_t vclosid;
+
+ switch (res) {
+ case RDT_RESOURCE_L2:
+ vclosid = vmsr - MSR_IA32_L2_MASK_BASE;
+ pmsr = MSR_IA32_L2_MASK_BASE + vclosid_to_pclosid(vm,
vclosid);
+ break;
+
+ case RDT_RESOURCE_L3:
+ vclosid = vmsr - MSR_IA32_L3_MASK_BASE;
+ pmsr = MSR_IA32_L3_MASK_BASE + vclosid_to_pclosid(vm,
vclosid);
+ break;
+
+ default:
+ break;
+ }
+
+ return pmsr;
+}
+
+/**
+ * @brief vCBM MSR write handler
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ static int32_t
+write_vcbm(__unused struct acrn_vcpu *vcpu, __unused uint32_t vmsr,
+__unused uint64_t val, __unused int res) {
+ /* TODO: this is going to be implemented in a subsequent commit,
will
perform the following actions:
+ * write vCBM
+ * vmsr_to_pmsr and vcbm_to_pcbm
+ * write pCBM
+ */
+ return -EFAULT;
+}
+
+/**
+ * @brief vCLOSID MSR write handler
+
+ * @pre vcpu != NULL && vcpu->vm != NULL */ static int32_t
+write_vclosid(struct acrn_vcpu *vcpu, uint64_t val) {
+ uint16_t vclosid, pclosid;
+
We need to check the reserve bits (or higher than MAX RMID) of
PQR_ASSOC too.
Inject #GP if it violates.
Don: we are not exposing the cat monitoring cap to guest, so we can skip
checking the reserved bits (or higher than MAX RMID) here?
In either cases, curtain bits are reserved bits, and we need to check and inject #GP, except SDM says something special.

But indeed here we should check if vclosid is out of range, e.g. it should be
less that vcat_get_num_vclosids(vcpu->vm).
Of course too.



+ /* Write the new vCLOSID value */
+ vcpu_set_guest_msr(vcpu, MSR_IA32_PQR_ASSOC, val);
+
+ vclosid = (uint16_t)((val >> 32U) & 0xFFFFFFFFUL);
Why ">>32U"?
And why &0xfff...?
Don: closid should be within uint16_t, as COS_MAX reported by cupid 10h is of
16 bits (see SDM vol 3).
SDM Vol3 Figure 17-34 provides 32 bits to this field, as if we checked the validity of CLOS, we didn't need to apply this kind of additional rule.
It is already covered by validity check.


For IA32_PQR_ASSOC msr:
COS occupies the bits from 31 to 63, but it still should be within 16 bits
ranges, so there is some confusion here.
That is not. As if we checked validity of the COS, it is fine. CPUID value is from HV, its value should follow "validity" rule too.


+ pclosid = vclosid_to_pclosid(vcpu->vm, vclosid);
+ /*
+ * Write the new pCLOSID value to the guest msr area
+ *
+ * The prepare_auto_msr_area() function has already initialized
+the
vcpu->arch.msr_area
+ * as follows:
+ * vcpu_clos = cfg->pclosids[vcpu->vcpu_id%cfg->num_pclosids]
+ *
vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].msr_index
=
MSR_IA32_PQR_ASSOC
+ *
vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value =
clos2pqr_msr(vcpu_clos)
+ * vcpu-
arch.msr_area.host[MSR_AREA_IA32_PQR_ASSOC].msr_index
= MSR_IA32_PQR_ASSOC
+ * vcpu->arch.msr_area.host[MSR_AREA_IA32_PQR_ASSOC].value
=
clos2pqr_msr(hv_clos)
+ * vcpu->arch.msr_area.count = 1
+ *
+ * So here we only need to update the
vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value field,
+ * all other vcpu->arch.msr_area fields remains unchanged at
runtime.
+ */
+ vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value =
+clos2pqr_msr(pclosid);
+
Where is definition of clos2pqr_msr()?
Don: in rdt.c:
uint64_t clos2pqr_msr(uint16_t clos)
{
uint64_t pqr_assoc;

pqr_assoc = msr_read(MSR_IA32_PQR_ASSOC);
pqr_assoc = (pqr_assoc & 0xffffffffUL) | ((uint64_t)clos << 32U);

return pqr_assoc;
}
I see, This is fine.



+ return 0;
+}
+
+/**
+ * @brief Initialize vCBM MSRs
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ static void
+init_vcbms(struct acrn_vcpu *vcpu, int res, uint32_t msr_base) {
+ uint64_t max_vcbm = vcat_get_max_vcbm(vcpu->vm, res);
+
+ if (max_vcbm != 0UL) {
+ uint32_t vmsr;
+ /* num_vcbm_msrs = num_vclosids */
+ uint16_t num_vcbm_msrs = vcat_get_num_vclosids(vcpu-
vm);
+
+ /*
+ * For each vCBM MSR, its initial vCBM is set to max_vcbm,
+ * a bitmask with vcbm_len bits (from 0 to vcbm_len - 1,
inclusive)
+ * set to 1 and all other bits set to 0.
+ *
+ * As CBM only allows contiguous '1' combinations, so
max_vcbm
essentially
+ * is a bitmask that selects all the virtual cache ways assigned
to
the VM.
+ * It covers all the virtual cache ways the guest VM may
access, i.e.
the
+ * superset bitmask.
+ */
+ for (vmsr = msr_base; vmsr < (msr_base + num_vcbm_msrs);
vmsr++) {
+ uint32_t pmsr = vmsr_to_pmsr(vcpu->vm, vmsr, res);
+ /* Set initial vMSR value: copy reserved bits from
corresponding pMSR, and set vCBM to max_vcbm */
Why we need to emulate the reserved bit? Normally all the reserved
bits should be zero.
Does SDM says the reserved bits must be none-zero for CBM?
Don: no, I see the SDM does not say that reserved bits must be non-zero, so I
will assume they are 0s and will change the code
But this code is not assuming they are 0s? If they are 0s, simply assign 0.



+ uint64_t val = (msr_read(pmsr) &
0xFFFFFFFF00000000UL) |
max_vcbm;
+
+ /* Write vCBM MSR */
+ (void)write_vcbm(vcpu, vmsr, val, res);
+ }
+ }
+}
+
+/**
+ * @brief Initialize vCAT MSRs
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ void
init_vcat_msrs(struct
+acrn_vcpu *vcpu) {
+ if (is_vcat_configured(vcpu->vm)) {
+ init_vcbms(vcpu, RDT_RESOURCE_L2,
MSR_IA32_L2_MASK_BASE);
+
+ init_vcbms(vcpu, RDT_RESOURCE_L3,
MSR_IA32_L3_MASK_BASE);
+
+ (void)write_vclosid(vcpu, clos2pqr_msr(0U));
Shouldn't we write 0 as power on initial vCLOSID?
Don: yes, the clos2pqr_msr(0U) means set the initial vclodis to 0
No. clos2pqr_msr is something about physical stuff. It reads physical values...
Here if it is for vCLOSID, we don't need this one.

It is simply: (0 << 32U) | RMID per SDM says.
You are mixing V and P almost everywhere...


+ }
+}
diff --git a/hypervisor/arch/x86/guest/vm.c
b/hypervisor/arch/x86/guest/vm.c index 66f60d289..a7848743a 100644
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -151,6 +151,16 @@ bool is_nvmx_configured(const struct acrn_vm
*vm)
return ((vm_config->guest_flags &
GUEST_FLAG_NVMX_ENABLED) != 0U);
}

+/**
+ * @pre vm != NULL && vm_config != NULL && vm->vmid <
CONFIG_MAX_VM_NUM
+*/ bool is_vcat_configured(const struct acrn_vm *vm) {
+ struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
+
+ return ((vm_config->guest_flags &
GUEST_FLAG_VCAT_ENABLED) !=
0U); }
+
/**
* @brief VT-d PI posted mode can possibly be used for PTDEVs
assigned
* to this VM if platform supports VT-d PI AND lapic passthru is
not configured diff --git a/hypervisor/arch/x86/guest/vmsr.c
b/hypervisor/arch/x86/guest/vmsr.c
index 971e0b8fb..a3a1a8705 100644
--- a/hypervisor/arch/x86/guest/vmsr.c
+++ b/hypervisor/arch/x86/guest/vmsr.c
@@ -22,6 +22,7 @@
#include <asm/tsc.h>
#include <trace.h>
#include <logmsg.h>
+#include <asm/guest/vcat.h>

#define INTERCEPT_DISABLE (0U)
#define INTERCEPT_READ (1U << 0U)
@@ -338,8 +339,10 @@ static void prepare_auto_msr_area(struct
acrn_vcpu *vcpu)
It seems you changed the implementation of this API from V5.
Why?


It seems we are not converging...
Don: the only diff in V6 here is that I added is_vcat_configured () to the
following line:
if (is_vcat_configured(vcpu->vm) || (vcpu_clos != hv_clos)) {
Where is the change of:
uint16_t vcpu_clos = cfg->clos[vcpu->vcpu_id]; --> uint16_t vcpu_clos = cfg->clos[vcpu->vcpu_id % num_closids];

I do remember it exist in a certain version.





vcpu_clos = cfg->pclosids[vcpu->vcpu_id%cfg- num_pclosids];

- /* RDT: only load/restore MSR IA32_PQR_ASSOC when hv
and
guest have different settings */
- if (vcpu_clos != hv_clos) {
+ /* RDT: only load/restore MSR_IA32_PQR_ASSOC when hv
and
guest have different settings
+ * vCAT: always load/restore MSR_IA32_PQR_ASSOC
+ */
+ if (is_vcat_configured(vcpu->vm) || (vcpu_clos != hv_clos)) {

vcpu-
arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].msr_index =
MSR_IA32_PQR_ASSOC;

vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value =
clos2pqr_msr(vcpu_clos);

vcpu->arch.msr_area.host[MSR_AREA_IA32_PQR_ASSOC].msr_index
=
MSR_IA32_PQR_ASSOC; @@ -371,6 +374,14 @@ void
init_emulated_msrs(struct acrn_vcpu *vcpu)
}

vcpu_set_guest_msr(vcpu, MSR_IA32_FEATURE_CONTROL, val64);
+
+#ifdef CONFIG_VCAT_ENABLED
+ /*
+ * init_vcat_msrs() will overwrite the
vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value
+ * set by prepare_auto_msr_area()
+ */
+ init_vcat_msrs(vcpu);
+#endif
}

#ifdef CONFIG_VCAT_ENABLED
diff --git a/hypervisor/arch/x86/rdt.c b/hypervisor/arch/x86/rdt.c
index
f6dc960f9..b21fd5209 100644
--- a/hypervisor/arch/x86/rdt.c
+++ b/hypervisor/arch/x86/rdt.c
@@ -60,6 +60,14 @@ static struct rdt_info
res_cap_info[RDT_NUM_RESOURCES] = {
},
};

+/*
+ * @pre res == RDT_RESOURCE_L3 || res == RDT_RESOURCE_L2 || res
==
+RDT_RESOURCE_MBA */ const struct rdt_info
+*get_rdt_res_cap_info(int
+res) {
+ return &res_cap_info[res];
+}
+
/*
* @pre res == RDT_RESOURCE_L3 || res == RDT_RESOURCE_L2
*/
diff --git a/hypervisor/include/arch/x86/asm/guest/vcat.h
b/hypervisor/include/arch/x86/asm/guest/vcat.h
new file mode 100644
index 000000000..6d8e587c4
--- /dev/null
+++ b/hypervisor/include/arch/x86/asm/guest/vcat.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2021 Intel Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause */
+
+#ifndef VCAT_H_
+#define VCAT_H_
+
+#include <asm/guest/vm.h>
+
+bool is_l2_vcat_configured(const struct acrn_vm *vm); bool
+is_l3_vcat_configured(const struct acrn_vm *vm); uint16_t
+vcat_get_vcbm_len(const struct acrn_vm *vm, int res); void
+init_vcat_msrs(struct acrn_vcpu *vcpu);
+
+#endif /* VCAT_H_ */
+
diff --git a/hypervisor/include/arch/x86/asm/guest/vm.h
b/hypervisor/include/arch/x86/asm/guest/vm.h
index 9a6171be0..2cb06c027 100644
--- a/hypervisor/include/arch/x86/asm/guest/vm.h
+++ b/hypervisor/include/arch/x86/asm/guest/vm.h
@@ -256,6 +256,7 @@ void vrtc_init(struct acrn_vm *vm); bool
is_lapic_pt_configured(const struct acrn_vm *vm); bool
is_rt_vm(const struct acrn_vm *vm); bool is_nvmx_configured(const
struct acrn_vm *vm);
+bool is_vcat_configured(const struct acrn_vm *vm);
bool is_pi_capable(const struct acrn_vm *vm); bool
has_rt_vm(void); struct acrn_vm *get_highest_severity_vm(bool
runtime); diff --git a/hypervisor/include/arch/x86/asm/rdt.h
b/hypervisor/include/arch/x86/asm/rdt.h
index f6c4448e2..95e149fcb 100644
--- a/hypervisor/include/arch/x86/asm/rdt.h
+++ b/hypervisor/include/arch/x86/asm/rdt.h
@@ -47,5 +47,6 @@ void init_rdt_info(void); void
setup_clos(uint16_t pcpu_id); uint64_t clos2pqr_msr(uint16_t clos);
bool is_platform_rdt_capable(void);
+const struct rdt_info *get_rdt_res_cap_info(int res);

#endif /* RDT_H */
--
2.25.1











Re: [PATCH V6 7/8] hv: vCAT: implementing the vCAT MSRs write handler

Dongsheng Zhang
 

+static int32_t write_vcbm(struct acrn_vcpu *vcpu, uint32_t vmsr,
+uint64_t val, int res)
{
- /* TODO: this is going to be implemented in a subsequent commit,
will
perform the following actions:
- * write vCBM
- * vmsr_to_pmsr and vcbm_to_pcbm
- * write pCBM
+ int32_t ret = -EINVAL;
+ /*
+ * vcbm set bits should only be in the range of [0, vcbm_len)
(vcat_get_max_vcbm),
+ * so mask with vcat_get_max_vcbm to prevent erroneous vCBM
value
*/
- return -EFAULT;
+ uint64_t masked_vcbm = val & vcat_get_max_vcbm(vcpu->vm, res);
If SW sets bits beyond hardware supported value, I thought we should
inject #GP.
Yes, we should inject #GP when the SW sets bits beyond hardware supported value, or when the vcbm is not contiguous,
I already added the code for these 2 cases.


Re: [PATCH V6 3/8] hv: vCAT: initialize the emulated_guest_msrs array for CAT msrs during platform initialization

Dongsheng Zhang
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Eddie Dong
Sent: Wednesday, October 20, 2021 5:21 PM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 3/8] hv: vCAT: initialize the
emulated_guest_msrs array for CAT msrs during platform initialization



-----Original Message-----
From: acrn-dev@... <acrn-dev@...>
On Behalf Of Dongsheng Zhang
Sent: Thursday, October 21, 2021 12:44 AM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 3/8] hv: vCAT: initialize the
emulated_guest_msrs array for CAT msrs during platform initialization



-----Original Message-----
From: acrn-dev@...
<acrn-dev@...> On Behalf Of Eddie Dong
Sent: Wednesday, October 20, 2021 4:32 AM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 3/8] hv: vCAT: initialize the
emulated_guest_msrs array for CAT msrs during platform
initialization



-----Original Message-----
From: acrn-dev@...
<acrn-dev@...> On Behalf Of Dongsheng Zhang
Sent: Wednesday, October 20, 2021 6:41 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH V6 3/8] hv: vCAT: initialize the
emulated_guest_msrs array for CAT msrs during platform
initialization

From: dongshen <dongsheng.x.zhang@...>

Initialize the emulated_guest_msrs[] array at runtime for
MSR_IA32_type_MASK_n and MSR_IA32_PQR_ASSOC msrs, there is no
good way
to do this initialization statically at build time

Tracked-On: #5917
Signed-off-by: dongshen <dongsheng.x.zhang@...>
---
hypervisor/arch/x86/cpu.c | 4 +
hypervisor/arch/x86/guest/vmsr.c | 88
+++++++++++++++++++-
hypervisor/include/arch/x86/asm/guest/vcpu.h | 19 ++++-
hypervisor/include/arch/x86/asm/msr.h | 1 +
4 files changed, 109 insertions(+), 3 deletions(-)

diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c
index
e1ee4770e..18c33c7d1 100644
--- a/hypervisor/arch/x86/cpu.c
+++ b/hypervisor/arch/x86/cpu.c
@@ -186,6 +186,10 @@ void init_pcpu_pre(bool is_bsp)
panic("System IOAPIC info is incorrect!");
}

+#ifdef CONFIG_VCAT_ENABLED
+ init_intercepted_cat_msr_list(); #endif
+
#ifdef CONFIG_RDT_ENABLED
init_rdt_info();
#endif
diff --git a/hypervisor/arch/x86/guest/vmsr.c
b/hypervisor/arch/x86/guest/vmsr.c
index e83c3069c..971e0b8fb 100644
--- a/hypervisor/arch/x86/guest/vmsr.c
+++ b/hypervisor/arch/x86/guest/vmsr.c
@@ -28,7 +28,7 @@
#define INTERCEPT_WRITE (1U << 1U)
#define INTERCEPT_READ_WRITE (INTERCEPT_READ |
INTERCEPT_WRITE)

-static const uint32_t emulated_guest_msrs[NUM_GUEST_MSRS] = {
+static uint32_t emulated_guest_msrs[NUM_GUEST_MSRS] = {
/*
* MSRs that trusty may touch and need isolation between secure
and
normal world
* This may include MSR_IA32_STAR, MSR_IA32_LSTAR,
MSR_IA32_FMASK,
@@ -79,6 +79,24 @@ static const uint32_t
emulated_guest_msrs[NUM_GUEST_MSRS] = { #ifdef
CONFIG_NVMX_ENABLED
LIST_OF_VMX_MSRS,
#endif
+
+ /* The following range of elements are reserved for vCAT usage
+and
are
+ * initialized dynamically by init_intercepted_cat_msr_list()
+during
platform initialization:
+ * [(NUM_GUEST_MSRS - NUM_VCAT_MSRS) ...
(NUM_GUEST_MSRS
-
1)] = {
+ * The following layout of each CAT MSR entry is determined by
catmsr_to_index_of_emulated_msr():
+ * MSR_IA32_L3_MASK_BASE,
+ * MSR_IA32_L3_MASK_BASE + 1,
+ * ...
+ * MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS - 1,
+ *
+ * MSR_IA32_L2_MASK_BASE + NUM_VCAT_L3_MSRS,
+ * MSR_IA32_L2_MASK_BASE + NUM_VCAT_L3_MSRS + 1,
+ * ...
+ * MSR_IA32_L2_MASK_BASE + NUM_VCAT_L3_MSRS +
NUM_VCAT_L2_MSRS - 1,
+ *
+ * MSR_IA32_PQR_ASSOC + NUM_VCAT_L3_MSRS +
NUM_VCAT_L2_MSRS
+ * }
+ */
};

static const uint32_t mtrr_msrs[] = { @@ -355,6 +373,74 @@ void
init_emulated_msrs(struct acrn_vcpu *vcpu)
vcpu_set_guest_msr(vcpu, MSR_IA32_FEATURE_CONTROL,
val64); }

+#ifdef CONFIG_VCAT_ENABLED
+/**
+ * @brief Map CAT MSR address to zero based index
+ *
+ * @pre ((msr >= MSR_IA32_L3_MASK_BASE) && msr <
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS))
+ * || ((msr >= MSR_IA32_L2_MASK_BASE) && msr <
(MSR_IA32_L2_MASK_BASE + NUM_VCAT_L2_MSRS))
+ * || (msr == MSR_IA32_PQR_ASSOC)
+ */
+static uint32_t cat_msr_to_index_of_emulated_msr(uint32_t msr) {
+ uint32_t index = 0U;
+
+ /* L3 MSRs indices assignment for MSR_IA32_L3_MASK_BASE ~
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS):
+ * 0
+ * 1
+ * ...
+ * (NUM_VCAT_L3_MSRS - 1)
+ *
+ * L2 MSRs indices assignment:
+ * NUM_VCAT_L3_MSRS
+ * ...
+ * NUM_VCAT_L3_MSRS + NUM_VCAT_L2_MSRS - 1
+
+ * PQR index assignment for MSR_IA32_PQR_ASSOC:
+ * NUM_VCAT_L3_MSRS
+ */
+
+ if ((msr >= MSR_IA32_L3_MASK_BASE) && (msr <
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS))) {
+ index = msr - MSR_IA32_L3_MASK_BASE;
+ } else if ((msr >= MSR_IA32_L2_MASK_BASE) && (msr <
(MSR_IA32_L2_MASK_BASE + NUM_VCAT_L2_MSRS))) {
+ index = msr - MSR_IA32_L2_MASK_BASE +
NUM_VCAT_L3_MSRS;
+ } else if (msr == MSR_IA32_PQR_ASSOC) {
+ index = NUM_VCAT_L3_MSRS + NUM_VCAT_L2_MSRS;
+ } else {
+ ASSERT(false, "invalid CAT msr address");
+ }
+
+ return index;
+}
+
+static void init_cat_msr_entry(uint32_t msr) {
+ /* Get index into the emulated_guest_msrs[] table for a given
+CAT MSR
*/
+ uint32_t index = cat_msr_to_index_of_emulated_msr(msr) +
+NUM_GUEST_MSRS - NUM_VCAT_MSRS;
Do you mean NUM_WORLD_MSRS + NUM_COMMON_MSRS +
NUM_VMX_MSRS +
cat_msr_to_index_of_emulated_msr(msr)?
Don: yes, I mean that

If you make a separate MACRO for " NUM_WORLD_MSRS +
NUM_COMMON_MSRS +
NUM_VMX_MSRS" to stand for the start offset of CAT MSRs, that is
fine too.
Don: maybe better to avoid another separate MACRO
It helps to read, and avoid potential issue if we add more MSRs to the list.
Don: sure, will add a MACRO


+
+ emulated_guest_msrs[index] = msr; }
+
+/* Init emulated_guest_msrs[] dynamically for CAT MSRs */ void
+init_intercepted_cat_msr_list(void)
+{
+ uint32_t msr;
+
+ /* MSR_IA32_L2_MASK_n MSRs */
+ for (msr = MSR_IA32_L2_MASK_BASE; msr <
(MSR_IA32_L2_MASK_BASE + NUM_VCAT_L2_MSRS); msr++) {
+ init_cat_msr_entry(msr);
+ }
+
+ /* MSR_IA32_L3_MASK_n MSRs */
+ for (msr = MSR_IA32_L3_MASK_BASE; msr <
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS); msr++) {
+ init_cat_msr_entry(msr);
+ }
+
+ /* MSR_IA32_PQR_ASSOC */
+ init_cat_msr_entry(MSR_IA32_PQR_ASSOC);
+}
+#endif
+
/**
* @pre vcpu != NULL
*/
diff --git a/hypervisor/include/arch/x86/asm/guest/vcpu.h
b/hypervisor/include/arch/x86/asm/guest/vcpu.h
index 65ec52203..e251fb4cf 100644
--- a/hypervisor/include/arch/x86/asm/guest/vcpu.h
+++ b/hypervisor/include/arch/x86/asm/guest/vcpu.h
@@ -29,6 +29,7 @@
#include <asm/guest/instr_emul.h> #include <asm/guest/nested.h>
#include <asm/vmx.h>
+#include <asm/vm_config.h>

/**
* @brief vcpu
@@ -173,12 +174,26 @@ enum reset_mode;

#define NUM_WORLD_MSRS 2U
#define NUM_COMMON_MSRS 23U
+
+#ifdef CONFIG_VCAT_ENABLED
+#define NUM_VCAT_L2_MSRS MAX_CACHE_CLOS_NUM_ENTRIES
+#define NUM_VCAT_L3_MSRS MAX_CACHE_CLOS_NUM_ENTRIES
+/* L2/L3 mask MSRs plus MSR_IA32_PQR_ASSOC */
+#define NUM_VCAT_MSRS (NUM_VCAT_L2_MSRS +
NUM_VCAT_L3_MSRS + 1U)
Why we need +1? Number of VCAT MSR should be L2 + L3.
Don: the comment above this line says it is "/* L2/L3 mask MSRs plus
MSR_IA32_PQR_ASSOC "
So both L2+L3+ PQR
I see.


+#else
+#define NUM_VCAT_L2_MSRS 0U
+#define NUM_VCAT_L3_MSRS 0U
I remember we don't need these 2 MACROs. Not?
Don: we need these above 2 MACROs, otherwise, #define
NUM_GUEST_MSRS(see below) will be too complicated (nested #ifdef),
too
ugly
I didn't catch. NUM_GUEST_MSRS only depends on NUM_VCAT_MSRS. It
never use above 2 MACROs.
Don: yes, you are right, they can be removed. Only NUM_VCAT_MSRS needs to be kept.




+#define NUM_VCAT_MSRS 0U
+#endif
+
+/* For detailed layout of the emulated guest MSRs, see
+emulated_guest_msrs[NUM_GUEST_MSRS] in vmsr.c */
#ifdef CONFIG_NVMX_ENABLED
-#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS + NUM_VMX_MSRS)
+#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS + NUM_VMX_MSRS + NUM_VCAT_MSRS)
#else
-#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS)
+#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS + NUM_VCAT_MSRS)
#endif

+
#define EOI_EXIT_BITMAP_SIZE 256U

struct guest_cpu_context {
diff --git a/hypervisor/include/arch/x86/asm/msr.h
b/hypervisor/include/arch/x86/asm/msr.h
index 9c9b56bf7..6556267f5 100644
--- a/hypervisor/include/arch/x86/asm/msr.h
+++ b/hypervisor/include/arch/x86/asm/msr.h
@@ -617,6 +617,7 @@ static inline bool is_x2apic_msr(uint32_t msr)
struct acrn_vcpu;

void init_msr_emulation(struct acrn_vcpu *vcpu);
+void init_intercepted_cat_msr_list(void);
uint32_t vmsr_get_guest_msr_index(uint32_t msr); void
update_msr_bitmap_x2apic_apicv(struct acrn_vcpu *vcpu); void
update_msr_bitmap_x2apic_passthru(struct acrn_vcpu *vcpu);
--
2.25.1














Re: [PATCH V6 3/8] hv: vCAT: initialize the emulated_guest_msrs array for CAT msrs during platform initialization

Eddie Dong
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Dongsheng Zhang
Sent: Thursday, October 21, 2021 12:44 AM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 3/8] hv: vCAT: initialize the
emulated_guest_msrs array for CAT msrs during platform initialization



-----Original Message-----
From: acrn-dev@... <acrn-dev@...>
On Behalf Of Eddie Dong
Sent: Wednesday, October 20, 2021 4:32 AM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 3/8] hv: vCAT: initialize the
emulated_guest_msrs array for CAT msrs during platform initialization



-----Original Message-----
From: acrn-dev@...
<acrn-dev@...> On Behalf Of Dongsheng Zhang
Sent: Wednesday, October 20, 2021 6:41 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH V6 3/8] hv: vCAT: initialize the
emulated_guest_msrs array for CAT msrs during platform
initialization

From: dongshen <dongsheng.x.zhang@...>

Initialize the emulated_guest_msrs[] array at runtime for
MSR_IA32_type_MASK_n and MSR_IA32_PQR_ASSOC msrs, there is no
good way
to do this initialization statically at build time

Tracked-On: #5917
Signed-off-by: dongshen <dongsheng.x.zhang@...>
---
hypervisor/arch/x86/cpu.c | 4 +
hypervisor/arch/x86/guest/vmsr.c | 88
+++++++++++++++++++-
hypervisor/include/arch/x86/asm/guest/vcpu.h | 19 ++++-
hypervisor/include/arch/x86/asm/msr.h | 1 +
4 files changed, 109 insertions(+), 3 deletions(-)

diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c
index
e1ee4770e..18c33c7d1 100644
--- a/hypervisor/arch/x86/cpu.c
+++ b/hypervisor/arch/x86/cpu.c
@@ -186,6 +186,10 @@ void init_pcpu_pre(bool is_bsp)
panic("System IOAPIC info is incorrect!");
}

+#ifdef CONFIG_VCAT_ENABLED
+ init_intercepted_cat_msr_list();
+#endif
+
#ifdef CONFIG_RDT_ENABLED
init_rdt_info();
#endif
diff --git a/hypervisor/arch/x86/guest/vmsr.c
b/hypervisor/arch/x86/guest/vmsr.c
index e83c3069c..971e0b8fb 100644
--- a/hypervisor/arch/x86/guest/vmsr.c
+++ b/hypervisor/arch/x86/guest/vmsr.c
@@ -28,7 +28,7 @@
#define INTERCEPT_WRITE (1U << 1U)
#define INTERCEPT_READ_WRITE (INTERCEPT_READ |
INTERCEPT_WRITE)

-static const uint32_t emulated_guest_msrs[NUM_GUEST_MSRS] = {
+static uint32_t emulated_guest_msrs[NUM_GUEST_MSRS] = {
/*
* MSRs that trusty may touch and need isolation between secure
and
normal world
* This may include MSR_IA32_STAR, MSR_IA32_LSTAR,
MSR_IA32_FMASK,
@@ -79,6 +79,24 @@ static const uint32_t
emulated_guest_msrs[NUM_GUEST_MSRS] = { #ifdef
CONFIG_NVMX_ENABLED
LIST_OF_VMX_MSRS,
#endif
+
+ /* The following range of elements are reserved for vCAT usage and
are
+ * initialized dynamically by init_intercepted_cat_msr_list()
+during
platform initialization:
+ * [(NUM_GUEST_MSRS - NUM_VCAT_MSRS) ...
(NUM_GUEST_MSRS
-
1)] = {
+ * The following layout of each CAT MSR entry is determined by
catmsr_to_index_of_emulated_msr():
+ * MSR_IA32_L3_MASK_BASE,
+ * MSR_IA32_L3_MASK_BASE + 1,
+ * ...
+ * MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS - 1,
+ *
+ * MSR_IA32_L2_MASK_BASE + NUM_VCAT_L3_MSRS,
+ * MSR_IA32_L2_MASK_BASE + NUM_VCAT_L3_MSRS + 1,
+ * ...
+ * MSR_IA32_L2_MASK_BASE + NUM_VCAT_L3_MSRS +
NUM_VCAT_L2_MSRS - 1,
+ *
+ * MSR_IA32_PQR_ASSOC + NUM_VCAT_L3_MSRS +
NUM_VCAT_L2_MSRS
+ * }
+ */
};

static const uint32_t mtrr_msrs[] = { @@ -355,6 +373,74 @@ void
init_emulated_msrs(struct acrn_vcpu *vcpu)
vcpu_set_guest_msr(vcpu, MSR_IA32_FEATURE_CONTROL,
val64); }

+#ifdef CONFIG_VCAT_ENABLED
+/**
+ * @brief Map CAT MSR address to zero based index
+ *
+ * @pre ((msr >= MSR_IA32_L3_MASK_BASE) && msr <
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS))
+ * || ((msr >= MSR_IA32_L2_MASK_BASE) && msr <
(MSR_IA32_L2_MASK_BASE + NUM_VCAT_L2_MSRS))
+ * || (msr == MSR_IA32_PQR_ASSOC)
+ */
+static uint32_t cat_msr_to_index_of_emulated_msr(uint32_t msr) {
+ uint32_t index = 0U;
+
+ /* L3 MSRs indices assignment for MSR_IA32_L3_MASK_BASE ~
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS):
+ * 0
+ * 1
+ * ...
+ * (NUM_VCAT_L3_MSRS - 1)
+ *
+ * L2 MSRs indices assignment:
+ * NUM_VCAT_L3_MSRS
+ * ...
+ * NUM_VCAT_L3_MSRS + NUM_VCAT_L2_MSRS - 1
+
+ * PQR index assignment for MSR_IA32_PQR_ASSOC:
+ * NUM_VCAT_L3_MSRS
+ */
+
+ if ((msr >= MSR_IA32_L3_MASK_BASE) && (msr <
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS))) {
+ index = msr - MSR_IA32_L3_MASK_BASE;
+ } else if ((msr >= MSR_IA32_L2_MASK_BASE) && (msr <
(MSR_IA32_L2_MASK_BASE + NUM_VCAT_L2_MSRS))) {
+ index = msr - MSR_IA32_L2_MASK_BASE +
NUM_VCAT_L3_MSRS;
+ } else if (msr == MSR_IA32_PQR_ASSOC) {
+ index = NUM_VCAT_L3_MSRS + NUM_VCAT_L2_MSRS;
+ } else {
+ ASSERT(false, "invalid CAT msr address");
+ }
+
+ return index;
+}
+
+static void init_cat_msr_entry(uint32_t msr) {
+ /* Get index into the emulated_guest_msrs[] table for a given CAT
+MSR
*/
+ uint32_t index = cat_msr_to_index_of_emulated_msr(msr) +
+NUM_GUEST_MSRS - NUM_VCAT_MSRS;
Do you mean NUM_WORLD_MSRS + NUM_COMMON_MSRS +
NUM_VMX_MSRS +
cat_msr_to_index_of_emulated_msr(msr)?
Don: yes, I mean that

If you make a separate MACRO for " NUM_WORLD_MSRS +
NUM_COMMON_MSRS +
NUM_VMX_MSRS" to stand for the start offset of CAT MSRs, that is fine
too.
Don: maybe better to avoid another separate MACRO
It helps to read, and avoid potential issue if we add more MSRs to the list.


+
+ emulated_guest_msrs[index] = msr;
+}
+
+/* Init emulated_guest_msrs[] dynamically for CAT MSRs */ void
+init_intercepted_cat_msr_list(void)
+{
+ uint32_t msr;
+
+ /* MSR_IA32_L2_MASK_n MSRs */
+ for (msr = MSR_IA32_L2_MASK_BASE; msr <
(MSR_IA32_L2_MASK_BASE + NUM_VCAT_L2_MSRS); msr++) {
+ init_cat_msr_entry(msr);
+ }
+
+ /* MSR_IA32_L3_MASK_n MSRs */
+ for (msr = MSR_IA32_L3_MASK_BASE; msr <
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS); msr++) {
+ init_cat_msr_entry(msr);
+ }
+
+ /* MSR_IA32_PQR_ASSOC */
+ init_cat_msr_entry(MSR_IA32_PQR_ASSOC);
+}
+#endif
+
/**
* @pre vcpu != NULL
*/
diff --git a/hypervisor/include/arch/x86/asm/guest/vcpu.h
b/hypervisor/include/arch/x86/asm/guest/vcpu.h
index 65ec52203..e251fb4cf 100644
--- a/hypervisor/include/arch/x86/asm/guest/vcpu.h
+++ b/hypervisor/include/arch/x86/asm/guest/vcpu.h
@@ -29,6 +29,7 @@
#include <asm/guest/instr_emul.h>
#include <asm/guest/nested.h>
#include <asm/vmx.h>
+#include <asm/vm_config.h>

/**
* @brief vcpu
@@ -173,12 +174,26 @@ enum reset_mode;

#define NUM_WORLD_MSRS 2U
#define NUM_COMMON_MSRS 23U
+
+#ifdef CONFIG_VCAT_ENABLED
+#define NUM_VCAT_L2_MSRS MAX_CACHE_CLOS_NUM_ENTRIES
+#define NUM_VCAT_L3_MSRS MAX_CACHE_CLOS_NUM_ENTRIES
+/* L2/L3 mask MSRs plus MSR_IA32_PQR_ASSOC */
+#define NUM_VCAT_MSRS (NUM_VCAT_L2_MSRS +
NUM_VCAT_L3_MSRS + 1U)
Why we need +1? Number of VCAT MSR should be L2 + L3.
Don: the comment above this line says it is "/* L2/L3 mask MSRs plus
MSR_IA32_PQR_ASSOC "
So both L2+L3+ PQR
I see.


+#else
+#define NUM_VCAT_L2_MSRS 0U
+#define NUM_VCAT_L3_MSRS 0U
I remember we don't need these 2 MACROs. Not?
Don: we need these above 2 MACROs, otherwise, #define
NUM_GUEST_MSRS(see below) will be too complicated (nested #ifdef), too
ugly
I didn't catch. NUM_GUEST_MSRS only depends on NUM_VCAT_MSRS. It never use above 2 MACROs.



+#define NUM_VCAT_MSRS 0U
+#endif
+
+/* For detailed layout of the emulated guest MSRs, see
+emulated_guest_msrs[NUM_GUEST_MSRS] in vmsr.c */
#ifdef CONFIG_NVMX_ENABLED
-#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS + NUM_VMX_MSRS)
+#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS + NUM_VMX_MSRS + NUM_VCAT_MSRS)
#else
-#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS)
+#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS + NUM_VCAT_MSRS)
#endif

+
#define EOI_EXIT_BITMAP_SIZE 256U

struct guest_cpu_context {
diff --git a/hypervisor/include/arch/x86/asm/msr.h
b/hypervisor/include/arch/x86/asm/msr.h
index 9c9b56bf7..6556267f5 100644
--- a/hypervisor/include/arch/x86/asm/msr.h
+++ b/hypervisor/include/arch/x86/asm/msr.h
@@ -617,6 +617,7 @@ static inline bool is_x2apic_msr(uint32_t msr)
struct acrn_vcpu;

void init_msr_emulation(struct acrn_vcpu *vcpu);
+void init_intercepted_cat_msr_list(void);
uint32_t vmsr_get_guest_msr_index(uint32_t msr); void
update_msr_bitmap_x2apic_apicv(struct acrn_vcpu *vcpu); void
update_msr_bitmap_x2apic_passthru(struct acrn_vcpu *vcpu);
--
2.25.1











Re: [PATCH V6 8/8] hv: vCAT: propagate vCBM to other vCPUs that share cache with vcpu

Dongsheng Zhang
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Eddie Dong
Sent: Wednesday, October 20, 2021 6:11 AM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 8/8] hv: vCAT: propagate vCBM to other
vCPUs that share cache with vcpu



-----Original Message-----
From: acrn-dev@... <acrn-dev@...>
On Behalf Of Dongsheng Zhang
Sent: Wednesday, October 20, 2021 6:42 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH V6 8/8] hv: vCAT: propagate vCBM to other
vCPUs that share cache with vcpu

From: dongshen <dongsheng.x.zhang@...>

Move the nearest_pow2() and get_cache_shift() functions from
hypercall.c to cpu_caps.c Store L2/L3 cat id shift in struct
cpuinfo_x86

Implement the propagate_vcbm() function:
Set vCBM to to all the vCPUs that share cache with vcpu
to mimic hardware CAT behavior

Tracked-On: #5917
Signed-off-by: dongshen <dongsheng.x.zhang@...>
---
hypervisor/arch/x86/cpu_caps.c | 55
+++++++++++++++++++
hypervisor/arch/x86/guest/vcat.c | 62
+++++++++++++++++++++-
hypervisor/common/hypercall.c | 50 ++---------------
hypervisor/include/arch/x86/asm/cpu_caps.h | 4 ++
4 files changed, 122 insertions(+), 49 deletions(-)

diff --git a/hypervisor/arch/x86/cpu_caps.c
b/hypervisor/arch/x86/cpu_caps.c index 89fd559b1..33ea3aa36 100644
--- a/hypervisor/arch/x86/cpu_caps.c
+++ b/hypervisor/arch/x86/cpu_caps.c
@@ -16,6 +16,7 @@
#include <errno.h>
#include <logmsg.h>
#include <asm/guest/vmcs.h>
+#include <asm/lib/bits.h>

/* TODO: add more capability per requirement */
/* APICv features */
@@ -322,6 +323,58 @@ static uint64_t get_address_mask(uint8_t limit)
return ((1UL << limit) - 1UL) & PAGE_MASK; }

+/*
+ * nearest_pow2(n) is the nearest power of 2 integer that is not less
+than n
+ * The last (most significant) bit set of (n*2-1) matches the above
+definition */ static uint32_t nearest_pow2(uint32_t n) {
+ uint32_t p = n;
+
+ if (n >= 2U) {
+ p = fls32(2U*n - 1U);
+ }
+
+ return p;
+}
+
+static void get_cat_id_shift(uint32_t *l2_cat_id_shift, uint32_t
+*l3_cat_id_shift) {
What does cat id mean?
Or are u defining a new term cache-id-shift?
Cannot understand.

I guess you are saying: cache_id? Cache_type? Cache_level? I am lost
Don: I mean l2/l3 cache id


+ uint32_t subleaf;
+
+ *l2_cat_id_shift = 0U;
+ *l3_cat_id_shift = 0U;
+
+ for (subleaf = 0U;; subleaf++) {
+ uint32_t eax, ebx, ecx, edx;
+ uint32_t cache_type, cache_level, id, shift;
+
+ cpuid_subleaf(0x4U, subleaf, &eax, &ebx, &ecx, &edx);
+
+ cache_type = eax & 0x1fU;
+ cache_level = (eax >> 5U) & 0x7U;
+
+ /* Intel SDM Vol 2, CPUID 04H:
+ * EAX: bits 25 - 14: Maximum number of addressable IDs for
logical
processors sharing this cache.
+ * The nearest power-of-2 integer that is not smaller than (1
+
EAX[25:14]) is the number of unique
+ * initial APIC IDs reserved for addressing different logical
processors sharing this cache
+ */
+ id = (eax >> 14U) & 0xfffU;
+ shift = nearest_pow2(id + 1U);
+
+ /* No more caches */
+ if ((cache_type == 0U) || (cache_type >= 4U)) {
+ break;
+ }
+
+ if (cache_level == 2U) {
+ *l2_cat_id_shift = shift;
+ } else if (cache_level == 3U) {
+ *l3_cat_id_shift = shift;
+ }
+ }
+}
+
void init_pcpu_capabilities(void)
{
uint32_t eax, unused;
@@ -385,6 +438,8 @@ void init_pcpu_capabilities(void)
get_address_mask(boot_cpu_data.phys_bits);
}

+ get_cat_id_shift(&boot_cpu_data.l2_cat_id_shift,
+&boot_cpu_data.l3_cat_id_shift);
+
detect_pcpu_cap();
}

diff --git a/hypervisor/arch/x86/guest/vcat.c
b/hypervisor/arch/x86/guest/vcat.c
index 1dda6b61f..9e302b25e 100644
--- a/hypervisor/arch/x86/guest/vcat.c
+++ b/hypervisor/arch/x86/guest/vcat.c
@@ -410,6 +410,55 @@ static uint32_t vmsr_to_pmsr(const struct
acrn_vm
*vm, uint32_t vmsr, int res)
return pmsr;
}

+static void get_vcat_id_shift(uint32_t *l2_shift, uint32_t *l3_shift) {
+ struct cpuinfo_x86 *cpu_info = get_pcpu_info();
+
+ /* Assume that virtual cat id shift is equal to physical cat id
+shift for now
*/
+ *l2_shift = cpu_info->l2_cat_id_shift;
+ *l3_shift = cpu_info->l3_cat_id_shift; }
+
+/**
+ * @brief Propagate vCBM to other vCPUs that share cache with vcpu
+ * @pre vcpu != NULL && vcpu->vm != NULL */ static void
+propagate_vcbm(struct acrn_vcpu *vcpu, uint32_t vmsr, uint64_t val) {
+ uint16_t i;
+ struct acrn_vcpu *tmp_vcpu;
+ uint32_t l2_shift, l3_shift, l2_id, l3_id;
+ struct acrn_vm *vm = vcpu->vm;
+ uint32_t apicid = vlapic_get_apicid(vcpu_vlapic(vcpu));
+
+ get_vcat_id_shift(&l2_shift, &l3_shift);


+ /* Intel SDM Vol 2, CPUID 04H:
+ * EAX: bits 25 - 14: Maximum number of addressable IDs for logical
processors sharing this cache.
+ *
+ * l2_shift/l3_shift: the nearest power-of-2 integer that is not
+smaller
than (1 + EAX[25:14])
+ * is the number of unique initial APIC IDs reserved for addressing
different logical processors
+ * sharing this cache
+ */
+ l2_id = apicid >> l2_shift;
+ l3_id = apicid >> l3_shift;
+
+ /*
+ * Determine which logical processors share an MSR (for instance
local
+ * to a core, or shared across multiple cores) by checking if they
+have the
same
+ * L2/L3 cache id
+ */
+ foreach_vcpu(i, vm, tmp_vcpu) {
+ uint32_t tmp_apicid =
vlapic_get_apicid(vcpu_vlapic(tmp_vcpu));
+ uint32_t tmp_l2_id = tmp_apicid >> l2_shift;
+ uint32_t tmp_l3_id = tmp_apicid >> l3_shift;
+
This patch is becoming worse than previous version.

We want to propagate among VCPUs with same cache ID. All what we
compare is among virtual cache ID.
However this code is comparing between P and V.
Don: this is following our email discussion, it is compare V to V:
Maybe we should add one API to get virtual cache shift in vcat.c, like get_vcache_shift, you can make it call get_cache_shift directly.
And get_cache_shift should be one low level API, maybe cpu_caps.c should be a good place?

get_vcat_id_shift(): returns the virtual cache shift (internally we assume virtual cache shift is equal to physical cache shift), so it is just
a wrapper.




+ if ((is_l2_vcbm_msr(vm, vmsr) && (l2_id == tmp_l2_id))
+ || (is_l3_vcbm_msr(vm, vmsr) && (l3_id ==
tmp_l3_id))) {
+ vcpu_set_guest_msr(tmp_vcpu, vmsr, val);
+ }
+ }
+}
+
static void write_pcbm(uint32_t pmsr, uint64_t pcbm) {
/* Preserve reserved bits, and only set the pCBM bits */ @@ -461,8
+510,17 @@ static int32_t write_vcbm(struct acrn_vcpu *vcpu, uint32_t
vmsr, uint64_t val, i
uint32_t pmsr;
uint64_t pcbm;

- /* Write vCBM first */
- vcpu_set_guest_msr(vcpu, vmsr, masked_vcbm | (val &
0xFFFFFFFF00000000UL));
+ /*
+ * Write vCBM first:
+ * The L2 mask MSRs are scoped at the same level as the L2
cache
(similarly,
+ * the L3 mask MSRs are scoped at the same level as the L3
cache).
+ *
+ * For example, the MSR_IA32_L3_MASK_n MSRs are scoped
at
socket level, which means if
+ * we program MSR_IA32_L3_MASK_n on one cpu and the
same
MSR_IA32_L3_MASK_n on all other cpus
+ * of the same socket will also get the change!
+ * Set vcbm to all the vCPUs that share cache with vcpu to
mimic
this hardware behavior.
+ */
+ propagate_vcbm(vcpu, vmsr, masked_vcbm | (val &
+0xFFFFFFFF00000000UL));

/* Write pCBM: */
pmsr = vmsr_to_pmsr(vcpu->vm, vmsr, res); diff --git
a/hypervisor/common/hypercall.c b/hypervisor/common/hypercall.c index
fb19fe4a0..3f5134d45 100644
--- a/hypervisor/common/hypercall.c
+++ b/hypervisor/common/hypercall.c
@@ -133,52 +133,6 @@ int32_t hcall_get_api_version(struct acrn_vcpu
*vcpu, __unused struct acrn_vm *t
return copy_to_gpa(vcpu->vm, &version, param1, sizeof(version)); }

-/*
- * nearest_pow2(n) is the nearest power of 2 integer that is not less
than n
- * The last (most significant) bit set of (n*2-1) matches the above
definition
- */
-static uint32_t nearest_pow2(uint32_t n) -{
- uint32_t p = n;
-
- if (n >= 2U) {
- p = fls32(2U*n - 1U);
- }
-
- return p;
-}
-
-static void get_cache_shift(uint32_t *l2_shift, uint32_t *l3_shift) -{
- uint32_t subleaf;
-
- *l2_shift = 0U;
- *l3_shift = 0U;
-
- for (subleaf = 0U;; subleaf++) {
- uint32_t eax, ebx, ecx, edx;
- uint32_t cache_type, cache_level, id, shift;
-
- cpuid_subleaf(0x4U, subleaf, &eax, &ebx, &ecx, &edx);
-
- cache_type = eax & 0x1fU;
- cache_level = (eax >> 5U) & 0x7U;
- id = (eax >> 14U) & 0xfffU;
- shift = nearest_pow2(id + 1U);
-
- /* No more caches */
- if ((cache_type == 0U) || (cache_type >= 4U)) {
- break;
- }
-
- if (cache_level == 2U) {
- *l2_shift = shift;
- } else if (cache_level == 3U) {
- *l3_shift = shift;
- }
- }
-}
-
/**
* @brief Get basic platform information.
*
@@ -204,8 +158,10 @@ int32_t hcall_get_platform_info(struct acrn_vcpu
*vcpu, __unused struct acrn_vm
if (ret == 0) {
uint16_t i;
uint16_t pcpu_nums = get_pcpu_nums();
+ struct cpuinfo_x86 *cpu_info = get_pcpu_info();

- get_cache_shift(&pi.hw.l2_cat_shift, &pi.hw.l3_cat_shift);
+ pi.hw.l2_cat_shift = cpu_info->l2_cat_id_shift;
+ pi.hw.l3_cat_shift = cpu_info->l3_cat_id_shift;

for (i = 0U; i < min(pcpu_nums,
ACRN_PLATFORM_LAPIC_IDS_MAX);
i++) {
pi.hw.lapic_ids[i] = per_cpu(lapic_id, i); diff --git
a/hypervisor/include/arch/x86/asm/cpu_caps.h
b/hypervisor/include/arch/x86/asm/cpu_caps.h
index 4f679f209..c22e306a0 100644
--- a/hypervisor/include/arch/x86/asm/cpu_caps.h
+++ b/hypervisor/include/arch/x86/asm/cpu_caps.h
@@ -43,6 +43,10 @@ struct cpuinfo_x86 {
uint64_t physical_address_mask;
uint32_t cpuid_leaves[FEATURE_WORDS];
char model_name[64];
+ /* Right-shift count that will allow software to extract part of
+APIC ID to
distinguish L2 CAT ID */
+ uint32_t l2_cat_id_shift;
+ /* Right-shift count that will allow software to extract part of
+APIC ID to
distinguish L3 CAT ID */
+ uint32_t l3_cat_id_shift;
};

bool has_monitor_cap(void);
--
2.25.1








Re: [PATCH V6 7/8] hv: vCAT: implementing the vCAT MSRs write handler

Dongsheng Zhang
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Eddie Dong
Sent: Wednesday, October 20, 2021 5:53 AM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 7/8] hv: vCAT: implementing the vCAT
MSRs write handler



-----Original Message-----
From: acrn-dev@... <acrn-dev@...>
On Behalf Of Dongsheng Zhang
Sent: Wednesday, October 20, 2021 6:42 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH V6 7/8] hv: vCAT: implementing the vCAT
MSRs write handler

From: dongshen <dongsheng.x.zhang@...>

Implement the write_vcat_msr() function to handle the
MSR_IA32_PQR_ASSOC and MSR_IA32_type_MASK_n vCAT MSRs write
request.

Several vCAT P2V (physical to virtual) and V2P (virtual to physical)
mappings
exist:

struct acrn_vm_config *vm_config = get_vm_config(vm_id)

max_pcbm = vm_config->max_type_pcbm (type: l2 or l3)
mask_shift = ffs64(max_pcbm)

vclosid = vmsr - MSR_IA32_type_MASK_0
pclosid = vm_config->pclosids[vclosid]

pmsr = MSR_IA32_type_MASK_0 + pclosid
pcbm = vcbm << mask_shift
vcbm = pcbm >> mask_shift

Where
MSR_IA32_type_MASK_n: L2 or L3 mask msr address for CLOSIDn, from
0C90H through 0D8FH (inclusive).

max_pcbm: a bitmask that selects all the physical cache ways
assigned to the VM

vclosid: virtual CLOSID, always starts from 0

pclosid: corresponding physical CLOSID for a given vclosid

vmsr: virtual msr address, passed to vCAT handlers by the
caller functions rdmsr_vmexit_handler()/wrmsr_vmexit_handler()

pmsr: physical msr address

vcbm: virtual CBM, passed to vCAT handlers by the
caller functions rdmsr_vmexit_handler()/wrmsr_vmexit_handler()

pcbm: physical CBM

Tracked-On: #5917
Signed-off-by: dongshen <dongsheng.x.zhang@...>
---
hypervisor/arch/x86/guest/vcat.c | 100
+++++++++++++++++--
hypervisor/arch/x86/guest/vmsr.c | 9 ++
hypervisor/include/arch/x86/asm/guest/vcat.h | 1 +
3 files changed, 104 insertions(+), 6 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcat.c
b/hypervisor/arch/x86/guest/vcat.c
index 03e9a3cc9..1dda6b61f 100644
--- a/hypervisor/arch/x86/guest/vcat.c
+++ b/hypervisor/arch/x86/guest/vcat.c
@@ -365,6 +365,21 @@ int32_t read_vcat_msr(const struct acrn_vcpu
*vcpu, uint32_t vmsr, uint64_t *rva
return ret;
}

+/**
+ * @brief Map vCBM to pCBM
+ *
+ * @pre vm != NULL
+ */
+static uint64_t vcbm_to_pcbm(const struct acrn_vm *vm, uint64_t vcbm,
+int res) {
+ uint64_t max_pcbm = get_max_pcbm(vm, res);
+
+ /* Find the position low (the first bit set) in max_pcbm */
+ uint16_t low = ffs64(max_pcbm);
+
+ return vcbm << low;
+}
+
/**
* @brief Map vMSR address (abbreviated as vmsr) to corresponding
pMSR address (abbreviated as pmsr)
* Each vMSR or pMSR is identified by a 32-bit integer @@ -395,19
+410,70 @@ static uint32_t vmsr_to_pmsr(const struct acrn_vm *vm,
uint32_t vmsr, int res)
return pmsr;
}

+static void write_pcbm(uint32_t pmsr, uint64_t pcbm) {
+ /* Preserve reserved bits, and only set the pCBM bits */
+ uint64_t pmsr_value = (msr_read(pmsr) & 0xFFFFFFFF00000000UL) |
pcbm;
+
+ msr_write(pmsr, pmsr_value);
+}
+
+/* Check if bitmask is contiguous:
+ * All (and only) contiguous '1' combinations are allowed (e.g.
+FFFFH, 0FF0H, 003CH, etc.) */ static bool
+is_contiguous_bit_set(uint64_t
+bitmask) {
+ bool ret = false;
+ uint64_t tmp64 = bitmask;
+
+ if (tmp64 != 0UL) {
+ while ((tmp64 & 1UL) == 0UL) {
+ tmp64 >>= 1U;
+ }
+
+ while ((tmp64 & 1UL) != 0UL) {
+ tmp64 >>= 1U;
+ }
+
+ if (tmp64 == 0UL) {
+ ret = true;
+ }
+ }
That is too ugly.

We can use bit-scan instruction to find the highest set-bit (say high), and the
lowest set bit (say low) Then the contiguous value must be: (1<<(high+1)) --
(1<<low)
Don: no, I do not think your algo is right, we need to check if the 1s are contiguous.
Can you rethink on this?


+
+ return ret;
+}
+
/**
* @brief vCBM MSR write handler
*
* @pre vcpu != NULL && vcpu->vm != NULL
*/
-static int32_t write_vcbm(__unused struct acrn_vcpu *vcpu, __unused
uint32_t vmsr, __unused uint64_t val, __unused int res)
+static int32_t write_vcbm(struct acrn_vcpu *vcpu, uint32_t vmsr,
+uint64_t val, int res)
{
- /* TODO: this is going to be implemented in a subsequent commit,
will
perform the following actions:
- * write vCBM
- * vmsr_to_pmsr and vcbm_to_pcbm
- * write pCBM
+ int32_t ret = -EINVAL;
+ /*
+ * vcbm set bits should only be in the range of [0, vcbm_len)
(vcat_get_max_vcbm),
+ * so mask with vcat_get_max_vcbm to prevent erroneous vCBM
value
*/
- return -EFAULT;
+ uint64_t masked_vcbm = val & vcat_get_max_vcbm(vcpu->vm, res);
If SW sets bits beyond hardware supported value, I thought we should inject
#GP.
Not?
Don: yes, just return non-zero value (= -EINVAL) here if not contiguous to upper caller,
then eventually in hv_main.c, it will inject a GP to guest after vmext_handler(), see below:
ret = vmexit_handler(vcpu);
if (ret < 0) {
pr_fatal("dispatch VM exit handler failed for reason"
" %d, ret = %d!", vcpu->arch.exit_reason, ret);
vcpu_inject_gp(vcpu, 0U);
continue;
}


+
+ if (is_contiguous_bit_set(masked_vcbm)) {
+ uint32_t pmsr;
+ uint64_t pcbm;
+
+ /* Write vCBM first */
+ vcpu_set_guest_msr(vcpu, vmsr, masked_vcbm | (val &
+0xFFFFFFFF00000000UL));
+
Didn't understand why we not directly set guest value here.
Don: we can, but if the vcbm is not within range: vcbm set bits should only be in the range of [0, vcbm_len) (vcat_get_max_vcbm),
Then what should we do here, return an error code to upper caller (will #GP), or just use masked off vcbm (masked_vcbm) and set it to guest? I choose the latter one here


+ /* Write pCBM: */
+ pmsr = vmsr_to_pmsr(vcpu->vm, vmsr, res);
+ pcbm = vcbm_to_pcbm(vcpu->vm, masked_vcbm, res);
+ write_pcbm(pmsr, pcbm);
+
+ ret = 0;
+ }
+
+ /* Return non-zero to vmexit_handler if vcbm is not contiguous,
+which
will result in #GP injected to guest */
+ return ret;
}

/**
@@ -444,6 +510,28 @@ static int32_t write_vclosid(struct acrn_vcpu
*vcpu, uint64_t val)
return 0;
}

+/**
+ * @brief vCAT MSRs write handler
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ int32_t
+write_vcat_msr(struct acrn_vcpu *vcpu, uint32_t vmsr, uint64_t val) {
+ int32_t ret = -EACCES;
+
+ if (is_vcat_configured(vcpu->vm)) {
+ if (vmsr == MSR_IA32_PQR_ASSOC) {
+ ret = write_vclosid(vcpu, val);
Please move this to caller side to avoid double switch/if-else...
This one happens frequently at runtime.
Don: yes we can, but move this to caller code (vmsr.c) will make the logic hard and requires more code.
I would try to avoid that if I have the choice.


+ } else if (is_l2_vcbm_msr(vcpu->vm, vmsr)) {
+ ret = write_vcbm(vcpu, vmsr, val,
RDT_RESOURCE_L2);
+ } else if (is_l3_vcbm_msr(vcpu->vm, vmsr)) {
+ ret = write_vcbm(vcpu, vmsr, val,
RDT_RESOURCE_L3);
+ }
+ }
+
+ return ret;
+}
+
/**
* @brief Initialize vCBM MSRs
*
diff --git a/hypervisor/arch/x86/guest/vmsr.c
b/hypervisor/arch/x86/guest/vmsr.c
index 06825d211..412388106 100644
--- a/hypervisor/arch/x86/guest/vmsr.c
+++ b/hypervisor/arch/x86/guest/vmsr.c
@@ -1092,6 +1092,15 @@ int32_t wrmsr_vmexit_handler(struct
acrn_vcpu
*vcpu)
}
break;
}
+#ifdef CONFIG_VCAT_ENABLED
+ case MSR_IA32_L2_MASK_BASE ... (MSR_IA32_L2_MASK_BASE +
NUM_VCAT_L2_MSRS - 1U):
+ case MSR_IA32_L3_MASK_BASE ... (MSR_IA32_L3_MASK_BASE +
NUM_VCAT_L3_MSRS - 1U):
+ case MSR_IA32_PQR_ASSOC:
+ {
+ err = write_vcat_msr(vcpu, msr, v);
+ break;
+ }
+#endif
default:
{
if (is_x2apic_msr(msr)) {
diff --git a/hypervisor/include/arch/x86/asm/guest/vcat.h
b/hypervisor/include/arch/x86/asm/guest/vcat.h
index 16ae0802b..8effa7604 100644
--- a/hypervisor/include/arch/x86/asm/guest/vcat.h
+++ b/hypervisor/include/arch/x86/asm/guest/vcat.h
@@ -16,6 +16,7 @@ void init_vcat_msrs(struct acrn_vcpu *vcpu);
uint16_t vcat_get_num_vclosids(const struct acrn_vm *vm); uint64_t
vcat_pcbm_to_vcbm(const struct acrn_vm *vm, uint64_t pcbm, int res);
int32_t read_vcat_msr(const struct acrn_vcpu *vcpu, uint32_t vmsr,
uint64_t *rval);
+int32_t write_vcat_msr(struct acrn_vcpu *vcpu, uint32_t vmsr,
+uint64_t val);

#endif /* VCAT_H_ */

--
2.25.1








Re: [PATCH V6 4/8] hv: vCAT: initialize vCAT MSRs during vmcs init

Dongsheng Zhang
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Eddie Dong
Sent: Wednesday, October 20, 2021 5:25 AM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 4/8] hv: vCAT: initialize vCAT MSRs during
vmcs init



-----Original Message-----
From: acrn-dev@... <acrn-dev@...>
On Behalf Of Dongsheng Zhang
Sent: Wednesday, October 20, 2021 6:41 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH V6 4/8] hv: vCAT: initialize vCAT MSRs
during vmcs init

From: dongshen <dongsheng.x.zhang@...>

Initialize vCBM MSR

Initialize vCLOSID MSR

Add some vCAT functions:
Retrieve max_vcbm
Check if vCAT is configured or not for the VM Map vclosid to pclosid
write_vclosid: vCLOSID MSR write handler
write_vcbm: vCBM MSR write handler

Tracked-On: #5917
Signed-off-by: dongshen <dongsheng.x.zhang@...>
---
hypervisor/Makefile | 3 +
hypervisor/arch/x86/guest/vcat.c | 408
+++++++++++++++++++
hypervisor/arch/x86/guest/vm.c | 10 +
hypervisor/arch/x86/guest/vmsr.c | 15 +-
hypervisor/arch/x86/rdt.c | 8 +
hypervisor/include/arch/x86/asm/guest/vcat.h | 18 +
hypervisor/include/arch/x86/asm/guest/vm.h | 1 +
hypervisor/include/arch/x86/asm/rdt.h | 1 +
8 files changed, 462 insertions(+), 2 deletions(-) create mode
100644 hypervisor/arch/x86/guest/vcat.c create mode 100644
hypervisor/include/arch/x86/asm/guest/vcat.h

diff --git a/hypervisor/Makefile b/hypervisor/Makefile index
50bb2891c..a5c02d115 100644
--- a/hypervisor/Makefile
+++ b/hypervisor/Makefile
@@ -330,6 +330,9 @@ VP_DM_C_SRCS += arch/x86/guest/vmx_io.c
VP_DM_C_SRCS += arch/x86/guest/instr_emul.c VP_DM_C_SRCS +=
arch/x86/guest/lock_instr_emul.c VP_DM_C_SRCS +=
arch/x86/guest/vm_reset.c
+ifeq ($(CONFIG_VCAT_ENABLED),y)
+VP_DM_C_SRCS += arch/x86/guest/vcat.c endif
VP_DM_C_SRCS += common/ptdev.c

# virtual platform trusty
diff --git a/hypervisor/arch/x86/guest/vcat.c
b/hypervisor/arch/x86/guest/vcat.c
new file mode 100644
index 000000000..49968b8d9
--- /dev/null
+++ b/hypervisor/arch/x86/guest/vcat.c
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 2021 Intel Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause */
+
+#include <types.h>
+#include <errno.h>
+#include <logmsg.h>
+#include <asm/cpufeatures.h>
+#include <asm/cpuid.h>
+#include <asm/rdt.h>
+#include <asm/lib/bits.h>
+#include <asm/board.h>
+#include <asm/vm_config.h>
+#include <asm/msr.h>
+#include <asm/guest/vcpu.h>
+#include <asm/guest/vm.h>
+#include <asm/guest/vcat.h>
+#include <asm/per_cpu.h>
+
+/*
+ * List of acronyms used here:
+ *
+ * - CAT:
+ * Cache Allocation Technology
+ *
+ *- vCAT:
+ * Virtual CAT
+ *
+ *- MSRs:
+ * Machine Specific Registers, each MSR is identified by a 32-bit integer.
+ *
+ *- pMSR:
+ * physical MSR
+ *
+ *- vMSR:
+ * virtual MSR
+ *
+ *- COS/CLOS:
+ * Class of Service. Also mean COS MSRs
+ *
+ *- CLOSID:
+ * Each CLOS has a number ID, ranges from 0 to COS_MAX
+ *
+ *- CLOSIDn:
+ * Each CLOS has a number ID denoted by n
+ *
+ *- COS_MAX:
+ * Max number of COS MSRs. ACRN uses the smallest number of
+ * CLOSIDs of all supported resources as COS_MAX to have consistent
+ * allocation
+ *
+-* pCLOSID:
+ * Physical CLOSID
+ *
+ *- vCLOSID:
+ * Virtual CLOSID
+ *
+ *- MSR_IA32_type_MASK_n
+ * type: L2 or L3
+ * One CAT (CBM) MSR, where n corresponds to a number (CLOSIDn)
+ *
+ *- CBM:
+ * Capacity bitmask (cache bit mask), specifies which region of cache
+ * can be filled into, all (and only) contiguous '1' combinations are
+allowed
+ *
+ *- pCBM:
+ * Physical CBM
+ *
+ *- pCBM length (pcbm_len):
+ * pcbm_len is calculated by `bitmap_weight(max_pcbm)`
+ * indicates number of bits set in max_pcbm
+ *
+ *- max_pcbm (maximum physical cache space assigned to VM):
+ * max_pcbm is a contiguous capacity bitmask (CBM) starting at bit
+position low
+ * (the lowest assigned physical cache way) and ending at position
+high
+ * (the highest assigned physical cache way, inclusive).
+ * As CBM only allows contiguous '1' combinations, so max_pcbm
+essentially
+ * is a bitmask that selects/covers all the physical cache ways assigned to
the VM.
+ *
+ * Example:
+ * pcbm_len=20
+ * max_pcbm=0xfffff
+ *
+ *- CLOS_MASK/max_pcbm: (maximum assigned/reserved physical cache
+space)
+ * vCAT is built on top of RDT, vCAT on ACRN is enabled by configuring
+the FEATURES/RDT
+ * and vm sub-sections of the scenario XML file as in the below example:
+ * <RDT>
+ * <RDT_ENABLED>y</RDT_ENABLED>
+ * <CDP_ENABLED>n</CDP_ENABLED>
+ * <VCAT_ENABLED>y</VCAT_ENABLED>
+ * <CLOS_MASK>0x7ff</CLOS_MASK>
+ * <CLOS_MASK>0x7ff</CLOS_MASK>
+ * <CLOS_MASK>0x7ff</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * /RDT>
+ *
+ * <vm id="0">
+ * <guest_flags>
+ <guest_flag>GUEST_FLAG_VCAT_ENABLED</guest_flag>
+ </guest_flags>
+ * <clos>
+ * <vcpu_clos>3</vcpu_clos>
+ * <vcpu_clos>4</vcpu_clos>
+ * <vcpu_clos>5</vcpu_clos>
+ * <vcpu_clos>6</vcpu_clos>
+ * <vcpu_clos>7</vcpu_clos>
+ * </clos>
+ * </vm>
+ *
+ * <vm id="1">
+ * <clos>
+ * <vcpu_clos>1</vcpu_clos>
+ * <vcpu_clos>2</vcpu_clos>
+ * </clos>
+ * </vm>
+ *
+ * vm_configurations.c (generated by config-tools) with the above vCAT
config:
+ *
+ * static uint16_t vm0_vcpu_clos[5U] = {3U, 4U, 5U, 6U, 7U};
+ * static uint16_t vm1_vcpu_clos[2U] = {1U, 2U};
+ *
+ * struct acrn_vm_config vm_configs[CONFIG_MAX_VM_NUM] = {
+ * {
+ * .guest_flags = (GUEST_FLAG_VCAT_ENABLED),
+ * .pclosids = vm0_vcpu_clos,
+ * .num_pclosids = 5U,
+ * .max_l3_pcbm = 0xff800U,
+ * },
+ * {
+ * .pclosids = vm1_vcpu_clos,
+ * .num_pclosids = 2U,
+ * },
+ * };
+ *
+ * Config CLOS_MASK/max_pcbm per pCLOSID:
+ * vCAT is enabled by setting both RDT_ENABLED and VCAT_ENABLED
to 'y',
+ * then specify the GUEST_FLAG_VCAT_ENABLED guest flag for the
desired VMs.
+ * Each CLOS_MASK (a.k.a. max_pcbm) setting corresponds to a
pCLOSID and
+ * specifies the allocated portion (ways) of cache.
+ * For example, if COS_MAX is 7, then 8 CLOS_MASK settings need to be
in place
+ * where each setting corresponds to a pCLOSID starting from 0.
+ * Each CLOS_MASK may or may not overlap with the CLOS_MASK of
another pCLOSID depending
+ * on whether overlapped or isolated bitmask is desired for particular
performance
+ * consideration.
+ *
+ * Assign pCLOSIDs per VM
+ * Assign the desired pCLOSIDs to each VM in the vm/clos section of the
scenario file
+ * by defining the vcpu_clos settings.
+ * All pCLOSIDs should be configured with the same pCBM (max_pcbm)
to simplify vCAT
+ * config and ensure vCAT capability symmetry across cpus of the VM. In
the above example,
+ * pCLOSIDs 3 to 7 are all configured with the same pCBM value 0xff800,
which
+ * means a total of 9 physical cache ways have been reserved for all the
cpus
+ * belonging to VM0
+ *
+ *- vCBM:
+ * Virtual CBM
+ *
+ *- vCBM length (vcbm_len):
+ * max number of bits to set for vCBM.
+ * vcbm_len is set equal to pcbm_len
+ * vCBM length is reported to guest VMs by using vCPUID (EAX=10H)
+ *
+ *- max_vcbm (maximum virtual cache space):
+ * Fully open vCBM (all ones bitmask), max vCBM is calculated
+ * by `(1 << vcbm_len) - 1`
+ *
+ * Usually, vCLOSID0 is associated with the fully open vCBM to access
+all assigned virtual caches */
+
+/**
+ * @pre vm != NULL
+ */
+bool is_l2_vcat_configured(const struct acrn_vm *vm) {
+ return is_vcat_configured(vm) &&
+(get_rdt_res_cap_info(RDT_RESOURCE_L2)->num_closids > 0U); }
+
+/**
+ * @pre vm != NULL
+ */
+bool is_l3_vcat_configured(const struct acrn_vm *vm) {
+ return is_vcat_configured(vm) &&
+(get_rdt_res_cap_info(RDT_RESOURCE_L3)->num_closids > 0U); }
+
+/**
+ * @brief Return number of vCLOSIDs of this VM
+ *
+ * @pre vm != NULL && vm->vm_id < CONFIG_MAX_VM_NUM */
uint16_t
+vcat_get_num_vclosids(const struct acrn_vm *vm) {
+ uint16_t num_vclosids = 0U;
+
+ if (is_vcat_configured(vm)) {
+ /*
+ * For performance and simplicity, here number of vCLOSIDs
(num_vclosids) is set
+ * equal to the number of pCLOSIDs assigned to this VM
(get_vm_config(vm->vm_id)->num_pclosids).
+ * But technically, we do not have to make such an
assumption. For
example,
+ * Hypervisor could implement CLOSID context switch, then
number
of vCLOSIDs
+ * can be greater than the number of pCLOSIDs assigned. etc.
+ */
+ num_vclosids = get_vm_config(vm->vm_id)->num_pclosids;
+ }
+
+ return num_vclosids;
+}
+
+/**
+ * @brief Map vCLOSID to pCLOSID
+ *
+ * @pre vm != NULL && vm->vm_id < CONFIG_MAX_VM_NUM
+ * @pre (get_vm_config(vm->vm_id)->pclosids != NULL) && (vclosid <
+get_vm_config(vm->vm_id)->num_pclosids)
+ */
+static uint16_t vclosid_to_pclosid(const struct acrn_vm *vm, uint16_t
+vclosid) {
+ ASSERT(vclosid < vcat_get_num_vclosids(vm), "vclosid is out of
+range!");
+
+ /*
+ * pclosids points to an array of assigned pCLOSIDs
+ * Use vCLOSID as the index into the pclosids array, returning the
corresponding pCLOSID
+ *
+ * Note that write_vcat_msr() calls vclosid_to_pclosid() indirectly, in
write_vcat_msr(),
+ * the is_l2_vcbm_msr()/is_l3_vcbm_msr() calls ensure that vclosid is
always less than
+ * get_vm_config(vm->vm_id)->num_pclosids, so vclosid is always an
array index within bound here
+ */
+ return get_vm_config(vm->vm_id)->pclosids[vclosid];
+}
+
+/**
+ * @brief Return the max_pcbm of this VM.
+ * @pre vm != NULL && vm->vm_id < CONFIG_MAX_VM_NUM
+ * @pre res == RDT_RESOURCE_L2 || res == RDT_RESOURCE_L3 */ static
+uint64_t get_max_pcbm(const struct acrn_vm *vm, int res) {
+ uint64_t max_pcbm = 0UL;
+
+ if (is_l2_vcat_configured(vm) && (res == RDT_RESOURCE_L2)) {
+ max_pcbm = get_vm_config(vm->vm_id)->max_l2_pcbm;
+ } else if (is_l3_vcat_configured(vm) && (res == RDT_RESOURCE_L3)) {
+ max_pcbm = get_vm_config(vm->vm_id)->max_l3_pcbm;
+ }
+
+ return max_pcbm;
+}
+
+/**
+ * @brief Retrieve vcbm_len of vm
+ * @pre vm != NULL
+ */
+uint16_t vcat_get_vcbm_len(const struct acrn_vm *vm, int res) {
+ /* vcbm_len = pcbm_len */
This comment is incorrect.
The rational here is that set-bits in max_l2_pcbm stands for bits of cbm the
guest sees, + a shift.
Don: yes, I understand the rational, but "bitmap_weight(get_max_pcbm(vm, res));" here does just return the vcbm_len?, looks yes to me, do not understand
why the comment is incorrect.



+ return bitmap_weight(get_max_pcbm(vm, res)); }
+
+/**
+ * @brief Retrieve max_vcbm of vm
+ * @pre vm != NULL
+ */
+static uint64_t vcat_get_max_vcbm(const struct acrn_vm *vm, int res) {
+ uint16_t vcbm_len = vcat_get_vcbm_len(vm, res);
+ uint64_t max_vcbm = 0UL;
If " set-bits in max_l2_pcbm stands for bits of cbm the guest sees, + a shift."
Is true,
Ideally max_vcbm should be max_pcbm >> shift.

Shift can be easily calculated by the number of 0 in LSB side.

The current implementation is not wrong, but very indirect.
Don: I can change the code

+
+ if (vcbm_len != 0U) {
+ max_vcbm = (1U << vcbm_len) - 1U;
+ }
+
+ return max_vcbm;
+}
+
+/**
The following comments may be too long. Explain the hard-to-understand
acronyms here only, and those easy-to-be-confused ones.
Don: we can rename

+ * @brief Map vMSR address (abbreviated as vmsr) to corresponding
pMSR
+address (abbreviated as pmsr)
+ * Each vMSR or pMSR is identified by a 32-bit integer
+ *
+ * @pre vm != NULL
+ * @pre res == RDT_RESOURCE_L2 || res == RDT_RESOURCE_L3 */ static
+uint32_t vmsr_to_pmsr(const struct acrn_vm *vm, uint32_t vmsr, int res)
+{
+ uint32_t pmsr = vmsr;
+ uint16_t vclosid;
+
+ switch (res) {
+ case RDT_RESOURCE_L2:
+ vclosid = vmsr - MSR_IA32_L2_MASK_BASE;
+ pmsr = MSR_IA32_L2_MASK_BASE + vclosid_to_pclosid(vm,
vclosid);
+ break;
+
+ case RDT_RESOURCE_L3:
+ vclosid = vmsr - MSR_IA32_L3_MASK_BASE;
+ pmsr = MSR_IA32_L3_MASK_BASE + vclosid_to_pclosid(vm,
vclosid);
+ break;
+
+ default:
+ break;
+ }
+
+ return pmsr;
+}
+
+/**
+ * @brief vCBM MSR write handler
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ static int32_t
+write_vcbm(__unused struct acrn_vcpu *vcpu, __unused uint32_t vmsr,
+__unused uint64_t val, __unused int res) {
+ /* TODO: this is going to be implemented in a subsequent commit,
will
perform the following actions:
+ * write vCBM
+ * vmsr_to_pmsr and vcbm_to_pcbm
+ * write pCBM
+ */
+ return -EFAULT;
+}
+
+/**
+ * @brief vCLOSID MSR write handler
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ static int32_t
+write_vclosid(struct acrn_vcpu *vcpu, uint64_t val) {
+ uint16_t vclosid, pclosid;
+
We need to check the reserve bits (or higher than MAX RMID) of PQR_ASSOC
too.
Inject #GP if it violates.
Don: we are not exposing the cat monitoring cap to guest, so we can skip checking the reserved bits (or higher than MAX RMID) here?
But indeed here we should check if vclosid is out of range, e.g. it should be less that vcat_get_num_vclosids(vcpu->vm).


+ /* Write the new vCLOSID value */
+ vcpu_set_guest_msr(vcpu, MSR_IA32_PQR_ASSOC, val);
+
+ vclosid = (uint16_t)((val >> 32U) & 0xFFFFFFFFUL);
Why ">>32U"?
And why &0xfff...?
Don: closid should be within uint16_t, as COS_MAX reported by cupid 10h is of 16 bits (see SDM vol 3).

For IA32_PQR_ASSOC msr:
COS occupies the bits from 31 to 63, but it still should be within 16 bits ranges, so there is some confusion here.

+ pclosid = vclosid_to_pclosid(vcpu->vm, vclosid);
+ /*
+ * Write the new pCLOSID value to the guest msr area
+ *
+ * The prepare_auto_msr_area() function has already initialized the
vcpu->arch.msr_area
+ * as follows:
+ * vcpu_clos = cfg->pclosids[vcpu->vcpu_id%cfg->num_pclosids]
+ *
vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].msr_index =
MSR_IA32_PQR_ASSOC
+ * vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value =
clos2pqr_msr(vcpu_clos)
+ * vcpu-
arch.msr_area.host[MSR_AREA_IA32_PQR_ASSOC].msr_index
= MSR_IA32_PQR_ASSOC
+ * vcpu->arch.msr_area.host[MSR_AREA_IA32_PQR_ASSOC].value =
clos2pqr_msr(hv_clos)
+ * vcpu->arch.msr_area.count = 1
+ *
+ * So here we only need to update the
vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value field,
+ * all other vcpu->arch.msr_area fields remains unchanged at
runtime.
+ */
+ vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value =
+clos2pqr_msr(pclosid);
+
Where is definition of clos2pqr_msr()?
Don: in rdt.c:
uint64_t clos2pqr_msr(uint16_t clos)
{
uint64_t pqr_assoc;

pqr_assoc = msr_read(MSR_IA32_PQR_ASSOC);
pqr_assoc = (pqr_assoc & 0xffffffffUL) | ((uint64_t)clos << 32U);

return pqr_assoc;
}


+ return 0;
+}
+
+/**
+ * @brief Initialize vCBM MSRs
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ static void
+init_vcbms(struct acrn_vcpu *vcpu, int res, uint32_t msr_base) {
+ uint64_t max_vcbm = vcat_get_max_vcbm(vcpu->vm, res);
+
+ if (max_vcbm != 0UL) {
+ uint32_t vmsr;
+ /* num_vcbm_msrs = num_vclosids */
+ uint16_t num_vcbm_msrs = vcat_get_num_vclosids(vcpu-
vm);
+
+ /*
+ * For each vCBM MSR, its initial vCBM is set to max_vcbm,
+ * a bitmask with vcbm_len bits (from 0 to vcbm_len - 1,
inclusive)
+ * set to 1 and all other bits set to 0.
+ *
+ * As CBM only allows contiguous '1' combinations, so
max_vcbm
essentially
+ * is a bitmask that selects all the virtual cache ways assigned
to
the VM.
+ * It covers all the virtual cache ways the guest VM may
access, i.e.
the
+ * superset bitmask.
+ */
+ for (vmsr = msr_base; vmsr < (msr_base + num_vcbm_msrs);
vmsr++) {
+ uint32_t pmsr = vmsr_to_pmsr(vcpu->vm, vmsr, res);
+ /* Set initial vMSR value: copy reserved bits from
corresponding pMSR, and set vCBM to max_vcbm */
Why we need to emulate the reserved bit? Normally all the reserved bits
should be zero.
Does SDM says the reserved bits must be none-zero for CBM?
Don: no, I see the SDM does not say that reserved bits must be non-zero, so I will assume they are 0s and will change the code


+ uint64_t val = (msr_read(pmsr) &
0xFFFFFFFF00000000UL) |
max_vcbm;
+
+ /* Write vCBM MSR */
+ (void)write_vcbm(vcpu, vmsr, val, res);
+ }
+ }
+}
+
+/**
+ * @brief Initialize vCAT MSRs
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ void
init_vcat_msrs(struct
+acrn_vcpu *vcpu) {
+ if (is_vcat_configured(vcpu->vm)) {
+ init_vcbms(vcpu, RDT_RESOURCE_L2,
MSR_IA32_L2_MASK_BASE);
+
+ init_vcbms(vcpu, RDT_RESOURCE_L3,
MSR_IA32_L3_MASK_BASE);
+
+ (void)write_vclosid(vcpu, clos2pqr_msr(0U));
Shouldn't we write 0 as power on initial vCLOSID?
Don: yes, the clos2pqr_msr(0U) means set the initial vclodis to 0

+ }
+}
diff --git a/hypervisor/arch/x86/guest/vm.c
b/hypervisor/arch/x86/guest/vm.c index 66f60d289..a7848743a 100644
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -151,6 +151,16 @@ bool is_nvmx_configured(const struct acrn_vm
*vm)
return ((vm_config->guest_flags & GUEST_FLAG_NVMX_ENABLED) !=
0U); }

+/**
+ * @pre vm != NULL && vm_config != NULL && vm->vmid <
CONFIG_MAX_VM_NUM
+*/ bool is_vcat_configured(const struct acrn_vm *vm) {
+ struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
+
+ return ((vm_config->guest_flags & GUEST_FLAG_VCAT_ENABLED) !=
0U); }
+
/**
* @brief VT-d PI posted mode can possibly be used for PTDEVs assigned
* to this VM if platform supports VT-d PI AND lapic passthru is not
configured diff --git a/hypervisor/arch/x86/guest/vmsr.c
b/hypervisor/arch/x86/guest/vmsr.c
index 971e0b8fb..a3a1a8705 100644
--- a/hypervisor/arch/x86/guest/vmsr.c
+++ b/hypervisor/arch/x86/guest/vmsr.c
@@ -22,6 +22,7 @@
#include <asm/tsc.h>
#include <trace.h>
#include <logmsg.h>
+#include <asm/guest/vcat.h>

#define INTERCEPT_DISABLE (0U)
#define INTERCEPT_READ (1U << 0U)
@@ -338,8 +339,10 @@ static void prepare_auto_msr_area(struct
acrn_vcpu *vcpu)
It seems you changed the implementation of this API from V5.
Why?


It seems we are not converging...
Don: the only diff in V6 here is that I added is_vcat_configured () to the following line:
if (is_vcat_configured(vcpu->vm) || (vcpu_clos != hv_clos)) {



vcpu_clos = cfg->pclosids[vcpu->vcpu_id%cfg-
num_pclosids];

- /* RDT: only load/restore MSR IA32_PQR_ASSOC when hv
and
guest have different settings */
- if (vcpu_clos != hv_clos) {
+ /* RDT: only load/restore MSR_IA32_PQR_ASSOC when hv
and
guest have different settings
+ * vCAT: always load/restore MSR_IA32_PQR_ASSOC
+ */
+ if (is_vcat_configured(vcpu->vm) || (vcpu_clos != hv_clos)) {

vcpu-
arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].msr_index =
MSR_IA32_PQR_ASSOC;

vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value =
clos2pqr_msr(vcpu_clos);

vcpu->arch.msr_area.host[MSR_AREA_IA32_PQR_ASSOC].msr_index
=
MSR_IA32_PQR_ASSOC; @@ -371,6 +374,14 @@ void
init_emulated_msrs(struct acrn_vcpu *vcpu)
}

vcpu_set_guest_msr(vcpu, MSR_IA32_FEATURE_CONTROL, val64);
+
+#ifdef CONFIG_VCAT_ENABLED
+ /*
+ * init_vcat_msrs() will overwrite the
vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value
+ * set by prepare_auto_msr_area()
+ */
+ init_vcat_msrs(vcpu);
+#endif
}

#ifdef CONFIG_VCAT_ENABLED
diff --git a/hypervisor/arch/x86/rdt.c b/hypervisor/arch/x86/rdt.c index
f6dc960f9..b21fd5209 100644
--- a/hypervisor/arch/x86/rdt.c
+++ b/hypervisor/arch/x86/rdt.c
@@ -60,6 +60,14 @@ static struct rdt_info
res_cap_info[RDT_NUM_RESOURCES] = {
},
};

+/*
+ * @pre res == RDT_RESOURCE_L3 || res == RDT_RESOURCE_L2 || res ==
+RDT_RESOURCE_MBA */ const struct rdt_info *get_rdt_res_cap_info(int
+res) {
+ return &res_cap_info[res];
+}
+
/*
* @pre res == RDT_RESOURCE_L3 || res == RDT_RESOURCE_L2
*/
diff --git a/hypervisor/include/arch/x86/asm/guest/vcat.h
b/hypervisor/include/arch/x86/asm/guest/vcat.h
new file mode 100644
index 000000000..6d8e587c4
--- /dev/null
+++ b/hypervisor/include/arch/x86/asm/guest/vcat.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2021 Intel Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause */
+
+#ifndef VCAT_H_
+#define VCAT_H_
+
+#include <asm/guest/vm.h>
+
+bool is_l2_vcat_configured(const struct acrn_vm *vm); bool
+is_l3_vcat_configured(const struct acrn_vm *vm); uint16_t
+vcat_get_vcbm_len(const struct acrn_vm *vm, int res); void
+init_vcat_msrs(struct acrn_vcpu *vcpu);
+
+#endif /* VCAT_H_ */
+
diff --git a/hypervisor/include/arch/x86/asm/guest/vm.h
b/hypervisor/include/arch/x86/asm/guest/vm.h
index 9a6171be0..2cb06c027 100644
--- a/hypervisor/include/arch/x86/asm/guest/vm.h
+++ b/hypervisor/include/arch/x86/asm/guest/vm.h
@@ -256,6 +256,7 @@ void vrtc_init(struct acrn_vm *vm); bool
is_lapic_pt_configured(const struct acrn_vm *vm); bool is_rt_vm(const
struct acrn_vm *vm); bool is_nvmx_configured(const struct acrn_vm *vm);
+bool is_vcat_configured(const struct acrn_vm *vm);
bool is_pi_capable(const struct acrn_vm *vm); bool has_rt_vm(void);
struct acrn_vm *get_highest_severity_vm(bool runtime); diff --git
a/hypervisor/include/arch/x86/asm/rdt.h
b/hypervisor/include/arch/x86/asm/rdt.h
index f6c4448e2..95e149fcb 100644
--- a/hypervisor/include/arch/x86/asm/rdt.h
+++ b/hypervisor/include/arch/x86/asm/rdt.h
@@ -47,5 +47,6 @@ void init_rdt_info(void); void setup_clos(uint16_t
pcpu_id); uint64_t clos2pqr_msr(uint16_t clos); bool
is_platform_rdt_capable(void);
+const struct rdt_info *get_rdt_res_cap_info(int res);

#endif /* RDT_H */
--
2.25.1








Re: [PATCH V6 3/8] hv: vCAT: initialize the emulated_guest_msrs array for CAT msrs during platform initialization

Dongsheng Zhang
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Eddie Dong
Sent: Wednesday, October 20, 2021 4:32 AM
To: acrn-dev@...
Subject: Re: [acrn-dev] [PATCH V6 3/8] hv: vCAT: initialize the
emulated_guest_msrs array for CAT msrs during platform initialization



-----Original Message-----
From: acrn-dev@... <acrn-dev@...>
On Behalf Of Dongsheng Zhang
Sent: Wednesday, October 20, 2021 6:41 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH V6 3/8] hv: vCAT: initialize the
emulated_guest_msrs array for CAT msrs during platform initialization

From: dongshen <dongsheng.x.zhang@...>

Initialize the emulated_guest_msrs[] array at runtime for
MSR_IA32_type_MASK_n and MSR_IA32_PQR_ASSOC msrs, there is no
good way
to do this initialization statically at build time

Tracked-On: #5917
Signed-off-by: dongshen <dongsheng.x.zhang@...>
---
hypervisor/arch/x86/cpu.c | 4 +
hypervisor/arch/x86/guest/vmsr.c | 88
+++++++++++++++++++-
hypervisor/include/arch/x86/asm/guest/vcpu.h | 19 ++++-
hypervisor/include/arch/x86/asm/msr.h | 1 +
4 files changed, 109 insertions(+), 3 deletions(-)

diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c
index
e1ee4770e..18c33c7d1 100644
--- a/hypervisor/arch/x86/cpu.c
+++ b/hypervisor/arch/x86/cpu.c
@@ -186,6 +186,10 @@ void init_pcpu_pre(bool is_bsp)
panic("System IOAPIC info is incorrect!");
}

+#ifdef CONFIG_VCAT_ENABLED
+ init_intercepted_cat_msr_list();
+#endif
+
#ifdef CONFIG_RDT_ENABLED
init_rdt_info();
#endif
diff --git a/hypervisor/arch/x86/guest/vmsr.c
b/hypervisor/arch/x86/guest/vmsr.c
index e83c3069c..971e0b8fb 100644
--- a/hypervisor/arch/x86/guest/vmsr.c
+++ b/hypervisor/arch/x86/guest/vmsr.c
@@ -28,7 +28,7 @@
#define INTERCEPT_WRITE (1U << 1U)
#define INTERCEPT_READ_WRITE (INTERCEPT_READ |
INTERCEPT_WRITE)

-static const uint32_t emulated_guest_msrs[NUM_GUEST_MSRS] = {
+static uint32_t emulated_guest_msrs[NUM_GUEST_MSRS] = {
/*
* MSRs that trusty may touch and need isolation between secure
and
normal world
* This may include MSR_IA32_STAR, MSR_IA32_LSTAR,
MSR_IA32_FMASK,
@@ -79,6 +79,24 @@ static const uint32_t
emulated_guest_msrs[NUM_GUEST_MSRS] = { #ifdef
CONFIG_NVMX_ENABLED
LIST_OF_VMX_MSRS,
#endif
+
+ /* The following range of elements are reserved for vCAT usage and
are
+ * initialized dynamically by init_intercepted_cat_msr_list() during
platform initialization:
+ * [(NUM_GUEST_MSRS - NUM_VCAT_MSRS) ... (NUM_GUEST_MSRS
-
1)] = {
+ * The following layout of each CAT MSR entry is determined by
catmsr_to_index_of_emulated_msr():
+ * MSR_IA32_L3_MASK_BASE,
+ * MSR_IA32_L3_MASK_BASE + 1,
+ * ...
+ * MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS - 1,
+ *
+ * MSR_IA32_L2_MASK_BASE + NUM_VCAT_L3_MSRS,
+ * MSR_IA32_L2_MASK_BASE + NUM_VCAT_L3_MSRS + 1,
+ * ...
+ * MSR_IA32_L2_MASK_BASE + NUM_VCAT_L3_MSRS +
NUM_VCAT_L2_MSRS - 1,
+ *
+ * MSR_IA32_PQR_ASSOC + NUM_VCAT_L3_MSRS +
NUM_VCAT_L2_MSRS
+ * }
+ */
};

static const uint32_t mtrr_msrs[] = { @@ -355,6 +373,74 @@ void
init_emulated_msrs(struct acrn_vcpu *vcpu)
vcpu_set_guest_msr(vcpu, MSR_IA32_FEATURE_CONTROL, val64); }

+#ifdef CONFIG_VCAT_ENABLED
+/**
+ * @brief Map CAT MSR address to zero based index
+ *
+ * @pre ((msr >= MSR_IA32_L3_MASK_BASE) && msr <
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS))
+ * || ((msr >= MSR_IA32_L2_MASK_BASE) && msr <
(MSR_IA32_L2_MASK_BASE + NUM_VCAT_L2_MSRS))
+ * || (msr == MSR_IA32_PQR_ASSOC)
+ */
+static uint32_t cat_msr_to_index_of_emulated_msr(uint32_t msr) {
+ uint32_t index = 0U;
+
+ /* L3 MSRs indices assignment for MSR_IA32_L3_MASK_BASE ~
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS):
+ * 0
+ * 1
+ * ...
+ * (NUM_VCAT_L3_MSRS - 1)
+ *
+ * L2 MSRs indices assignment:
+ * NUM_VCAT_L3_MSRS
+ * ...
+ * NUM_VCAT_L3_MSRS + NUM_VCAT_L2_MSRS - 1
+
+ * PQR index assignment for MSR_IA32_PQR_ASSOC:
+ * NUM_VCAT_L3_MSRS
+ */
+
+ if ((msr >= MSR_IA32_L3_MASK_BASE) && (msr <
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS))) {
+ index = msr - MSR_IA32_L3_MASK_BASE;
+ } else if ((msr >= MSR_IA32_L2_MASK_BASE) && (msr <
(MSR_IA32_L2_MASK_BASE + NUM_VCAT_L2_MSRS))) {
+ index = msr - MSR_IA32_L2_MASK_BASE +
NUM_VCAT_L3_MSRS;
+ } else if (msr == MSR_IA32_PQR_ASSOC) {
+ index = NUM_VCAT_L3_MSRS + NUM_VCAT_L2_MSRS;
+ } else {
+ ASSERT(false, "invalid CAT msr address");
+ }
+
+ return index;
+}
+
+static void init_cat_msr_entry(uint32_t msr) {
+ /* Get index into the emulated_guest_msrs[] table for a given CAT
+MSR
*/
+ uint32_t index = cat_msr_to_index_of_emulated_msr(msr) +
+NUM_GUEST_MSRS - NUM_VCAT_MSRS;
Do you mean NUM_WORLD_MSRS + NUM_COMMON_MSRS +
NUM_VMX_MSRS + cat_msr_to_index_of_emulated_msr(msr)?
Don: yes, I mean that

If you make a separate MACRO for " NUM_WORLD_MSRS +
NUM_COMMON_MSRS + NUM_VMX_MSRS" to stand for the start offset of
CAT MSRs, that is fine too.
Don: maybe better to avoid another separate MACRO

+
+ emulated_guest_msrs[index] = msr;
+}
+
+/* Init emulated_guest_msrs[] dynamically for CAT MSRs */ void
+init_intercepted_cat_msr_list(void)
+{
+ uint32_t msr;
+
+ /* MSR_IA32_L2_MASK_n MSRs */
+ for (msr = MSR_IA32_L2_MASK_BASE; msr <
(MSR_IA32_L2_MASK_BASE + NUM_VCAT_L2_MSRS); msr++) {
+ init_cat_msr_entry(msr);
+ }
+
+ /* MSR_IA32_L3_MASK_n MSRs */
+ for (msr = MSR_IA32_L3_MASK_BASE; msr <
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS); msr++) {
+ init_cat_msr_entry(msr);
+ }
+
+ /* MSR_IA32_PQR_ASSOC */
+ init_cat_msr_entry(MSR_IA32_PQR_ASSOC);
+}
+#endif
+
/**
* @pre vcpu != NULL
*/
diff --git a/hypervisor/include/arch/x86/asm/guest/vcpu.h
b/hypervisor/include/arch/x86/asm/guest/vcpu.h
index 65ec52203..e251fb4cf 100644
--- a/hypervisor/include/arch/x86/asm/guest/vcpu.h
+++ b/hypervisor/include/arch/x86/asm/guest/vcpu.h
@@ -29,6 +29,7 @@
#include <asm/guest/instr_emul.h>
#include <asm/guest/nested.h>
#include <asm/vmx.h>
+#include <asm/vm_config.h>

/**
* @brief vcpu
@@ -173,12 +174,26 @@ enum reset_mode;

#define NUM_WORLD_MSRS 2U
#define NUM_COMMON_MSRS 23U
+
+#ifdef CONFIG_VCAT_ENABLED
+#define NUM_VCAT_L2_MSRS MAX_CACHE_CLOS_NUM_ENTRIES
+#define NUM_VCAT_L3_MSRS MAX_CACHE_CLOS_NUM_ENTRIES
+/* L2/L3 mask MSRs plus MSR_IA32_PQR_ASSOC */
+#define NUM_VCAT_MSRS (NUM_VCAT_L2_MSRS +
NUM_VCAT_L3_MSRS + 1U)
Why we need +1? Number of VCAT MSR should be L2 + L3.
Don: the comment above this line says it is "/* L2/L3 mask MSRs plus MSR_IA32_PQR_ASSOC "
So both L2+L3+ PQR

+#else
+#define NUM_VCAT_L2_MSRS 0U
+#define NUM_VCAT_L3_MSRS 0U
I remember we don't need these 2 MACROs. Not?
Don: we need these above 2 MACROs, otherwise, #define NUM_GUEST_MSRS(see below) will be too complicated
(nested #ifdef), too ugly


+#define NUM_VCAT_MSRS 0U
+#endif
+
+/* For detailed layout of the emulated guest MSRs, see
+emulated_guest_msrs[NUM_GUEST_MSRS] in vmsr.c */
#ifdef CONFIG_NVMX_ENABLED
-#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS + NUM_VMX_MSRS)
+#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS + NUM_VMX_MSRS + NUM_VCAT_MSRS) #else
-#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS)
+#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS + NUM_VCAT_MSRS)
#endif

+
#define EOI_EXIT_BITMAP_SIZE 256U

struct guest_cpu_context {
diff --git a/hypervisor/include/arch/x86/asm/msr.h
b/hypervisor/include/arch/x86/asm/msr.h
index 9c9b56bf7..6556267f5 100644
--- a/hypervisor/include/arch/x86/asm/msr.h
+++ b/hypervisor/include/arch/x86/asm/msr.h
@@ -617,6 +617,7 @@ static inline bool is_x2apic_msr(uint32_t msr)
struct acrn_vcpu;

void init_msr_emulation(struct acrn_vcpu *vcpu);
+void init_intercepted_cat_msr_list(void);
uint32_t vmsr_get_guest_msr_index(uint32_t msr); void
update_msr_bitmap_x2apic_apicv(struct acrn_vcpu *vcpu); void
update_msr_bitmap_x2apic_passthru(struct acrn_vcpu *vcpu);
--
2.25.1








Re: [PATCH V6 8/8] hv: vCAT: propagate vCBM to other vCPUs that share cache with vcpu

Eddie Dong
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Dongsheng Zhang
Sent: Wednesday, October 20, 2021 6:42 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH V6 8/8] hv: vCAT: propagate vCBM to other vCPUs
that share cache with vcpu

From: dongshen <dongsheng.x.zhang@...>

Move the nearest_pow2() and get_cache_shift() functions from hypercall.c to
cpu_caps.c Store L2/L3 cat id shift in struct cpuinfo_x86

Implement the propagate_vcbm() function:
Set vCBM to to all the vCPUs that share cache with vcpu
to mimic hardware CAT behavior

Tracked-On: #5917
Signed-off-by: dongshen <dongsheng.x.zhang@...>
---
hypervisor/arch/x86/cpu_caps.c | 55
+++++++++++++++++++
hypervisor/arch/x86/guest/vcat.c | 62
+++++++++++++++++++++-
hypervisor/common/hypercall.c | 50 ++---------------
hypervisor/include/arch/x86/asm/cpu_caps.h | 4 ++
4 files changed, 122 insertions(+), 49 deletions(-)

diff --git a/hypervisor/arch/x86/cpu_caps.c
b/hypervisor/arch/x86/cpu_caps.c index 89fd559b1..33ea3aa36 100644
--- a/hypervisor/arch/x86/cpu_caps.c
+++ b/hypervisor/arch/x86/cpu_caps.c
@@ -16,6 +16,7 @@
#include <errno.h>
#include <logmsg.h>
#include <asm/guest/vmcs.h>
+#include <asm/lib/bits.h>

/* TODO: add more capability per requirement */
/* APICv features */
@@ -322,6 +323,58 @@ static uint64_t get_address_mask(uint8_t limit)
return ((1UL << limit) - 1UL) & PAGE_MASK; }

+/*
+ * nearest_pow2(n) is the nearest power of 2 integer that is not less
+than n
+ * The last (most significant) bit set of (n*2-1) matches the above
+definition */ static uint32_t nearest_pow2(uint32_t n) {
+ uint32_t p = n;
+
+ if (n >= 2U) {
+ p = fls32(2U*n - 1U);
+ }
+
+ return p;
+}
+
+static void get_cat_id_shift(uint32_t *l2_cat_id_shift, uint32_t
+*l3_cat_id_shift) {
What does cat id mean?
Or are u defining a new term cache-id-shift?
Cannot understand.

I guess you are saying: cache_id? Cache_type? Cache_level? I am lost


+ uint32_t subleaf;
+
+ *l2_cat_id_shift = 0U;
+ *l3_cat_id_shift = 0U;
+
+ for (subleaf = 0U;; subleaf++) {
+ uint32_t eax, ebx, ecx, edx;
+ uint32_t cache_type, cache_level, id, shift;
+
+ cpuid_subleaf(0x4U, subleaf, &eax, &ebx, &ecx, &edx);
+
+ cache_type = eax & 0x1fU;
+ cache_level = (eax >> 5U) & 0x7U;
+
+ /* Intel SDM Vol 2, CPUID 04H:
+ * EAX: bits 25 - 14: Maximum number of addressable IDs for logical
processors sharing this cache.
+ * The nearest power-of-2 integer that is not smaller than (1 +
EAX[25:14]) is the number of unique
+ * initial APIC IDs reserved for addressing different logical
processors sharing this cache
+ */
+ id = (eax >> 14U) & 0xfffU;
+ shift = nearest_pow2(id + 1U);
+
+ /* No more caches */
+ if ((cache_type == 0U) || (cache_type >= 4U)) {
+ break;
+ }
+
+ if (cache_level == 2U) {
+ *l2_cat_id_shift = shift;
+ } else if (cache_level == 3U) {
+ *l3_cat_id_shift = shift;
+ }
+ }
+}
+
void init_pcpu_capabilities(void)
{
uint32_t eax, unused;
@@ -385,6 +438,8 @@ void init_pcpu_capabilities(void)
get_address_mask(boot_cpu_data.phys_bits);
}

+ get_cat_id_shift(&boot_cpu_data.l2_cat_id_shift,
+&boot_cpu_data.l3_cat_id_shift);
+
detect_pcpu_cap();
}

diff --git a/hypervisor/arch/x86/guest/vcat.c
b/hypervisor/arch/x86/guest/vcat.c
index 1dda6b61f..9e302b25e 100644
--- a/hypervisor/arch/x86/guest/vcat.c
+++ b/hypervisor/arch/x86/guest/vcat.c
@@ -410,6 +410,55 @@ static uint32_t vmsr_to_pmsr(const struct acrn_vm
*vm, uint32_t vmsr, int res)
return pmsr;
}

+static void get_vcat_id_shift(uint32_t *l2_shift, uint32_t *l3_shift) {
+ struct cpuinfo_x86 *cpu_info = get_pcpu_info();
+
+ /* Assume that virtual cat id shift is equal to physical cat id shift for now
*/
+ *l2_shift = cpu_info->l2_cat_id_shift;
+ *l3_shift = cpu_info->l3_cat_id_shift; }
+
+/**
+ * @brief Propagate vCBM to other vCPUs that share cache with vcpu
+ * @pre vcpu != NULL && vcpu->vm != NULL */ static void
+propagate_vcbm(struct acrn_vcpu *vcpu, uint32_t vmsr, uint64_t val) {
+ uint16_t i;
+ struct acrn_vcpu *tmp_vcpu;
+ uint32_t l2_shift, l3_shift, l2_id, l3_id;
+ struct acrn_vm *vm = vcpu->vm;
+ uint32_t apicid = vlapic_get_apicid(vcpu_vlapic(vcpu));
+
+ get_vcat_id_shift(&l2_shift, &l3_shift);
???


+ /* Intel SDM Vol 2, CPUID 04H:
+ * EAX: bits 25 - 14: Maximum number of addressable IDs for logical
processors sharing this cache.
+ *
+ * l2_shift/l3_shift: the nearest power-of-2 integer that is not smaller
than (1 + EAX[25:14])
+ * is the number of unique initial APIC IDs reserved for addressing
different logical processors
+ * sharing this cache
+ */
+ l2_id = apicid >> l2_shift;
+ l3_id = apicid >> l3_shift;
+
+ /*
+ * Determine which logical processors share an MSR (for instance local
+ * to a core, or shared across multiple cores) by checking if they have the
same
+ * L2/L3 cache id
+ */
+ foreach_vcpu(i, vm, tmp_vcpu) {
+ uint32_t tmp_apicid = vlapic_get_apicid(vcpu_vlapic(tmp_vcpu));
+ uint32_t tmp_l2_id = tmp_apicid >> l2_shift;
+ uint32_t tmp_l3_id = tmp_apicid >> l3_shift;
+
This patch is becoming worse than previous version.

We want to propagate among VCPUs with same cache ID. All what we compare is among virtual cache ID.
However this code is comparing between P and V.



+ if ((is_l2_vcbm_msr(vm, vmsr) && (l2_id == tmp_l2_id))
+ || (is_l3_vcbm_msr(vm, vmsr) && (l3_id == tmp_l3_id))) {
+ vcpu_set_guest_msr(tmp_vcpu, vmsr, val);
+ }
+ }
+}
+
static void write_pcbm(uint32_t pmsr, uint64_t pcbm) {
/* Preserve reserved bits, and only set the pCBM bits */ @@ -461,8
+510,17 @@ static int32_t write_vcbm(struct acrn_vcpu *vcpu, uint32_t
vmsr, uint64_t val, i
uint32_t pmsr;
uint64_t pcbm;

- /* Write vCBM first */
- vcpu_set_guest_msr(vcpu, vmsr, masked_vcbm | (val &
0xFFFFFFFF00000000UL));
+ /*
+ * Write vCBM first:
+ * The L2 mask MSRs are scoped at the same level as the L2 cache
(similarly,
+ * the L3 mask MSRs are scoped at the same level as the L3 cache).
+ *
+ * For example, the MSR_IA32_L3_MASK_n MSRs are scoped at
socket level, which means if
+ * we program MSR_IA32_L3_MASK_n on one cpu and the same
MSR_IA32_L3_MASK_n on all other cpus
+ * of the same socket will also get the change!
+ * Set vcbm to all the vCPUs that share cache with vcpu to mimic
this hardware behavior.
+ */
+ propagate_vcbm(vcpu, vmsr, masked_vcbm | (val &
+0xFFFFFFFF00000000UL));

/* Write pCBM: */
pmsr = vmsr_to_pmsr(vcpu->vm, vmsr, res); diff --git
a/hypervisor/common/hypercall.c b/hypervisor/common/hypercall.c index
fb19fe4a0..3f5134d45 100644
--- a/hypervisor/common/hypercall.c
+++ b/hypervisor/common/hypercall.c
@@ -133,52 +133,6 @@ int32_t hcall_get_api_version(struct acrn_vcpu
*vcpu, __unused struct acrn_vm *t
return copy_to_gpa(vcpu->vm, &version, param1, sizeof(version)); }

-/*
- * nearest_pow2(n) is the nearest power of 2 integer that is not less than n
- * The last (most significant) bit set of (n*2-1) matches the above definition
- */
-static uint32_t nearest_pow2(uint32_t n) -{
- uint32_t p = n;
-
- if (n >= 2U) {
- p = fls32(2U*n - 1U);
- }
-
- return p;
-}
-
-static void get_cache_shift(uint32_t *l2_shift, uint32_t *l3_shift) -{
- uint32_t subleaf;
-
- *l2_shift = 0U;
- *l3_shift = 0U;
-
- for (subleaf = 0U;; subleaf++) {
- uint32_t eax, ebx, ecx, edx;
- uint32_t cache_type, cache_level, id, shift;
-
- cpuid_subleaf(0x4U, subleaf, &eax, &ebx, &ecx, &edx);
-
- cache_type = eax & 0x1fU;
- cache_level = (eax >> 5U) & 0x7U;
- id = (eax >> 14U) & 0xfffU;
- shift = nearest_pow2(id + 1U);
-
- /* No more caches */
- if ((cache_type == 0U) || (cache_type >= 4U)) {
- break;
- }
-
- if (cache_level == 2U) {
- *l2_shift = shift;
- } else if (cache_level == 3U) {
- *l3_shift = shift;
- }
- }
-}
-
/**
* @brief Get basic platform information.
*
@@ -204,8 +158,10 @@ int32_t hcall_get_platform_info(struct acrn_vcpu
*vcpu, __unused struct acrn_vm
if (ret == 0) {
uint16_t i;
uint16_t pcpu_nums = get_pcpu_nums();
+ struct cpuinfo_x86 *cpu_info = get_pcpu_info();

- get_cache_shift(&pi.hw.l2_cat_shift, &pi.hw.l3_cat_shift);
+ pi.hw.l2_cat_shift = cpu_info->l2_cat_id_shift;
+ pi.hw.l3_cat_shift = cpu_info->l3_cat_id_shift;

for (i = 0U; i < min(pcpu_nums, ACRN_PLATFORM_LAPIC_IDS_MAX);
i++) {
pi.hw.lapic_ids[i] = per_cpu(lapic_id, i); diff --git
a/hypervisor/include/arch/x86/asm/cpu_caps.h
b/hypervisor/include/arch/x86/asm/cpu_caps.h
index 4f679f209..c22e306a0 100644
--- a/hypervisor/include/arch/x86/asm/cpu_caps.h
+++ b/hypervisor/include/arch/x86/asm/cpu_caps.h
@@ -43,6 +43,10 @@ struct cpuinfo_x86 {
uint64_t physical_address_mask;
uint32_t cpuid_leaves[FEATURE_WORDS];
char model_name[64];
+ /* Right-shift count that will allow software to extract part of APIC ID to
distinguish L2 CAT ID */
+ uint32_t l2_cat_id_shift;
+ /* Right-shift count that will allow software to extract part of APIC ID to
distinguish L3 CAT ID */
+ uint32_t l3_cat_id_shift;
};

bool has_monitor_cap(void);
--
2.25.1





Re: [PATCH V6 7/8] hv: vCAT: implementing the vCAT MSRs write handler

Eddie Dong
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Dongsheng Zhang
Sent: Wednesday, October 20, 2021 6:42 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH V6 7/8] hv: vCAT: implementing the vCAT MSRs
write handler

From: dongshen <dongsheng.x.zhang@...>

Implement the write_vcat_msr() function to handle the
MSR_IA32_PQR_ASSOC and MSR_IA32_type_MASK_n vCAT MSRs write
request.

Several vCAT P2V (physical to virtual) and V2P (virtual to physical) mappings
exist:

struct acrn_vm_config *vm_config = get_vm_config(vm_id)

max_pcbm = vm_config->max_type_pcbm (type: l2 or l3)
mask_shift = ffs64(max_pcbm)

vclosid = vmsr - MSR_IA32_type_MASK_0
pclosid = vm_config->pclosids[vclosid]

pmsr = MSR_IA32_type_MASK_0 + pclosid
pcbm = vcbm << mask_shift
vcbm = pcbm >> mask_shift

Where
MSR_IA32_type_MASK_n: L2 or L3 mask msr address for CLOSIDn, from
0C90H through 0D8FH (inclusive).

max_pcbm: a bitmask that selects all the physical cache ways assigned to
the VM

vclosid: virtual CLOSID, always starts from 0

pclosid: corresponding physical CLOSID for a given vclosid

vmsr: virtual msr address, passed to vCAT handlers by the
caller functions rdmsr_vmexit_handler()/wrmsr_vmexit_handler()

pmsr: physical msr address

vcbm: virtual CBM, passed to vCAT handlers by the
caller functions rdmsr_vmexit_handler()/wrmsr_vmexit_handler()

pcbm: physical CBM

Tracked-On: #5917
Signed-off-by: dongshen <dongsheng.x.zhang@...>
---
hypervisor/arch/x86/guest/vcat.c | 100
+++++++++++++++++--
hypervisor/arch/x86/guest/vmsr.c | 9 ++
hypervisor/include/arch/x86/asm/guest/vcat.h | 1 +
3 files changed, 104 insertions(+), 6 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcat.c
b/hypervisor/arch/x86/guest/vcat.c
index 03e9a3cc9..1dda6b61f 100644
--- a/hypervisor/arch/x86/guest/vcat.c
+++ b/hypervisor/arch/x86/guest/vcat.c
@@ -365,6 +365,21 @@ int32_t read_vcat_msr(const struct acrn_vcpu
*vcpu, uint32_t vmsr, uint64_t *rva
return ret;
}

+/**
+ * @brief Map vCBM to pCBM
+ *
+ * @pre vm != NULL
+ */
+static uint64_t vcbm_to_pcbm(const struct acrn_vm *vm, uint64_t vcbm,
+int res) {
+ uint64_t max_pcbm = get_max_pcbm(vm, res);
+
+ /* Find the position low (the first bit set) in max_pcbm */
+ uint16_t low = ffs64(max_pcbm);
+
+ return vcbm << low;
+}
+
/**
* @brief Map vMSR address (abbreviated as vmsr) to corresponding pMSR
address (abbreviated as pmsr)
* Each vMSR or pMSR is identified by a 32-bit integer @@ -395,19 +410,70
@@ static uint32_t vmsr_to_pmsr(const struct acrn_vm *vm, uint32_t vmsr,
int res)
return pmsr;
}

+static void write_pcbm(uint32_t pmsr, uint64_t pcbm) {
+ /* Preserve reserved bits, and only set the pCBM bits */
+ uint64_t pmsr_value = (msr_read(pmsr) & 0xFFFFFFFF00000000UL) |
pcbm;
+
+ msr_write(pmsr, pmsr_value);
+}
+
+/* Check if bitmask is contiguous:
+ * All (and only) contiguous '1' combinations are allowed (e.g. FFFFH,
+0FF0H, 003CH, etc.) */ static bool is_contiguous_bit_set(uint64_t
+bitmask) {
+ bool ret = false;
+ uint64_t tmp64 = bitmask;
+
+ if (tmp64 != 0UL) {
+ while ((tmp64 & 1UL) == 0UL) {
+ tmp64 >>= 1U;
+ }
+
+ while ((tmp64 & 1UL) != 0UL) {
+ tmp64 >>= 1U;
+ }
+
+ if (tmp64 == 0UL) {
+ ret = true;
+ }
+ }
That is too ugly.

We can use bit-scan instruction to find the highest set-bit (say high), and the lowest set bit (say low)
Then the contiguous value must be: (1<<(high+1)) -- (1<<low)


+
+ return ret;
+}
+
/**
* @brief vCBM MSR write handler
*
* @pre vcpu != NULL && vcpu->vm != NULL
*/
-static int32_t write_vcbm(__unused struct acrn_vcpu *vcpu, __unused
uint32_t vmsr, __unused uint64_t val, __unused int res)
+static int32_t write_vcbm(struct acrn_vcpu *vcpu, uint32_t vmsr,
+uint64_t val, int res)
{
- /* TODO: this is going to be implemented in a subsequent commit, will
perform the following actions:
- * write vCBM
- * vmsr_to_pmsr and vcbm_to_pcbm
- * write pCBM
+ int32_t ret = -EINVAL;
+ /*
+ * vcbm set bits should only be in the range of [0, vcbm_len)
(vcat_get_max_vcbm),
+ * so mask with vcat_get_max_vcbm to prevent erroneous vCBM value
*/
- return -EFAULT;
+ uint64_t masked_vcbm = val & vcat_get_max_vcbm(vcpu->vm, res);
If SW sets bits beyond hardware supported value, I thought we should inject #GP.
Not?

+
+ if (is_contiguous_bit_set(masked_vcbm)) {
+ uint32_t pmsr;
+ uint64_t pcbm;
+
+ /* Write vCBM first */
+ vcpu_set_guest_msr(vcpu, vmsr, masked_vcbm | (val &
+0xFFFFFFFF00000000UL));
+
Didn't understand why we not directly set guest value here.

+ /* Write pCBM: */
+ pmsr = vmsr_to_pmsr(vcpu->vm, vmsr, res);
+ pcbm = vcbm_to_pcbm(vcpu->vm, masked_vcbm, res);
+ write_pcbm(pmsr, pcbm);
+
+ ret = 0;
+ }
+
+ /* Return non-zero to vmexit_handler if vcbm is not contiguous, which
will result in #GP injected to guest */
+ return ret;
}

/**
@@ -444,6 +510,28 @@ static int32_t write_vclosid(struct acrn_vcpu *vcpu,
uint64_t val)
return 0;
}

+/**
+ * @brief vCAT MSRs write handler
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ int32_t
+write_vcat_msr(struct acrn_vcpu *vcpu, uint32_t vmsr, uint64_t val) {
+ int32_t ret = -EACCES;
+
+ if (is_vcat_configured(vcpu->vm)) {
+ if (vmsr == MSR_IA32_PQR_ASSOC) {
+ ret = write_vclosid(vcpu, val);
Please move this to caller side to avoid double switch/if-else...
This one happens frequently at runtime.

+ } else if (is_l2_vcbm_msr(vcpu->vm, vmsr)) {
+ ret = write_vcbm(vcpu, vmsr, val, RDT_RESOURCE_L2);
+ } else if (is_l3_vcbm_msr(vcpu->vm, vmsr)) {
+ ret = write_vcbm(vcpu, vmsr, val, RDT_RESOURCE_L3);
+ }
+ }
+
+ return ret;
+}
+
/**
* @brief Initialize vCBM MSRs
*
diff --git a/hypervisor/arch/x86/guest/vmsr.c
b/hypervisor/arch/x86/guest/vmsr.c
index 06825d211..412388106 100644
--- a/hypervisor/arch/x86/guest/vmsr.c
+++ b/hypervisor/arch/x86/guest/vmsr.c
@@ -1092,6 +1092,15 @@ int32_t wrmsr_vmexit_handler(struct acrn_vcpu
*vcpu)
}
break;
}
+#ifdef CONFIG_VCAT_ENABLED
+ case MSR_IA32_L2_MASK_BASE ... (MSR_IA32_L2_MASK_BASE +
NUM_VCAT_L2_MSRS - 1U):
+ case MSR_IA32_L3_MASK_BASE ... (MSR_IA32_L3_MASK_BASE +
NUM_VCAT_L3_MSRS - 1U):
+ case MSR_IA32_PQR_ASSOC:
+ {
+ err = write_vcat_msr(vcpu, msr, v);
+ break;
+ }
+#endif
default:
{
if (is_x2apic_msr(msr)) {
diff --git a/hypervisor/include/arch/x86/asm/guest/vcat.h
b/hypervisor/include/arch/x86/asm/guest/vcat.h
index 16ae0802b..8effa7604 100644
--- a/hypervisor/include/arch/x86/asm/guest/vcat.h
+++ b/hypervisor/include/arch/x86/asm/guest/vcat.h
@@ -16,6 +16,7 @@ void init_vcat_msrs(struct acrn_vcpu *vcpu); uint16_t
vcat_get_num_vclosids(const struct acrn_vm *vm); uint64_t
vcat_pcbm_to_vcbm(const struct acrn_vm *vm, uint64_t pcbm, int res);
int32_t read_vcat_msr(const struct acrn_vcpu *vcpu, uint32_t vmsr, uint64_t
*rval);
+int32_t write_vcat_msr(struct acrn_vcpu *vcpu, uint32_t vmsr, uint64_t
+val);

#endif /* VCAT_H_ */

--
2.25.1





Re: [PATCH V6 4/8] hv: vCAT: initialize vCAT MSRs during vmcs init

Eddie Dong
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Dongsheng Zhang
Sent: Wednesday, October 20, 2021 6:41 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH V6 4/8] hv: vCAT: initialize vCAT MSRs during vmcs
init

From: dongshen <dongsheng.x.zhang@...>

Initialize vCBM MSR

Initialize vCLOSID MSR

Add some vCAT functions:
Retrieve max_vcbm
Check if vCAT is configured or not for the VM Map vclosid to pclosid
write_vclosid: vCLOSID MSR write handler
write_vcbm: vCBM MSR write handler

Tracked-On: #5917
Signed-off-by: dongshen <dongsheng.x.zhang@...>
---
hypervisor/Makefile | 3 +
hypervisor/arch/x86/guest/vcat.c | 408
+++++++++++++++++++
hypervisor/arch/x86/guest/vm.c | 10 +
hypervisor/arch/x86/guest/vmsr.c | 15 +-
hypervisor/arch/x86/rdt.c | 8 +
hypervisor/include/arch/x86/asm/guest/vcat.h | 18 +
hypervisor/include/arch/x86/asm/guest/vm.h | 1 +
hypervisor/include/arch/x86/asm/rdt.h | 1 +
8 files changed, 462 insertions(+), 2 deletions(-) create mode 100644
hypervisor/arch/x86/guest/vcat.c create mode 100644
hypervisor/include/arch/x86/asm/guest/vcat.h

diff --git a/hypervisor/Makefile b/hypervisor/Makefile index
50bb2891c..a5c02d115 100644
--- a/hypervisor/Makefile
+++ b/hypervisor/Makefile
@@ -330,6 +330,9 @@ VP_DM_C_SRCS += arch/x86/guest/vmx_io.c
VP_DM_C_SRCS += arch/x86/guest/instr_emul.c VP_DM_C_SRCS +=
arch/x86/guest/lock_instr_emul.c VP_DM_C_SRCS +=
arch/x86/guest/vm_reset.c
+ifeq ($(CONFIG_VCAT_ENABLED),y)
+VP_DM_C_SRCS += arch/x86/guest/vcat.c
+endif
VP_DM_C_SRCS += common/ptdev.c

# virtual platform trusty
diff --git a/hypervisor/arch/x86/guest/vcat.c
b/hypervisor/arch/x86/guest/vcat.c
new file mode 100644
index 000000000..49968b8d9
--- /dev/null
+++ b/hypervisor/arch/x86/guest/vcat.c
@@ -0,0 +1,408 @@
+/*
+ * Copyright (C) 2021 Intel Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause */
+
+#include <types.h>
+#include <errno.h>
+#include <logmsg.h>
+#include <asm/cpufeatures.h>
+#include <asm/cpuid.h>
+#include <asm/rdt.h>
+#include <asm/lib/bits.h>
+#include <asm/board.h>
+#include <asm/vm_config.h>
+#include <asm/msr.h>
+#include <asm/guest/vcpu.h>
+#include <asm/guest/vm.h>
+#include <asm/guest/vcat.h>
+#include <asm/per_cpu.h>
+
+/*
+ * List of acronyms used here:
+ *
+ * - CAT:
+ * Cache Allocation Technology
+ *
+ *- vCAT:
+ * Virtual CAT
+ *
+ *- MSRs:
+ * Machine Specific Registers, each MSR is identified by a 32-bit integer.
+ *
+ *- pMSR:
+ * physical MSR
+ *
+ *- vMSR:
+ * virtual MSR
+ *
+ *- COS/CLOS:
+ * Class of Service. Also mean COS MSRs
+ *
+ *- CLOSID:
+ * Each CLOS has a number ID, ranges from 0 to COS_MAX
+ *
+ *- CLOSIDn:
+ * Each CLOS has a number ID denoted by n
+ *
+ *- COS_MAX:
+ * Max number of COS MSRs. ACRN uses the smallest number of
+ * CLOSIDs of all supported resources as COS_MAX to have consistent
+ * allocation
+ *
+-* pCLOSID:
+ * Physical CLOSID
+ *
+ *- vCLOSID:
+ * Virtual CLOSID
+ *
+ *- MSR_IA32_type_MASK_n
+ * type: L2 or L3
+ * One CAT (CBM) MSR, where n corresponds to a number (CLOSIDn)
+ *
+ *- CBM:
+ * Capacity bitmask (cache bit mask), specifies which region of cache
+ * can be filled into, all (and only) contiguous '1' combinations are
+allowed
+ *
+ *- pCBM:
+ * Physical CBM
+ *
+ *- pCBM length (pcbm_len):
+ * pcbm_len is calculated by `bitmap_weight(max_pcbm)`
+ * indicates number of bits set in max_pcbm
+ *
+ *- max_pcbm (maximum physical cache space assigned to VM):
+ * max_pcbm is a contiguous capacity bitmask (CBM) starting at bit
+position low
+ * (the lowest assigned physical cache way) and ending at position
+high
+ * (the highest assigned physical cache way, inclusive).
+ * As CBM only allows contiguous '1' combinations, so max_pcbm
+essentially
+ * is a bitmask that selects/covers all the physical cache ways assigned to
the VM.
+ *
+ * Example:
+ * pcbm_len=20
+ * max_pcbm=0xfffff
+ *
+ *- CLOS_MASK/max_pcbm: (maximum assigned/reserved physical cache
+space)
+ * vCAT is built on top of RDT, vCAT on ACRN is enabled by configuring
+the FEATURES/RDT
+ * and vm sub-sections of the scenario XML file as in the below example:
+ * <RDT>
+ * <RDT_ENABLED>y</RDT_ENABLED>
+ * <CDP_ENABLED>n</CDP_ENABLED>
+ * <VCAT_ENABLED>y</VCAT_ENABLED>
+ * <CLOS_MASK>0x7ff</CLOS_MASK>
+ * <CLOS_MASK>0x7ff</CLOS_MASK>
+ * <CLOS_MASK>0x7ff</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * <CLOS_MASK>0xff800</CLOS_MASK>
+ * /RDT>
+ *
+ * <vm id="0">
+ * <guest_flags>
+ <guest_flag>GUEST_FLAG_VCAT_ENABLED</guest_flag>
+ </guest_flags>
+ * <clos>
+ * <vcpu_clos>3</vcpu_clos>
+ * <vcpu_clos>4</vcpu_clos>
+ * <vcpu_clos>5</vcpu_clos>
+ * <vcpu_clos>6</vcpu_clos>
+ * <vcpu_clos>7</vcpu_clos>
+ * </clos>
+ * </vm>
+ *
+ * <vm id="1">
+ * <clos>
+ * <vcpu_clos>1</vcpu_clos>
+ * <vcpu_clos>2</vcpu_clos>
+ * </clos>
+ * </vm>
+ *
+ * vm_configurations.c (generated by config-tools) with the above vCAT
config:
+ *
+ * static uint16_t vm0_vcpu_clos[5U] = {3U, 4U, 5U, 6U, 7U};
+ * static uint16_t vm1_vcpu_clos[2U] = {1U, 2U};
+ *
+ * struct acrn_vm_config vm_configs[CONFIG_MAX_VM_NUM] = {
+ * {
+ * .guest_flags = (GUEST_FLAG_VCAT_ENABLED),
+ * .pclosids = vm0_vcpu_clos,
+ * .num_pclosids = 5U,
+ * .max_l3_pcbm = 0xff800U,
+ * },
+ * {
+ * .pclosids = vm1_vcpu_clos,
+ * .num_pclosids = 2U,
+ * },
+ * };
+ *
+ * Config CLOS_MASK/max_pcbm per pCLOSID:
+ * vCAT is enabled by setting both RDT_ENABLED and VCAT_ENABLED
to 'y',
+ * then specify the GUEST_FLAG_VCAT_ENABLED guest flag for the
desired VMs.
+ * Each CLOS_MASK (a.k.a. max_pcbm) setting corresponds to a
pCLOSID and
+ * specifies the allocated portion (ways) of cache.
+ * For example, if COS_MAX is 7, then 8 CLOS_MASK settings need to be
in place
+ * where each setting corresponds to a pCLOSID starting from 0.
+ * Each CLOS_MASK may or may not overlap with the CLOS_MASK of
another pCLOSID depending
+ * on whether overlapped or isolated bitmask is desired for particular
performance
+ * consideration.
+ *
+ * Assign pCLOSIDs per VM
+ * Assign the desired pCLOSIDs to each VM in the vm/clos section of the
scenario file
+ * by defining the vcpu_clos settings.
+ * All pCLOSIDs should be configured with the same pCBM (max_pcbm)
to simplify vCAT
+ * config and ensure vCAT capability symmetry across cpus of the VM. In
the above example,
+ * pCLOSIDs 3 to 7 are all configured with the same pCBM value 0xff800,
which
+ * means a total of 9 physical cache ways have been reserved for all the
cpus
+ * belonging to VM0
+ *
+ *- vCBM:
+ * Virtual CBM
+ *
+ *- vCBM length (vcbm_len):
+ * max number of bits to set for vCBM.
+ * vcbm_len is set equal to pcbm_len
+ * vCBM length is reported to guest VMs by using vCPUID (EAX=10H)
+ *
+ *- max_vcbm (maximum virtual cache space):
+ * Fully open vCBM (all ones bitmask), max vCBM is calculated
+ * by `(1 << vcbm_len) - 1`
+ *
+ * Usually, vCLOSID0 is associated with the fully open vCBM to access
+all assigned virtual caches */
+
+/**
+ * @pre vm != NULL
+ */
+bool is_l2_vcat_configured(const struct acrn_vm *vm) {
+ return is_vcat_configured(vm) &&
+(get_rdt_res_cap_info(RDT_RESOURCE_L2)->num_closids > 0U); }
+
+/**
+ * @pre vm != NULL
+ */
+bool is_l3_vcat_configured(const struct acrn_vm *vm) {
+ return is_vcat_configured(vm) &&
+(get_rdt_res_cap_info(RDT_RESOURCE_L3)->num_closids > 0U); }
+
+/**
+ * @brief Return number of vCLOSIDs of this VM
+ *
+ * @pre vm != NULL && vm->vm_id < CONFIG_MAX_VM_NUM */
uint16_t
+vcat_get_num_vclosids(const struct acrn_vm *vm) {
+ uint16_t num_vclosids = 0U;
+
+ if (is_vcat_configured(vm)) {
+ /*
+ * For performance and simplicity, here number of vCLOSIDs
(num_vclosids) is set
+ * equal to the number of pCLOSIDs assigned to this VM
(get_vm_config(vm->vm_id)->num_pclosids).
+ * But technically, we do not have to make such an assumption. For
example,
+ * Hypervisor could implement CLOSID context switch, then number
of vCLOSIDs
+ * can be greater than the number of pCLOSIDs assigned. etc.
+ */
+ num_vclosids = get_vm_config(vm->vm_id)->num_pclosids;
+ }
+
+ return num_vclosids;
+}
+
+/**
+ * @brief Map vCLOSID to pCLOSID
+ *
+ * @pre vm != NULL && vm->vm_id < CONFIG_MAX_VM_NUM
+ * @pre (get_vm_config(vm->vm_id)->pclosids != NULL) && (vclosid <
+get_vm_config(vm->vm_id)->num_pclosids)
+ */
+static uint16_t vclosid_to_pclosid(const struct acrn_vm *vm, uint16_t
+vclosid) {
+ ASSERT(vclosid < vcat_get_num_vclosids(vm), "vclosid is out of
+range!");
+
+ /*
+ * pclosids points to an array of assigned pCLOSIDs
+ * Use vCLOSID as the index into the pclosids array, returning the
corresponding pCLOSID
+ *
+ * Note that write_vcat_msr() calls vclosid_to_pclosid() indirectly, in
write_vcat_msr(),
+ * the is_l2_vcbm_msr()/is_l3_vcbm_msr() calls ensure that vclosid is
always less than
+ * get_vm_config(vm->vm_id)->num_pclosids, so vclosid is always an
array index within bound here
+ */
+ return get_vm_config(vm->vm_id)->pclosids[vclosid];
+}
+
+/**
+ * @brief Return the max_pcbm of this VM.
+ * @pre vm != NULL && vm->vm_id < CONFIG_MAX_VM_NUM
+ * @pre res == RDT_RESOURCE_L2 || res == RDT_RESOURCE_L3 */ static
+uint64_t get_max_pcbm(const struct acrn_vm *vm, int res) {
+ uint64_t max_pcbm = 0UL;
+
+ if (is_l2_vcat_configured(vm) && (res == RDT_RESOURCE_L2)) {
+ max_pcbm = get_vm_config(vm->vm_id)->max_l2_pcbm;
+ } else if (is_l3_vcat_configured(vm) && (res == RDT_RESOURCE_L3)) {
+ max_pcbm = get_vm_config(vm->vm_id)->max_l3_pcbm;
+ }
+
+ return max_pcbm;
+}
+
+/**
+ * @brief Retrieve vcbm_len of vm
+ * @pre vm != NULL
+ */
+uint16_t vcat_get_vcbm_len(const struct acrn_vm *vm, int res) {
+ /* vcbm_len = pcbm_len */
This comment is incorrect.
The rational here is that set-bits in max_l2_pcbm stands for bits of cbm the guest sees, + a shift.



+ return bitmap_weight(get_max_pcbm(vm, res)); }
+
+/**
+ * @brief Retrieve max_vcbm of vm
+ * @pre vm != NULL
+ */
+static uint64_t vcat_get_max_vcbm(const struct acrn_vm *vm, int res) {
+ uint16_t vcbm_len = vcat_get_vcbm_len(vm, res);
+ uint64_t max_vcbm = 0UL;
If " set-bits in max_l2_pcbm stands for bits of cbm the guest sees, + a shift." Is true,
Ideally max_vcbm should be max_pcbm >> shift.

Shift can be easily calculated by the number of 0 in LSB side.

The current implementation is not wrong, but very indirect.

+
+ if (vcbm_len != 0U) {
+ max_vcbm = (1U << vcbm_len) - 1U;
+ }
+
+ return max_vcbm;
+}
+
+/**
The following comments may be too long. Explain the hard-to-understand acronyms here only, and those easy-to-be-confused ones.

+ * @brief Map vMSR address (abbreviated as vmsr) to corresponding pMSR
+address (abbreviated as pmsr)
+ * Each vMSR or pMSR is identified by a 32-bit integer
+ *
+ * @pre vm != NULL
+ * @pre res == RDT_RESOURCE_L2 || res == RDT_RESOURCE_L3 */ static
+uint32_t vmsr_to_pmsr(const struct acrn_vm *vm, uint32_t vmsr, int res)
+{
+ uint32_t pmsr = vmsr;
+ uint16_t vclosid;
+
+ switch (res) {
+ case RDT_RESOURCE_L2:
+ vclosid = vmsr - MSR_IA32_L2_MASK_BASE;
+ pmsr = MSR_IA32_L2_MASK_BASE + vclosid_to_pclosid(vm,
vclosid);
+ break;
+
+ case RDT_RESOURCE_L3:
+ vclosid = vmsr - MSR_IA32_L3_MASK_BASE;
+ pmsr = MSR_IA32_L3_MASK_BASE + vclosid_to_pclosid(vm,
vclosid);
+ break;
+
+ default:
+ break;
+ }
+
+ return pmsr;
+}
+
+/**
+ * @brief vCBM MSR write handler
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ static int32_t
+write_vcbm(__unused struct acrn_vcpu *vcpu, __unused uint32_t vmsr,
+__unused uint64_t val, __unused int res) {
+ /* TODO: this is going to be implemented in a subsequent commit, will
perform the following actions:
+ * write vCBM
+ * vmsr_to_pmsr and vcbm_to_pcbm
+ * write pCBM
+ */
+ return -EFAULT;
+}
+
+/**
+ * @brief vCLOSID MSR write handler
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ static int32_t
+write_vclosid(struct acrn_vcpu *vcpu, uint64_t val) {
+ uint16_t vclosid, pclosid;
+
We need to check the reserve bits (or higher than MAX RMID) of PQR_ASSOC too.
Inject #GP if it violates.

+ /* Write the new vCLOSID value */
+ vcpu_set_guest_msr(vcpu, MSR_IA32_PQR_ASSOC, val);
+
+ vclosid = (uint16_t)((val >> 32U) & 0xFFFFFFFFUL);
Why ">>32U"?
And why &0xfff...?

+ pclosid = vclosid_to_pclosid(vcpu->vm, vclosid);
+ /*
+ * Write the new pCLOSID value to the guest msr area
+ *
+ * The prepare_auto_msr_area() function has already initialized the
vcpu->arch.msr_area
+ * as follows:
+ * vcpu_clos = cfg->pclosids[vcpu->vcpu_id%cfg->num_pclosids]
+ *
vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].msr_index =
MSR_IA32_PQR_ASSOC
+ * vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value =
clos2pqr_msr(vcpu_clos)
+ * vcpu->arch.msr_area.host[MSR_AREA_IA32_PQR_ASSOC].msr_index
= MSR_IA32_PQR_ASSOC
+ * vcpu->arch.msr_area.host[MSR_AREA_IA32_PQR_ASSOC].value =
clos2pqr_msr(hv_clos)
+ * vcpu->arch.msr_area.count = 1
+ *
+ * So here we only need to update the
vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value field,
+ * all other vcpu->arch.msr_area fields remains unchanged at runtime.
+ */
+ vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value =
+clos2pqr_msr(pclosid);
+
Where is definition of clos2pqr_msr()?

+ return 0;
+}
+
+/**
+ * @brief Initialize vCBM MSRs
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ static void
+init_vcbms(struct acrn_vcpu *vcpu, int res, uint32_t msr_base) {
+ uint64_t max_vcbm = vcat_get_max_vcbm(vcpu->vm, res);
+
+ if (max_vcbm != 0UL) {
+ uint32_t vmsr;
+ /* num_vcbm_msrs = num_vclosids */
+ uint16_t num_vcbm_msrs = vcat_get_num_vclosids(vcpu->vm);
+
+ /*
+ * For each vCBM MSR, its initial vCBM is set to max_vcbm,
+ * a bitmask with vcbm_len bits (from 0 to vcbm_len - 1, inclusive)
+ * set to 1 and all other bits set to 0.
+ *
+ * As CBM only allows contiguous '1' combinations, so max_vcbm
essentially
+ * is a bitmask that selects all the virtual cache ways assigned to
the VM.
+ * It covers all the virtual cache ways the guest VM may access, i.e.
the
+ * superset bitmask.
+ */
+ for (vmsr = msr_base; vmsr < (msr_base + num_vcbm_msrs);
vmsr++) {
+ uint32_t pmsr = vmsr_to_pmsr(vcpu->vm, vmsr, res);
+ /* Set initial vMSR value: copy reserved bits from
corresponding pMSR, and set vCBM to max_vcbm */
Why we need to emulate the reserved bit? Normally all the reserved bits should be zero.
Does SDM says the reserved bits must be none-zero for CBM?

+ uint64_t val = (msr_read(pmsr) & 0xFFFFFFFF00000000UL) |
max_vcbm;
+
+ /* Write vCBM MSR */
+ (void)write_vcbm(vcpu, vmsr, val, res);
+ }
+ }
+}
+
+/**
+ * @brief Initialize vCAT MSRs
+ *
+ * @pre vcpu != NULL && vcpu->vm != NULL */ void init_vcat_msrs(struct
+acrn_vcpu *vcpu) {
+ if (is_vcat_configured(vcpu->vm)) {
+ init_vcbms(vcpu, RDT_RESOURCE_L2, MSR_IA32_L2_MASK_BASE);
+
+ init_vcbms(vcpu, RDT_RESOURCE_L3, MSR_IA32_L3_MASK_BASE);
+
+ (void)write_vclosid(vcpu, clos2pqr_msr(0U));
Shouldn't we write 0 as power on initial vCLOSID?

+ }
+}
diff --git a/hypervisor/arch/x86/guest/vm.c
b/hypervisor/arch/x86/guest/vm.c index 66f60d289..a7848743a 100644
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -151,6 +151,16 @@ bool is_nvmx_configured(const struct acrn_vm *vm)
return ((vm_config->guest_flags & GUEST_FLAG_NVMX_ENABLED) !=
0U); }

+/**
+ * @pre vm != NULL && vm_config != NULL && vm->vmid <
CONFIG_MAX_VM_NUM
+*/ bool is_vcat_configured(const struct acrn_vm *vm) {
+ struct acrn_vm_config *vm_config = get_vm_config(vm->vm_id);
+
+ return ((vm_config->guest_flags & GUEST_FLAG_VCAT_ENABLED) !=
0U); }
+
/**
* @brief VT-d PI posted mode can possibly be used for PTDEVs assigned
* to this VM if platform supports VT-d PI AND lapic passthru is not
configured diff --git a/hypervisor/arch/x86/guest/vmsr.c
b/hypervisor/arch/x86/guest/vmsr.c
index 971e0b8fb..a3a1a8705 100644
--- a/hypervisor/arch/x86/guest/vmsr.c
+++ b/hypervisor/arch/x86/guest/vmsr.c
@@ -22,6 +22,7 @@
#include <asm/tsc.h>
#include <trace.h>
#include <logmsg.h>
+#include <asm/guest/vcat.h>

#define INTERCEPT_DISABLE (0U)
#define INTERCEPT_READ (1U << 0U)
@@ -338,8 +339,10 @@ static void prepare_auto_msr_area(struct
acrn_vcpu *vcpu)
It seems you changed the implementation of this API from V5.
Why?


It seems we are not converging...

vcpu_clos = cfg->pclosids[vcpu->vcpu_id%cfg->num_pclosids];

- /* RDT: only load/restore MSR IA32_PQR_ASSOC when hv and
guest have different settings */
- if (vcpu_clos != hv_clos) {
+ /* RDT: only load/restore MSR_IA32_PQR_ASSOC when hv and
guest have different settings
+ * vCAT: always load/restore MSR_IA32_PQR_ASSOC
+ */
+ if (is_vcat_configured(vcpu->vm) || (vcpu_clos != hv_clos)) {

vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].msr_index =
MSR_IA32_PQR_ASSOC;

vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value =
clos2pqr_msr(vcpu_clos);

vcpu->arch.msr_area.host[MSR_AREA_IA32_PQR_ASSOC].msr_index =
MSR_IA32_PQR_ASSOC; @@ -371,6 +374,14 @@ void
init_emulated_msrs(struct acrn_vcpu *vcpu)
}

vcpu_set_guest_msr(vcpu, MSR_IA32_FEATURE_CONTROL, val64);
+
+#ifdef CONFIG_VCAT_ENABLED
+ /*
+ * init_vcat_msrs() will overwrite the
vcpu->arch.msr_area.guest[MSR_AREA_IA32_PQR_ASSOC].value
+ * set by prepare_auto_msr_area()
+ */
+ init_vcat_msrs(vcpu);
+#endif
}

#ifdef CONFIG_VCAT_ENABLED
diff --git a/hypervisor/arch/x86/rdt.c b/hypervisor/arch/x86/rdt.c index
f6dc960f9..b21fd5209 100644
--- a/hypervisor/arch/x86/rdt.c
+++ b/hypervisor/arch/x86/rdt.c
@@ -60,6 +60,14 @@ static struct rdt_info
res_cap_info[RDT_NUM_RESOURCES] = {
},
};

+/*
+ * @pre res == RDT_RESOURCE_L3 || res == RDT_RESOURCE_L2 || res ==
+RDT_RESOURCE_MBA */ const struct rdt_info *get_rdt_res_cap_info(int
+res) {
+ return &res_cap_info[res];
+}
+
/*
* @pre res == RDT_RESOURCE_L3 || res == RDT_RESOURCE_L2
*/
diff --git a/hypervisor/include/arch/x86/asm/guest/vcat.h
b/hypervisor/include/arch/x86/asm/guest/vcat.h
new file mode 100644
index 000000000..6d8e587c4
--- /dev/null
+++ b/hypervisor/include/arch/x86/asm/guest/vcat.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2021 Intel Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause */
+
+#ifndef VCAT_H_
+#define VCAT_H_
+
+#include <asm/guest/vm.h>
+
+bool is_l2_vcat_configured(const struct acrn_vm *vm); bool
+is_l3_vcat_configured(const struct acrn_vm *vm); uint16_t
+vcat_get_vcbm_len(const struct acrn_vm *vm, int res); void
+init_vcat_msrs(struct acrn_vcpu *vcpu);
+
+#endif /* VCAT_H_ */
+
diff --git a/hypervisor/include/arch/x86/asm/guest/vm.h
b/hypervisor/include/arch/x86/asm/guest/vm.h
index 9a6171be0..2cb06c027 100644
--- a/hypervisor/include/arch/x86/asm/guest/vm.h
+++ b/hypervisor/include/arch/x86/asm/guest/vm.h
@@ -256,6 +256,7 @@ void vrtc_init(struct acrn_vm *vm); bool
is_lapic_pt_configured(const struct acrn_vm *vm); bool is_rt_vm(const
struct acrn_vm *vm); bool is_nvmx_configured(const struct acrn_vm *vm);
+bool is_vcat_configured(const struct acrn_vm *vm);
bool is_pi_capable(const struct acrn_vm *vm); bool has_rt_vm(void);
struct acrn_vm *get_highest_severity_vm(bool runtime); diff --git
a/hypervisor/include/arch/x86/asm/rdt.h
b/hypervisor/include/arch/x86/asm/rdt.h
index f6c4448e2..95e149fcb 100644
--- a/hypervisor/include/arch/x86/asm/rdt.h
+++ b/hypervisor/include/arch/x86/asm/rdt.h
@@ -47,5 +47,6 @@ void init_rdt_info(void); void setup_clos(uint16_t
pcpu_id); uint64_t clos2pqr_msr(uint16_t clos); bool
is_platform_rdt_capable(void);
+const struct rdt_info *get_rdt_res_cap_info(int res);

#endif /* RDT_H */
--
2.25.1





Re: [PATCH V6 3/8] hv: vCAT: initialize the emulated_guest_msrs array for CAT msrs during platform initialization

Eddie Dong
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Dongsheng Zhang
Sent: Wednesday, October 20, 2021 6:41 AM
To: acrn-dev@...
Subject: [acrn-dev] [PATCH V6 3/8] hv: vCAT: initialize the
emulated_guest_msrs array for CAT msrs during platform initialization

From: dongshen <dongsheng.x.zhang@...>

Initialize the emulated_guest_msrs[] array at runtime for
MSR_IA32_type_MASK_n and MSR_IA32_PQR_ASSOC msrs, there is no good
way to do this initialization statically at build time

Tracked-On: #5917
Signed-off-by: dongshen <dongsheng.x.zhang@...>
---
hypervisor/arch/x86/cpu.c | 4 +
hypervisor/arch/x86/guest/vmsr.c | 88
+++++++++++++++++++-
hypervisor/include/arch/x86/asm/guest/vcpu.h | 19 ++++-
hypervisor/include/arch/x86/asm/msr.h | 1 +
4 files changed, 109 insertions(+), 3 deletions(-)

diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c index
e1ee4770e..18c33c7d1 100644
--- a/hypervisor/arch/x86/cpu.c
+++ b/hypervisor/arch/x86/cpu.c
@@ -186,6 +186,10 @@ void init_pcpu_pre(bool is_bsp)
panic("System IOAPIC info is incorrect!");
}

+#ifdef CONFIG_VCAT_ENABLED
+ init_intercepted_cat_msr_list();
+#endif
+
#ifdef CONFIG_RDT_ENABLED
init_rdt_info();
#endif
diff --git a/hypervisor/arch/x86/guest/vmsr.c
b/hypervisor/arch/x86/guest/vmsr.c
index e83c3069c..971e0b8fb 100644
--- a/hypervisor/arch/x86/guest/vmsr.c
+++ b/hypervisor/arch/x86/guest/vmsr.c
@@ -28,7 +28,7 @@
#define INTERCEPT_WRITE (1U << 1U)
#define INTERCEPT_READ_WRITE (INTERCEPT_READ |
INTERCEPT_WRITE)

-static const uint32_t emulated_guest_msrs[NUM_GUEST_MSRS] = {
+static uint32_t emulated_guest_msrs[NUM_GUEST_MSRS] = {
/*
* MSRs that trusty may touch and need isolation between secure and
normal world
* This may include MSR_IA32_STAR, MSR_IA32_LSTAR,
MSR_IA32_FMASK, @@ -79,6 +79,24 @@ static const uint32_t
emulated_guest_msrs[NUM_GUEST_MSRS] = { #ifdef
CONFIG_NVMX_ENABLED
LIST_OF_VMX_MSRS,
#endif
+
+ /* The following range of elements are reserved for vCAT usage and are
+ * initialized dynamically by init_intercepted_cat_msr_list() during
platform initialization:
+ * [(NUM_GUEST_MSRS - NUM_VCAT_MSRS) ... (NUM_GUEST_MSRS -
1)] = {
+ * The following layout of each CAT MSR entry is determined by
catmsr_to_index_of_emulated_msr():
+ * MSR_IA32_L3_MASK_BASE,
+ * MSR_IA32_L3_MASK_BASE + 1,
+ * ...
+ * MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS - 1,
+ *
+ * MSR_IA32_L2_MASK_BASE + NUM_VCAT_L3_MSRS,
+ * MSR_IA32_L2_MASK_BASE + NUM_VCAT_L3_MSRS + 1,
+ * ...
+ * MSR_IA32_L2_MASK_BASE + NUM_VCAT_L3_MSRS +
NUM_VCAT_L2_MSRS - 1,
+ *
+ * MSR_IA32_PQR_ASSOC + NUM_VCAT_L3_MSRS +
NUM_VCAT_L2_MSRS
+ * }
+ */
};

static const uint32_t mtrr_msrs[] = {
@@ -355,6 +373,74 @@ void init_emulated_msrs(struct acrn_vcpu *vcpu)
vcpu_set_guest_msr(vcpu, MSR_IA32_FEATURE_CONTROL, val64); }

+#ifdef CONFIG_VCAT_ENABLED
+/**
+ * @brief Map CAT MSR address to zero based index
+ *
+ * @pre ((msr >= MSR_IA32_L3_MASK_BASE) && msr <
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS))
+ * || ((msr >= MSR_IA32_L2_MASK_BASE) && msr <
(MSR_IA32_L2_MASK_BASE + NUM_VCAT_L2_MSRS))
+ * || (msr == MSR_IA32_PQR_ASSOC)
+ */
+static uint32_t cat_msr_to_index_of_emulated_msr(uint32_t msr) {
+ uint32_t index = 0U;
+
+ /* L3 MSRs indices assignment for MSR_IA32_L3_MASK_BASE ~
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS):
+ * 0
+ * 1
+ * ...
+ * (NUM_VCAT_L3_MSRS - 1)
+ *
+ * L2 MSRs indices assignment:
+ * NUM_VCAT_L3_MSRS
+ * ...
+ * NUM_VCAT_L3_MSRS + NUM_VCAT_L2_MSRS - 1
+
+ * PQR index assignment for MSR_IA32_PQR_ASSOC:
+ * NUM_VCAT_L3_MSRS
+ */
+
+ if ((msr >= MSR_IA32_L3_MASK_BASE) && (msr <
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS))) {
+ index = msr - MSR_IA32_L3_MASK_BASE;
+ } else if ((msr >= MSR_IA32_L2_MASK_BASE) && (msr <
(MSR_IA32_L2_MASK_BASE + NUM_VCAT_L2_MSRS))) {
+ index = msr - MSR_IA32_L2_MASK_BASE + NUM_VCAT_L3_MSRS;
+ } else if (msr == MSR_IA32_PQR_ASSOC) {
+ index = NUM_VCAT_L3_MSRS + NUM_VCAT_L2_MSRS;
+ } else {
+ ASSERT(false, "invalid CAT msr address");
+ }
+
+ return index;
+}
+
+static void init_cat_msr_entry(uint32_t msr) {
+ /* Get index into the emulated_guest_msrs[] table for a given CAT MSR
*/
+ uint32_t index = cat_msr_to_index_of_emulated_msr(msr) +
+NUM_GUEST_MSRS - NUM_VCAT_MSRS;
Do you mean NUM_WORLD_MSRS + NUM_COMMON_MSRS + NUM_VMX_MSRS + cat_msr_to_index_of_emulated_msr(msr)?

If you make a separate MACRO for " NUM_WORLD_MSRS + NUM_COMMON_MSRS + NUM_VMX_MSRS" to stand for the start offset of CAT MSRs, that is fine too.


+
+ emulated_guest_msrs[index] = msr;
+}
+
+/* Init emulated_guest_msrs[] dynamically for CAT MSRs */ void
+init_intercepted_cat_msr_list(void)
+{
+ uint32_t msr;
+
+ /* MSR_IA32_L2_MASK_n MSRs */
+ for (msr = MSR_IA32_L2_MASK_BASE; msr <
(MSR_IA32_L2_MASK_BASE + NUM_VCAT_L2_MSRS); msr++) {
+ init_cat_msr_entry(msr);
+ }
+
+ /* MSR_IA32_L3_MASK_n MSRs */
+ for (msr = MSR_IA32_L3_MASK_BASE; msr <
(MSR_IA32_L3_MASK_BASE + NUM_VCAT_L3_MSRS); msr++) {
+ init_cat_msr_entry(msr);
+ }
+
+ /* MSR_IA32_PQR_ASSOC */
+ init_cat_msr_entry(MSR_IA32_PQR_ASSOC);
+}
+#endif
+
/**
* @pre vcpu != NULL
*/
diff --git a/hypervisor/include/arch/x86/asm/guest/vcpu.h
b/hypervisor/include/arch/x86/asm/guest/vcpu.h
index 65ec52203..e251fb4cf 100644
--- a/hypervisor/include/arch/x86/asm/guest/vcpu.h
+++ b/hypervisor/include/arch/x86/asm/guest/vcpu.h
@@ -29,6 +29,7 @@
#include <asm/guest/instr_emul.h>
#include <asm/guest/nested.h>
#include <asm/vmx.h>
+#include <asm/vm_config.h>

/**
* @brief vcpu
@@ -173,12 +174,26 @@ enum reset_mode;

#define NUM_WORLD_MSRS 2U
#define NUM_COMMON_MSRS 23U
+
+#ifdef CONFIG_VCAT_ENABLED
+#define NUM_VCAT_L2_MSRS MAX_CACHE_CLOS_NUM_ENTRIES
+#define NUM_VCAT_L3_MSRS MAX_CACHE_CLOS_NUM_ENTRIES
+/* L2/L3 mask MSRs plus MSR_IA32_PQR_ASSOC */
+#define NUM_VCAT_MSRS (NUM_VCAT_L2_MSRS +
NUM_VCAT_L3_MSRS + 1U)
Why we need +1? Number of VCAT MSR should be L2 + L3.

+#else
+#define NUM_VCAT_L2_MSRS 0U
+#define NUM_VCAT_L3_MSRS 0U
I remember we don't need these 2 MACROs. Not?

+#define NUM_VCAT_MSRS 0U
+#endif
+
+/* For detailed layout of the emulated guest MSRs, see
+emulated_guest_msrs[NUM_GUEST_MSRS] in vmsr.c */
#ifdef CONFIG_NVMX_ENABLED
-#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS + NUM_VMX_MSRS)
+#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS + NUM_VMX_MSRS + NUM_VCAT_MSRS)
#else
-#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS)
+#define NUM_GUEST_MSRS (NUM_WORLD_MSRS +
NUM_COMMON_MSRS + NUM_VCAT_MSRS)
#endif

+
#define EOI_EXIT_BITMAP_SIZE 256U

struct guest_cpu_context {
diff --git a/hypervisor/include/arch/x86/asm/msr.h
b/hypervisor/include/arch/x86/asm/msr.h
index 9c9b56bf7..6556267f5 100644
--- a/hypervisor/include/arch/x86/asm/msr.h
+++ b/hypervisor/include/arch/x86/asm/msr.h
@@ -617,6 +617,7 @@ static inline bool is_x2apic_msr(uint32_t msr) struct
acrn_vcpu;

void init_msr_emulation(struct acrn_vcpu *vcpu);
+void init_intercepted_cat_msr_list(void);
uint32_t vmsr_get_guest_msr_index(uint32_t msr); void
update_msr_bitmap_x2apic_apicv(struct acrn_vcpu *vcpu); void
update_msr_bitmap_x2apic_passthru(struct acrn_vcpu *vcpu);
--
2.25.1





Re: [PATCH] hv: remove UUID

Yu Wang
 

On Tue, Oct 19, 2021 at 11:34:26AM +0800, Mingqiang Chi wrote:
From: Mingqiang Chi <mingqiang.chi@...>

-- use VM-name to identify the static VM configuration
instead of UUID
-- reuse uuid field in acrn_vm_creation structure,and pass
vm name to hypervisor.
Two TODOs:
-- allow post-launched VM boot even no matched static VM configuration
-- add vseed hypercall to pass the HKDF derive data

BTW, I see there still have uuid related things from following files:
../hypervisor/include/lib/util.h
../hypervisor/quirks/security_vm_fixup.c

Should we clean them up?


Tracked-On: #6685
Signed-off-by: Mingqiang Chi <mingqiang.chi@...>
---
hypervisor/arch/x86/configs/vm_config.c | 5 +-
hypervisor/arch/x86/guest/trusty.c | 5 +-
hypervisor/arch/x86/guest/vm.c | 7 +-
hypervisor/arch/x86/guest/vmcall.c | 2 +-
hypervisor/debug/profiling.c | 2 -
hypervisor/debug/shell.c | 11 +--
hypervisor/include/arch/x86/asm/guest/vm.h | 3 +-
hypervisor/include/arch/x86/asm/vm_config.h | 13 +---
hypervisor/include/common/vm_uuids.h | 58 ---------------
hypervisor/include/debug/profiling_internal.h | 1 -
hypervisor/include/public/acrn_common.h | 11 ++-
misc/hv_prebuild/vm_cfg_checks.c | 73 -------------------
12 files changed, 20 insertions(+), 171 deletions(-)
delete mode 100644 hypervisor/include/common/vm_uuids.h

diff --git a/hypervisor/arch/x86/configs/vm_config.c b/hypervisor/arch/x86/configs/vm_config.c
index ee0f4cf00..4ba0d41ef 100644
--- a/hypervisor/arch/x86/configs/vm_config.c
+++ b/hypervisor/arch/x86/configs/vm_config.c
@@ -6,6 +6,7 @@

#include <asm/vm_config.h>
#include <util.h>
+#include <rtl.h>

/*
* @pre vm_id < CONFIG_MAX_VM_NUM
@@ -29,9 +30,9 @@ uint8_t get_vm_severity(uint16_t vm_id)
*
* @pre vmid < CONFIG_MAX_VM_NUM
*/
-bool vm_has_matched_uuid(uint16_t vmid, const uint8_t *uuid)
+bool vm_has_matched_name(uint16_t vmid, const uint8_t *name)
{
struct acrn_vm_config *vm_config = get_vm_config(vmid);

- return (uuid_is_equal(vm_config->uuid, uuid));
+ return (strncmp(vm_config->name, (char *)name, MAX_VM_NAME_LEN) == 0);
}
diff --git a/hypervisor/arch/x86/guest/trusty.c b/hypervisor/arch/x86/guest/trusty.c
index 1a56f7357..b5d137db3 100644
--- a/hypervisor/arch/x86/guest/trusty.c
+++ b/hypervisor/arch/x86/guest/trusty.c
@@ -262,6 +262,8 @@ static bool setup_trusty_info(struct acrn_vcpu *vcpu, uint32_t mem_size, uint64_
struct trusty_mem *mem;
struct trusty_key_info key_info;
struct trusty_startup_param startup_param;
+ /*TODO: add hypercall to pass vseed to HV */
+ uint8_t vseed[16] = {0};

(void)memset(&key_info, 0U, sizeof(key_info));

@@ -276,8 +278,7 @@ static bool setup_trusty_info(struct acrn_vcpu *vcpu, uint32_t mem_size, uint64_

/* Derive dvseed from dseed for Trusty */
if (derive_virtual_seed(&key_info.dseed_list[0U], &key_info.num_seeds,
- NULL, 0U,
- vcpu->vm->uuid, sizeof(vcpu->vm->uuid))) {
+ NULL, 0U, vseed, 16)) {
/* Derive encryption key of attestation keybox from dseed */
if (derive_attkb_enc_key(key_info.attkb_enc_key)) {
/* Prepare trusty startup param */
diff --git a/hypervisor/arch/x86/guest/vm.c b/hypervisor/arch/x86/guest/vm.c
index 66f60d289..f0e42f494 100644
--- a/hypervisor/arch/x86/guest/vm.c
+++ b/hypervisor/arch/x86/guest/vm.c
@@ -57,11 +57,11 @@ void *get_sworld_memory_base(void)
return post_uos_sworld_memory;
}

-uint16_t get_vmid_by_uuid(const uint8_t *uuid)
+uint16_t get_vmid_by_name(const uint8_t *name)
{
uint16_t vm_id = 0U;

- while (!vm_has_matched_uuid(vm_id, uuid)) {
+ while (!vm_has_matched_name(vm_id, name)) {
vm_id++;
if (vm_id == CONFIG_MAX_VM_NUM) {
break;
@@ -525,9 +525,6 @@ int32_t create_vm(uint16_t vm_id, uint64_t pcpu_bitmap, struct acrn_vm_config *v
init_ept_pgtable(&vm->arch_vm.ept_pgtable, vm->vm_id);
vm->arch_vm.nworld_eptp = pgtable_create_root(&vm->arch_vm.ept_pgtable);

- (void)memcpy_s(&vm->uuid[0], sizeof(vm->uuid),
- &vm_config->uuid[0], sizeof(vm_config->uuid));
-
if (is_sos_vm(vm)) {
/* Only for SOS_VM */
create_sos_vm_e820(vm);
diff --git a/hypervisor/arch/x86/guest/vmcall.c b/hypervisor/arch/x86/guest/vmcall.c
index 56c41dc85..8bbb7a814 100644
--- a/hypervisor/arch/x86/guest/vmcall.c
+++ b/hypervisor/arch/x86/guest/vmcall.c
@@ -117,7 +117,7 @@ struct acrn_vm *parse_target_vm(struct acrn_vm *sos_vm, uint64_t hcall_id, uint6
switch (hcall_id) {
case HC_CREATE_VM:
if (copy_from_gpa(sos_vm, &cv, param1, sizeof(cv)) == 0) {
- vm_id = get_vmid_by_uuid(&cv.uuid[0]);
+ vm_id = get_vmid_by_name(cv.name);
}
break;

diff --git a/hypervisor/debug/profiling.c b/hypervisor/debug/profiling.c
index 1929aa7ad..4e75d95fb 100644
--- a/hypervisor/debug/profiling.c
+++ b/hypervisor/debug/profiling.c
@@ -875,8 +875,6 @@ int32_t profiling_vm_list_info(struct acrn_vm *vm, uint64_t addr)
vm_idx++;

vm_info_list.vm_list[vm_idx].vm_id_num = tmp_vm->vm_id;
- (void)memcpy_s((void *)vm_info_list.vm_list[vm_idx].uuid,
- 16U, tmp_vm->uuid, 16U);
snprintf(vm_info_list.vm_list[vm_idx].vm_name, 16U, "vm_%d",
tmp_vm->vm_id, 16U);
vm_info_list.vm_list[vm_idx].num_vcpus = 0;
diff --git a/hypervisor/debug/shell.c b/hypervisor/debug/shell.c
index be6dc0037..aeaeff005 100644
--- a/hypervisor/debug/shell.c
+++ b/hypervisor/debug/shell.c
@@ -590,8 +590,8 @@ static int32_t shell_list_vm(__unused int32_t argc, __unused char **argv)
uint16_t vm_id;
char state[32];

- shell_puts("\r\nVM_UUID VM_ID VM_NAME VM_STATE"
- "\r\n================================ ===== ================================ ========\r\n");
+ shell_puts("\r\nVM_ID VM_NAME VM_STATE"
+ "\r\n===== ================================ ========\r\n");

for (vm_id = 0U; vm_id < CONFIG_MAX_VM_NUM; vm_id++) {
vm = get_vm_from_vmid(vm_id);
@@ -614,12 +614,7 @@ static int32_t shell_list_vm(__unused int32_t argc, __unused char **argv)
}
vm_config = get_vm_config(vm_id);
if (!is_poweroff_vm(vm)) {
- int8_t i;
-
- for (i = 0; i < 16; i++) {
- snprintf(temp_str + 2 * i, 3U, "%02x", vm->uuid[i]);
- }
- snprintf(temp_str + 32, MAX_STR_SIZE - 32U, " %-3d %-32s %-8s\r\n",
+ snprintf(temp_str, MAX_STR_SIZE, " %-3d %-32s %-8s\r\n",
vm_id, vm_config->name, state);

/* Output information for this task */
diff --git a/hypervisor/include/arch/x86/asm/guest/vm.h b/hypervisor/include/arch/x86/asm/guest/vm.h
index 9a6171be0..9a4efc838 100644
--- a/hypervisor/include/arch/x86/asm/guest/vm.h
+++ b/hypervisor/include/arch/x86/asm/guest/vm.h
@@ -156,7 +156,6 @@ struct acrn_vm {

struct vm_io_handler_desc emul_pio[EMUL_PIO_IDX_MAX];

- uint8_t uuid[16];
struct secure_world_control sworld_control;

/* Secure World's snapshot
@@ -241,7 +240,7 @@ bool is_paused_vm(const struct acrn_vm *vm);
bool is_sos_vm(const struct acrn_vm *vm);
bool is_postlaunched_vm(const struct acrn_vm *vm);
bool is_prelaunched_vm(const struct acrn_vm *vm);
-uint16_t get_vmid_by_uuid(const uint8_t *uuid);
+uint16_t get_vmid_by_name(const uint8_t *name);
struct acrn_vm *get_vm_from_vmid(uint16_t vm_id);
struct acrn_vm *get_sos_vm(void);

diff --git a/hypervisor/include/arch/x86/asm/vm_config.h b/hypervisor/include/arch/x86/asm/vm_config.h
index 528210375..859ca50fa 100644
--- a/hypervisor/include/arch/x86/asm/vm_config.h
+++ b/hypervisor/include/arch/x86/asm/vm_config.h
@@ -12,7 +12,6 @@
#include <board_info.h>
#include <boot.h>
#include <acrn_common.h>
-#include <vm_uuids.h>
#include <vm_configurations.h>
#include <asm/sgx.h>
#include <acrn_hv_defs.h>
@@ -38,31 +37,24 @@
#define MAX_MMIO_DEV_NUM 2U

#define CONFIG_SOS_VM .load_order = SOS_VM, \
- .uuid = SOS_VM_UUID, \
.severity = SEVERITY_SOS

#define CONFIG_SAFETY_VM(idx) .load_order = PRE_LAUNCHED_VM, \
- .uuid = SAFETY_VM_UUID##idx, \
.severity = SEVERITY_SAFETY_VM

#define CONFIG_PRE_STD_VM(idx) .load_order = PRE_LAUNCHED_VM, \
- .uuid = PRE_STANDARD_VM_UUID##idx, \
.severity = SEVERITY_STANDARD_VM

#define CONFIG_PRE_RT_VM(idx) .load_order = PRE_LAUNCHED_VM, \
- .uuid = PRE_RTVM_UUID##idx, \
.severity = SEVERITY_RTVM

#define CONFIG_POST_STD_VM(idx) .load_order = POST_LAUNCHED_VM, \
- .uuid = POST_STANDARD_VM_UUID##idx, \
.severity = SEVERITY_STANDARD_VM

#define CONFIG_POST_RT_VM(idx) .load_order = POST_LAUNCHED_VM, \
- .uuid = POST_RTVM_UUID##idx, \
.severity = SEVERITY_RTVM

#define CONFIG_KATA_VM(idx) .load_order = POST_LAUNCHED_VM, \
- .uuid = KATA_VM_UUID##idx, \
.severity = SEVERITY_STANDARD_VM

/* ACRN guest severity */
@@ -154,8 +146,7 @@ struct pt_intx_config {

struct acrn_vm_config {
enum acrn_vm_load_order load_order; /* specify the load order of VM */
- char name[MAX_VM_OS_NAME_LEN]; /* VM name identifier, useful for debug. */
- const uint8_t uuid[16]; /* UUID of the VM */
+ char name[MAX_VM_NAME_LEN]; /* VM name identifier, useful for debug. */
Remove the ", useful for debug"

uint8_t reserved[2]; /* Temporarily reserve it so that don't need to update
* the users of get_platform_info frequently.
*/
@@ -199,7 +190,7 @@ struct acrn_vm_config {

struct acrn_vm_config *get_vm_config(uint16_t vm_id);
uint8_t get_vm_severity(uint16_t vm_id);
-bool vm_has_matched_uuid(uint16_t vmid, const uint8_t *uuid);
+bool vm_has_matched_name(uint16_t vmid, const uint8_t *name);

extern struct acrn_vm_config vm_configs[CONFIG_MAX_VM_NUM];

diff --git a/hypervisor/include/common/vm_uuids.h b/hypervisor/include/common/vm_uuids.h
deleted file mode 100644
index e87d071da..000000000
--- a/hypervisor/include/common/vm_uuids.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (C) 2020 Intel Corporation. All rights reserved.
- *
- * SPDX-License-Identifier: BSD-3-Clause
- */
-
-#ifndef VM_UUIDS_H
-#define VM_UUIDS_H
-
-/* dbbbd434-7a57-4216-a12c-2201f1ab0240 */
-#define SOS_VM_UUID {0xdbU, 0xbbU, 0xd4U, 0x34U, 0x7aU, 0x57U, 0x42U, 0x16U, \
- 0xa1U, 0x2cU, 0x22U, 0x01U, 0xf1U, 0xabU, 0x02U, 0x40U}
-
-/* fc836901-8685-4bc0-8b71-6e31dc36fa47 */
-#define SAFETY_VM_UUID1 {0xfcU, 0x83U, 0x69U, 0x01U, 0x86U, 0x85U, 0x4bU, 0xc0U, \
- 0x8bU, 0x71U, 0x6eU, 0x31U, 0xdcU, 0x36U, 0xfaU, 0x47U}
-
-/* 26c5e0d8-8f8a-47d8-8109-f201ebd61a5e */
-#define PRE_STANDARD_VM_UUID1 {0x26U, 0xc5U, 0xe0U, 0xd8U, 0x8fU, 0x8aU, 0x47U, 0xd8U, \
- 0x81U, 0x09U, 0xf2U, 0x01U, 0xebU, 0xd6U, 0x1aU, 0x5eU}
-
-/* dd87ce08-66f9-473d-bc58-7605837f935e */
-#define PRE_STANDARD_VM_UUID2 {0xddU, 0x87U, 0xceU, 0x08U, 0x66U, 0xf9U, 0x47U, 0x3dU, \
- 0xbcU, 0x58U, 0x76U, 0x05U, 0x83U, 0x7fU, 0x93U, 0x5eU}
-
-/* d2795438-25d6-11e8-864e-cb7a18b34643 */
-#define POST_STANDARD_VM_UUID1 {0xd2U, 0x79U, 0x54U, 0x38U, 0x25U, 0xd6U, 0x11U, 0xe8U, \
- 0x86U, 0x4eU, 0xcbU, 0x7aU, 0x18U, 0xb3U, 0x46U, 0x43U}
-
-/* 615db82a-e189-4b4f-8dbb-d321343e4ab3 */
-#define POST_STANDARD_VM_UUID2 {0x61U, 0x5dU, 0xb8U, 0x2aU, 0xe1U, 0x89U, 0x4bU, 0x4fU, \
- 0x8dU, 0xbbU, 0xd3U, 0x21U, 0x34U, 0x3eU, 0x4aU, 0xb3U}
-
-/* 38158821-5208-4005-b72a-8a609e4190d0 */
-#define POST_STANDARD_VM_UUID3 {0x38U, 0x15U, 0x88U, 0x21U, 0x52U, 0x08U, 0x40U, 0x05U, \
- 0xb7U, 0x2aU, 0x8aU, 0x60U, 0x9eU, 0x41U, 0x90U, 0xd0U}
-
-/* a6750180-f87a-48d2-91d9-4e7f62b6519e */
-#define POST_STANDARD_VM_UUID4 {0xa6U, 0x75U, 0x01U, 0x80U, 0xf8U, 0x7aU, 0x48U, 0xd2U, \
- 0x91U, 0xd9U, 0x4eU, 0x7fU, 0x62U, 0xb6U, 0x51U, 0x9eU}
-
-/* d1816e4a-a9bb-4cb4-a066-3f1a8a5ce73f */
-#define POST_STANDARD_VM_UUID5 {0xd1U, 0x81U, 0x6eU, 0x4aU, 0xa9U, 0xbbU, 0x4cU, 0xb4U, \
- 0xa0U, 0x66U, 0x3fU, 0x1aU, 0x8aU, 0x5cU, 0xe7U, 0x3fU}
-
-/* b2a92bec-ca6b-11ea-b106-3716a8ba0bb9 */
-#define PRE_RTVM_UUID1 {0xb2U, 0xa9U, 0x2bU, 0xecU, 0xcaU, 0x6bU, 0x11U, 0xeaU, \
- 0xb1U, 0x06U, 0x37U, 0x16U, 0xa8U, 0xbaU, 0x0bU, 0xb9}
-
-/* 495ae2e5-2603-4d64-af76-d4bc5a8ec0e5 */
-#define POST_RTVM_UUID1 {0x49U, 0x5aU, 0xe2U, 0xe5U, 0x26U, 0x03U, 0x4dU, 0x64U, \
- 0xafU, 0x76U, 0xd4U, 0xbcU, 0x5aU, 0x8eU, 0xc0U, 0xe5U}
-
-/* a7ada506-1ab0-4b6b-a0da-e513ca9b8c2f */
-#define KATA_VM_UUID1 {0xa7U, 0xadU, 0xa5U, 0x06U, 0x1aU, 0xb0U, 0x4bU, 0x6bU, \
- 0xa0U, 0xdaU, 0xe5U, 0x13U, 0xcaU, 0x9bU, 0x8cU, 0x2fU}
-
-#endif /* VM_UUIDS_H */
diff --git a/hypervisor/include/debug/profiling_internal.h b/hypervisor/include/debug/profiling_internal.h
index 43d8da3f8..f1267ab5e 100644
--- a/hypervisor/include/debug/profiling_internal.h
+++ b/hypervisor/include/debug/profiling_internal.h
@@ -111,7 +111,6 @@ struct profiling_vcpu_pcpu_map {

struct profiling_vm_info {
uint16_t vm_id_num;
- uint8_t uuid[16];
char vm_name[16];
uint16_t num_vcpus;
struct profiling_vcpu_pcpu_map cpu_map[MAX_VCPUS_PER_VM];
diff --git a/hypervisor/include/public/acrn_common.h b/hypervisor/include/public/acrn_common.h
index 1fbd19af7..59ba14efe 100644
--- a/hypervisor/include/public/acrn_common.h
+++ b/hypervisor/include/public/acrn_common.h
@@ -66,6 +66,8 @@
#define VIRTUAL_PM1A_SLP_EN 0x2000U
#define VIRTUAL_PM1A_ALWAYS_ZERO 0xc003

+#define MAX_VM_NAME_LEN (16U)
+
/**
* @brief Hypercall
*
@@ -350,8 +352,8 @@ struct acrn_vm_creation {
/** Reserved */
uint16_t reserved1;

- /** the UUID of this VM */
- uint8_t uuid[16];
+ /** the name of this VM */
+ uint8_t name[MAX_VM_NAME_LEN];

/* VM flag bits from Guest OS, now used
* GUEST_FLAG_SECURE_WORLD_ENABLED (1UL<<0)
@@ -590,12 +592,9 @@ enum acrn_vm_load_order {
MAX_LOAD_ORDER
};

-#define MAX_VM_OS_NAME_LEN 32U
-
struct acrn_vm_config_header {
enum acrn_vm_load_order load_order;
- char name[MAX_VM_OS_NAME_LEN];
- const uint8_t uuid[16];
+ char name[MAX_VM_NAME_LEN];
uint8_t reserved[2];
uint8_t severity;
uint64_t cpu_affinity;
diff --git a/misc/hv_prebuild/vm_cfg_checks.c b/misc/hv_prebuild/vm_cfg_checks.c
index 8620c1580..1204f31bf 100644
--- a/misc/hv_prebuild/vm_cfg_checks.c
+++ b/misc/hv_prebuild/vm_cfg_checks.c
@@ -14,11 +14,6 @@
#include <vpci.h>
#include <hv_prebuild.h>

-static uint8_t rtvm_uuids[][16] = {
- PRE_RTVM_UUID1,
- POST_RTVM_UUID1,
-};
-static uint8_t safety_vm_uuid1[16] = SAFETY_VM_UUID1;

/* sanity check for below structs is not needed, so use a empty struct instead */
const struct pci_vdev_ops vhostbridge_ops;
@@ -27,55 +22,6 @@ const struct pci_vdev_ops vmcs9900_ops;

#define PLATFORM_CPUS_MASK ((1UL << MAX_PCPU_NUM) - 1UL)

-/**
- * return true if the input uuid is for safety VM
- */
-static bool is_safety_vm_uuid(const uint8_t *uuid)
-{
- /* TODO: Extend to check more safety VM uuid if we have more than one safety VM. */
- return uuid_is_equal(uuid, safety_vm_uuid1);
-}
-
-/**
- * return true if the input uuid is for RTVM
- */
-static bool is_rtvm_uuid(const uint8_t *uuid)
-{
- bool ret = false;
- uint16_t i;
- uint8_t *rtvm_uuid;
-
- for (i = 0U; i < ARRAY_SIZE(rtvm_uuids); i++) {
- rtvm_uuid = rtvm_uuids[i];
- if (uuid_is_equal(uuid, rtvm_uuid)) {
- ret = true;
- break;
- }
- }
- return ret;
-}
-
-/**
- * return true if no UUID collision is found in vm configs array start from vm_configs[vm_id]
- *
- * @pre vm_id < CONFIG_MAX_VM_NUM
- */
-static bool check_vm_uuid_collision(uint16_t vm_id)
-{
- uint16_t i;
- bool ret = true;
- struct acrn_vm_config *start_config = get_vm_config(vm_id);
- struct acrn_vm_config *following_config;
-
- for (i = vm_id + 1U; i < CONFIG_MAX_VM_NUM; i++) {
- following_config = get_vm_config(i);
- if (uuid_is_equal(&start_config->uuid[0], &following_config->uuid[0])) {
- ret = false;
- break;
- }
- }
- return ret;
-}

#ifdef CONFIG_RDT_ENABLED
static bool check_vm_clos_config(uint16_t vm_id)
@@ -127,8 +73,6 @@ bool sanitize_vm_config(void)
ret = false;
} else if (vm_config->epc.size != 0UL) {
ret = false;
- } else if (is_safety_vm_uuid(vm_config->uuid) && (vm_config->severity != (uint8_t)SEVERITY_SAFETY_VM)) {
- ret = false;
} else {
#if (SOS_VM_NUM == 1U)
if (vm_config->severity <= SEVERITY_SOS) {
@@ -151,18 +95,6 @@ bool sanitize_vm_config(void)
break;
}

- if (ret) {
- /* VM with RTVM uuid must have RTVM severity */
- if (is_rtvm_uuid(vm_config->uuid) && (vm_config->severity != (uint8_t)SEVERITY_RTVM)) {
- ret = false;
- }
-
- /* VM WITHOUT RTVM uuid must NOT have RTVM severity */
- if (!is_rtvm_uuid(vm_config->uuid) && (vm_config->severity == (uint8_t)SEVERITY_RTVM)) {
- ret = false;
- }
- }
-
#ifdef CONFIG_RDT_ENABLED
if (ret) {
ret = check_vm_clos_config(vm_id);
@@ -174,11 +106,6 @@ bool sanitize_vm_config(void)
ret = false;
}

- if (ret) {
- /* make sure no identical UUID in following VM configurations */
- ret = check_vm_uuid_collision(vm_id);
- }
-
if (ret) {
/* vuart[1+] are used for VM communications */
for (vuart_idx = 1U; vuart_idx < MAX_VUART_NUM_PER_VM; vuart_idx++) {
--
2.17.0






Re: [PATCH v2 1/1] dm: replace UUID with vmname.

Yu Wang
 

Please make sure the patch is merged with the HV one together.

Acked-by: Wang, Yu1 <yu1.wang@...>

On Wed, Oct 20, 2021 at 05:19:44PM +0800, Zhao, Yuanyuan wrote:


-----Original Message-----
From: Wang, Yu1 <yu1.wang@...>
Sent: Wednesday, October 20, 2021 4:56 PM
To: Zhao, Yuanyuan <yuanyuan.zhao@...>
Cc: acrn-dev@...
Subject: Re: [PATCH v2 1/1] dm: replace UUID with vmname.

On Wed, Oct 20, 2021 at 03:43:13PM +0800, Yuanyuan Zhao wrote:
The UUID has several usages before:
1, For HV to identify the static VM configuration of post-launched VM.
2, Seed virtualization.
3, Slightly prevent launching malicous VM from SOS as lack of secure
boot.

The UUID is confused to user, user don't understand what it is. And
user don't know where to get/apply the UUID. The worst experience is
user can't launch any VMs w/o re-compile the hv. Everything needs to
be static decided in building phase.

Now we decide to remove UUID and split each usage. For 1st usage, use
vmname as the identifier of static VM configuration. For 2nd one, we
will use --vseed as the new parameter. For 3rd one, will pretect by
SOS's dm-verity.

This patch will remove the UUID parameter and support 1st&3rd usages
from DM part. For 2nd usage, another patch will be submitted later.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
devicemodel/core/hugetlb.c | 27 ++++++---------------------
devicemodel/core/main.c | 15 ++++++---------
devicemodel/core/vmmapi.c | 17 ++---------------
devicemodel/include/dm.h | 3 +--
devicemodel/include/vmmapi.h | 1 -
5 files changed, 15 insertions(+), 48 deletions(-)

diff --git a/devicemodel/core/hugetlb.c b/devicemodel/core/hugetlb.c
index e14faf46e..756b7b9a7 100644
--- a/devicemodel/core/hugetlb.c
+++ b/devicemodel/core/hugetlb.c
@@ -168,8 +168,6 @@ static int unlock_acrn_hugetlb(void)

static int open_hugetlbfs(struct vmctx *ctx, int level) {
- char uuid_str[48];
- uint8_t UUID[16];
char *path;
size_t len;
struct statfs fs;
@@ -181,27 +179,14 @@ static int open_hugetlbfs(struct vmctx *ctx, int
level)

path = hugetlb_priv[level].node_path;
memset(path, '\0', MAX_PATH_LEN);
- snprintf(path, MAX_PATH_LEN, "%s%s/",
hugetlb_priv[level].mount_path, ctx->name);
+ snprintf(path, MAX_PATH_LEN, "%s%s",
hugetlb_priv[level].mount_path,
+ctx->name);
I think it should be "%s/%s", right?
[Yuanyuan:] The '/' is in the path string.

len = strnlen(path, MAX_PATH_LEN);
- /* UUID will use 32 bytes */
- if (len + 32 > MAX_PATH_LEN) {
+ if (len > MAX_PATH_LEN) {
pr_err("PATH overflow");
return -ENOMEM;
}

- uuid_copy(UUID, ctx->vm_uuid);
- snprintf(uuid_str, sizeof(uuid_str),
- "%02X%02X%02X%02X%02X%02X%02X%02X"
- "%02X%02X%02X%02X%02X%02X%02X%02X",
- UUID[0], UUID[1], UUID[2], UUID[3],
- UUID[4], UUID[5], UUID[6], UUID[7],
- UUID[8], UUID[9], UUID[10], UUID[11],
- UUID[12], UUID[13], UUID[14], UUID[15]);
-
- *(path + len) = '\0';
- strncat(path, uuid_str, strnlen(uuid_str, sizeof(uuid_str)));
-
pr_info("open hugetlbfs file %s\n", path);

hugetlb_priv[level].fd = open(path, O_CREAT | O_RDWR, 0644); @@
-383,7 +368,7 @@ static int rm_hugetlb_dirs(int level)
return -EINVAL;
}

- snprintf(path,MAX_PATH_LEN,
"%s%s/",hugetlb_priv[level].mount_path,vmname);
+ snprintf(path,MAX_PATH_LEN,
"%s/",hugetlb_priv[level].mount_path);

if (access(path, F_OK) == 0) {
if (rmdir(path) < 0) {
@@ -405,7 +390,7 @@ static int create_hugetlb_dirs(int level)
return -EINVAL;
}

- snprintf(path,MAX_PATH_LEN,
"%s%s/",hugetlb_priv[level].mount_path,vmname);
+ snprintf(path,MAX_PATH_LEN,
"%s/",hugetlb_priv[level].mount_path);

len = strnlen(path, MAX_PATH_LEN);
for (i = 1; i < len; i++) {
@@ -437,7 +422,7 @@ static int mount_hugetlbfs(int level)
if (hugetlb_priv[level].mounted)
return 0;

- snprintf(path, MAX_PATH_LEN, "%s%s",
hugetlb_priv[level].mount_path,vmname);
+ snprintf(path, MAX_PATH_LEN, "%s",
hugetlb_priv[level].mount_path);

/* only support x86 as HUGETLB level-1 2M page, level-2 1G page*/
ret = mount("none", path, "hugetlbfs", @@ -457,7 +442,7 @@ static
void umount_hugetlbfs(int level)
return;
}

- snprintf(path, MAX_PATH_LEN, "%s%s",
hugetlb_priv[level].mount_path,vmname);
+ snprintf(path, MAX_PATH_LEN, "%s",
hugetlb_priv[level].mount_path);


if (hugetlb_priv[level].mounted) {
diff --git a/devicemodel/core/main.c b/devicemodel/core/main.c index
957c4d93f..0c9120ce0 100644
--- a/devicemodel/core/main.c
+++ b/devicemodel/core/main.c
@@ -81,7 +81,6 @@ typedef void (*vmexit_handler_t)(struct vmctx *,

char *vmname;

-char *guest_uuid_str;
char *vsbl_file_name;
char *ovmf_file_name;
char *ovmf_code_file_name;
@@ -145,7 +144,7 @@ usage(int code)
"Usage: %s [-hAWYv] [-B bootargs] [-E elf_image_path]\n"
" %*s [-G GVT_args] [-i ioc_mediator_parameters] [-k
kernel_image_path]\n"
" %*s [-l lpc] [-m mem] [-r ramdisk_image_path]\n"
- " %*s [-s pci] [-U uuid] [--vsbl vsbl_file_name] [--ovmf
ovmf_file_path]\n"
+ " %*s [-s pci] [--vsbl vsbl_file_name] [--ovmf
ovmf_file_path]\n"
" %*s [--part_info part_info_name] [--enable_trusty] [--
intr_monitor param_setting]\n"
" %*s [--acpidev_pt HID] [--mmiodev_pt
MMIO_Regions]\n"
" %*s [--vtpm2 sock_path] [--virtio_poll interval] [--
mac_seed seed_string]\n"
@@ -164,7 +163,6 @@ usage(int code)
" -m: memory size in MB\n"
" -r: ramdisk image path\n"
" -s: <slot,driver,configinfo> PCI slot config\n"
- " -U: uuid\n"
" -v: version\n"
" -W: force virtio to use single-vector MSI\n"
" -Y: disable MPtable generation\n"
@@ -777,7 +775,6 @@ static struct option long_options[] = {
{"lpc", required_argument, 0, 'l' },
{"pci_slot", required_argument, 0, 's' },
{"memsize", required_argument, 0, 'm' },
- {"uuid", required_argument, 0, 'U' },
{"virtio_msix", no_argument, 0, 'W' },
{"mptgen", no_argument, 0, 'Y' },
{"kernel", required_argument, 0, 'k' },
@@ -872,9 +869,6 @@ main(int argc, char *argv[])
if (vm_parse_memsize(optarg, &memsize) != 0)
errx(EX_USAGE, "invalid memsize '%s'",
optarg);
break;
- case 'U':
- guest_uuid_str = optarg;
- break;
case 'W':
virtio_msix = 0;
break;
@@ -1006,12 +1000,15 @@ main(int argc, char *argv[])
argc -= optind;
argv += optind;

- if (argc != 1)
+ if (argc != 1) {
+ pr_err("The vmname(<vm>) is necessary!\n");
usage(1);
+ }

vmname = argv[0];
+
if (strnlen(vmname, MAX_VMNAME_LEN) >= MAX_VMNAME_LEN)
{
- pr_err("vmname size exceed %u\n", MAX_VMNAME_LEN);
+ pr_err("vmname size exceed %u\n", MAX_VMNAME_LEN -
1);
exit(1);
}

diff --git a/devicemodel/core/vmmapi.c b/devicemodel/core/vmmapi.c
index 9c9e54d02..6ffa2f507 100644
--- a/devicemodel/core/vmmapi.c
+++ b/devicemodel/core/vmmapi.c
@@ -167,7 +167,6 @@ vm_create(const char *name, uint64_t req_buf, int
*vcpu_num)
struct vmctx *ctx;
struct acrn_vm_creation create_vm;
int error, retry = 10;
- uuid_t vm_uuid;
struct stat tmp_st;

memset(&create_vm, 0, sizeof(struct acrn_vm_creation)); @@ -
187,19
+186,6 @@ vm_create(const char *name, uint64_t req_buf, int
*vcpu_num)
goto err;
}

- if (guest_uuid_str == NULL)
- guest_uuid_str = "d2795438-25d6-11e8-864e-cb7a18b34643";
-
- error = uuid_parse(guest_uuid_str, vm_uuid);
- if (error != 0)
- goto err;
-
- /* save vm uuid to ctx */
- uuid_copy(ctx->vm_uuid, vm_uuid);
-
- /* Pass uuid as parameter of create vm*/
- uuid_copy(create_vm.uuid, vm_uuid);
-
ctx->gvt_enabled = false;
ctx->fd = devfd;
ctx->lowmem_limit = PCI_EMUL_MEMBASE32; @@ -224,6 +210,7
@@
vm_create(const char *name, uint64_t req_buf, int *vcpu_num)

/* command line arguments specified CPU affinity could overwrite
HV's static configuration */
create_vm.cpu_affinity = cpu_affinity_bitmap;
+ strncpy((char *)create_vm.name, name, strnlen(name,
+MAX_VMNAME_LEN));

if (is_rtvm) {
create_vm.vm_flag |= GUEST_FLAG_RT; @@ -711,7 +698,7
@@
vm_get_config(struct vmctx *ctx, struct acrn_vm_config_header *vm_cfg,
struct ac

for (i = 0; i < platform_info.sw.max_vms; i++) {
pcfg = (struct acrn_vm_config_header *)(configs_buff + (i *
platform_info.sw.vm_config_size));
- if (!uuid_compare(ctx->vm_uuid, pcfg->uuid))
+ if (!strncmp(ctx->name, pcfg->name, strnlen(ctx->name,
+MAX_VMNAME_LEN)))
break;
}

diff --git a/devicemodel/include/dm.h b/devicemodel/include/dm.h index
1d28e1b90..ea3499b9b 100644
--- a/devicemodel/include/dm.h
+++ b/devicemodel/include/dm.h
@@ -33,10 +33,9 @@
#include "types.h"
#include "dm_string.h"

-#define MAX_VMNAME_LEN 128U
+#define MAX_VMNAME_LEN 16U

struct vmctx;
-extern char *guest_uuid_str;
extern uint8_t trusty_enabled;
extern char *vsbl_file_name;
extern char *ovmf_file_name;
diff --git a/devicemodel/include/vmmapi.h
b/devicemodel/include/vmmapi.h index c8dab4f52..523c2ef10 100644
--- a/devicemodel/include/vmmapi.h
+++ b/devicemodel/include/vmmapi.h
@@ -57,7 +57,6 @@ struct vmctx {
size_t highmem;
char *baseaddr;
char *name;
- uuid_t vm_uuid;

/* fields to track virtual devices */
void *atkbdc_base;
--
2.17.1


Re: [PATCH v2 1/1] dm: replace UUID with vmname.

Zhao, Yuanyuan
 

-----Original Message-----
From: Wang, Yu1 <yu1.wang@...>
Sent: Wednesday, October 20, 2021 4:56 PM
To: Zhao, Yuanyuan <yuanyuan.zhao@...>
Cc: acrn-dev@...
Subject: Re: [PATCH v2 1/1] dm: replace UUID with vmname.

On Wed, Oct 20, 2021 at 03:43:13PM +0800, Yuanyuan Zhao wrote:
The UUID has several usages before:
1, For HV to identify the static VM configuration of post-launched VM.
2, Seed virtualization.
3, Slightly prevent launching malicous VM from SOS as lack of secure
boot.

The UUID is confused to user, user don't understand what it is. And
user don't know where to get/apply the UUID. The worst experience is
user can't launch any VMs w/o re-compile the hv. Everything needs to
be static decided in building phase.

Now we decide to remove UUID and split each usage. For 1st usage, use
vmname as the identifier of static VM configuration. For 2nd one, we
will use --vseed as the new parameter. For 3rd one, will pretect by
SOS's dm-verity.

This patch will remove the UUID parameter and support 1st&3rd usages
from DM part. For 2nd usage, another patch will be submitted later.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
devicemodel/core/hugetlb.c | 27 ++++++---------------------
devicemodel/core/main.c | 15 ++++++---------
devicemodel/core/vmmapi.c | 17 ++---------------
devicemodel/include/dm.h | 3 +--
devicemodel/include/vmmapi.h | 1 -
5 files changed, 15 insertions(+), 48 deletions(-)

diff --git a/devicemodel/core/hugetlb.c b/devicemodel/core/hugetlb.c
index e14faf46e..756b7b9a7 100644
--- a/devicemodel/core/hugetlb.c
+++ b/devicemodel/core/hugetlb.c
@@ -168,8 +168,6 @@ static int unlock_acrn_hugetlb(void)

static int open_hugetlbfs(struct vmctx *ctx, int level) {
- char uuid_str[48];
- uint8_t UUID[16];
char *path;
size_t len;
struct statfs fs;
@@ -181,27 +179,14 @@ static int open_hugetlbfs(struct vmctx *ctx, int
level)

path = hugetlb_priv[level].node_path;
memset(path, '\0', MAX_PATH_LEN);
- snprintf(path, MAX_PATH_LEN, "%s%s/",
hugetlb_priv[level].mount_path, ctx->name);
+ snprintf(path, MAX_PATH_LEN, "%s%s",
hugetlb_priv[level].mount_path,
+ctx->name);
I think it should be "%s/%s", right?
[Yuanyuan:] The '/' is in the path string.

len = strnlen(path, MAX_PATH_LEN);
- /* UUID will use 32 bytes */
- if (len + 32 > MAX_PATH_LEN) {
+ if (len > MAX_PATH_LEN) {
pr_err("PATH overflow");
return -ENOMEM;
}

- uuid_copy(UUID, ctx->vm_uuid);
- snprintf(uuid_str, sizeof(uuid_str),
- "%02X%02X%02X%02X%02X%02X%02X%02X"
- "%02X%02X%02X%02X%02X%02X%02X%02X",
- UUID[0], UUID[1], UUID[2], UUID[3],
- UUID[4], UUID[5], UUID[6], UUID[7],
- UUID[8], UUID[9], UUID[10], UUID[11],
- UUID[12], UUID[13], UUID[14], UUID[15]);
-
- *(path + len) = '\0';
- strncat(path, uuid_str, strnlen(uuid_str, sizeof(uuid_str)));
-
pr_info("open hugetlbfs file %s\n", path);

hugetlb_priv[level].fd = open(path, O_CREAT | O_RDWR, 0644); @@
-383,7 +368,7 @@ static int rm_hugetlb_dirs(int level)
return -EINVAL;
}

- snprintf(path,MAX_PATH_LEN,
"%s%s/",hugetlb_priv[level].mount_path,vmname);
+ snprintf(path,MAX_PATH_LEN,
"%s/",hugetlb_priv[level].mount_path);

if (access(path, F_OK) == 0) {
if (rmdir(path) < 0) {
@@ -405,7 +390,7 @@ static int create_hugetlb_dirs(int level)
return -EINVAL;
}

- snprintf(path,MAX_PATH_LEN,
"%s%s/",hugetlb_priv[level].mount_path,vmname);
+ snprintf(path,MAX_PATH_LEN,
"%s/",hugetlb_priv[level].mount_path);

len = strnlen(path, MAX_PATH_LEN);
for (i = 1; i < len; i++) {
@@ -437,7 +422,7 @@ static int mount_hugetlbfs(int level)
if (hugetlb_priv[level].mounted)
return 0;

- snprintf(path, MAX_PATH_LEN, "%s%s",
hugetlb_priv[level].mount_path,vmname);
+ snprintf(path, MAX_PATH_LEN, "%s",
hugetlb_priv[level].mount_path);

/* only support x86 as HUGETLB level-1 2M page, level-2 1G page*/
ret = mount("none", path, "hugetlbfs", @@ -457,7 +442,7 @@ static
void umount_hugetlbfs(int level)
return;
}

- snprintf(path, MAX_PATH_LEN, "%s%s",
hugetlb_priv[level].mount_path,vmname);
+ snprintf(path, MAX_PATH_LEN, "%s",
hugetlb_priv[level].mount_path);


if (hugetlb_priv[level].mounted) {
diff --git a/devicemodel/core/main.c b/devicemodel/core/main.c index
957c4d93f..0c9120ce0 100644
--- a/devicemodel/core/main.c
+++ b/devicemodel/core/main.c
@@ -81,7 +81,6 @@ typedef void (*vmexit_handler_t)(struct vmctx *,

char *vmname;

-char *guest_uuid_str;
char *vsbl_file_name;
char *ovmf_file_name;
char *ovmf_code_file_name;
@@ -145,7 +144,7 @@ usage(int code)
"Usage: %s [-hAWYv] [-B bootargs] [-E elf_image_path]\n"
" %*s [-G GVT_args] [-i ioc_mediator_parameters] [-k
kernel_image_path]\n"
" %*s [-l lpc] [-m mem] [-r ramdisk_image_path]\n"
- " %*s [-s pci] [-U uuid] [--vsbl vsbl_file_name] [--ovmf
ovmf_file_path]\n"
+ " %*s [-s pci] [--vsbl vsbl_file_name] [--ovmf
ovmf_file_path]\n"
" %*s [--part_info part_info_name] [--enable_trusty] [--
intr_monitor param_setting]\n"
" %*s [--acpidev_pt HID] [--mmiodev_pt
MMIO_Regions]\n"
" %*s [--vtpm2 sock_path] [--virtio_poll interval] [--
mac_seed seed_string]\n"
@@ -164,7 +163,6 @@ usage(int code)
" -m: memory size in MB\n"
" -r: ramdisk image path\n"
" -s: <slot,driver,configinfo> PCI slot config\n"
- " -U: uuid\n"
" -v: version\n"
" -W: force virtio to use single-vector MSI\n"
" -Y: disable MPtable generation\n"
@@ -777,7 +775,6 @@ static struct option long_options[] = {
{"lpc", required_argument, 0, 'l' },
{"pci_slot", required_argument, 0, 's' },
{"memsize", required_argument, 0, 'm' },
- {"uuid", required_argument, 0, 'U' },
{"virtio_msix", no_argument, 0, 'W' },
{"mptgen", no_argument, 0, 'Y' },
{"kernel", required_argument, 0, 'k' },
@@ -872,9 +869,6 @@ main(int argc, char *argv[])
if (vm_parse_memsize(optarg, &memsize) != 0)
errx(EX_USAGE, "invalid memsize '%s'",
optarg);
break;
- case 'U':
- guest_uuid_str = optarg;
- break;
case 'W':
virtio_msix = 0;
break;
@@ -1006,12 +1000,15 @@ main(int argc, char *argv[])
argc -= optind;
argv += optind;

- if (argc != 1)
+ if (argc != 1) {
+ pr_err("The vmname(<vm>) is necessary!\n");
usage(1);
+ }

vmname = argv[0];
+
if (strnlen(vmname, MAX_VMNAME_LEN) >= MAX_VMNAME_LEN)
{
- pr_err("vmname size exceed %u\n", MAX_VMNAME_LEN);
+ pr_err("vmname size exceed %u\n", MAX_VMNAME_LEN -
1);
exit(1);
}

diff --git a/devicemodel/core/vmmapi.c b/devicemodel/core/vmmapi.c
index 9c9e54d02..6ffa2f507 100644
--- a/devicemodel/core/vmmapi.c
+++ b/devicemodel/core/vmmapi.c
@@ -167,7 +167,6 @@ vm_create(const char *name, uint64_t req_buf, int
*vcpu_num)
struct vmctx *ctx;
struct acrn_vm_creation create_vm;
int error, retry = 10;
- uuid_t vm_uuid;
struct stat tmp_st;

memset(&create_vm, 0, sizeof(struct acrn_vm_creation)); @@ -
187,19
+186,6 @@ vm_create(const char *name, uint64_t req_buf, int
*vcpu_num)
goto err;
}

- if (guest_uuid_str == NULL)
- guest_uuid_str = "d2795438-25d6-11e8-864e-cb7a18b34643";
-
- error = uuid_parse(guest_uuid_str, vm_uuid);
- if (error != 0)
- goto err;
-
- /* save vm uuid to ctx */
- uuid_copy(ctx->vm_uuid, vm_uuid);
-
- /* Pass uuid as parameter of create vm*/
- uuid_copy(create_vm.uuid, vm_uuid);
-
ctx->gvt_enabled = false;
ctx->fd = devfd;
ctx->lowmem_limit = PCI_EMUL_MEMBASE32; @@ -224,6 +210,7
@@
vm_create(const char *name, uint64_t req_buf, int *vcpu_num)

/* command line arguments specified CPU affinity could overwrite
HV's static configuration */
create_vm.cpu_affinity = cpu_affinity_bitmap;
+ strncpy((char *)create_vm.name, name, strnlen(name,
+MAX_VMNAME_LEN));

if (is_rtvm) {
create_vm.vm_flag |= GUEST_FLAG_RT; @@ -711,7 +698,7
@@
vm_get_config(struct vmctx *ctx, struct acrn_vm_config_header *vm_cfg,
struct ac

for (i = 0; i < platform_info.sw.max_vms; i++) {
pcfg = (struct acrn_vm_config_header *)(configs_buff + (i *
platform_info.sw.vm_config_size));
- if (!uuid_compare(ctx->vm_uuid, pcfg->uuid))
+ if (!strncmp(ctx->name, pcfg->name, strnlen(ctx->name,
+MAX_VMNAME_LEN)))
break;
}

diff --git a/devicemodel/include/dm.h b/devicemodel/include/dm.h index
1d28e1b90..ea3499b9b 100644
--- a/devicemodel/include/dm.h
+++ b/devicemodel/include/dm.h
@@ -33,10 +33,9 @@
#include "types.h"
#include "dm_string.h"

-#define MAX_VMNAME_LEN 128U
+#define MAX_VMNAME_LEN 16U

struct vmctx;
-extern char *guest_uuid_str;
extern uint8_t trusty_enabled;
extern char *vsbl_file_name;
extern char *ovmf_file_name;
diff --git a/devicemodel/include/vmmapi.h
b/devicemodel/include/vmmapi.h index c8dab4f52..523c2ef10 100644
--- a/devicemodel/include/vmmapi.h
+++ b/devicemodel/include/vmmapi.h
@@ -57,7 +57,6 @@ struct vmctx {
size_t highmem;
char *baseaddr;
char *name;
- uuid_t vm_uuid;

/* fields to track virtual devices */
void *atkbdc_base;
--
2.17.1


Re: [PATCH v2 1/1] dm: replace UUID with vmname.

Yu Wang
 

On Wed, Oct 20, 2021 at 03:43:13PM +0800, Yuanyuan Zhao wrote:
The UUID has several usages before:
1, For HV to identify the static VM configuration of post-launched VM.
2, Seed virtualization.
3, Slightly prevent launching malicous VM from SOS as lack of secure
boot.

The UUID is confused to user, user don't understand what it is. And user
don't know where to get/apply the UUID. The worst experience is user
can't launch any VMs w/o re-compile the hv. Everything needs to be
static decided in building phase.

Now we decide to remove UUID and split each usage. For 1st usage, use
vmname as the identifier of static VM configuration. For 2nd one, we
will use --vseed as the new parameter. For 3rd one, will pretect by
SOS's dm-verity.

This patch will remove the UUID parameter and support 1st&3rd usages
from DM part. For 2nd usage, another patch will be submitted later.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
devicemodel/core/hugetlb.c | 27 ++++++---------------------
devicemodel/core/main.c | 15 ++++++---------
devicemodel/core/vmmapi.c | 17 ++---------------
devicemodel/include/dm.h | 3 +--
devicemodel/include/vmmapi.h | 1 -
5 files changed, 15 insertions(+), 48 deletions(-)

diff --git a/devicemodel/core/hugetlb.c b/devicemodel/core/hugetlb.c
index e14faf46e..756b7b9a7 100644
--- a/devicemodel/core/hugetlb.c
+++ b/devicemodel/core/hugetlb.c
@@ -168,8 +168,6 @@ static int unlock_acrn_hugetlb(void)

static int open_hugetlbfs(struct vmctx *ctx, int level)
{
- char uuid_str[48];
- uint8_t UUID[16];
char *path;
size_t len;
struct statfs fs;
@@ -181,27 +179,14 @@ static int open_hugetlbfs(struct vmctx *ctx, int level)

path = hugetlb_priv[level].node_path;
memset(path, '\0', MAX_PATH_LEN);
- snprintf(path, MAX_PATH_LEN, "%s%s/", hugetlb_priv[level].mount_path, ctx->name);
+ snprintf(path, MAX_PATH_LEN, "%s%s", hugetlb_priv[level].mount_path, ctx->name);
I think it should be "%s/%s", right?


len = strnlen(path, MAX_PATH_LEN);
- /* UUID will use 32 bytes */
- if (len + 32 > MAX_PATH_LEN) {
+ if (len > MAX_PATH_LEN) {
pr_err("PATH overflow");
return -ENOMEM;
}

- uuid_copy(UUID, ctx->vm_uuid);
- snprintf(uuid_str, sizeof(uuid_str),
- "%02X%02X%02X%02X%02X%02X%02X%02X"
- "%02X%02X%02X%02X%02X%02X%02X%02X",
- UUID[0], UUID[1], UUID[2], UUID[3],
- UUID[4], UUID[5], UUID[6], UUID[7],
- UUID[8], UUID[9], UUID[10], UUID[11],
- UUID[12], UUID[13], UUID[14], UUID[15]);
-
- *(path + len) = '\0';
- strncat(path, uuid_str, strnlen(uuid_str, sizeof(uuid_str)));
-
pr_info("open hugetlbfs file %s\n", path);

hugetlb_priv[level].fd = open(path, O_CREAT | O_RDWR, 0644);
@@ -383,7 +368,7 @@ static int rm_hugetlb_dirs(int level)
return -EINVAL;
}

- snprintf(path,MAX_PATH_LEN, "%s%s/",hugetlb_priv[level].mount_path,vmname);
+ snprintf(path,MAX_PATH_LEN, "%s/",hugetlb_priv[level].mount_path);

if (access(path, F_OK) == 0) {
if (rmdir(path) < 0) {
@@ -405,7 +390,7 @@ static int create_hugetlb_dirs(int level)
return -EINVAL;
}

- snprintf(path,MAX_PATH_LEN, "%s%s/",hugetlb_priv[level].mount_path,vmname);
+ snprintf(path,MAX_PATH_LEN, "%s/",hugetlb_priv[level].mount_path);

len = strnlen(path, MAX_PATH_LEN);
for (i = 1; i < len; i++) {
@@ -437,7 +422,7 @@ static int mount_hugetlbfs(int level)
if (hugetlb_priv[level].mounted)
return 0;

- snprintf(path, MAX_PATH_LEN, "%s%s", hugetlb_priv[level].mount_path,vmname);
+ snprintf(path, MAX_PATH_LEN, "%s", hugetlb_priv[level].mount_path);

/* only support x86 as HUGETLB level-1 2M page, level-2 1G page*/
ret = mount("none", path, "hugetlbfs",
@@ -457,7 +442,7 @@ static void umount_hugetlbfs(int level)
return;
}

- snprintf(path, MAX_PATH_LEN, "%s%s", hugetlb_priv[level].mount_path,vmname);
+ snprintf(path, MAX_PATH_LEN, "%s", hugetlb_priv[level].mount_path);


if (hugetlb_priv[level].mounted) {
diff --git a/devicemodel/core/main.c b/devicemodel/core/main.c
index 957c4d93f..0c9120ce0 100644
--- a/devicemodel/core/main.c
+++ b/devicemodel/core/main.c
@@ -81,7 +81,6 @@ typedef void (*vmexit_handler_t)(struct vmctx *,

char *vmname;

-char *guest_uuid_str;
char *vsbl_file_name;
char *ovmf_file_name;
char *ovmf_code_file_name;
@@ -145,7 +144,7 @@ usage(int code)
"Usage: %s [-hAWYv] [-B bootargs] [-E elf_image_path]\n"
" %*s [-G GVT_args] [-i ioc_mediator_parameters] [-k kernel_image_path]\n"
" %*s [-l lpc] [-m mem] [-r ramdisk_image_path]\n"
- " %*s [-s pci] [-U uuid] [--vsbl vsbl_file_name] [--ovmf ovmf_file_path]\n"
+ " %*s [-s pci] [--vsbl vsbl_file_name] [--ovmf ovmf_file_path]\n"
" %*s [--part_info part_info_name] [--enable_trusty] [--intr_monitor param_setting]\n"
" %*s [--acpidev_pt HID] [--mmiodev_pt MMIO_Regions]\n"
" %*s [--vtpm2 sock_path] [--virtio_poll interval] [--mac_seed seed_string]\n"
@@ -164,7 +163,6 @@ usage(int code)
" -m: memory size in MB\n"
" -r: ramdisk image path\n"
" -s: <slot,driver,configinfo> PCI slot config\n"
- " -U: uuid\n"
" -v: version\n"
" -W: force virtio to use single-vector MSI\n"
" -Y: disable MPtable generation\n"
@@ -777,7 +775,6 @@ static struct option long_options[] = {
{"lpc", required_argument, 0, 'l' },
{"pci_slot", required_argument, 0, 's' },
{"memsize", required_argument, 0, 'm' },
- {"uuid", required_argument, 0, 'U' },
{"virtio_msix", no_argument, 0, 'W' },
{"mptgen", no_argument, 0, 'Y' },
{"kernel", required_argument, 0, 'k' },
@@ -872,9 +869,6 @@ main(int argc, char *argv[])
if (vm_parse_memsize(optarg, &memsize) != 0)
errx(EX_USAGE, "invalid memsize '%s'", optarg);
break;
- case 'U':
- guest_uuid_str = optarg;
- break;
case 'W':
virtio_msix = 0;
break;
@@ -1006,12 +1000,15 @@ main(int argc, char *argv[])
argc -= optind;
argv += optind;

- if (argc != 1)
+ if (argc != 1) {
+ pr_err("The vmname(<vm>) is necessary!\n");
usage(1);
+ }

vmname = argv[0];
+
if (strnlen(vmname, MAX_VMNAME_LEN) >= MAX_VMNAME_LEN) {
- pr_err("vmname size exceed %u\n", MAX_VMNAME_LEN);
+ pr_err("vmname size exceed %u\n", MAX_VMNAME_LEN - 1);
exit(1);
}

diff --git a/devicemodel/core/vmmapi.c b/devicemodel/core/vmmapi.c
index 9c9e54d02..6ffa2f507 100644
--- a/devicemodel/core/vmmapi.c
+++ b/devicemodel/core/vmmapi.c
@@ -167,7 +167,6 @@ vm_create(const char *name, uint64_t req_buf, int *vcpu_num)
struct vmctx *ctx;
struct acrn_vm_creation create_vm;
int error, retry = 10;
- uuid_t vm_uuid;
struct stat tmp_st;

memset(&create_vm, 0, sizeof(struct acrn_vm_creation));
@@ -187,19 +186,6 @@ vm_create(const char *name, uint64_t req_buf, int *vcpu_num)
goto err;
}

- if (guest_uuid_str == NULL)
- guest_uuid_str = "d2795438-25d6-11e8-864e-cb7a18b34643";
-
- error = uuid_parse(guest_uuid_str, vm_uuid);
- if (error != 0)
- goto err;
-
- /* save vm uuid to ctx */
- uuid_copy(ctx->vm_uuid, vm_uuid);
-
- /* Pass uuid as parameter of create vm*/
- uuid_copy(create_vm.uuid, vm_uuid);
-
ctx->gvt_enabled = false;
ctx->fd = devfd;
ctx->lowmem_limit = PCI_EMUL_MEMBASE32;
@@ -224,6 +210,7 @@ vm_create(const char *name, uint64_t req_buf, int *vcpu_num)

/* command line arguments specified CPU affinity could overwrite HV's static configuration */
create_vm.cpu_affinity = cpu_affinity_bitmap;
+ strncpy((char *)create_vm.name, name, strnlen(name, MAX_VMNAME_LEN));

if (is_rtvm) {
create_vm.vm_flag |= GUEST_FLAG_RT;
@@ -711,7 +698,7 @@ vm_get_config(struct vmctx *ctx, struct acrn_vm_config_header *vm_cfg, struct ac

for (i = 0; i < platform_info.sw.max_vms; i++) {
pcfg = (struct acrn_vm_config_header *)(configs_buff + (i * platform_info.sw.vm_config_size));
- if (!uuid_compare(ctx->vm_uuid, pcfg->uuid))
+ if (!strncmp(ctx->name, pcfg->name, strnlen(ctx->name, MAX_VMNAME_LEN)))
break;
}

diff --git a/devicemodel/include/dm.h b/devicemodel/include/dm.h
index 1d28e1b90..ea3499b9b 100644
--- a/devicemodel/include/dm.h
+++ b/devicemodel/include/dm.h
@@ -33,10 +33,9 @@
#include "types.h"
#include "dm_string.h"

-#define MAX_VMNAME_LEN 128U
+#define MAX_VMNAME_LEN 16U

struct vmctx;
-extern char *guest_uuid_str;
extern uint8_t trusty_enabled;
extern char *vsbl_file_name;
extern char *ovmf_file_name;
diff --git a/devicemodel/include/vmmapi.h b/devicemodel/include/vmmapi.h
index c8dab4f52..523c2ef10 100644
--- a/devicemodel/include/vmmapi.h
+++ b/devicemodel/include/vmmapi.h
@@ -57,7 +57,6 @@ struct vmctx {
size_t highmem;
char *baseaddr;
char *name;
- uuid_t vm_uuid;

/* fields to track virtual devices */
void *atkbdc_base;
--
2.17.1


[PATCH v2 1/1] dm: replace UUID with vmname.

Zhao, Yuanyuan
 

The UUID has several usages before:
1, For HV to identify the static VM configuration of post-launched VM.
2, Seed virtualization.
3, Slightly prevent launching malicous VM from SOS as lack of secure
boot.

The UUID is confused to user, user don't understand what it is. And user
don't know where to get/apply the UUID. The worst experience is user
can't launch any VMs w/o re-compile the hv. Everything needs to be
static decided in building phase.

Now we decide to remove UUID and split each usage. For 1st usage, use
vmname as the identifier of static VM configuration. For 2nd one, we
will use --vseed as the new parameter. For 3rd one, will pretect by
SOS's dm-verity.

This patch will remove the UUID parameter and support 1st&3rd usages
from DM part. For 2nd usage, another patch will be submitted later.

Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@...>
---
devicemodel/core/hugetlb.c | 27 ++++++---------------------
devicemodel/core/main.c | 15 ++++++---------
devicemodel/core/vmmapi.c | 17 ++---------------
devicemodel/include/dm.h | 3 +--
devicemodel/include/vmmapi.h | 1 -
5 files changed, 15 insertions(+), 48 deletions(-)

diff --git a/devicemodel/core/hugetlb.c b/devicemodel/core/hugetlb.c
index e14faf46e..756b7b9a7 100644
--- a/devicemodel/core/hugetlb.c
+++ b/devicemodel/core/hugetlb.c
@@ -168,8 +168,6 @@ static int unlock_acrn_hugetlb(void)

static int open_hugetlbfs(struct vmctx *ctx, int level)
{
- char uuid_str[48];
- uint8_t UUID[16];
char *path;
size_t len;
struct statfs fs;
@@ -181,27 +179,14 @@ static int open_hugetlbfs(struct vmctx *ctx, int level)

path = hugetlb_priv[level].node_path;
memset(path, '\0', MAX_PATH_LEN);
- snprintf(path, MAX_PATH_LEN, "%s%s/", hugetlb_priv[level].mount_path, ctx->name);
+ snprintf(path, MAX_PATH_LEN, "%s%s", hugetlb_priv[level].mount_path, ctx->name);

len = strnlen(path, MAX_PATH_LEN);
- /* UUID will use 32 bytes */
- if (len + 32 > MAX_PATH_LEN) {
+ if (len > MAX_PATH_LEN) {
pr_err("PATH overflow");
return -ENOMEM;
}

- uuid_copy(UUID, ctx->vm_uuid);
- snprintf(uuid_str, sizeof(uuid_str),
- "%02X%02X%02X%02X%02X%02X%02X%02X"
- "%02X%02X%02X%02X%02X%02X%02X%02X",
- UUID[0], UUID[1], UUID[2], UUID[3],
- UUID[4], UUID[5], UUID[6], UUID[7],
- UUID[8], UUID[9], UUID[10], UUID[11],
- UUID[12], UUID[13], UUID[14], UUID[15]);
-
- *(path + len) = '\0';
- strncat(path, uuid_str, strnlen(uuid_str, sizeof(uuid_str)));
-
pr_info("open hugetlbfs file %s\n", path);

hugetlb_priv[level].fd = open(path, O_CREAT | O_RDWR, 0644);
@@ -383,7 +368,7 @@ static int rm_hugetlb_dirs(int level)
return -EINVAL;
}

- snprintf(path,MAX_PATH_LEN, "%s%s/",hugetlb_priv[level].mount_path,vmname);
+ snprintf(path,MAX_PATH_LEN, "%s/",hugetlb_priv[level].mount_path);

if (access(path, F_OK) == 0) {
if (rmdir(path) < 0) {
@@ -405,7 +390,7 @@ static int create_hugetlb_dirs(int level)
return -EINVAL;
}

- snprintf(path,MAX_PATH_LEN, "%s%s/",hugetlb_priv[level].mount_path,vmname);
+ snprintf(path,MAX_PATH_LEN, "%s/",hugetlb_priv[level].mount_path);

len = strnlen(path, MAX_PATH_LEN);
for (i = 1; i < len; i++) {
@@ -437,7 +422,7 @@ static int mount_hugetlbfs(int level)
if (hugetlb_priv[level].mounted)
return 0;

- snprintf(path, MAX_PATH_LEN, "%s%s", hugetlb_priv[level].mount_path,vmname);
+ snprintf(path, MAX_PATH_LEN, "%s", hugetlb_priv[level].mount_path);

/* only support x86 as HUGETLB level-1 2M page, level-2 1G page*/
ret = mount("none", path, "hugetlbfs",
@@ -457,7 +442,7 @@ static void umount_hugetlbfs(int level)
return;
}

- snprintf(path, MAX_PATH_LEN, "%s%s", hugetlb_priv[level].mount_path,vmname);
+ snprintf(path, MAX_PATH_LEN, "%s", hugetlb_priv[level].mount_path);


if (hugetlb_priv[level].mounted) {
diff --git a/devicemodel/core/main.c b/devicemodel/core/main.c
index 957c4d93f..0c9120ce0 100644
--- a/devicemodel/core/main.c
+++ b/devicemodel/core/main.c
@@ -81,7 +81,6 @@ typedef void (*vmexit_handler_t)(struct vmctx *,

char *vmname;

-char *guest_uuid_str;
char *vsbl_file_name;
char *ovmf_file_name;
char *ovmf_code_file_name;
@@ -145,7 +144,7 @@ usage(int code)
"Usage: %s [-hAWYv] [-B bootargs] [-E elf_image_path]\n"
" %*s [-G GVT_args] [-i ioc_mediator_parameters] [-k kernel_image_path]\n"
" %*s [-l lpc] [-m mem] [-r ramdisk_image_path]\n"
- " %*s [-s pci] [-U uuid] [--vsbl vsbl_file_name] [--ovmf ovmf_file_path]\n"
+ " %*s [-s pci] [--vsbl vsbl_file_name] [--ovmf ovmf_file_path]\n"
" %*s [--part_info part_info_name] [--enable_trusty] [--intr_monitor param_setting]\n"
" %*s [--acpidev_pt HID] [--mmiodev_pt MMIO_Regions]\n"
" %*s [--vtpm2 sock_path] [--virtio_poll interval] [--mac_seed seed_string]\n"
@@ -164,7 +163,6 @@ usage(int code)
" -m: memory size in MB\n"
" -r: ramdisk image path\n"
" -s: <slot,driver,configinfo> PCI slot config\n"
- " -U: uuid\n"
" -v: version\n"
" -W: force virtio to use single-vector MSI\n"
" -Y: disable MPtable generation\n"
@@ -777,7 +775,6 @@ static struct option long_options[] = {
{"lpc", required_argument, 0, 'l' },
{"pci_slot", required_argument, 0, 's' },
{"memsize", required_argument, 0, 'm' },
- {"uuid", required_argument, 0, 'U' },
{"virtio_msix", no_argument, 0, 'W' },
{"mptgen", no_argument, 0, 'Y' },
{"kernel", required_argument, 0, 'k' },
@@ -872,9 +869,6 @@ main(int argc, char *argv[])
if (vm_parse_memsize(optarg, &memsize) != 0)
errx(EX_USAGE, "invalid memsize '%s'", optarg);
break;
- case 'U':
- guest_uuid_str = optarg;
- break;
case 'W':
virtio_msix = 0;
break;
@@ -1006,12 +1000,15 @@ main(int argc, char *argv[])
argc -= optind;
argv += optind;

- if (argc != 1)
+ if (argc != 1) {
+ pr_err("The vmname(<vm>) is necessary!\n");
usage(1);
+ }

vmname = argv[0];
+
if (strnlen(vmname, MAX_VMNAME_LEN) >= MAX_VMNAME_LEN) {
- pr_err("vmname size exceed %u\n", MAX_VMNAME_LEN);
+ pr_err("vmname size exceed %u\n", MAX_VMNAME_LEN - 1);
exit(1);
}

diff --git a/devicemodel/core/vmmapi.c b/devicemodel/core/vmmapi.c
index 9c9e54d02..6ffa2f507 100644
--- a/devicemodel/core/vmmapi.c
+++ b/devicemodel/core/vmmapi.c
@@ -167,7 +167,6 @@ vm_create(const char *name, uint64_t req_buf, int *vcpu_num)
struct vmctx *ctx;
struct acrn_vm_creation create_vm;
int error, retry = 10;
- uuid_t vm_uuid;
struct stat tmp_st;

memset(&create_vm, 0, sizeof(struct acrn_vm_creation));
@@ -187,19 +186,6 @@ vm_create(const char *name, uint64_t req_buf, int *vcpu_num)
goto err;
}

- if (guest_uuid_str == NULL)
- guest_uuid_str = "d2795438-25d6-11e8-864e-cb7a18b34643";
-
- error = uuid_parse(guest_uuid_str, vm_uuid);
- if (error != 0)
- goto err;
-
- /* save vm uuid to ctx */
- uuid_copy(ctx->vm_uuid, vm_uuid);
-
- /* Pass uuid as parameter of create vm*/
- uuid_copy(create_vm.uuid, vm_uuid);
-
ctx->gvt_enabled = false;
ctx->fd = devfd;
ctx->lowmem_limit = PCI_EMUL_MEMBASE32;
@@ -224,6 +210,7 @@ vm_create(const char *name, uint64_t req_buf, int *vcpu_num)

/* command line arguments specified CPU affinity could overwrite HV's static configuration */
create_vm.cpu_affinity = cpu_affinity_bitmap;
+ strncpy((char *)create_vm.name, name, strnlen(name, MAX_VMNAME_LEN));

if (is_rtvm) {
create_vm.vm_flag |= GUEST_FLAG_RT;
@@ -711,7 +698,7 @@ vm_get_config(struct vmctx *ctx, struct acrn_vm_config_header *vm_cfg, struct ac

for (i = 0; i < platform_info.sw.max_vms; i++) {
pcfg = (struct acrn_vm_config_header *)(configs_buff + (i * platform_info.sw.vm_config_size));
- if (!uuid_compare(ctx->vm_uuid, pcfg->uuid))
+ if (!strncmp(ctx->name, pcfg->name, strnlen(ctx->name, MAX_VMNAME_LEN)))
break;
}

diff --git a/devicemodel/include/dm.h b/devicemodel/include/dm.h
index 1d28e1b90..ea3499b9b 100644
--- a/devicemodel/include/dm.h
+++ b/devicemodel/include/dm.h
@@ -33,10 +33,9 @@
#include "types.h"
#include "dm_string.h"

-#define MAX_VMNAME_LEN 128U
+#define MAX_VMNAME_LEN 16U

struct vmctx;
-extern char *guest_uuid_str;
extern uint8_t trusty_enabled;
extern char *vsbl_file_name;
extern char *ovmf_file_name;
diff --git a/devicemodel/include/vmmapi.h b/devicemodel/include/vmmapi.h
index c8dab4f52..523c2ef10 100644
--- a/devicemodel/include/vmmapi.h
+++ b/devicemodel/include/vmmapi.h
@@ -57,7 +57,6 @@ struct vmctx {
size_t highmem;
char *baseaddr;
char *name;
- uuid_t vm_uuid;

/* fields to track virtual devices */
void *atkbdc_base;
--
2.17.1

3681 - 3700 of 37344