[PATCH 5/6] hv: add CPU frequency driver in hv


Zhou, Wu
 

This patch will enable ACRN as the CPU frequency controllor.

The frequency driver uses governors to decide the frequency strategy.
Currently we have two governors:
- Performance: CPU can runs at its max possible frequency(turbo boost
will be activated if enabled). If hardware performance states (HWP)
machanic is available, use its to automatically adjust frequency.
- Nominal: CPU runs at its base frequency.

The governor is chosed by user in configurator. Besides, user will
have to chose one of the two freuquency control interfaces:
- HWP
- ACPI p-state

The governors use an abstract layer called frequency policy to decide
what the CPU's highest/lowest/base frequency is.
the frequency policy is a per pCPU data. It is generated by offline tools.

The frequency driver provides 2 APIs for the hypervisor to call.
- init_cpu_freq() called by boot CPU at start up to initialize the
frequency driver.
- cpu_freq_pcpu_online() called by pCPUs when online. Setting
CPU frequency as its governor demands.

After pCPU is online, the frequency driver would no longer adjust value
of its frequency registers. It is required for the VMs to give up CPU
frequency control. This can be done by blocking VMs' eist/hwp cpuids
and freuqency control MSRs in hv.
Alternatively, ACPI p-state can be passed through to VMs if they do not
share pCPUs(or interfere with other VM's CPU frequency).

Signed-off-by: Wu Zhou <wu.zhou@...>
---
hypervisor/arch/x86/cpu.c | 5 ++
hypervisor/arch/x86/pm.c | 59 +++++++++++++++++++++++
hypervisor/include/arch/x86/asm/board.h | 2 +
hypervisor/include/arch/x86/asm/host_pm.h | 2 +
hypervisor/include/arch/x86/asm/per_cpu.h | 1 +
5 files changed, 69 insertions(+)

diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c
index 13ffb3f59..b668439c8 100644
--- a/hypervisor/arch/x86/cpu.c
+++ b/hypervisor/arch/x86/cpu.c
@@ -24,6 +24,7 @@
#include <version.h>
#include <asm/vmx.h>
#include <asm/msr.h>
+#include <asm/host_pm.h>
#include <ptdev.h>
#include <logmsg.h>
#include <asm/rdt.h>
@@ -156,6 +157,8 @@ void init_pcpu_pre(bool is_bsp)

load_pcpu_state_data();

+ init_cpu_freq();
+
init_e820();

/* reserve ppt buffer from e820 */
@@ -315,6 +318,8 @@ void init_pcpu_post(uint16_t pcpu_id)
panic("failed to initialize software SRAM!");
}

+ cpu_freq_pcpu_online();
+
init_sched(pcpu_id);

#ifdef CONFIG_RDT_ENABLED
diff --git a/hypervisor/arch/x86/pm.c b/hypervisor/arch/x86/pm.c
index 9af9362cc..abadb37ef 100644
--- a/hypervisor/arch/x86/pm.c
+++ b/hypervisor/arch/x86/pm.c
@@ -19,6 +19,7 @@
#include <asm/lapic.h>
#include <asm/tsc.h>
#include <delay.h>
+#include <asm/board.h>

struct cpu_context cpu_ctx;

@@ -271,3 +272,61 @@ void reset_host(void)
asm_pause();
}
}
+
+/*
+ * set the cpu's performance level range
+ * The performance level is not necessary CPU's frequency ratio.
+ * When using HWP, the levels represents the level used in HWP_REQUEST MSR, while using ACPI p-state, it represents
+ * the Px level 'x' descripted in ACPI _PSS table.
+ *
+ * For ACPI p-state, it does not have the hardware automatic frequency adjust ablility, it can only set a fixed
+ * frequency. So this function assumes that highest_lvl = lowest_lvl when using ACPI p-state.
+ */
+static void cpu_freq_set_performance(uint8_t highest_lvl, uint8_t lowest_lvl)
+{
+ uint64_t reg;
+ pr_acrnlog("3");
+ if (cpufreq_info.interface_type == CPUFREQ_INTERFACE_HWP) {
+ reg = (0x80UL << 24) | (0x00UL << 16) | (((uint64_t)highest_lvl) << 8) | ((uint64_t)lowest_lvl);
+ msr_write(MSR_IA32_HWP_REQUEST, reg);
+
+ } else if (cpufreq_info.interface_type == CPUFREQ_INTERFACE_ACPI) {
+ struct cpu_state_info *pm_s_state_data = get_cpu_pm_state_info();
+ if (highest_lvl < pm_s_state_data->px_cnt) {
+ reg = pm_s_state_data->px_data[highest_lvl].control;
+ msr_write(MSR_IA32_PERF_CTL, reg);
+ }
+ }
+}
+
+/* called by boot cpu at initializing phase */
+void init_cpu_freq(void)
+{
+ uint16_t pcpu_id;
+
+ for (pcpu_id = 0; pcpu_id < MAX_PCPU_NUM; pcpu_id++) {
+ per_cpu(cpufreq_policy, pcpu_id) = &(cpufreq_policy_info[pcpu_id]);
+ }
+
+ if (cpufreq_info.interface_type == CPUFREQ_INTERFACE_HWP) {
+ msr_write(MSR_IA32_PM_ENABLE, 1U);
+ }
+}
+
+/* called by pcpu, setting its frequency at start up */
+void cpu_freq_pcpu_online(void)
+{
+ struct acrn_cpufreq_policy *policy = get_cpu_var(cpufreq_policy);
+
+ /*
+ * Currently we have 2 governors:
+ * CPUFREQ_GOVERNOR_PERFORMANCE - CPU can run at up to its max frequency
+ * CPUFREQ_GOVERNOR_NOMINAL - CPU locked at its base/guaranteed frequency
+ * You can chose one from offline tool.
+ */
+ if (cpufreq_info.governor_type == CPUFREQ_GOVERNOR_PERFORMANCE && policy->available) {
+ cpu_freq_set_performance(policy->policy_highest_lvl, policy->policy_lowest_lvl);
+ } else if (cpufreq_info.governor_type == CPUFREQ_GOVERNOR_NOMINAL && policy->available) {
+ cpu_freq_set_performance(policy->policy_guaranteed_lvl, policy->policy_guaranteed_lvl);
+ }
+}
diff --git a/hypervisor/include/arch/x86/asm/board.h b/hypervisor/include/arch/x86/asm/board.h
index 56bbeb9c8..f30457d25 100644
--- a/hypervisor/include/arch/x86/asm/board.h
+++ b/hypervisor/include/arch/x86/asm/board.h
@@ -34,6 +34,8 @@ extern struct rdt_type res_cap_info[RDT_NUM_RESOURCES];
#endif

extern const struct cpu_state_table board_cpu_state_tbl;
+extern struct acrn_cpufreq_policy cpufreq_policy_info[MAX_PCPU_NUM];
+extern const struct acrn_cpufreq_info cpufreq_info;
extern const union pci_bdf plat_hidden_pdevs[MAX_HIDDEN_PDEVS_NUM];
extern const struct vmsix_on_msi_info vmsix_on_msi_devs[MAX_VMSIX_ON_MSI_PDEVS_NUM];

diff --git a/hypervisor/include/arch/x86/asm/host_pm.h b/hypervisor/include/arch/x86/asm/host_pm.h
index b8fb8a307..e71895579 100644
--- a/hypervisor/include/arch/x86/asm/host_pm.h
+++ b/hypervisor/include/arch/x86/asm/host_pm.h
@@ -39,5 +39,7 @@ extern void restore_s3_context(void);
struct cpu_state_info *get_cpu_pm_state_info(void);
struct acpi_reset_reg *get_host_reset_reg_data(void);
void reset_host(void);
+void init_cpu_freq(void);
+void cpu_freq_pcpu_online(void);

#endif /* HOST_PM_H */
diff --git a/hypervisor/include/arch/x86/asm/per_cpu.h b/hypervisor/include/arch/x86/asm/per_cpu.h
index 1c7c83d80..2cc7aa4b8 100644
--- a/hypervisor/include/arch/x86/asm/per_cpu.h
+++ b/hypervisor/include/arch/x86/asm/per_cpu.h
@@ -62,6 +62,7 @@ struct per_cpu_region {
uint64_t shutdown_vm_bitmap;
uint64_t tsc_suspend;
struct acrn_vcpu *whose_iwkey;
+ struct acrn_cpufreq_policy *cpufreq_policy;
/*
* We maintain a per-pCPU array of vCPUs. vCPUs of a VM won't
* share same pCPU. So the maximum possible # of vCPUs that can
--
2.25.1


Eddie Dong
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Zhou, Wu
Sent: Wednesday, August 10, 2022 2:25 AM
To: acrn-dev@...
Cc: Zhou, Wu <wu.zhou@...>
Subject: [acrn-dev] [PATCH 5/6] hv: add CPU frequency driver in hv

This patch will enable ACRN as the CPU frequency controllor.

The frequency driver uses governors to decide the frequency strategy.
I am not sure this is governor... To me, governor monitors the status of "frequency", and do adjustment per different situation.
While here, we just initialize it or configure the platform.

Currently we have two governors:
- Performance: CPU can runs at its max possible frequency(turbo boost will
be activated if enabled). If hardware performance states (HWP) machanic is
available, use its to automatically adjust frequency.
- Nominal: CPU runs at its base frequency.

The governor is chosed by user in configurator. Besides, user will have to
chose one of the two freuquency control interfaces:
- HWP
- ACPI p-state
I still don't understand why this cannot be done automatically by ACRN.
Of course, additional configuration simplifies ACRN code. Then the question is how many LOC is simplified.


The governors use an abstract layer called frequency policy to decide what the
CPU's highest/lowest/base frequency is.
the frequency policy is a per pCPU data. It is generated by offline tools.

The frequency driver provides 2 APIs for the hypervisor to call.
- init_cpu_freq() called by boot CPU at start up to initialize the frequency
driver.
Is called by BSP at start up time...

- cpu_freq_pcpu_online() called by pCPUs when online. Setting CPU
Is called by each pCPU at start up time.

What is the difference of "start up" and "online" in your original description?

frequency as its governor demands.

After pCPU is online, the frequency driver would no longer adjust value of its
frequency registers. It is required for the VMs to give up CPU frequency
Couldn't understand.

ACRN removes the frequency control capability from VMs.

control. This can be done by blocking VMs' eist/hwp cpuids and freuqency
control MSRs in hv.
This is achieved by removing xxx capability in CPUID leaf yy.


Alternatively, ACPI p-state can be passed through to VMs if they do not share
pCPUs(or interfere with other VM's CPU frequency).
Didn't understand what it means here.
Really need to make the commit message clear enough to tell what this patch does.


Signed-off-by: Wu Zhou <wu.zhou@...>
---
hypervisor/arch/x86/cpu.c | 5 ++
hypervisor/arch/x86/pm.c | 59 +++++++++++++++++++++++
hypervisor/include/arch/x86/asm/board.h | 2 +
hypervisor/include/arch/x86/asm/host_pm.h | 2 +
hypervisor/include/arch/x86/asm/per_cpu.h | 1 +
5 files changed, 69 insertions(+)

diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c index
13ffb3f59..b668439c8 100644
--- a/hypervisor/arch/x86/cpu.c
+++ b/hypervisor/arch/x86/cpu.c
@@ -24,6 +24,7 @@
#include <version.h>
#include <asm/vmx.h>
#include <asm/msr.h>
+#include <asm/host_pm.h>
#include <ptdev.h>
#include <logmsg.h>
#include <asm/rdt.h>
@@ -156,6 +157,8 @@ void init_pcpu_pre(bool is_bsp)

load_pcpu_state_data();

+ init_cpu_freq();
+
init_e820();

/* reserve ppt buffer from e820 */
@@ -315,6 +318,8 @@ void init_pcpu_post(uint16_t pcpu_id)
panic("failed to initialize software SRAM!");
}

+ cpu_freq_pcpu_online();
+
init_sched(pcpu_id);

#ifdef CONFIG_RDT_ENABLED
diff --git a/hypervisor/arch/x86/pm.c b/hypervisor/arch/x86/pm.c index
9af9362cc..abadb37ef 100644
--- a/hypervisor/arch/x86/pm.c
+++ b/hypervisor/arch/x86/pm.c
@@ -19,6 +19,7 @@
#include <asm/lapic.h>
#include <asm/tsc.h>
#include <delay.h>
+#include <asm/board.h>

struct cpu_context cpu_ctx;

@@ -271,3 +272,61 @@ void reset_host(void)
asm_pause();
}
}
+
+/*
+ * set the cpu's performance level range
+ * The performance level is not necessary CPU's frequency ratio.
+ * When using HWP, the levels represents the level used in HWP_REQUEST
+MSR, while using ACPI p-state, it represents
+ * the Px level 'x' descripted in ACPI _PSS table.
+ *
+ * For ACPI p-state, it does not have the hardware automatic frequency
+adjust ablility, it can only set a fixed
+ * frequency. So this function assumes that highest_lvl = lowest_lvl when
using ACPI p-state.
+ */
+static void cpu_freq_set_performance(uint8_t highest_lvl, uint8_t
+lowest_lvl) {
+ uint64_t reg;
+ pr_acrnlog("3");
+ if (cpufreq_info.interface_type == CPUFREQ_INTERFACE_HWP) {
+ reg = (0x80UL << 24) | (0x00UL << 16) | (((uint64_t)highest_lvl)
<< 8) | ((uint64_t)lowest_lvl);
+ msr_write(MSR_IA32_HWP_REQUEST, reg);
+
+ } else if (cpufreq_info.interface_type == CPUFREQ_INTERFACE_ACPI) {
+ struct cpu_state_info *pm_s_state_data =
get_cpu_pm_state_info();
+ if (highest_lvl < pm_s_state_data->px_cnt) {
+ reg = pm_s_state_data->px_data[highest_lvl].control;
+ msr_write(MSR_IA32_PERF_CTL, reg);
Base on this code, it is pretty sure we can do automatically rather than pre-configuration.

+ }
+ }
+}
+

+/* called by boot cpu at initializing phase */ void init_cpu_freq(void)
Now, you use Initializing phase...

+{
+ uint16_t pcpu_id;
+
+ for (pcpu_id = 0; pcpu_id < MAX_PCPU_NUM; pcpu_id++) {
+ per_cpu(cpufreq_policy, pcpu_id) =
&(cpufreq_policy_info[pcpu_id]);
+ }
If we do this inside cpu_freq_pcpu_online, it is much simpler.

+
+ if (cpufreq_info.interface_type == CPUFREQ_INTERFACE_HWP) {
+ msr_write(MSR_IA32_PM_ENABLE, 1U);
+ }
+}
+
+/* called by pcpu, setting its frequency at start up */ void
Here use "start up"

+cpu_freq_pcpu_online(void) {
Here use online.

Please use consistent words.

+ struct acrn_cpufreq_policy *policy = get_cpu_var(cpufreq_policy);
+
+ /*
+ * Currently we have 2 governors:
+ * CPUFREQ_GOVERNOR_PERFORMANCE - CPU can run at up to its
max frequency
+ * CPUFREQ_GOVERNOR_NOMINAL - CPU locked at its
base/guaranteed frequency
+ * You can chose one from offline tool.
+ */
+ if (cpufreq_info.governor_type ==
CPUFREQ_GOVERNOR_PERFORMANCE && policy->available) {
+ cpu_freq_set_performance(policy->policy_highest_lvl, policy-
policy_lowest_lvl);
+ } else if (cpufreq_info.governor_type ==
CPUFREQ_GOVERNOR_NOMINAL && policy->available) {
+ cpu_freq_set_performance(policy->policy_guaranteed_lvl,
policy->policy_guaranteed_lvl);
+ }
For this governor_type configuration, I prefer to use ACRN boot parameter, to choose.
But, for the policy_highest_lvl/ policy_lowest_lvl/ policy_guaranteed_lvl, the offline tool can configure.

Junjie's comments?

+}
diff --git a/hypervisor/include/arch/x86/asm/board.h
b/hypervisor/include/arch/x86/asm/board.h
index 56bbeb9c8..f30457d25 100644
--- a/hypervisor/include/arch/x86/asm/board.h
+++ b/hypervisor/include/arch/x86/asm/board.h
@@ -34,6 +34,8 @@ extern struct rdt_type
res_cap_info[RDT_NUM_RESOURCES]; #endif

extern const struct cpu_state_table board_cpu_state_tbl;
+extern struct acrn_cpufreq_policy cpufreq_policy_info[MAX_PCPU_NUM];
+extern const struct acrn_cpufreq_info cpufreq_info;
extern const union pci_bdf plat_hidden_pdevs[MAX_HIDDEN_PDEVS_NUM];
extern const struct vmsix_on_msi_info
vmsix_on_msi_devs[MAX_VMSIX_ON_MSI_PDEVS_NUM];

diff --git a/hypervisor/include/arch/x86/asm/host_pm.h
b/hypervisor/include/arch/x86/asm/host_pm.h
index b8fb8a307..e71895579 100644
--- a/hypervisor/include/arch/x86/asm/host_pm.h
+++ b/hypervisor/include/arch/x86/asm/host_pm.h
@@ -39,5 +39,7 @@ extern void restore_s3_context(void); struct
cpu_state_info *get_cpu_pm_state_info(void); struct acpi_reset_reg
*get_host_reset_reg_data(void); void reset_host(void);
+void init_cpu_freq(void);
+void cpu_freq_pcpu_online(void);

#endif /* HOST_PM_H */
diff --git a/hypervisor/include/arch/x86/asm/per_cpu.h
b/hypervisor/include/arch/x86/asm/per_cpu.h
index 1c7c83d80..2cc7aa4b8 100644
--- a/hypervisor/include/arch/x86/asm/per_cpu.h
+++ b/hypervisor/include/arch/x86/asm/per_cpu.h
@@ -62,6 +62,7 @@ struct per_cpu_region {
uint64_t shutdown_vm_bitmap;
uint64_t tsc_suspend;
struct acrn_vcpu *whose_iwkey;
+ struct acrn_cpufreq_policy *cpufreq_policy;
/*
* We maintain a per-pCPU array of vCPUs. vCPUs of a VM won't
* share same pCPU. So the maximum possible # of vCPUs that can
--
2.25.1





Zhou, Wu
 

-----Original Message-----
From: Dong, Eddie <eddie.dong@...>
Sent: Wednesday, August 24, 2022 2:39 AM
To: acrn-dev@...
Cc: Zhou, Wu <wu.zhou@...>
Subject: RE: [acrn-dev] [PATCH 5/6] hv: add CPU frequency driver in hv



-----Original Message-----
From: acrn-dev@... <acrn-dev@...>
On Behalf Of Zhou, Wu
Sent: Wednesday, August 10, 2022 2:25 AM
To: acrn-dev@...
Cc: Zhou, Wu <wu.zhou@...>
Subject: [acrn-dev] [PATCH 5/6] hv: add CPU frequency driver in hv

This patch will enable ACRN as the CPU frequency controllor.

The frequency driver uses governors to decide the frequency strategy.
I am not sure this is governor... To me, governor monitors the status of
"frequency", and do adjustment per different situation.
While here, we just initialize it or configure the platform.
The expression 'Governor' is borrowed from Linux cpufreq driver.
Indeed we don't govern the frequency in ACRN. We just make the policy
and then leave it to hardware.

Maybe change it to 'policymaker' ?


Currently we have two governors:
- Performance: CPU can runs at its max possible frequency(turbo
boost will be activated if enabled). If hardware performance states
(HWP) machanic is available, use its to automatically adjust frequency.
- Nominal: CPU runs at its base frequency.

The governor is chosed by user in configurator. Besides, user will
have to chose one of the two freuquency control interfaces:
- HWP
- ACPI p-state
I still don't understand why this cannot be done automatically by ACRN.
Of course, additional configuration simplifies ACRN code. Then the question
is how many LOC is simplified.
The HWP/p-state config item is for users who have preference to use
p-state over HWP.

Of course users who prefer p-state can turn off HWP in BIOS, then ACRN
will be forced to use p-state.
But some platforms don't provide BIOS options to turn off HWP.
So we added this config item to let ACRN know the user preference.


However, user preferring p-state is an assumption. I've had some offline
discussion with Junjie. There is no actual requirements yet. Then it is better
to let ACRN automatically decide HWP/p-state.




The governors use an abstract layer called frequency policy to decide
what the CPU's highest/lowest/base frequency is.
the frequency policy is a per pCPU data. It is generated by offline tools.

The frequency driver provides 2 APIs for the hypervisor to call.
- init_cpu_freq() called by boot CPU at start up to initialize the
frequency driver.
Is called by BSP at start up time...

- cpu_freq_pcpu_online() called by pCPUs when online. Setting CPU
Is called by each pCPU at start up time.

What is the difference of "start up" and "online" in your original description?
They are all called during pCPU initializing. I need to refine those words...


frequency as its governor demands.

After pCPU is online, the frequency driver would no longer adjust
value of its frequency registers. It is required for the VMs to give
up CPU frequency
Couldn't understand.

ACRN removes the frequency control capability from VMs.

control. This can be done by blocking VMs' eist/hwp cpuids and
freuqency control MSRs in hv.
This is achieved by removing xxx capability in CPUID leaf yy.


Alternatively, ACPI p-state can be passed through to VMs if they do
not share pCPUs(or interfere with other VM's CPU frequency).
Didn't understand what it means here.
Really need to make the commit message clear enough to tell what this patch
does.
It is about passing through p-state control to guests.

ACRN already has a p-state framework that can provide post-launched VMs with
p-state capability. If a post-launched VM is owning its pCPUs exclusively, we can
pass through those p-state control to it.

As ACRN's performance 'governor' only set up frequency during pCPU initializing,
we just have to:
- provide guest with EIST bit in CPUID.01H:ECX
- pass through MSR IA32_PERF_CTL
- let DM generate _PSS/_PPC in guest ACPI
The code is done in the patch 6/6. I shouldn't have mentioned it here.


However, passing through p-state would not work in WaaG(for windows don't run
frequency driver in virtualized environment). So there seems to be few value in it.
I think it is better to be removed from this patch series.



Signed-off-by: Wu Zhou <wu.zhou@...>
---
hypervisor/arch/x86/cpu.c | 5 ++
hypervisor/arch/x86/pm.c | 59 +++++++++++++++++++++++
hypervisor/include/arch/x86/asm/board.h | 2 +
hypervisor/include/arch/x86/asm/host_pm.h | 2 +
hypervisor/include/arch/x86/asm/per_cpu.h | 1 +
5 files changed, 69 insertions(+)

diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c
index
13ffb3f59..b668439c8 100644
--- a/hypervisor/arch/x86/cpu.c
+++ b/hypervisor/arch/x86/cpu.c
@@ -24,6 +24,7 @@
#include <version.h>
#include <asm/vmx.h>
#include <asm/msr.h>
+#include <asm/host_pm.h>
#include <ptdev.h>
#include <logmsg.h>
#include <asm/rdt.h>
@@ -156,6 +157,8 @@ void init_pcpu_pre(bool is_bsp)

load_pcpu_state_data();

+ init_cpu_freq();
+
init_e820();

/* reserve ppt buffer from e820 */
@@ -315,6 +318,8 @@ void init_pcpu_post(uint16_t pcpu_id)
panic("failed to initialize software SRAM!");
}

+ cpu_freq_pcpu_online();
+
init_sched(pcpu_id);

#ifdef CONFIG_RDT_ENABLED
diff --git a/hypervisor/arch/x86/pm.c b/hypervisor/arch/x86/pm.c index
9af9362cc..abadb37ef 100644
--- a/hypervisor/arch/x86/pm.c
+++ b/hypervisor/arch/x86/pm.c
@@ -19,6 +19,7 @@
#include <asm/lapic.h>
#include <asm/tsc.h>
#include <delay.h>
+#include <asm/board.h>

struct cpu_context cpu_ctx;

@@ -271,3 +272,61 @@ void reset_host(void)
asm_pause();
}
}
+
+/*
+ * set the cpu's performance level range
+ * The performance level is not necessary CPU's frequency ratio.
+ * When using HWP, the levels represents the level used in
+HWP_REQUEST MSR, while using ACPI p-state, it represents
+ * the Px level 'x' descripted in ACPI _PSS table.
+ *
+ * For ACPI p-state, it does not have the hardware automatic
+frequency adjust ablility, it can only set a fixed
+ * frequency. So this function assumes that highest_lvl = lowest_lvl
+when
using ACPI p-state.
+ */
+static void cpu_freq_set_performance(uint8_t highest_lvl, uint8_t
+lowest_lvl) {
+ uint64_t reg;
+ pr_acrnlog("3");
+ if (cpufreq_info.interface_type == CPUFREQ_INTERFACE_HWP) {
+ reg = (0x80UL << 24) | (0x00UL << 16) |
(((uint64_t)highest_lvl)
<< 8) | ((uint64_t)lowest_lvl);
+ msr_write(MSR_IA32_HWP_REQUEST, reg);
+
+ } else if (cpufreq_info.interface_type == CPUFREQ_INTERFACE_ACPI)
{
+ struct cpu_state_info *pm_s_state_data =
get_cpu_pm_state_info();
+ if (highest_lvl < pm_s_state_data->px_cnt) {
+ reg = pm_s_state_data-
px_data[highest_lvl].control;
+ msr_write(MSR_IA32_PERF_CTL, reg);
Base on this code, it is pretty sure we can do automatically rather than pre-
configuration.

+ }
+ }
+}
+

+/* called by boot cpu at initializing phase */ void
+init_cpu_freq(void)
Now, you use Initializing phase...

+{
+ uint16_t pcpu_id;
+
+ for (pcpu_id = 0; pcpu_id < MAX_PCPU_NUM; pcpu_id++) {
+ per_cpu(cpufreq_policy, pcpu_id) =
&(cpufreq_policy_info[pcpu_id]);
+ }
If we do this inside cpu_freq_pcpu_online, it is much simpler.

+
+ if (cpufreq_info.interface_type == CPUFREQ_INTERFACE_HWP) {
+ msr_write(MSR_IA32_PM_ENABLE, 1U);
+ }
+}
+
+/* called by pcpu, setting its frequency at start up */ void
Here use "start up"

+cpu_freq_pcpu_online(void) {
Here use online.

Please use consistent words.

+ struct acrn_cpufreq_policy *policy = get_cpu_var(cpufreq_policy);
+
+ /*
+ * Currently we have 2 governors:
+ * CPUFREQ_GOVERNOR_PERFORMANCE - CPU can run at up to its
max frequency
+ * CPUFREQ_GOVERNOR_NOMINAL - CPU locked at its
base/guaranteed frequency
+ * You can chose one from offline tool.
+ */
+ if (cpufreq_info.governor_type ==
CPUFREQ_GOVERNOR_PERFORMANCE && policy->available) {
+ cpu_freq_set_performance(policy->policy_highest_lvl,
policy-
policy_lowest_lvl);
+ } else if (cpufreq_info.governor_type ==
CPUFREQ_GOVERNOR_NOMINAL && policy->available) {
+ cpu_freq_set_performance(policy->policy_guaranteed_lvl,
policy->policy_guaranteed_lvl);
+ }
For this governor_type configuration, I prefer to use ACRN boot parameter,
to choose.
But, for the policy_highest_lvl/ policy_lowest_lvl/ policy_guaranteed_lvl,
the offline tool can configure.

Junjie's comments?

+}
diff --git a/hypervisor/include/arch/x86/asm/board.h
b/hypervisor/include/arch/x86/asm/board.h
index 56bbeb9c8..f30457d25 100644
--- a/hypervisor/include/arch/x86/asm/board.h
+++ b/hypervisor/include/arch/x86/asm/board.h
@@ -34,6 +34,8 @@ extern struct rdt_type
res_cap_info[RDT_NUM_RESOURCES]; #endif

extern const struct cpu_state_table board_cpu_state_tbl;
+extern struct acrn_cpufreq_policy
cpufreq_policy_info[MAX_PCPU_NUM];
+extern const struct acrn_cpufreq_info cpufreq_info;
extern const union pci_bdf
plat_hidden_pdevs[MAX_HIDDEN_PDEVS_NUM];
extern const struct vmsix_on_msi_info
vmsix_on_msi_devs[MAX_VMSIX_ON_MSI_PDEVS_NUM];

diff --git a/hypervisor/include/arch/x86/asm/host_pm.h
b/hypervisor/include/arch/x86/asm/host_pm.h
index b8fb8a307..e71895579 100644
--- a/hypervisor/include/arch/x86/asm/host_pm.h
+++ b/hypervisor/include/arch/x86/asm/host_pm.h
@@ -39,5 +39,7 @@ extern void restore_s3_context(void); struct
cpu_state_info *get_cpu_pm_state_info(void); struct acpi_reset_reg
*get_host_reset_reg_data(void); void reset_host(void);
+void init_cpu_freq(void);
+void cpu_freq_pcpu_online(void);

#endif /* HOST_PM_H */
diff --git a/hypervisor/include/arch/x86/asm/per_cpu.h
b/hypervisor/include/arch/x86/asm/per_cpu.h
index 1c7c83d80..2cc7aa4b8 100644
--- a/hypervisor/include/arch/x86/asm/per_cpu.h
+++ b/hypervisor/include/arch/x86/asm/per_cpu.h
@@ -62,6 +62,7 @@ struct per_cpu_region {
uint64_t shutdown_vm_bitmap;
uint64_t tsc_suspend;
struct acrn_vcpu *whose_iwkey;
+ struct acrn_cpufreq_policy *cpufreq_policy;
/*
* We maintain a per-pCPU array of vCPUs. vCPUs of a VM won't
* share same pCPU. So the maximum possible # of vCPUs that can
--
2.25.1





Junjie Mao
 

"Eddie Dong" <eddie.dong@...> writes:

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Zhou, Wu
Sent: Wednesday, August 10, 2022 2:25 AM
To: acrn-dev@...
Cc: Zhou, Wu <wu.zhou@...>
Subject: [acrn-dev] [PATCH 5/6] hv: add CPU frequency driver in hv

+ struct acrn_cpufreq_policy *policy = get_cpu_var(cpufreq_policy);
+
+ /*
+ * Currently we have 2 governors:
+ * CPUFREQ_GOVERNOR_PERFORMANCE - CPU can run at up to its
max frequency
+ * CPUFREQ_GOVERNOR_NOMINAL - CPU locked at its
base/guaranteed frequency
+ * You can chose one from offline tool.
+ */
+ if (cpufreq_info.governor_type ==
CPUFREQ_GOVERNOR_PERFORMANCE && policy->available) {
+ cpu_freq_set_performance(policy->policy_highest_lvl, policy-
policy_lowest_lvl);
+ } else if (cpufreq_info.governor_type ==
CPUFREQ_GOVERNOR_NOMINAL && policy->available) {
+ cpu_freq_set_performance(policy->policy_guaranteed_lvl,
policy->policy_guaranteed_lvl);
+ }
For this governor_type configuration, I prefer to use ACRN boot parameter, to choose.
But, for the policy_highest_lvl/ policy_lowest_lvl/ policy_guaranteed_lvl, the offline tool can configure.

Junjie's comments?
Those levels are hardware properties, with their meaning defined deep in
ACPI specs and their values reported via ACPI namespace / MSRs. For the
offline tool part, I would not recommend to expose those parameters to
users who are unlikely to know all those details. Instead it looks
natural to me to automatically extract such information from the target
board and tell the hypervisor via generated code, in order to reduce the
complexity of the hypervisor.

Having a boot parameter to specify the governor_type is definitely a
good idea. Meanwhile, I think users should still be able to choose the
governor when they configure ACRN using the GUI tool (which is the
unified interface for users to specify anything). Instead of generating
C code, the offline tools can generate the boot parameter for ACRN
according to users' choice.

In that case the hypervisor needs to define the default governor when
nothing is specified in the boot parameters (when, for example, a user
accidentially removed that in grub.cfg). It's totally up to our choice,
though.

--
Best Regards
Junjie Mao

+}
diff --git a/hypervisor/include/arch/x86/asm/board.h
b/hypervisor/include/arch/x86/asm/board.h
index 56bbeb9c8..f30457d25 100644
--- a/hypervisor/include/arch/x86/asm/board.h
+++ b/hypervisor/include/arch/x86/asm/board.h
@@ -34,6 +34,8 @@ extern struct rdt_type
res_cap_info[RDT_NUM_RESOURCES]; #endif

extern const struct cpu_state_table board_cpu_state_tbl;
+extern struct acrn_cpufreq_policy cpufreq_policy_info[MAX_PCPU_NUM];
+extern const struct acrn_cpufreq_info cpufreq_info;
extern const union pci_bdf plat_hidden_pdevs[MAX_HIDDEN_PDEVS_NUM];
extern const struct vmsix_on_msi_info
vmsix_on_msi_devs[MAX_VMSIX_ON_MSI_PDEVS_NUM];

diff --git a/hypervisor/include/arch/x86/asm/host_pm.h
b/hypervisor/include/arch/x86/asm/host_pm.h
index b8fb8a307..e71895579 100644
--- a/hypervisor/include/arch/x86/asm/host_pm.h
+++ b/hypervisor/include/arch/x86/asm/host_pm.h
@@ -39,5 +39,7 @@ extern void restore_s3_context(void); struct
cpu_state_info *get_cpu_pm_state_info(void); struct acpi_reset_reg
*get_host_reset_reg_data(void); void reset_host(void);
+void init_cpu_freq(void);
+void cpu_freq_pcpu_online(void);

#endif /* HOST_PM_H */
diff --git a/hypervisor/include/arch/x86/asm/per_cpu.h
b/hypervisor/include/arch/x86/asm/per_cpu.h
index 1c7c83d80..2cc7aa4b8 100644
--- a/hypervisor/include/arch/x86/asm/per_cpu.h
+++ b/hypervisor/include/arch/x86/asm/per_cpu.h
@@ -62,6 +62,7 @@ struct per_cpu_region {
uint64_t shutdown_vm_bitmap;
uint64_t tsc_suspend;
struct acrn_vcpu *whose_iwkey;
+ struct acrn_cpufreq_policy *cpufreq_policy;
/*
* We maintain a per-pCPU array of vCPUs. vCPUs of a VM won't
* share same pCPU. So the maximum possible # of vCPUs that can
--
2.25.1









Eddie Dong
 

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Junjie Mao
Sent: Thursday, August 25, 2022 7:42 AM
To: Dong, Eddie <eddie.dong@...>
Cc: acrn-dev@...; Zhou, Wu <wu.zhou@...>
Subject: Re: [acrn-dev] [PATCH 5/6] hv: add CPU frequency driver in hv

"Eddie Dong" <eddie.dong@...> writes:

-----Original Message-----
From: acrn-dev@... <acrn-dev@...>
On Behalf Of Zhou, Wu
Sent: Wednesday, August 10, 2022 2:25 AM
To: acrn-dev@...
Cc: Zhou, Wu <wu.zhou@...>
Subject: [acrn-dev] [PATCH 5/6] hv: add CPU frequency driver in hv

+ struct acrn_cpufreq_policy *policy = get_cpu_var(cpufreq_policy);
+
+ /*
+ * Currently we have 2 governors:
+ * CPUFREQ_GOVERNOR_PERFORMANCE - CPU can run at up to its
max frequency
+ * CPUFREQ_GOVERNOR_NOMINAL - CPU locked at its
base/guaranteed frequency
+ * You can chose one from offline tool.
+ */
+ if (cpufreq_info.governor_type ==
CPUFREQ_GOVERNOR_PERFORMANCE && policy->available) {
+ cpu_freq_set_performance(policy->policy_highest_lvl, policy-
policy_lowest_lvl);
+ } else if (cpufreq_info.governor_type ==
CPUFREQ_GOVERNOR_NOMINAL && policy->available) {
+ cpu_freq_set_performance(policy->policy_guaranteed_lvl,
policy->policy_guaranteed_lvl);
+ }
For this governor_type configuration, I prefer to use ACRN boot parameter,
to choose.
But, for the policy_highest_lvl/ policy_lowest_lvl/ policy_guaranteed_lvl, the
offline tool can configure.

Junjie's comments?
Those levels are hardware properties, with their meaning defined deep in ACPI
specs and their values reported via ACPI namespace / MSRs. For the offline
tool part, I would not recommend to expose those parameters to users who
are unlikely to know all those details. Instead it looks natural to me to
automatically extract such information from the target board and tell the
hypervisor via generated code, in order to reduce the complexity of the
hypervisor.

Having a boot parameter to specify the governor_type is definitely a good idea.
Meanwhile, I think users should still be able to choose the governor when they
configure ACRN using the GUI tool (which is the unified interface for users to
specify anything). Instead of generating C code, the offline tools can generate
the boot parameter for ACRN according to users' choice.

In that case the hypervisor needs to define the default governor when nothing
is specified in the boot parameters (when, for example, a user accidentially
removed that in grub.cfg). It's totally up to our choice, though.
OK, so you mean offline tool defines the default policy type, but hypervisor command line parameter can overwrite it.
That is good idea.

I expect to see a new version of patch series based on those comments.


--
Best Regards
Junjie Mao

+}
diff --git a/hypervisor/include/arch/x86/asm/board.h
b/hypervisor/include/arch/x86/asm/board.h
index 56bbeb9c8..f30457d25 100644
--- a/hypervisor/include/arch/x86/asm/board.h
+++ b/hypervisor/include/arch/x86/asm/board.h
@@ -34,6 +34,8 @@ extern struct rdt_type
res_cap_info[RDT_NUM_RESOURCES]; #endif

extern const struct cpu_state_table board_cpu_state_tbl;
+extern struct acrn_cpufreq_policy cpufreq_policy_info[MAX_PCPU_NUM];
+extern const struct acrn_cpufreq_info cpufreq_info;
extern const union pci_bdf
plat_hidden_pdevs[MAX_HIDDEN_PDEVS_NUM];
extern const struct vmsix_on_msi_info
vmsix_on_msi_devs[MAX_VMSIX_ON_MSI_PDEVS_NUM];

diff --git a/hypervisor/include/arch/x86/asm/host_pm.h
b/hypervisor/include/arch/x86/asm/host_pm.h
index b8fb8a307..e71895579 100644
--- a/hypervisor/include/arch/x86/asm/host_pm.h
+++ b/hypervisor/include/arch/x86/asm/host_pm.h
@@ -39,5 +39,7 @@ extern void restore_s3_context(void); struct
cpu_state_info *get_cpu_pm_state_info(void); struct acpi_reset_reg
*get_host_reset_reg_data(void); void reset_host(void);
+void init_cpu_freq(void);
+void cpu_freq_pcpu_online(void);

#endif /* HOST_PM_H */
diff --git a/hypervisor/include/arch/x86/asm/per_cpu.h
b/hypervisor/include/arch/x86/asm/per_cpu.h
index 1c7c83d80..2cc7aa4b8 100644
--- a/hypervisor/include/arch/x86/asm/per_cpu.h
+++ b/hypervisor/include/arch/x86/asm/per_cpu.h
@@ -62,6 +62,7 @@ struct per_cpu_region {
uint64_t shutdown_vm_bitmap;
uint64_t tsc_suspend;
struct acrn_vcpu *whose_iwkey;
+ struct acrn_cpufreq_policy *cpufreq_policy;
/*
* We maintain a per-pCPU array of vCPUs. vCPUs of a VM won't
* share same pCPU. So the maximum possible # of vCPUs that can
--
2.25.1