[PATCH V2 4/7] config-tools: generate CPU frequency limits


Zhou, Wu
 

This patch is to generates frequency limits for each CPU, as a set of
data structure in hypervisor .c code.

With the frequency limits data, the hypervisor performance manager does
not have to deal with the CPU/board info. It just choose the
highest/lowest/nominal performance level/p-state, and use them to
construct HWP_REQUEST/PERF_CTL reg value.

How are frequency limits decided:
- For CPUs in standard VMs, frequency limits are just decided by
CPU/board info.
- For CPUs assigned to RTVMs, we want certainty in latency, so just
set its frequency to nominal/guaranteed by letting highest=lowest.
- In some cases, CPUs are sharing frequency on hardware level
(e.g. ADL e-cores in group of 4). This is described as _PSD in ACPI
spec, or 'frequency domain' in Linux cpufreq driver. Thoese CPUs'
frequency are linked together. If one of them are running RTVM,
all other CPUs in the domain should be set to the same frequency.

Signed-off-by: Wu Zhou <wu.zhou@...>
---
misc/config_tools/board_config/board_c.py | 28 +++
misc/config_tools/library/board_cfg_lib.py | 38 ++++
.../static_allocators/cpu_freq.py | 177 ++++++++++++++++++
3 files changed, 243 insertions(+)
create mode 100644 misc/config_tools/static_allocators/cpu_freq.py

diff --git a/misc/config_tools/board_config/board_c.py b/misc/config_tools/board_config/board_c.py
index c1ce14e29..b7c1f7fcf 100644
--- a/misc/config_tools/board_config/board_c.py
+++ b/misc/config_tools/board_config/board_c.py
@@ -455,6 +455,32 @@ def gen_known_caps_pci_devs(config):
if i == (bdf_list_len - 1):
print("};", file=config)

+def gen_cpufreq_limits(config):
+ allocation_dir = os.path.split(common.SCENARIO_INFO_FILE)[0] + "/configs/allocation.xml"
+ allocation_etree = lxml.etree.parse(allocation_dir)
+ cpu_list = board_cfg_lib.get_processor_info()
+ max_cpu_num = len(cpu_list)
+
+ print("\nstruct acrn_cpufreq_limits cpufreq_limits[MAX_PCPU_NUM] = {", file=config)
+ for cpu_id in range(max_cpu_num):
+ limit_node = common.get_node(f"//cpufreq/CPU[@id='{cpu_id}']/limits", allocation_etree)
+ if limit_node != None:
+ limit_guaranteed_lvl = common.get_node("./limit_guaranteed_lvl/text()", limit_node)
+ limit_highest_lvl = common.get_node("./limit_highest_lvl/text()", limit_node)
+ limit_lowest_lvl = common.get_node("./limit_lowest_lvl/text()", limit_node)
+ limit_nominal_pstate = common.get_node("./limit_nominal_pstate/text()", limit_node)
+ limit_highest_pstate = common.get_node("./limit_highest_pstate/text()", limit_node)
+ limit_lowest_pstate = common.get_node("./limit_lowest_pstate/text()", limit_node)
+
+ print("\t{", file=config)
+ print(f"\t\t.guaranteed_hwp_lvl = {limit_guaranteed_lvl},", file=config)
+ print(f"\t\t.highest_hwp_lvl = {limit_highest_lvl},", file=config)
+ print(f"\t\t.lowest_hwp_lvl = {limit_lowest_lvl},", file=config)
+ print(f"\t\t.nominal_pstate = {limit_nominal_pstate},", file=config)
+ print(f"\t\t.highest_pstate = {limit_highest_pstate},", file=config)
+ print(f"\t\t.lowest_pstate = {limit_lowest_pstate},", file=config)
+ print("\t},", file=config)
+ print("};", file=config)

def generate_file(config):
"""
@@ -485,4 +511,6 @@ def generate_file(config):
# gen known caps of pci dev info for platform
gen_known_caps_pci_devs(config)

+ gen_cpufreq_limits(config)
+
return err_dic
diff --git a/misc/config_tools/library/board_cfg_lib.py b/misc/config_tools/library/board_cfg_lib.py
index 953d2d7d0..cbe8a1247 100644
--- a/misc/config_tools/library/board_cfg_lib.py
+++ b/misc/config_tools/library/board_cfg_lib.py
@@ -349,6 +349,44 @@ def get_pci_info(board_info):

return (pci_desc, pci_bdf_vpid)

+def get_p_state_count():
+ """
+ Get cpu p-state count
+ :return: p-state count
+ """
+ px_info = get_info(common.BOARD_INFO_FILE, "<PX_INFO>", "</PX_INFO>")
+ if px_info != None:
+ for line in px_info:
+ if re.search("{.*}", line) == None:
+ px_info.remove(line)
+
+ return len(px_info)
+
+def get_p_state_index_from_ratio(ratio):
+ """
+ Get the closest p-state index that is lesser than or equel to given ratio
+ :return: p-state index; If no px_info found in board file, return 0;
+ """
+ closest_index = 0
+ px_info = get_info(common.BOARD_INFO_FILE, "<PX_INFO>", "</PX_INFO>")
+ if px_info != None:
+ for line in px_info:
+ if re.search("{.*}", line) == None:
+ px_info.remove(line)
+
+ i = 0
+ closest_index = 1
+ for line in px_info:
+ l = re.search("0x(\w*)UL}", line)
+ if l != None:
+ state_ratio = int(l.group(1), 16) >> 8
+ if state_ratio <= ratio:
+ closest_index = i
+ break
+ i += 1
+
+ return closest_index
+
HI_MMIO_OFFSET = 0

class Bar_Mem:
diff --git a/misc/config_tools/static_allocators/cpu_freq.py b/misc/config_tools/static_allocators/cpu_freq.py
new file mode 100644
index 000000000..83be0bf0b
--- /dev/null
+++ b/misc/config_tools/static_allocators/cpu_freq.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2022 Intel Corporation.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#
+
+import common, board_cfg_lib
+
+# CPU frequency dependency
+# Some CPU cores may share the same clock domain/group with others, which makes them always run at
+# the same frequency of the highest on in the group. Including those known conditions:
+# 1. CPU in the clock domain described in ACPI _PSD.
+# Like _PSS, board_inspector extracted this data from Linux cpufreq driver
+# (see Linux document 'sysfs-devices-system-cpu' about freqdomain_cpus)
+# 2. CPU hyper threads sharing the same physical core.
+# The data is extracted form apic id.
+# 3. E-cores residents in the same topological group.
+# The data is extracted form CPU model type and apic id.
+# CPU frequency dependency may have some impacts on our frequency limits.
+def alloc_dependency(board_etree, scenario_etree, allocation_etree):
+ cpus = board_etree.xpath("//processors//thread")
+ for cpu in cpus:
+ cpu_id = common.get_node("./cpu_id/text()", cpu)
+ psd_cpus = common.get_node("./freqdomain_cpus/text()", cpu).split(' ')
+ apic_id = int(common.get_node("./apic_id/text()", cpu)[2:], base=16)
+ is_hybrid = (len(board_etree.xpath("//processors//capability[@id='hybrid']")) != 0)
+ core_type = common.get_node("./core_type/text()", cpu)
+ for other_cpu in cpus:
+ other_cpu_id = common.get_node("./cpu_id/text()", other_cpu)
+ if cpu_id != other_cpu_id:
+ other_apic_id = int(common.get_node("./apic_id/text()", other_cpu)[2:], base=16)
+ other_core_type = common.get_node("./core_type/text()", other_cpu)
+ # threads at same core
+ if (apic_id & ~1) == (other_apic_id & ~1):
+ psd_cpus.append(other_cpu_id)
+ # e-cores in the same group
+ if is_hybrid and core_type == 'Atom' and other_core_type == 'Atom' and (apic_id & ~7) == (other_apic_id & ~7):
+ psd_cpus.append(other_cpu_id)
+
+ alloc_dep_node = common.append_node("/acrn-config/hv/cpufreq/CPU", None, allocation_etree, id=cpu_id)
+ if psd_cpus != None:
+ psd_cpus = list(set(psd_cpus))
+ psd_cpus.sort()
+ common.append_node("./freq_dependency", " ".join(psd_cpus), alloc_dep_node)
+
+# CPU frequency limits:
+#
+# Frequency limits is a per CPU data type. Hypervisor uses this data to quickly decide what performance
+# level/p-state range it should apply.
+#
+# Those limits are decided by hardware and scenario config.
+#
+# When the CPU is assigned to a RTVM, we want to set its frequency fixed.(to get more certainty
+# in latency). To do this, we just let highest_lvl = lowest_lvl.
+# Some CPU cores' frequency may be linked to each other in a frequency domain or group(eg. e-cores in a group).
+# In this condition, RTVM's CPU frequency might be influenced by other VMs. So we fix all of them to the value of
+# the RTVM's CPU frequence.
+#
+# Both HWP and ACPI p-state are supported in ACRN CPU performance management. So here we generate two sets of
+# data:
+#
+# - 'limit_guaranteed_lvl', 'limit_highest_lvl' and 'limit_lowest_lvl' are for HWP. The values represent
+# HWP performance level used in IA32_HWP_CAPABILITIES and IA32_HWP_REQUEST.
+#
+# - 'limit_nominal_pstate', 'limit_highest_pstate' and 'limit_lowest_pstate' are for ACPI p-state.
+# Those values represent the performance state's index P(x).
+# ACPI p-state does not define a 'guaranteed p-state' or a 'base p-state'. Here the 'nominal p-state' refers
+# to a state whose frequency is closest to the max none-turbo frequency.
+def alloc_limits(board_etree, scenario_etree, allocation_etree):
+ cpu_has_eist = (len(board_etree.xpath("//processors//capability[@id='est']")) != 0)
+ cpu_has_hwp = (len(board_etree.xpath("//processors//capability[@id='hwp_supported']")) != 0)
+ cpu_has_turbo = (len(board_etree.xpath("//processors//capability[@id='turbo_boost_available']")) != 0)
+ rtvm_cpus = scenario_etree.xpath(f"//vm[vm_type = 'RTVM']//cpu_affinity//pcpu_id/text()")
+ cpus = board_etree.xpath("//processors//thread")
+
+ for cpu in cpus:
+ cpu_id = common.get_node("./cpu_id/text()", cpu)
+ if cpu_has_hwp:
+ guaranteed_performance_lvl = common.get_node("./guaranteed_performance_lvl/text()", cpu)
+ highest_performance_lvl = common.get_node("./highest_performance_lvl/text()", cpu)
+ lowest_performance_lvl = common.get_node("./lowest_performance_lvl/text()", cpu)
+ if cpu_id in rtvm_cpus:
+ # for CPUs in RTVM, fix to base performance
+ limit_lowest_lvl = guaranteed_performance_lvl
+ limit_highest_lvl = guaranteed_performance_lvl
+ limit_guaranteed_lvl = guaranteed_performance_lvl
+ elif cpu_has_turbo:
+ limit_lowest_lvl = lowest_performance_lvl
+ limit_highest_lvl = highest_performance_lvl
+ limit_guaranteed_lvl = guaranteed_performance_lvl
+ else:
+ limit_lowest_lvl = lowest_performance_lvl
+ limit_highest_lvl = guaranteed_performance_lvl
+ limit_guaranteed_lvl = guaranteed_performance_lvl
+ else:
+ limit_lowest_lvl = 1
+ limit_highest_lvl = 0xff
+ limit_guaranteed_lvl = 0xff
+
+ cpu_node = common.get_node(f"//hv/cpufreq/CPU[@id='{cpu_id}']", allocation_etree)
+ limit_node = common.append_node("./limits", None, cpu_node)
+ common.append_node("./limit_guaranteed_lvl", limit_guaranteed_lvl, limit_node)
+ common.append_node("./limit_highest_lvl", limit_highest_lvl, limit_node)
+ common.append_node("./limit_lowest_lvl", limit_lowest_lvl, limit_node)
+
+ if cpu_has_eist:
+ mntr = int(board_etree.xpath("//processors//attribute[@id='max_none_turbo_ratio']/text()")[0], 10)
+ p_count = board_cfg_lib.get_p_state_count()
+ none_turbo_p = board_cfg_lib.get_p_state_index_from_ratio(mntr)
+ if p_count != 0:
+ # P0 is the highest stat
+ if cpu_id in rtvm_cpus:
+ # for CPUs in RTVM, fix to nominal performance(max none turbo frequency if turbo on)
+ if cpu_has_turbo:
+ limit_highest_pstate = none_turbo_p
+ limit_nominal_pstate = none_turbo_p
+ limit_lowest_pstate = none_turbo_p
+ else:
+ limit_highest_pstate = 0
+ limit_nominal_pstate = 0
+ limit_lowest_pstate = 0
+ else:
+ if cpu_has_turbo:
+ limit_highest_pstate = 0
+ limit_nominal_pstate = none_turbo_p
+ limit_lowest_pstate = p_count -1
+ else:
+ limit_highest_pstate = 0
+ limit_nominal_pstate = 0
+ limit_lowest_pstate = p_count -1
+ else:
+ limit_highest_pstate = 0
+ limit_nominal_pstate = 0
+ limit_lowest_pstate = 0
+
+ common.append_node("./limit_nominal_pstate", str(limit_nominal_pstate), limit_node)
+ common.append_node("./limit_highest_pstate", str(limit_highest_pstate), limit_node)
+ common.append_node("./limit_lowest_pstate", str(limit_lowest_pstate), limit_node)
+
+ # Let CPUs in the same frequency dependency group have the same limits. So that RTVM's frequency can be fixed
+ for alloc_cpu in allocation_etree.xpath("//cpufreq/CPU"):
+ dependency_cpus = common.get_node("./freq_dependency/text()", alloc_cpu).split(" ")
+ if common.get_node("./limits", alloc_cpu) != None:
+ highest_lvl = int(common.get_node(".//limit_highest_lvl/text()", alloc_cpu))
+ lowest_lvl = int(common.get_node(".//limit_lowest_lvl/text()", alloc_cpu))
+ highest_pstate = int(common.get_node(".//limit_highest_pstate/text()", alloc_cpu))
+ lowest_pstate = int(common.get_node(".//limit_lowest_pstate/text()", alloc_cpu))
+
+ for dep_cpu_id in dependency_cpus:
+ dep_highest_lvl = int(common.get_node(f"//cpufreq/CPU[@id={dep_cpu_id}]//limit_highest_lvl/text()", allocation_etree))
+ dep_lowest_lvl = int(common.get_node(f"//cpufreq/CPU[@id={dep_cpu_id}]//limit_lowest_lvl/text()", allocation_etree))
+ if highest_lvl > dep_highest_lvl:
+ highest_lvl = dep_highest_lvl
+ if lowest_lvl < dep_lowest_lvl:
+ lowest_lvl = dep_lowest_lvl
+ dep_highest_pstate = int(common.get_node(f"//cpufreq/CPU[@id={dep_cpu_id}]//limit_highest_pstate/text()", allocation_etree))
+ dep_lowest_pstate = int(common.get_node(f"//cpufreq/CPU[@id={dep_cpu_id}]//limit_lowest_pstate/text()", allocation_etree))
+ if highest_pstate < dep_highest_pstate:
+ highest_pstate = dep_highest_pstate
+ if lowest_pstate > dep_lowest_pstate:
+ lowest_pstate = dep_lowest_pstate
+
+ common.update_text("./limits/limit_highest_lvl", str(highest_lvl), alloc_cpu, True)
+ common.update_text("./limits/limit_lowest_lvl", str(lowest_lvl), alloc_cpu, True)
+ common.update_text("./limits/limit_highest_pstate", str(highest_pstate), alloc_cpu, True)
+ common.update_text("./limits/limit_lowest_pstate", str(lowest_pstate), alloc_cpu, True)
+
+def alloc_policy(board_etree, scenario_etree, allocation_etree):
+ policy = common.get_node("//CPU_PERFORMANCE_POLICY/text()", scenario_etree)
+ common.append_node("//hv/CPU_PERFORMANCE_POLICY", policy, allocation_etree)
+
+def fn(board_etree, scenario_etree, allocation_etree):
+ common.append_node("/acrn-config/hv/cpufreq", None, allocation_etree)
+ alloc_dependency(board_etree, scenario_etree, allocation_etree)
+ alloc_limits(board_etree, scenario_etree, allocation_etree)
+ alloc_policy(board_etree, scenario_etree, allocation_etree)
--
2.25.1