Date   

Re: [RFC PATCH v3 2/6] hv: sched: simple event implemention

Eddie Dong
 

Acked-by: Eddie Dong <eddie.dong@...>


Thx Eddie

-----Original Message-----
From: Liu, Shuo A <shuo.a.liu@...>
Sent: Thursday, January 2, 2020 11:49 AM
To: acrn-dev@...
Cc: Dong, Eddie <eddie.dong@...>; Liu, Shuo A <shuo.a.liu@...>
Subject: [RFC PATCH v3 2/6] hv: sched: simple event implemention

This simple event implemention can only support exclusive waiting at same
time. It mainly used by thread who want to wait for special event happens.
Thread A who want to wait for some events calls
wait_event(struct sched_event *);

Thread B who can give the event signal calls
signal_event(struct sched_event *);

Signed-off-by: Shuo A Liu <shuo.a.liu@...>
---
hypervisor/common/schedule.c | 45
++++++++++++++++++++++++++++++++++++
hypervisor/include/common/schedule.h | 11 +++++++++
2 files changed, 56 insertions(+)

diff --git a/hypervisor/common/schedule.c b/hypervisor/common/schedule.c
index 3ce6a41..2f72bc9 100644
--- a/hypervisor/common/schedule.c
+++ b/hypervisor/common/schedule.c
@@ -274,3 +274,48 @@ void run_thread(struct thread_object *obj)
obj->thread_entry(obj);
}
}
+
+void init_event(struct sched_event *event) {
+ spinlock_init(&event->lock);
+ event->done = 0UL;
+ event->waiting_thread = NULL;
+}
+
+void reset_event(struct sched_event *event) {
+ uint64_t rflag;
+
+ spinlock_irqsave_obtain(&event->lock, &rflag);
+ event->done = 0UL;
+ event->waiting_thread = NULL;
+ spinlock_irqrestore_release(&event->lock, rflag); }
+
+/* support exclusive waiting only */
+void wait_event(struct sched_event *event) {
+ uint64_t rflag;
+
+ spinlock_irqsave_obtain(&event->lock, &rflag);
+ ASSERT((event->waiting_thread == NULL), "only support exclusive
waiting");
+ if (event->done == 0UL) {
+ event->waiting_thread = sched_get_current(get_pcpu_id());
+ sleep_thread(event->waiting_thread);
+ }
+ spinlock_irqrestore_release(&event->lock, rflag); }
+
+void signal_event(struct sched_event *event) {
+ uint64_t rflag;
+
+ spinlock_irqsave_obtain(&event->lock, &rflag);
+ event->done++;
+ if (event->waiting_thread != NULL) {
+ wake_thread(event->waiting_thread);
+ event->done = 0UL;
+ event->waiting_thread = NULL;
+ }
+ spinlock_irqrestore_release(&event->lock, rflag); }
diff --git a/hypervisor/include/common/schedule.h
b/hypervisor/include/common/schedule.h
index 1526865..630f130 100644
--- a/hypervisor/include/common/schedule.h
+++ b/hypervisor/include/common/schedule.h
@@ -90,6 +90,17 @@ struct sched_iorr_control {
struct hv_timer tick_timer;
};

+struct sched_event {
+ spinlock_t lock;
+ uint32_t done;
+ struct thread_object* waiting_thread;
+};
+
+void init_event(struct sched_event *event); void reset_event(struct
+sched_event *event); void wait_event(struct sched_event *event); void
+signal_event(struct sched_event *event);
+
bool is_idle_thread(const struct thread_object *obj); uint16_t
sched_get_pcpuid(const struct thread_object *obj); struct thread_object
*sched_get_current(uint16_t pcpu_id);
--
2.8.3


[PATCH 4/4] HV: Remove unused function send_dest_ipi_mask

Kaige Fu
 

There are no callers of send_dest_ipi_mask. This patch
removes it.

Signed-off-by: Kaige Fu <kaige.fu@...>
---
hypervisor/arch/x86/lapic.c | 22 ----------------------
hypervisor/include/arch/x86/lapic.h | 8 --------
2 files changed, 30 deletions(-)

diff --git a/hypervisor/arch/x86/lapic.c b/hypervisor/arch/x86/lapic.c
index cb8eb3f4..55c0d7ee 100644
--- a/hypervisor/arch/x86/lapic.c
+++ b/hypervisor/arch/x86/lapic.c
@@ -230,28 +230,6 @@ send_startup_ipi(enum intr_cpu_startup_shorthand cpu_startup_shorthand,
msr_write(MSR_IA32_EXT_APIC_ICR, icr.value);
}

-void send_dest_ipi_mask(uint32_t dest_mask, uint32_t vector)
-{
- union apic_icr icr;
- uint16_t pcpu_id;
- uint32_t mask = dest_mask;
-
- icr.value_32.lo_32 = vector | (INTR_LAPIC_ICR_PHYSICAL << 11U);
-
- pcpu_id = ffs64(mask);
-
- while (pcpu_id < MAX_PCPU_NUM) {
- bitmap32_clear_nolock(pcpu_id, &mask);
- if (is_pcpu_active(pcpu_id)) {
- icr.value_32.hi_32 = per_cpu(lapic_id, pcpu_id);
- msr_write(MSR_IA32_EXT_APIC_ICR, icr.value);
- } else {
- pr_err("pcpu_id %d not in active!", pcpu_id);
- }
- pcpu_id = ffs64(mask);
- }
-}
-
void send_single_ipi(uint16_t pcpu_id, uint32_t vector)
{
union apic_icr icr;
diff --git a/hypervisor/include/arch/x86/lapic.h b/hypervisor/include/arch/x86/lapic.h
index ca721d89..a7e65c8c 100644
--- a/hypervisor/include/arch/x86/lapic.h
+++ b/hypervisor/include/arch/x86/lapic.h
@@ -141,14 +141,6 @@ void send_startup_ipi(enum intr_cpu_startup_shorthand cpu_startup_shorthand,
uint16_t dest_pcpu_id,
uint64_t cpu_startup_start_address);

-/**
- * @brief Send an IPI to multiple pCPUs
- *
- * @param[in] dest_mask The mask of destination physical cpus
- * @param[in] vector The vector of interrupt
- */
-void send_dest_ipi_mask(uint32_t dest_mask, uint32_t vector);
-
/**
* @brief Send an IPI to a single pCPU
*
--
2.20.0


[PATCH 3/4] HV: Implement smp call for lapic_pt VMs using NMI

Kaige Fu
 

There are some functions which rely on smp call, such as vcpu_dumpreg.
This function is very useful when debug guest hang issue as it can provides
the vCPU contexts where the guest hang at.

So, this patch implements smp call for lapic_pt VMs using NMI to make these
functions work.

Signed-off-by: Kaige Fu <kaige.fu@...>
---
hypervisor/arch/x86/guest/virq.c | 6 ++++++
hypervisor/arch/x86/guest/vmexit.c | 6 ++++++
hypervisor/arch/x86/irq.c | 6 ++++++
hypervisor/arch/x86/notify.c | 10 +++++++++-
4 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/hypervisor/arch/x86/guest/virq.c b/hypervisor/arch/x86/guest/virq.c
index 1099d8da..7433af2d 100644
--- a/hypervisor/arch/x86/guest/virq.c
+++ b/hypervisor/arch/x86/guest/virq.c
@@ -213,6 +213,12 @@ int32_t vcpu_queue_exception(struct acrn_vcpu *vcpu, uint32_t vector_arg, uint32
* or not in order to support vNMI.
*/
pr_dbg("This NMI is used as notification signal. So ignore it.");
+
+ /*
+ * NMI is used to kick the target cpu out of VMX non-root mode when lapic
+ * is passthroughed to the guest. And it also serves for smp call.
+ */
+ exec_smp_call();
} else {
vcpu_make_request(vcpu, ACRN_REQUEST_EXCP);
}
diff --git a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c
index ac73f4ca..8594e742 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -203,6 +203,12 @@ int32_t vmexit_handler(struct acrn_vcpu *vcpu)
* or not in order to support vNMI.
*/
pr_dbg("This NMI is used as notification signal. So ignore it.");
+
+ /*
+ * NMI is used to kick the target cpu out of VMX non-root mode when lapic
+ * is passthroughed to the guest. And it also serves for smp call.
+ */
+ exec_smp_call();
} else {
vcpu_make_request(vcpu, ACRN_REQUEST_NMI);
vcpu->arch.idt_vectoring_info = 0U;
diff --git a/hypervisor/arch/x86/irq.c b/hypervisor/arch/x86/irq.c
index b0ba0351..927d856c 100644
--- a/hypervisor/arch/x86/irq.c
+++ b/hypervisor/arch/x86/irq.c
@@ -415,6 +415,12 @@ void handle_nmi(__unused struct intr_excp_ctx *ctx)
value32 = exec_vmread32(VMX_PROC_VM_EXEC_CONTROLS);
value32 |= VMX_PROCBASED_CTLS_NMI_WINEXIT;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32);
+
+ /*
+ * NMI is used to kick the target cpu out of VMX non-root mode when lapic
+ * is passthroughed to the guest. And it also serves for smp call.
+ */
+ exec_smp_call();
}

static void init_irq_descs(void)
diff --git a/hypervisor/arch/x86/notify.c b/hypervisor/arch/x86/notify.c
index d67fdabd..3585667b 100644
--- a/hypervisor/arch/x86/notify.c
+++ b/hypervisor/arch/x86/notify.c
@@ -46,11 +46,19 @@ static void kick_notification(__unused uint32_t irq, __unused void *data)

static void notify_cpus(uint64_t mask)
{
+ struct acrn_vcpu *vcpu;
uint16_t pcpu_id = ffs64(mask);

while (pcpu_id < MAX_PCPU_NUM) {
bitmap_clear_nolock(pcpu_id, &mask);
- send_single_ipi(pcpu_id, VECTOR_NOTIFY_VCPU);
+
+ vcpu = get_running_vcpu(pcpu_id);
+ if ((vcpu != NULL) && is_lapic_pt_enabled(vcpu)) {
+ send_single_nmi(pcpu_id);
+ } else {
+ send_single_ipi(pcpu_id, VECTOR_NOTIFY_VCPU);
+ }
+
pcpu_id = ffs64(mask);
}
}
--
2.20.0


[PATCH 2/4] HV: Introduce new API exec_smp_call

Kaige Fu
 

This patch introduces a new exec_smp_call to execute the smp call
funtion on target CPUs. Currently, it is only called by kick_notification.
And we will call it in NMI notification function in coming patch.

Signed-off-by: Kaige Fu <kaige.fu@...>
---
hypervisor/arch/x86/notify.c | 25 ++++++++++++++++---------
hypervisor/include/arch/x86/irq.h | 1 +
2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/hypervisor/arch/x86/notify.c b/hypervisor/arch/x86/notify.c
index e5cb2000..d67fdabd 100644
--- a/hypervisor/arch/x86/notify.c
+++ b/hypervisor/arch/x86/notify.c
@@ -18,25 +18,32 @@ static uint32_t notification_irq = IRQ_INVALID;

static uint64_t smp_call_mask = 0UL;

-/* run in interrupt context */
-static void kick_notification(__unused uint32_t irq, __unused void *data)
+/**
+ * @brief Execute smp call function on this running physical cpu.
+ */
+void exec_smp_call(void)
{
- /* Notification vector is used to kick taget cpu out of non-root mode.
- * And it also serves for smp call.
- */
+
uint16_t pcpu_id = get_pcpu_id();

- if (bitmap_test(pcpu_id, &smp_call_mask)) {
- struct smp_call_info_data *smp_call =
- &per_cpu(smp_call_info, pcpu_id);
+ if (bitmap_test_and_clear_lock(pcpu_id, &smp_call_mask)) {
+ struct smp_call_info_data *smp_call = &per_cpu(smp_call_info, pcpu_id);

if (smp_call->func != NULL) {
smp_call->func(smp_call->data);
}
- bitmap_clear_lock(pcpu_id, &smp_call_mask);
}
}

+/* run in interrupt context */
+static void kick_notification(__unused uint32_t irq, __unused void *data)
+{
+ /* Notification vector is used to kick taget cpu out of non-root mode.
+ * And it also serves for smp call.
+ */
+ exec_smp_call();
+}
+
static void notify_cpus(uint64_t mask)
{
uint16_t pcpu_id = ffs64(mask);
diff --git a/hypervisor/include/arch/x86/irq.h b/hypervisor/include/arch/x86/irq.h
index 2c96e11e..7ecbe171 100644
--- a/hypervisor/include/arch/x86/irq.h
+++ b/hypervisor/include/arch/x86/irq.h
@@ -88,6 +88,7 @@ struct smp_call_info_data {
};

void smp_call_function(uint64_t mask, smp_call_func_t func, void *data);
+void exec_smp_call(void);
bool is_notification_nmi(const struct acrn_vm *vm);

void init_default_irqs(uint16_t cpu_id);
--
2.20.0


[PATCH 1/4] HV: Introduce a new local function notify_cpus

Kaige Fu
 

This patch introduces a new local function notify_cpus to replace
send_dest_ipi_mask. And in following patch, we will check if the
target cpu is for lapic_pt enabled vCPU in notify_cpus. If yes, we
will use NMI to notify target vCPU. Otherwise, use IPI to notify
target vCPU.

Signed-off-by: Kaige Fu <kaige.fu@...>
---
hypervisor/arch/x86/notify.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/hypervisor/arch/x86/notify.c b/hypervisor/arch/x86/notify.c
index c56bb3cf..e5cb2000 100644
--- a/hypervisor/arch/x86/notify.c
+++ b/hypervisor/arch/x86/notify.c
@@ -37,6 +37,17 @@ static void kick_notification(__unused uint32_t irq, __unused void *data)
}
}

+static void notify_cpus(uint64_t mask)
+{
+ uint16_t pcpu_id = ffs64(mask);
+
+ while (pcpu_id < MAX_PCPU_NUM) {
+ bitmap_clear_nolock(pcpu_id, &mask);
+ send_single_ipi(pcpu_id, VECTOR_NOTIFY_VCPU);
+ pcpu_id = ffs64(mask);
+ }
+}
+
void smp_call_function(uint64_t mask, smp_call_func_t func, void *data)
{
uint16_t pcpu_id;
@@ -58,7 +69,8 @@ void smp_call_function(uint64_t mask, smp_call_func_t func, void *data)
}
pcpu_id = ffs64(mask);
}
- send_dest_ipi_mask((uint32_t)smp_call_mask, VECTOR_NOTIFY_VCPU);
+
+ notify_cpus(smp_call_mask);
/* wait for current smp call complete */
wait_sync_change(&smp_call_mask, 0UL);
}
--
2.20.0


[PATCH 0/4] Implementation of smpcall for lapic_pt VMs using NMI

Kaige Fu
 

There are some functions which rely on smp call, such as vcpu_dumpreg.
This function is very useful when debug guest hang issue as it can provides
the vCPU contexts where the guest hang at.

So, this patchset implements smp call for lapic_pt VMs using NMI to make these
functions work.

Kaige Fu (4):
HV: Introduce a new local function notify_cpus
HV: Introduce new API exec_smp_call
HV: Implement smp call for lapic_pt VMs using NMI
HV: Remove unused function send_dest_ipi_mask

hypervisor/arch/x86/guest/virq.c | 6 ++++
hypervisor/arch/x86/guest/vmexit.c | 6 ++++
hypervisor/arch/x86/irq.c | 6 ++++
hypervisor/arch/x86/lapic.c | 22 ---------------
hypervisor/arch/x86/notify.c | 43 +++++++++++++++++++++++------
hypervisor/include/arch/x86/irq.h | 1 +
hypervisor/include/arch/x86/lapic.h | 8 ------
7 files changed, 54 insertions(+), 38 deletions(-)

--
2.20.0


[PATCH 2/2] DM: Reset the INTx pinstate when configure lintr route

Kaige Fu
 

DM ACPI provides the guest with 'active low' for INTx polarity. So, the initial
pin state should be 'HIGH' to indicate that there is no interrupts asserted.

Signed-off-by: Kaige Fu <kaige.fu@...>
---
devicemodel/hw/pci/core.c | 1 +
devicemodel/hw/pci/irq.c | 9 +++++++++
devicemodel/include/irq.h | 1 +
3 files changed, 11 insertions(+)

diff --git a/devicemodel/hw/pci/core.c b/devicemodel/hw/pci/core.c
index e2545163..35c9e00d 100644
--- a/devicemodel/hw/pci/core.c
+++ b/devicemodel/hw/pci/core.c
@@ -2000,6 +2000,7 @@ pci_lintr_route(struct pci_vdev *dev)
dev->lintr.ioapic_irq = ii->ii_ioapic_irq;
dev->lintr.pirq_pin = ii->ii_pirq_pin;
pci_set_cfgdata8(dev, PCIR_INTLINE, pirq_irq(ii->ii_pirq_pin));
+ pci_reset_pinstate(dev);
}

/**
diff --git a/devicemodel/hw/pci/irq.c b/devicemodel/hw/pci/irq.c
index 0ebcbe6d..ae558c6e 100644
--- a/devicemodel/hw/pci/irq.c
+++ b/devicemodel/hw/pci/irq.c
@@ -144,6 +144,15 @@ void pci_irq_deinit(struct vmctx *ctx)
pirq_cold = 1;
}

+void pci_reset_pinstate(struct pci_vdev *dev)
+{
+ /*
+ * DM ACPI provides the guest with 'active low' for INTx polarity. So, the initial
+ * pin state should be 'HIGH' to indicate that there is no interrupts asserted.
+ */
+ vm_set_gsi_irq(dev->vmctx, dev->lintr.ioapic_irq, GSI_SET_HIGH);
+}
+
void
pci_irq_assert(struct pci_vdev *dev)
{
diff --git a/devicemodel/include/irq.h b/devicemodel/include/irq.h
index aa644868..8a128b38 100644
--- a/devicemodel/include/irq.h
+++ b/devicemodel/include/irq.h
@@ -36,6 +36,7 @@ void pci_irq_assert(struct pci_vdev *pi);
void pci_irq_deassert(struct pci_vdev *pi);
void pci_irq_init(struct vmctx *ctx);
void pci_irq_deinit(struct vmctx *ctx);
+void pci_reset_pinstate(struct pci_vdev *pi);
void pci_irq_reserve(int irq);
void pci_irq_use(int irq);
int pirq_alloc_pin(struct pci_vdev *pi);
--
2.20.0


[PATCH 1/2] DM: Fix pci_irq_assert/deassert

Kaige Fu
 

DM ACPI provides the guest with 'active low' for INTx polarity.
So, we should set 'LOW' to assert one interrupt and 'HIGH' to
deassert one interrupt.

Signed-off-by: Shuo Liu <shuo.a.liu@...>
Signed-off-by: Kaige Fu <kaige.fu@...>
---
devicemodel/hw/pci/irq.c | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/devicemodel/hw/pci/irq.c b/devicemodel/hw/pci/irq.c
index 9d4ea323..0ebcbe6d 100644
--- a/devicemodel/hw/pci/irq.c
+++ b/devicemodel/hw/pci/irq.c
@@ -147,13 +147,21 @@ void pci_irq_deinit(struct vmctx *ctx)
void
pci_irq_assert(struct pci_vdev *dev)
{
- vm_set_gsi_irq(dev->vmctx, dev->lintr.ioapic_irq, GSI_SET_HIGH);
+ /*
+ * DM ACPI provides the guest with 'active low' for INTx polarity.
+ * Set 'LOW' to assert the interrupt.
+ */
+ vm_set_gsi_irq(dev->vmctx, dev->lintr.ioapic_irq, GSI_SET_LOW);
}

void
pci_irq_deassert(struct pci_vdev *dev)
{
- vm_set_gsi_irq(dev->vmctx, dev->lintr.ioapic_irq, GSI_SET_LOW);
+ /*
+ * DM ACPI provides the guest with 'active low' for INTx polarity.
+ * Set 'HIGH' to deassert the interrupt.
+ */
+ vm_set_gsi_irq(dev->vmctx, dev->lintr.ioapic_irq, GSI_SET_HIGH);
}

int
--
2.20.0


[PATCH 0/2] vIOAPIC INTx fix

Kaige Fu
 

vIOAPIC pinstate should be reset to initial state when configure
lintr route according pin polarity. Currently, ACRN devicemodel
provides guest with 'active low' polarity. This patchset resets
the INTx pinstate as 'HIGH' when configure lintr route and fixes
pci_irq_assert/deassert.

Kaige Fu (2):
DM: Fix pci_irq_assert/deassert
DM: Reset the INTx pinstate when configure lintr route

devicemodel/hw/pci/core.c | 1 +
devicemodel/hw/pci/irq.c | 21 +++++++++++++++++++--
devicemodel/include/irq.h | 1 +
3 files changed, 21 insertions(+), 2 deletions(-)

--
2.20.0


[RFC PATCH v3 6/6] hv: Use HLT as the default idle action of service OS

Shuo A Liu
 

This patch overwrites the idle driver of service OS for industry, sdc,
sdc2 scenarios. HLT will be used as the default idle action.

Signed-off-by: Shuo A Liu <shuo.a.liu@...>
---
hypervisor/scenarios/industry/vm_configurations.h | 2 +-
hypervisor/scenarios/sdc/vm_configurations.h | 2 +-
hypervisor/scenarios/sdc2/vm_configurations.h | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hypervisor/scenarios/industry/vm_configurations.h b/hypervisor/scenarios/industry/vm_configurations.h
index 482801a..633dab4 100644
--- a/hypervisor/scenarios/industry/vm_configurations.h
+++ b/hypervisor/scenarios/industry/vm_configurations.h
@@ -25,7 +25,7 @@
"i915.nuclear_pageflip=1 " \
"i915.avail_planes_per_pipe=0x01010F " \
"i915.domain_plane_owners=0x011111110000 " \
- "i915.enable_gvt=1 " \
+ "i915.enable_gvt=1 idle=halt " \
SOS_BOOTARGS_DIFF

#define VM1_CONFIG_VCPU_AFFINITY {AFFINITY_CPU(1U)}
diff --git a/hypervisor/scenarios/sdc/vm_configurations.h b/hypervisor/scenarios/sdc/vm_configurations.h
index 436e931..2814259 100644
--- a/hypervisor/scenarios/sdc/vm_configurations.h
+++ b/hypervisor/scenarios/sdc/vm_configurations.h
@@ -25,7 +25,7 @@
"i915.nuclear_pageflip=1 " \
"i915.avail_planes_per_pipe=0x01010F " \
"i915.domain_plane_owners=0x011111110000 " \
- "i915.enable_gvt=1 " \
+ "i915.enable_gvt=1 idle=halt " \
SOS_BOOTARGS_DIFF

#if CONFIG_MAX_KATA_VM_NUM > 0
diff --git a/hypervisor/scenarios/sdc2/vm_configurations.h b/hypervisor/scenarios/sdc2/vm_configurations.h
index 96e069d..5701a41 100644
--- a/hypervisor/scenarios/sdc2/vm_configurations.h
+++ b/hypervisor/scenarios/sdc2/vm_configurations.h
@@ -25,7 +25,7 @@
"i915.nuclear_pageflip=1 " \
"i915.avail_planes_per_pipe=0x01010F " \
"i915.domain_plane_owners=0x011111110000 " \
- "i915.enable_gvt=1 " \
+ "i915.enable_gvt=1 idle=halt " \
SOS_BOOTARGS_DIFF

#define VM1_CONFIG_VCPU_AFFINITY {AFFINITY_CPU(1U)}
--
2.8.3


[RFC PATCH v3 5/6] hv: HLT emulation in hypervisor

Shuo A Liu
 

HLT emulation is import to CPU resource maximum utilization. vcpu
doing HLT means it is idle and can give up CPU proactively. Thus, we
pause the vcpu thread in HLT emulation and resume it while event happens.

When vcpu enter HLT, its vcpu thread will sleep, but the vcpu state is
still 'Running'.

VM ID PCPU ID VCPU ID VCPU ROLE VCPU STATE
===== ======= ======= ========= ==========
0 0 0 PRIMARY Running
0 1 1 SECONDARY Running

Signed-off-by: Shuo A Liu <shuo.a.liu@...>
---
hypervisor/arch/x86/guest/vlapic.c | 2 ++
hypervisor/arch/x86/guest/vmcs.c | 2 +-
hypervisor/arch/x86/guest/vmexit.c | 11 ++++++++++-
hypervisor/common/hv_main.c | 1 +
4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vlapic.c b/hypervisor/arch/x86/guest/vlapic.c
index 1c7bdfb..a1c5b58 100644
--- a/hypervisor/arch/x86/guest/vlapic.c
+++ b/hypervisor/arch/x86/guest/vlapic.c
@@ -556,6 +556,7 @@ static void apicv_basic_accept_intr(struct acrn_vlapic *vlapic, uint32_t vector,

static void apicv_advanced_accept_intr(struct acrn_vlapic *vlapic, uint32_t vector, bool level)
{
+
/* update TMR if interrupt trigger mode has changed */
vlapic_set_tmr(vlapic, vector, level);

@@ -590,6 +591,7 @@ static void vlapic_accept_intr(struct acrn_vlapic *vlapic, uint32_t vector, bool
if ((lapic->svr.v & APIC_SVR_ENABLE) == 0U) {
dev_dbg(ACRN_DBG_LAPIC, "vlapic is software disabled, ignoring interrupt %u", vector);
} else {
+ signal_event(&vlapic->vcpu->events[VCPU_EVENT_VIRTUAL_INTERRUPT]);
vlapic->ops->accept_intr(vlapic, vector, level);
}
}
diff --git a/hypervisor/arch/x86/guest/vmcs.c b/hypervisor/arch/x86/guest/vmcs.c
index 33be74f..fb0ba76 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -272,7 +272,7 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 = check_vmx_ctrl(MSR_IA32_VMX_PROCBASED_CTLS,
VMX_PROCBASED_CTLS_TSC_OFF | VMX_PROCBASED_CTLS_TPR_SHADOW |
VMX_PROCBASED_CTLS_IO_BITMAP | VMX_PROCBASED_CTLS_MSR_BITMAP |
- VMX_PROCBASED_CTLS_SECONDARY);
+ VMX_PROCBASED_CTLS_HLT | VMX_PROCBASED_CTLS_SECONDARY);

/*Disable VM_EXIT for CR3 access*/
value32 &= ~(VMX_PROCBASED_CTLS_CR3_LOAD | VMX_PROCBASED_CTLS_CR3_STORE);
diff --git a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c
index 4fb0478..f7bdb44 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -31,6 +31,7 @@ static int32_t xsetbv_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t wbinvd_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t undefined_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t pause_vmexit_handler(struct acrn_vcpu *vcpu);
+static int32_t hlt_vmexit_handler(struct acrn_vcpu *vcpu);

/* VM Dispatch table for Exit condition handling */
static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
@@ -59,7 +60,7 @@ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_GETSEC] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_HLT] = {
- .handler = unhandled_vmexit_handler},
+ .handler = hlt_vmexit_handler},
[VMX_EXIT_REASON_INVD] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_INVLPG] = {
@@ -284,6 +285,14 @@ static int32_t pause_vmexit_handler(__unused struct acrn_vcpu *vcpu)
return 0;
}

+static int32_t hlt_vmexit_handler(struct acrn_vcpu *vcpu)
+{
+ if ((vcpu->arch.pending_req == 0UL) && (!vlapic_has_pending_intr(vcpu))) {
+ wait_event(&vcpu->events[VCPU_EVENT_VIRTUAL_INTERRUPT]);
+ }
+ return 0;
+}
+
int32_t cpuid_vmexit_handler(struct acrn_vcpu *vcpu)
{
uint64_t rax, rbx, rcx, rdx;
diff --git a/hypervisor/common/hv_main.c b/hypervisor/common/hv_main.c
index e949149..41ecc9c 100644
--- a/hypervisor/common/hv_main.c
+++ b/hypervisor/common/hv_main.c
@@ -40,6 +40,7 @@ void vcpu_thread(struct thread_object *obj)
continue;
}

+ reset_event(&vcpu->events[VCPU_EVENT_VIRTUAL_INTERRUPT]);
profiling_vmenter_handler(vcpu);

TRACE_2L(TRACE_VM_ENTER, 0UL, 0UL);
--
2.8.3


[RFC PATCH v3 4/6] hv: Add vlapic_has_pending_intr of apicv to check pending interrupts

Shuo A Liu
 

Sometimes HV wants to know if there are pending interrupts of one vcpu.
Add .has_pending_intr interface in acrn_apicv_ops and return the pending
interrupts status by check IRRs of apicv.

Signed-off-by: Shuo A Liu <shuo.a.liu@...>
---
hypervisor/arch/x86/guest/vlapic.c | 29 +++++++++++++++++++++++++++++
hypervisor/include/arch/x86/guest/vlapic.h | 2 ++
2 files changed, 31 insertions(+)

diff --git a/hypervisor/arch/x86/guest/vlapic.c b/hypervisor/arch/x86/guest/vlapic.c
index 8c79a4d..1c7bdfb 100644
--- a/hypervisor/arch/x86/guest/vlapic.c
+++ b/hypervisor/arch/x86/guest/vlapic.c
@@ -1745,6 +1745,11 @@ static bool ptapic_has_pending_delivery_intr(__unused struct acrn_vcpu *vcpu)
return false;
}

+static bool ptapic_has_pending_intr(__unused struct acrn_vcpu *vcpu)
+{
+ return false;
+}
+
static bool ptapic_invalid(__unused uint32_t offset)
{
return false;
@@ -1754,6 +1759,7 @@ static const struct acrn_apicv_ops ptapic_ops = {
.accept_intr = ptapic_accept_intr,
.inject_intr = ptapic_inject_intr,
.has_pending_delivery_intr = ptapic_has_pending_delivery_intr,
+ .has_pending_intr = ptapic_has_pending_intr,
.apic_read_access_may_valid = ptapic_invalid,
.apic_write_access_may_valid = ptapic_invalid,
.x2apic_read_msr_may_valid = ptapic_invalid,
@@ -2379,6 +2385,27 @@ bool vlapic_has_pending_delivery_intr(struct acrn_vcpu *vcpu)
return vlapic->ops->has_pending_delivery_intr(vcpu);
}

+static bool apicv_basic_has_pending_intr(struct acrn_vcpu *vcpu)
+{
+ struct acrn_vlapic *vlapic = vcpu_vlapic(vcpu);
+ uint32_t vector;
+
+ vector = vlapic_find_highest_irr(vlapic);
+
+ return vector != 0UL;
+}
+
+static bool apicv_advanced_has_pending_intr(struct acrn_vcpu *vcpu)
+{
+ return apicv_basic_has_pending_intr(vcpu);
+}
+
+bool vlapic_has_pending_intr(struct acrn_vcpu *vcpu)
+{
+ struct acrn_vlapic *vlapic = vcpu_vlapic(vcpu);
+ return vlapic->ops->has_pending_intr(vcpu);
+}
+
static bool apicv_basic_apic_read_access_may_valid(__unused uint32_t offset)
{
return true;
@@ -2592,6 +2619,7 @@ static const struct acrn_apicv_ops apicv_basic_ops = {
.accept_intr = apicv_basic_accept_intr,
.inject_intr = apicv_basic_inject_intr,
.has_pending_delivery_intr = apicv_basic_has_pending_delivery_intr,
+ .has_pending_intr = apicv_basic_has_pending_intr,
.apic_read_access_may_valid = apicv_basic_apic_read_access_may_valid,
.apic_write_access_may_valid = apicv_basic_apic_write_access_may_valid,
.x2apic_read_msr_may_valid = apicv_basic_x2apic_read_msr_may_valid,
@@ -2602,6 +2630,7 @@ static const struct acrn_apicv_ops apicv_advanced_ops = {
.accept_intr = apicv_advanced_accept_intr,
.inject_intr = apicv_advanced_inject_intr,
.has_pending_delivery_intr = apicv_advanced_has_pending_delivery_intr,
+ .has_pending_intr = apicv_advanced_has_pending_intr,
.apic_read_access_may_valid = apicv_advanced_apic_read_access_may_valid,
.apic_write_access_may_valid = apicv_advanced_apic_write_access_may_valid,
.x2apic_read_msr_may_valid = apicv_advanced_x2apic_read_msr_may_valid,
diff --git a/hypervisor/include/arch/x86/guest/vlapic.h b/hypervisor/include/arch/x86/guest/vlapic.h
index f271a4e..8feea92 100644
--- a/hypervisor/include/arch/x86/guest/vlapic.h
+++ b/hypervisor/include/arch/x86/guest/vlapic.h
@@ -100,6 +100,7 @@ struct acrn_apicv_ops {
void (*accept_intr)(struct acrn_vlapic *vlapic, uint32_t vector, bool level);
bool (*inject_intr)(struct acrn_vlapic *vlapic, bool guest_irq_enabled, bool injected);
bool (*has_pending_delivery_intr)(struct acrn_vcpu *vcpu);
+ bool (*has_pending_intr)(struct acrn_vcpu *vcpu);
bool (*apic_read_access_may_valid)(uint32_t offset);
bool (*apic_write_access_may_valid)(uint32_t offset);
bool (*x2apic_read_msr_may_valid)(uint32_t offset);
@@ -118,6 +119,7 @@ void vlapic_set_apicv_ops(void);

bool vlapic_inject_intr(struct acrn_vlapic *vlapic, bool guest_irq_enabled, bool injected);
bool vlapic_has_pending_delivery_intr(struct acrn_vcpu *vcpu);
+bool vlapic_has_pending_intr(struct acrn_vcpu *vcpu);

/**
* @brief Get physical address to PIR description.
--
2.8.3


[RFC PATCH v3 3/6] hv: vcpu: wait and signal vcpu event support

Shuo A Liu
 

Introduce two kinds of events for each vcpu,
VCPU_EVENT_IOREQ: for vcpu waiting for IO request completion
VCPU_EVENT_VIRTUAL_INTERRUPT: for vcpu waiting for virtual interrupts events
vcpu can wait for such events, and resume to run when the
event get signalled.

This patch also change IO request waiting/notifying to this way.

Signed-off-by: Shuo A Liu <shuo.a.liu@...>
---
hypervisor/arch/x86/guest/vcpu.c | 5 ++++-
hypervisor/common/hypercall.c | 8 +++-----
hypervisor/dm/io_req.c | 15 ++++-----------
hypervisor/include/arch/x86/guest/vcpu.h | 7 +++++++
4 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vcpu.c b/hypervisor/arch/x86/guest/vcpu.c
index ea6d0f4..6d0ecc5 100644
--- a/hypervisor/arch/x86/guest/vcpu.c
+++ b/hypervisor/arch/x86/guest/vcpu.c
@@ -795,7 +795,7 @@ void launch_vcpu(struct acrn_vcpu *vcpu)
/* help function for vcpu create */
int32_t prepare_vcpu(struct acrn_vm *vm, uint16_t pcpu_id)
{
- int32_t ret;
+ int32_t ret, i;
struct acrn_vcpu *vcpu = NULL;
char thread_name[16];

@@ -811,6 +811,9 @@ int32_t prepare_vcpu(struct acrn_vm *vm, uint16_t pcpu_id)
vcpu->thread_obj.switch_out = context_switch_out;
vcpu->thread_obj.switch_in = context_switch_in;
init_thread_data(&vcpu->thread_obj);
+ for (i = 0; i < VCPU_EVENT_NUM; i++) {
+ init_event(&vcpu->events[i]);
+ }
}

return ret;
diff --git a/hypervisor/common/hypercall.c b/hypervisor/common/hypercall.c
index c114040..394f405 100644
--- a/hypervisor/common/hypercall.c
+++ b/hypervisor/common/hypercall.c
@@ -553,12 +553,10 @@ int32_t hcall_notify_ioreq_finish(uint16_t vmid, uint16_t vcpu_id)
__func__, vcpu_id, target_vm->vm_id);
} else {
vcpu = vcpu_from_vid(target_vm, vcpu_id);
- if (vcpu->state == VCPU_PAUSED) {
- if (!vcpu->vm->sw.is_completion_polling) {
- resume_vcpu(vcpu);
- }
- ret = 0;
+ if (!vcpu->vm->sw.is_completion_polling) {
+ signal_event(&vcpu->events[VCPU_EVENT_IOREQ]);
}
+ ret = 0;
}
}

diff --git a/hypervisor/dm/io_req.c b/hypervisor/dm/io_req.c
index d0ce332..daee03d 100644
--- a/hypervisor/dm/io_req.c
+++ b/hypervisor/dm/io_req.c
@@ -103,14 +103,6 @@ int32_t acrn_insert_request(struct acrn_vcpu *vcpu, const struct io_request *io_
}
clac();

- /* pause vcpu in notification mode , wait for VHM to handle the MMIO request.
- * TODO: when pause_vcpu changed to switch vcpu out directlly, we
- * should fix the race issue between req.processed update and vcpu pause
- */
- if (!is_polling) {
- pause_vcpu(vcpu, VCPU_PAUSED);
- }
-
/* Before updating the vhm_req state, enforce all fill vhm_req operations done */
cpu_write_memory_barrier();

@@ -136,10 +128,11 @@ int32_t acrn_insert_request(struct acrn_vcpu *vcpu, const struct io_request *io_
schedule();
}
}
- } else if (need_reschedule(pcpuid_from_vcpu(vcpu))) {
- schedule();
} else {
- ret = -EINVAL;
+ wait_event(&vcpu->events[VCPU_EVENT_IOREQ]);
+ if (need_reschedule(pcpuid_from_vcpu(vcpu))) {
+ schedule();
+ }
}
} else {
ret = -EINVAL;
diff --git a/hypervisor/include/arch/x86/guest/vcpu.h b/hypervisor/include/arch/x86/guest/vcpu.h
index bfee6e4..827ee6f 100644
--- a/hypervisor/include/arch/x86/guest/vcpu.h
+++ b/hypervisor/include/arch/x86/guest/vcpu.h
@@ -146,6 +146,11 @@ enum vm_cpu_mode {
CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */
};

+enum vcpu_event_type {
+ VCPU_EVENT_IOREQ,
+ VCPU_EVENT_VIRTUAL_INTERRUPT,
+ VCPU_EVENT_NUM
+};

/* 2 worlds: 0 for Normal World, 1 for Secure World */
#define NR_WORLD 2
@@ -260,6 +265,8 @@ struct acrn_vcpu {

uint64_t reg_cached;
uint64_t reg_updated;
+
+ struct sched_event events[VCPU_EVENT_NUM];
} __aligned(PAGE_SIZE);

struct vcpu_dump {
--
2.8.3


[RFC PATCH v3 2/6] hv: sched: simple event implemention

Shuo A Liu
 

This simple event implemention can only support exclusive waiting
at same time. It mainly used by thread who want to wait for special event
happens.
Thread A who want to wait for some events calls
wait_event(struct sched_event *);

Thread B who can give the event signal calls
signal_event(struct sched_event *);

Signed-off-by: Shuo A Liu <shuo.a.liu@...>
---
hypervisor/common/schedule.c | 45 ++++++++++++++++++++++++++++++++++++
hypervisor/include/common/schedule.h | 11 +++++++++
2 files changed, 56 insertions(+)

diff --git a/hypervisor/common/schedule.c b/hypervisor/common/schedule.c
index 3ce6a41..2f72bc9 100644
--- a/hypervisor/common/schedule.c
+++ b/hypervisor/common/schedule.c
@@ -274,3 +274,48 @@ void run_thread(struct thread_object *obj)
obj->thread_entry(obj);
}
}
+
+void init_event(struct sched_event *event)
+{
+ spinlock_init(&event->lock);
+ event->done = 0UL;
+ event->waiting_thread = NULL;
+}
+
+void reset_event(struct sched_event *event)
+{
+ uint64_t rflag;
+
+ spinlock_irqsave_obtain(&event->lock, &rflag);
+ event->done = 0UL;
+ event->waiting_thread = NULL;
+ spinlock_irqrestore_release(&event->lock, rflag);
+}
+
+/* support exclusive waiting only */
+void wait_event(struct sched_event *event)
+{
+ uint64_t rflag;
+
+ spinlock_irqsave_obtain(&event->lock, &rflag);
+ ASSERT((event->waiting_thread == NULL), "only support exclusive waiting");
+ if (event->done == 0UL) {
+ event->waiting_thread = sched_get_current(get_pcpu_id());
+ sleep_thread(event->waiting_thread);
+ }
+ spinlock_irqrestore_release(&event->lock, rflag);
+}
+
+void signal_event(struct sched_event *event)
+{
+ uint64_t rflag;
+
+ spinlock_irqsave_obtain(&event->lock, &rflag);
+ event->done++;
+ if (event->waiting_thread != NULL) {
+ wake_thread(event->waiting_thread);
+ event->done = 0UL;
+ event->waiting_thread = NULL;
+ }
+ spinlock_irqrestore_release(&event->lock, rflag);
+}
diff --git a/hypervisor/include/common/schedule.h b/hypervisor/include/common/schedule.h
index 1526865..630f130 100644
--- a/hypervisor/include/common/schedule.h
+++ b/hypervisor/include/common/schedule.h
@@ -90,6 +90,17 @@ struct sched_iorr_control {
struct hv_timer tick_timer;
};

+struct sched_event {
+ spinlock_t lock;
+ uint32_t done;
+ struct thread_object* waiting_thread;
+};
+
+void init_event(struct sched_event *event);
+void reset_event(struct sched_event *event);
+void wait_event(struct sched_event *event);
+void signal_event(struct sched_event *event);
+
bool is_idle_thread(const struct thread_object *obj);
uint16_t sched_get_pcpuid(const struct thread_object *obj);
struct thread_object *sched_get_current(uint16_t pcpu_id);
--
2.8.3


[RFC PATCH v3 1/6] hv: PAUSE-loop exiting support in hypervisor

Shuo A Liu
 

As we enabled cpu sharing, PAUSE-loop exiting can help vcpu
to release its pcpu proactively. It's good for performance.

VMX_PLE_GAP: upper bound on the amount of time between two successive
executions of PAUSE in a loop.
VMX_PLE_WINDOW: upper bound on the amount of time a guest is allowed to
execute in a PAUSE loop

Signed-off-by: Shuo A Liu <shuo.a.liu@...>
Acked-by: Eddie Dong <eddie.dong@...>
---
hypervisor/arch/x86/guest/vmcs.c | 7 ++++++-
hypervisor/arch/x86/guest/vmexit.c | 9 ++++++++-
2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vmcs.c b/hypervisor/arch/x86/guest/vmcs.c
index a125e2d..33be74f 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -297,7 +297,8 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
*/
value32 = check_vmx_ctrl(MSR_IA32_VMX_PROCBASED_CTLS2,
VMX_PROCBASED_CTLS2_VAPIC | VMX_PROCBASED_CTLS2_EPT |
- VMX_PROCBASED_CTLS2_RDTSCP | VMX_PROCBASED_CTLS2_UNRESTRICT);
+ VMX_PROCBASED_CTLS2_RDTSCP | VMX_PROCBASED_CTLS2_UNRESTRICT |
+ VMX_PROCBASED_CTLS2_PAUSE_LOOP);

if (vcpu->arch.vpid != 0U) {
value32 |= VMX_PROCBASED_CTLS2_VPID;
@@ -422,6 +423,10 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
exec_vmwrite(VMX_CR3_TARGET_1, 0UL);
exec_vmwrite(VMX_CR3_TARGET_2, 0UL);
exec_vmwrite(VMX_CR3_TARGET_3, 0UL);
+
+ /* Setup PAUSE-loop exiting - 24.6.13 */
+ exec_vmwrite(VMX_PLE_GAP, 128U);
+ exec_vmwrite(VMX_PLE_WINDOW, 4096U);
}

static void init_entry_ctrl(const struct acrn_vcpu *vcpu)
diff --git a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c
index ac73f4c..4fb0478 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -30,6 +30,7 @@ static int32_t unhandled_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t xsetbv_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t wbinvd_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t undefined_vmexit_handler(struct acrn_vcpu *vcpu);
+static int32_t pause_vmexit_handler(struct acrn_vcpu *vcpu);

/* VM Dispatch table for Exit condition handling */
static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
@@ -113,7 +114,7 @@ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_MONITOR] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_PAUSE] = {
- .handler = unhandled_vmexit_handler},
+ .handler = pause_vmexit_handler},
[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_TPR_BELOW_THRESHOLD] = {
@@ -277,6 +278,12 @@ static int32_t triple_fault_vmexit_handler(struct acrn_vcpu *vcpu)
return 0;
}

+static int32_t pause_vmexit_handler(__unused struct acrn_vcpu *vcpu)
+{
+ yield_current();
+ return 0;
+}
+
int32_t cpuid_vmexit_handler(struct acrn_vcpu *vcpu)
{
uint64_t rax, rbx, rcx, rdx;
--
2.8.3


[RFC PATCH v3 0/6] Enable PAUSE-Loop exiting and HLT emulation in hypervisor

Shuo A Liu
 

When multiple vcpus running on same pcpu, it's good for performance
if vcpu can yield pcpu proactively. When vcpus are idle, they should release
pcpu and let scheduler pickup other vcpus to run.

To avoid sleep-after-wakeup race issue, HLT emulation introduces a block_flags
and a per-vcpu lock(vcpu_lock).

v3:
1) concept: completion -> event
2) Drop vcpu layer abstraction of event. Just use wait_event/signal_event provided
by scheduling.

v2:
Drop block_flags&vcpu_lock approach, and introduce a event completion mechanism
in scheduling and vcpu layer.


Shuo A Liu (6):
hv: PAUSE-loop exiting support in hypervisor
hv: sched: simple event implemention
hv: vcpu: wait and signal vcpu event support
hv: Add vlapic_has_pending_intr of apicv to check pending interrupts
hv: HLT emulation in hypervisor
hv: Use HLT as the default idle action of service OS

hypervisor/arch/x86/guest/vcpu.c | 5 ++-
hypervisor/arch/x86/guest/vlapic.c | 31 ++++++++++++++++
hypervisor/arch/x86/guest/vmcs.c | 9 ++++-
hypervisor/arch/x86/guest/vmexit.c | 20 +++++++++-
hypervisor/common/hv_main.c | 1 +
hypervisor/common/hypercall.c | 8 ++--
hypervisor/common/schedule.c | 45 +++++++++++++++++++++++
hypervisor/dm/io_req.c | 15 ++------
hypervisor/include/arch/x86/guest/vcpu.h | 7 ++++
hypervisor/include/arch/x86/guest/vlapic.h | 2 +
hypervisor/include/common/schedule.h | 11 ++++++
hypervisor/scenarios/industry/vm_configurations.h | 2 +-
hypervisor/scenarios/sdc/vm_configurations.h | 2 +-
hypervisor/scenarios/sdc2/vm_configurations.h | 2 +-
14 files changed, 136 insertions(+), 24 deletions(-)

--
2.8.3


Re: [RFC PATCH v2 2/7] hv: sched: simple completion implemention

Eddie Dong
 

Hi Shuo:
It seems to be simple :)
From abstraction p.o.v., the APIs are still coupled with the event we are using now. If we can have a neutral event wait/signal APIs, + 2 specific event, that will be great!

In here we may 1) use neutral name, i.e. wait_event, but not wait_vcpu_event. Actually these APIs are for thread, not VCPU specific. 2) the event mechanism may couple with scheduler, but not the specific usage/EVENT we use here.

Thx, Eddie

-----Original Message-----
From: acrn-dev@... <acrn-dev@...> On
Behalf Of Shuo A Liu
Sent: Tuesday, December 31, 2019 2:52 PM
To: acrn-dev@...
Cc: Liu, Shuo A <shuo.a.liu@...>
Subject: [acrn-dev] [RFC PATCH v2 2/7] hv: sched: simple completion
implemention

This simple completion implemention can only support exclusive waiting at
same time. It mainly used by thread who want to wait for some event
happens.
Thread A who want to wait for some events calls
wait_for_completion(struct sched_completion *,
void *action(void *), void *data);
where 'action' is the callback when do wait.

Thread B who can give the completion signal calls
complete(struct sched_completion *,
void *action(void *), void *data);
where 'action' is the callback when do complete.

Signed-off-by: Shuo A Liu <shuo.a.liu@...>
---
hypervisor/common/schedule.c | 45
++++++++++++++++++++++++++++++++++++
hypervisor/include/common/schedule.h | 13 +++++++++++
2 files changed, 58 insertions(+)

diff --git a/hypervisor/common/schedule.c b/hypervisor/common/schedule.c
index 3ce6a41..e74f739 100644
--- a/hypervisor/common/schedule.c
+++ b/hypervisor/common/schedule.c
@@ -274,3 +274,48 @@ void run_thread(struct thread_object *obj)
obj->thread_entry(obj);
}
}
+
+void init_completion(struct sched_completion *completion) {
+ spinlock_init(&completion->lock);
+ completion->done = 0UL;
+ completion->waiting_thread = NULL;
+}
+
+void reset_completion(struct sched_completion *completion) {
+ uint64_t rflag;
+
+ spinlock_irqsave_obtain(&completion->lock, &rflag);
+ completion->done = 0UL;
+ completion->waiting_thread = NULL;
+ spinlock_irqrestore_release(&completion->lock, rflag); }
+
+/* support exclusive waiting only */
+void wait_for_completion(struct sched_completion *completion, wait_fn
+action, void *data) {
+ uint64_t rflag;
+
+ spinlock_irqsave_obtain(&completion->lock, &rflag);
+ ASSERT((completion->waiting_thread == NULL), "only support exclusive
waiting");
+ if (completion->done == 0UL) {
+ completion->waiting_thread = sched_get_current(get_pcpu_id());
+ action(data);
+ }
+ spinlock_irqrestore_release(&completion->lock, rflag); }
+
+void complete(struct sched_completion *completion, complete_fn action,
+void *data) {
+ uint64_t rflag;
+
+ spinlock_irqsave_obtain(&completion->lock, &rflag);
+ completion->done++;
+ if (completion->waiting_thread != NULL) {
+ action(data);
+ completion->done = 0UL;
+ completion->waiting_thread = NULL;
+ }
+ spinlock_irqrestore_release(&completion->lock, rflag); }
diff --git a/hypervisor/include/common/schedule.h
b/hypervisor/include/common/schedule.h
index 1526865..224ad48 100644
--- a/hypervisor/include/common/schedule.h
+++ b/hypervisor/include/common/schedule.h
@@ -90,6 +90,19 @@ struct sched_iorr_control {
struct hv_timer tick_timer;
};

+struct sched_completion {
+ spinlock_t lock;
+ uint32_t done;
+ struct thread_object* waiting_thread;
+};
+
+typedef void (*wait_fn)(void *data);
+typedef void (*complete_fn)(void *data); void init_completion(struct
+sched_completion *completion); void reset_completion(struct
+sched_completion *completion); void wait_for_completion(struct
+sched_completion *completion, wait_fn action, void *data); void
+complete(struct sched_completion *completion, complete_fn action, void
+*data);
+
bool is_idle_thread(const struct thread_object *obj); uint16_t
sched_get_pcpuid(const struct thread_object *obj); struct thread_object
*sched_get_current(uint16_t pcpu_id);
--
2.8.3



[RFC PATCH v2 7/7] hv: Use HLT as the default idle action of service OS

Shuo A Liu
 

This patch overwrites the idle driver of service OS for industry, sdc,
sdc2 scenarios. HLT will be used as the default idle action.

Signed-off-by: Shuo A Liu <shuo.a.liu@...>
---
hypervisor/scenarios/industry/vm_configurations.h | 2 +-
hypervisor/scenarios/sdc/vm_configurations.h | 2 +-
hypervisor/scenarios/sdc2/vm_configurations.h | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/hypervisor/scenarios/industry/vm_configurations.h b/hypervisor/scenarios/industry/vm_configurations.h
index 482801a..633dab4 100644
--- a/hypervisor/scenarios/industry/vm_configurations.h
+++ b/hypervisor/scenarios/industry/vm_configurations.h
@@ -25,7 +25,7 @@
"i915.nuclear_pageflip=1 " \
"i915.avail_planes_per_pipe=0x01010F " \
"i915.domain_plane_owners=0x011111110000 " \
- "i915.enable_gvt=1 " \
+ "i915.enable_gvt=1 idle=halt " \
SOS_BOOTARGS_DIFF

#define VM1_CONFIG_VCPU_AFFINITY {AFFINITY_CPU(1U)}
diff --git a/hypervisor/scenarios/sdc/vm_configurations.h b/hypervisor/scenarios/sdc/vm_configurations.h
index 436e931..2814259 100644
--- a/hypervisor/scenarios/sdc/vm_configurations.h
+++ b/hypervisor/scenarios/sdc/vm_configurations.h
@@ -25,7 +25,7 @@
"i915.nuclear_pageflip=1 " \
"i915.avail_planes_per_pipe=0x01010F " \
"i915.domain_plane_owners=0x011111110000 " \
- "i915.enable_gvt=1 " \
+ "i915.enable_gvt=1 idle=halt " \
SOS_BOOTARGS_DIFF

#if CONFIG_MAX_KATA_VM_NUM > 0
diff --git a/hypervisor/scenarios/sdc2/vm_configurations.h b/hypervisor/scenarios/sdc2/vm_configurations.h
index 96e069d..5701a41 100644
--- a/hypervisor/scenarios/sdc2/vm_configurations.h
+++ b/hypervisor/scenarios/sdc2/vm_configurations.h
@@ -25,7 +25,7 @@
"i915.nuclear_pageflip=1 " \
"i915.avail_planes_per_pipe=0x01010F " \
"i915.domain_plane_owners=0x011111110000 " \
- "i915.enable_gvt=1 " \
+ "i915.enable_gvt=1 idle=halt " \
SOS_BOOTARGS_DIFF

#define VM1_CONFIG_VCPU_AFFINITY {AFFINITY_CPU(1U)}
--
2.8.3


[RFC PATCH v2 6/7] hv: HLT emulation in hypervisor

Shuo A Liu
 

HLT emulation is import to CPU resource maximum utilization. vcpu
doing HLT means it is idle and can give up CPU proactively. Thus, we
pause the vcpu in HLT emulation and resume it while event happens.

When vcpu enter HLT, it will be paused.

VM ID PCPU ID VCPU ID VCPU ROLE VCPU STATE
===== ======= ======= ========= ==========
0 0 0 PRIMARY Paused
0 1 1 SECONDARY Paused

Signed-off-by: Shuo A Liu <shuo.a.liu@...>
---
hypervisor/arch/x86/guest/vlapic.c | 2 ++
hypervisor/arch/x86/guest/vmcs.c | 2 +-
hypervisor/arch/x86/guest/vmexit.c | 11 ++++++++++-
hypervisor/common/hv_main.c | 1 +
4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/hypervisor/arch/x86/guest/vlapic.c b/hypervisor/arch/x86/guest/vlapic.c
index 1c7bdfb..63f6b01 100644
--- a/hypervisor/arch/x86/guest/vlapic.c
+++ b/hypervisor/arch/x86/guest/vlapic.c
@@ -556,6 +556,7 @@ static void apicv_basic_accept_intr(struct acrn_vlapic *vlapic, uint32_t vector,

static void apicv_advanced_accept_intr(struct acrn_vlapic *vlapic, uint32_t vector, bool level)
{
+
/* update TMR if interrupt trigger mode has changed */
vlapic_set_tmr(vlapic, vector, level);

@@ -590,6 +591,7 @@ static void vlapic_accept_intr(struct acrn_vlapic *vlapic, uint32_t vector, bool
if ((lapic->svr.v & APIC_SVR_ENABLE) == 0U) {
dev_dbg(ACRN_DBG_LAPIC, "vlapic is software disabled, ignoring interrupt %u", vector);
} else {
+ notify_vcpu_event(vlapic->vcpu, VCPU_EVENT_INTERRUPT);
vlapic->ops->accept_intr(vlapic, vector, level);
}
}
diff --git a/hypervisor/arch/x86/guest/vmcs.c b/hypervisor/arch/x86/guest/vmcs.c
index 33be74f..fb0ba76 100644
--- a/hypervisor/arch/x86/guest/vmcs.c
+++ b/hypervisor/arch/x86/guest/vmcs.c
@@ -272,7 +272,7 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 = check_vmx_ctrl(MSR_IA32_VMX_PROCBASED_CTLS,
VMX_PROCBASED_CTLS_TSC_OFF | VMX_PROCBASED_CTLS_TPR_SHADOW |
VMX_PROCBASED_CTLS_IO_BITMAP | VMX_PROCBASED_CTLS_MSR_BITMAP |
- VMX_PROCBASED_CTLS_SECONDARY);
+ VMX_PROCBASED_CTLS_HLT | VMX_PROCBASED_CTLS_SECONDARY);

/*Disable VM_EXIT for CR3 access*/
value32 &= ~(VMX_PROCBASED_CTLS_CR3_LOAD | VMX_PROCBASED_CTLS_CR3_STORE);
diff --git a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c
index 4fb0478..ea576be 100644
--- a/hypervisor/arch/x86/guest/vmexit.c
+++ b/hypervisor/arch/x86/guest/vmexit.c
@@ -31,6 +31,7 @@ static int32_t xsetbv_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t wbinvd_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t undefined_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t pause_vmexit_handler(struct acrn_vcpu *vcpu);
+static int32_t hlt_vmexit_handler(struct acrn_vcpu *vcpu);

/* VM Dispatch table for Exit condition handling */
static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
@@ -59,7 +60,7 @@ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_GETSEC] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_HLT] = {
- .handler = unhandled_vmexit_handler},
+ .handler = hlt_vmexit_handler},
[VMX_EXIT_REASON_INVD] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_INVLPG] = {
@@ -284,6 +285,14 @@ static int32_t pause_vmexit_handler(__unused struct acrn_vcpu *vcpu)
return 0;
}

+static int32_t hlt_vmexit_handler(struct acrn_vcpu *vcpu)
+{
+ if ((vcpu->arch.pending_req == 0UL) && (!vlapic_has_pending_intr(vcpu))) {
+ wait_vcpu_event(vcpu, VCPU_EVENT_INTERRUPT);
+ }
+ return 0;
+}
+
int32_t cpuid_vmexit_handler(struct acrn_vcpu *vcpu)
{
uint64_t rax, rbx, rcx, rdx;
diff --git a/hypervisor/common/hv_main.c b/hypervisor/common/hv_main.c
index e949149..a6920f6 100644
--- a/hypervisor/common/hv_main.c
+++ b/hypervisor/common/hv_main.c
@@ -40,6 +40,7 @@ void vcpu_thread(struct thread_object *obj)
continue;
}

+ reset_vcpu_event(vcpu, VCPU_EVENT_INTERRUPT);
profiling_vmenter_handler(vcpu);

TRACE_2L(TRACE_VM_ENTER, 0UL, 0UL);
--
2.8.3


[RFC PATCH v2 5/7] hv: Add vlapic_has_pending_intr of apicv to check pending interrupts

Shuo A Liu
 

Sometimes HV wants to know if there are pending interrupts of one vcpu.
Add .has_pending_intr interface in acrn_apicv_ops and return the pending
interrupts status by check IRRs of apicv.

Signed-off-by: Shuo A Liu <shuo.a.liu@...>
---
hypervisor/arch/x86/guest/vlapic.c | 29 +++++++++++++++++++++++++++++
hypervisor/include/arch/x86/guest/vlapic.h | 2 ++
2 files changed, 31 insertions(+)

diff --git a/hypervisor/arch/x86/guest/vlapic.c b/hypervisor/arch/x86/guest/vlapic.c
index 8c79a4d..1c7bdfb 100644
--- a/hypervisor/arch/x86/guest/vlapic.c
+++ b/hypervisor/arch/x86/guest/vlapic.c
@@ -1745,6 +1745,11 @@ static bool ptapic_has_pending_delivery_intr(__unused struct acrn_vcpu *vcpu)
return false;
}

+static bool ptapic_has_pending_intr(__unused struct acrn_vcpu *vcpu)
+{
+ return false;
+}
+
static bool ptapic_invalid(__unused uint32_t offset)
{
return false;
@@ -1754,6 +1759,7 @@ static const struct acrn_apicv_ops ptapic_ops = {
.accept_intr = ptapic_accept_intr,
.inject_intr = ptapic_inject_intr,
.has_pending_delivery_intr = ptapic_has_pending_delivery_intr,
+ .has_pending_intr = ptapic_has_pending_intr,
.apic_read_access_may_valid = ptapic_invalid,
.apic_write_access_may_valid = ptapic_invalid,
.x2apic_read_msr_may_valid = ptapic_invalid,
@@ -2379,6 +2385,27 @@ bool vlapic_has_pending_delivery_intr(struct acrn_vcpu *vcpu)
return vlapic->ops->has_pending_delivery_intr(vcpu);
}

+static bool apicv_basic_has_pending_intr(struct acrn_vcpu *vcpu)
+{
+ struct acrn_vlapic *vlapic = vcpu_vlapic(vcpu);
+ uint32_t vector;
+
+ vector = vlapic_find_highest_irr(vlapic);
+
+ return vector != 0UL;
+}
+
+static bool apicv_advanced_has_pending_intr(struct acrn_vcpu *vcpu)
+{
+ return apicv_basic_has_pending_intr(vcpu);
+}
+
+bool vlapic_has_pending_intr(struct acrn_vcpu *vcpu)
+{
+ struct acrn_vlapic *vlapic = vcpu_vlapic(vcpu);
+ return vlapic->ops->has_pending_intr(vcpu);
+}
+
static bool apicv_basic_apic_read_access_may_valid(__unused uint32_t offset)
{
return true;
@@ -2592,6 +2619,7 @@ static const struct acrn_apicv_ops apicv_basic_ops = {
.accept_intr = apicv_basic_accept_intr,
.inject_intr = apicv_basic_inject_intr,
.has_pending_delivery_intr = apicv_basic_has_pending_delivery_intr,
+ .has_pending_intr = apicv_basic_has_pending_intr,
.apic_read_access_may_valid = apicv_basic_apic_read_access_may_valid,
.apic_write_access_may_valid = apicv_basic_apic_write_access_may_valid,
.x2apic_read_msr_may_valid = apicv_basic_x2apic_read_msr_may_valid,
@@ -2602,6 +2630,7 @@ static const struct acrn_apicv_ops apicv_advanced_ops = {
.accept_intr = apicv_advanced_accept_intr,
.inject_intr = apicv_advanced_inject_intr,
.has_pending_delivery_intr = apicv_advanced_has_pending_delivery_intr,
+ .has_pending_intr = apicv_advanced_has_pending_intr,
.apic_read_access_may_valid = apicv_advanced_apic_read_access_may_valid,
.apic_write_access_may_valid = apicv_advanced_apic_write_access_may_valid,
.x2apic_read_msr_may_valid = apicv_advanced_x2apic_read_msr_may_valid,
diff --git a/hypervisor/include/arch/x86/guest/vlapic.h b/hypervisor/include/arch/x86/guest/vlapic.h
index f271a4e..8feea92 100644
--- a/hypervisor/include/arch/x86/guest/vlapic.h
+++ b/hypervisor/include/arch/x86/guest/vlapic.h
@@ -100,6 +100,7 @@ struct acrn_apicv_ops {
void (*accept_intr)(struct acrn_vlapic *vlapic, uint32_t vector, bool level);
bool (*inject_intr)(struct acrn_vlapic *vlapic, bool guest_irq_enabled, bool injected);
bool (*has_pending_delivery_intr)(struct acrn_vcpu *vcpu);
+ bool (*has_pending_intr)(struct acrn_vcpu *vcpu);
bool (*apic_read_access_may_valid)(uint32_t offset);
bool (*apic_write_access_may_valid)(uint32_t offset);
bool (*x2apic_read_msr_may_valid)(uint32_t offset);
@@ -118,6 +119,7 @@ void vlapic_set_apicv_ops(void);

bool vlapic_inject_intr(struct acrn_vlapic *vlapic, bool guest_irq_enabled, bool injected);
bool vlapic_has_pending_delivery_intr(struct acrn_vcpu *vcpu);
+bool vlapic_has_pending_intr(struct acrn_vcpu *vcpu);

/**
* @brief Get physical address to PIR description.
--
2.8.3

10241 - 10260 of 36575