[PATCH V2.1 1/5] HV: Fix decode_instruction() trigger #UD for emulating UC-lock #ud


Tao, Yuhong
 

From: Tao Yuhong <yuhong.tao@intel.com>

When ACRN uses decode_instruction to emulate split-lock/uc-lock
instruction, It is actually a try-decode to see if it is XCHG.
If the instruction is XCHG instruction, ACRN must emulate it
(inject #PF if it is triggered) with peer VCPUs paused, and advance
the guest IP. If the instruction is a LOCK prefixed instruction
with accessing the UC memory, ACRN Halted the peer VCPUs, and
advance the IP to skip the LOCK prefix, and then let the VCPU
Executes one instruction by enabling IRQ Windows vm-exit. For
other cases, ACRN injects the exception back to VCPU without
emulating it.
So change the API to decode_instruction(vcpu, bool try_decode),
when try_decode is true, try decode this instruction: if
encounter #UD, do not inject #UD, just inject back #AC/#GP

Signed-off-by: Tao Yuhong <yuhong.tao@intel.com>
---
hypervisor/arch/x86/guest/instr_emul.c | 10 ++++++----
hypervisor/arch/x86/guest/splitlock.c | 6 ++++--
hypervisor/arch/x86/guest/vlapic.c | 2 +-
hypervisor/arch/x86/guest/vmx_io.c | 2 +-
hypervisor/include/arch/x86/asm/guest/instr_emul.h | 2 +-
5 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/hypervisor/arch/x86/guest/instr_emul.c b/hypervisor/arch/x86/guest/instr_emul.c
index ab55f69b6..884aa7731 100644
--- a/hypervisor/arch/x86/guest/instr_emul.c
+++ b/hypervisor/arch/x86/guest/instr_emul.c
@@ -2340,7 +2340,7 @@ static int32_t instr_check_gva(struct acrn_vcpu *vcpu, enum vm_cpu_mode cpu_mode
return ret;
}

-int32_t decode_instruction(struct acrn_vcpu *vcpu)
+int32_t decode_instruction(struct acrn_vcpu *vcpu, bool try_decode)
{
struct instr_emul_ctxt *emul_ctxt;
uint32_t csar;
@@ -2361,9 +2361,11 @@ int32_t decode_instruction(struct acrn_vcpu *vcpu)
retval = local_decode_instruction(cpu_mode, seg_desc_def32(csar), &emul_ctxt->vie);

if (retval != 0) {
- pr_err("decode instruction failed @ 0x%016lx:", vcpu_get_rip(vcpu));
- vcpu_inject_ud(vcpu);
- retval = -EFAULT;
+ if (!try_decode) {
+ pr_err("decode instruction failed @ 0x%016lx:", vcpu_get_rip(vcpu));
+ vcpu_inject_ud(vcpu);
+ retval = -EFAULT;
+ }
} else {
/*
* We do operand check in instruction decode phase and
diff --git a/hypervisor/arch/x86/guest/splitlock.c b/hypervisor/arch/x86/guest/splitlock.c
index 09bcace47..bd3b67d1b 100644
--- a/hypervisor/arch/x86/guest/splitlock.c
+++ b/hypervisor/arch/x86/guest/splitlock.c
@@ -111,7 +111,7 @@ int32_t emulate_splitlock(struct acrn_vcpu *vcpu, uint32_t exception_vector, boo
/* Skip the #AC, we have emulated it. */
*queue_exception = false;
} else {
- status = decode_instruction(vcpu);
+ status = decode_instruction(vcpu, true);
if (status >= 0) {
/*
* If this is the xchg, then emulate it, otherwise,
@@ -149,10 +149,12 @@ int32_t emulate_splitlock(struct acrn_vcpu *vcpu, uint32_t exception_vector, boo
} else {
if (status == -EFAULT) {
pr_info("page fault happen during decode_instruction");
- status = 0;
/* For this case, Inject #PF, not to queue #AC */
*queue_exception = false;
}
+
+ /* status == -1, undefined instruction, inject #AC back */
+ status = 0;
}
}
}
diff --git a/hypervisor/arch/x86/guest/vlapic.c b/hypervisor/arch/x86/guest/vlapic.c
index f856a2acd..4d0e6aa51 100644
--- a/hypervisor/arch/x86/guest/vlapic.c
+++ b/hypervisor/arch/x86/guest/vlapic.c
@@ -2404,7 +2404,7 @@ int32_t apic_access_vmexit_handler(struct acrn_vcpu *vcpu)
* 3 = linear access (read or write) during event delivery
*/
if (((access_type == TYPE_LINEAR_APIC_INST_READ) || (access_type == TYPE_LINEAR_APIC_INST_WRITE)) &&
- (decode_instruction(vcpu) >= 0)) {
+ (decode_instruction(vcpu, false) >= 0)) {
vlapic = vcpu_vlapic(vcpu);
offset = (uint32_t)apic_access_offset(qual);
mmio = &vcpu->req.reqs.mmio;
diff --git a/hypervisor/arch/x86/guest/vmx_io.c b/hypervisor/arch/x86/guest/vmx_io.c
index abaa88927..50aaf7eac 100644
--- a/hypervisor/arch/x86/guest/vmx_io.c
+++ b/hypervisor/arch/x86/guest/vmx_io.c
@@ -150,7 +150,7 @@ int32_t ept_violation_vmexit_handler(struct acrn_vcpu *vcpu)
*/
mmio_req->address = gpa;

- ret = decode_instruction(vcpu);
+ ret = decode_instruction(vcpu, false);
if (ret > 0) {
mmio_req->size = (uint64_t)ret;
/*
diff --git a/hypervisor/include/arch/x86/asm/guest/instr_emul.h b/hypervisor/include/arch/x86/asm/guest/instr_emul.h
index 6969c0412..7a7e32ec0 100644
--- a/hypervisor/include/arch/x86/asm/guest/instr_emul.h
+++ b/hypervisor/include/arch/x86/asm/guest/instr_emul.h
@@ -92,7 +92,7 @@ struct instr_emul_ctxt {
};

int32_t emulate_instruction(struct acrn_vcpu *vcpu);
-int32_t decode_instruction(struct acrn_vcpu *vcpu);
+int32_t decode_instruction(struct acrn_vcpu *vcpu, bool try_decode);
bool is_current_opcode_xchg(struct acrn_vcpu *vcpu);

#endif
--
2.17.1


Eddie Dong
 

-----Original Message-----
From: Tao, Yuhong <yuhong.tao@intel.com>
Sent: Wednesday, July 14, 2021 7:48 PM
To: acrn-dev@lists.projectacrn.org
Cc: Dong, Eddie <eddie.dong@intel.com>
Subject: [PATCH V2.1 1/5] HV: Fix decode_instruction() trigger #UD for
emulating UC-lock

From: Tao Yuhong <yuhong.tao@intel.com>

When ACRN uses decode_instruction to emulate split-lock/uc-lock instruction,
It is actually a try-decode to see if it is XCHG.
If the instruction is XCHG instruction, ACRN must emulate it (inject #PF if it is
triggered) with peer VCPUs paused, and advance the guest IP. If the
instruction is a LOCK prefixed instruction with accessing the UC memory,
ACRN Halted the peer VCPUs, and advance the IP to skip the LOCK prefix, and
then let the VCPU Executes one instruction by enabling IRQ Windows vm-exit.
For other cases, ACRN injects the exception back to VCPU without emulating
it.
So change the API to decode_instruction(vcpu, bool try_decode), when
try_decode is true, try decode this instruction: if encounter #UD, do not inject
#UD, just inject back #AC/#GP

Signed-off-by: Tao Yuhong <yuhong.tao@intel.com>
---
hypervisor/arch/x86/guest/instr_emul.c | 10 ++++++----
hypervisor/arch/x86/guest/splitlock.c | 6 ++++--
hypervisor/arch/x86/guest/vlapic.c | 2 +-
hypervisor/arch/x86/guest/vmx_io.c | 2 +-
hypervisor/include/arch/x86/asm/guest/instr_emul.h | 2 +-
5 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/hypervisor/arch/x86/guest/instr_emul.c
b/hypervisor/arch/x86/guest/instr_emul.c
index ab55f69b6..884aa7731 100644
--- a/hypervisor/arch/x86/guest/instr_emul.c
+++ b/hypervisor/arch/x86/guest/instr_emul.c
@@ -2340,7 +2340,7 @@ static int32_t instr_check_gva(struct acrn_vcpu
*vcpu, enum vm_cpu_mode cpu_mode
return ret;
}

-int32_t decode_instruction(struct acrn_vcpu *vcpu)
+int32_t decode_instruction(struct acrn_vcpu *vcpu, bool try_decode)
The API name is decode_instruction, but all the callers needs to use "FALSE" to do the real decode. This is bad
We can: 1) remain the 2nd parameter as try_decode, and rename API name to try_decode_instruction, or 2) remain the API name, but change try_decode to full_decode or real_decode + comments of the API.




{
struct instr_emul_ctxt *emul_ctxt;
uint32_t csar;
@@ -2361,9 +2361,11 @@ int32_t decode_instruction(struct acrn_vcpu
*vcpu)
retval = local_decode_instruction(cpu_mode, seg_desc_def32(csar),
&emul_ctxt->vie);

if (retval != 0) {
- pr_err("decode instruction failed @ 0x%016lx:",
vcpu_get_rip(vcpu));
- vcpu_inject_ud(vcpu);
- retval = -EFAULT;
+ if (!try_decode) {
+ pr_err("decode instruction failed @ 0x%016lx:",
vcpu_get_rip(vcpu));
+ vcpu_inject_ud(vcpu);
+ retval = -EFAULT;
+ }
} else {
/*
* We do operand check in instruction decode phase and diff
--git a/hypervisor/arch/x86/guest/splitlock.c
b/hypervisor/arch/x86/guest/splitlock.c
index 09bcace47..bd3b67d1b 100644
--- a/hypervisor/arch/x86/guest/splitlock.c
+++ b/hypervisor/arch/x86/guest/splitlock.c
@@ -111,7 +111,7 @@ int32_t emulate_splitlock(struct acrn_vcpu *vcpu,
uint32_t exception_vector, boo
/* Skip the #AC, we have emulated it. */
*queue_exception = false;
} else {
- status = decode_instruction(vcpu);
+ status = decode_instruction(vcpu, true);
if (status >= 0) {
/*
* If this is the xchg, then emulate it, otherwise,
@@ -149,10 +149,12 @@ int32_t emulate_splitlock(struct acrn_vcpu *vcpu,
uint32_t exception_vector, boo
} else {
if (status == -EFAULT) {
pr_info("page fault happen during
decode_instruction");
- status = 0;
/* For this case, Inject #PF, not to queue
#AC */
*queue_exception = false;
}
+
+ /* status == -1, undefined instruction, inject #AC
back */
You implicitly assume the "status" got from decode_instruction can be only -1 or -EFAULT. Is this the case?
Change the comments. Otherwise it implicitly means we came here only when status = -1.

+ status = 0;
}
}
}
diff --git a/hypervisor/arch/x86/guest/vlapic.c
b/hypervisor/arch/x86/guest/vlapic.c
index f856a2acd..4d0e6aa51 100644
--- a/hypervisor/arch/x86/guest/vlapic.c
+++ b/hypervisor/arch/x86/guest/vlapic.c
@@ -2404,7 +2404,7 @@ int32_t apic_access_vmexit_handler(struct
acrn_vcpu *vcpu)
* 3 = linear access (read or write) during event delivery
*/
if (((access_type == TYPE_LINEAR_APIC_INST_READ) || (access_type ==
TYPE_LINEAR_APIC_INST_WRITE)) &&
- (decode_instruction(vcpu) >= 0)) {
+ (decode_instruction(vcpu, false) >= 0)) {
vlapic = vcpu_vlapic(vcpu);
offset = (uint32_t)apic_access_offset(qual);
mmio = &vcpu->req.reqs.mmio;
diff --git a/hypervisor/arch/x86/guest/vmx_io.c
b/hypervisor/arch/x86/guest/vmx_io.c
index abaa88927..50aaf7eac 100644
--- a/hypervisor/arch/x86/guest/vmx_io.c
+++ b/hypervisor/arch/x86/guest/vmx_io.c
@@ -150,7 +150,7 @@ int32_t ept_violation_vmexit_handler(struct
acrn_vcpu *vcpu)
*/
mmio_req->address = gpa;

- ret = decode_instruction(vcpu);
+ ret = decode_instruction(vcpu, false);
if (ret > 0) {
mmio_req->size = (uint64_t)ret;
/*
diff --git a/hypervisor/include/arch/x86/asm/guest/instr_emul.h
b/hypervisor/include/arch/x86/asm/guest/instr_emul.h
index 6969c0412..7a7e32ec0 100644
--- a/hypervisor/include/arch/x86/asm/guest/instr_emul.h
+++ b/hypervisor/include/arch/x86/asm/guest/instr_emul.h
@@ -92,7 +92,7 @@ struct instr_emul_ctxt { };

int32_t emulate_instruction(struct acrn_vcpu *vcpu); -int32_t
decode_instruction(struct acrn_vcpu *vcpu);
+int32_t decode_instruction(struct acrn_vcpu *vcpu, bool try_decode);
bool is_current_opcode_xchg(struct acrn_vcpu *vcpu);

#endif
--
2.17.1


Tao, Yuhong
 

-----Original Message-----
From: Dong, Eddie <eddie.dong@intel.com>
Sent: Thursday, July 15, 2021 10:20 AM
To: Tao, Yuhong <yuhong.tao@intel.com>; acrn-dev@lists.projectacrn.org
Subject: RE: [PATCH V2.1 1/5] HV: Fix decode_instruction() trigger #UD for
emulating UC-lock



-int32_t decode_instruction(struct acrn_vcpu *vcpu)
+int32_t decode_instruction(struct acrn_vcpu *vcpu, bool try_decode)
The API name is decode_instruction, but all the callers needs to use "FALSE" to
do the real decode. This is bad We can: 1) remain the 2nd parameter as
try_decode, and rename API name to try_decode_instruction, or 2) remain the
API name, but change try_decode to full_decode or real_decode + comments of
the API.
I think 2) is better, we can keep the API name, and add full_decode/realdecode. Because the callers are more likely to do full_decode/real_decode.
Only in splitlock emulation use "FALSE" to do try_decode.




{
struct instr_emul_ctxt *emul_ctxt;
uint32_t csar;
@@ -2361,9 +2361,11 @@ int32_t decode_instruction(struct acrn_vcpu
*vcpu)
retval = local_decode_instruction(cpu_mode,
seg_desc_def32(csar),
&emul_ctxt->vie);

if (retval != 0) {
- pr_err("decode instruction failed @ 0x%016lx:",
vcpu_get_rip(vcpu));
- vcpu_inject_ud(vcpu);
- retval = -EFAULT;
+ if (!try_decode) {
+ pr_err("decode instruction failed @ 0x%016lx:",
vcpu_get_rip(vcpu));
+ vcpu_inject_ud(vcpu);
+ retval = -EFAULT;
+ }
} else {
/*
* We do operand check in instruction decode phase
and diff --git
a/hypervisor/arch/x86/guest/splitlock.c
b/hypervisor/arch/x86/guest/splitlock.c
index 09bcace47..bd3b67d1b 100644
--- a/hypervisor/arch/x86/guest/splitlock.c
+++ b/hypervisor/arch/x86/guest/splitlock.c
@@ -111,7 +111,7 @@ int32_t emulate_splitlock(struct acrn_vcpu *vcpu,
uint32_t exception_vector, boo
/* Skip the #AC, we have emulated it.
*/
*queue_exception = false;
} else {
- status = decode_instruction(vcpu);
+ status = decode_instruction(vcpu, true);
if (status >= 0) {
/*
* If this is the xchg, then
emulate it, otherwise, @@ -149,10
+149,12 @@ int32_t emulate_splitlock(struct acrn_vcpu *vcpu, uint32_t
exception_vector, boo
} else {
if (status == -EFAULT) {
pr_info("page fault
happen during decode_instruction");
- status = 0;
/* For this case, Inject
#PF, not to queue #AC */
*queue_exception =
false;
}
+
+ /* status == -1, undefined
instruction, inject #AC
back */
You implicitly assume the "status" got from decode_instruction can be only -1 or
-EFAULT. Is this the case?
Yes, decode_instruction(full_decode) return -EFALT for fail. We hope when decode_instruction(full_decode == false), do not inject #UD for
unrecognized instructions, and we can also keep return = -1 if skip inject #UD.
decode_instruction(full_decode == false) can still inject exception for other failure, and return -EFAULT.

So status = -1 is useful to be a condition case: decode fail, and nothing is injected. Will add detail comments.

Change the comments. Otherwise it implicitly means we came here only when
status = -1.

+ status = 0;
}
}
}
diff --git a/hypervisor/arch/x86/guest/vlapic.c
b/hypervisor/arch/x86/guest/vlapic.c
index f856a2acd..4d0e6aa51 100644
--- a/hypervisor/arch/x86/guest/vlapic.c
+++ b/hypervisor/arch/x86/guest/vlapic.c
@@ -2404,7 +2404,7 @@ int32_t apic_access_vmexit_handler(struct
acrn_vcpu *vcpu)
* 3 = linear access (read or write) during event delivery
*/
if (((access_type == TYPE_LINEAR_APIC_INST_READ) || (access_type ==
TYPE_LINEAR_APIC_INST_WRITE)) &&
- (decode_instruction(vcpu) >= 0)) {
+ (decode_instruction(vcpu, false) >= 0)) {
vlapic = vcpu_vlapic(vcpu);
offset = (uint32_t)apic_access_offset(qual);
mmio = &vcpu->req.reqs.mmio;
diff --git a/hypervisor/arch/x86/guest/vmx_io.c
b/hypervisor/arch/x86/guest/vmx_io.c
index abaa88927..50aaf7eac 100644
--- a/hypervisor/arch/x86/guest/vmx_io.c
+++ b/hypervisor/arch/x86/guest/vmx_io.c
@@ -150,7 +150,7 @@ int32_t ept_violation_vmexit_handler(struct
acrn_vcpu *vcpu)
*/
mmio_req->address = gpa;

- ret = decode_instruction(vcpu);
+ ret = decode_instruction(vcpu, false);
if (ret > 0) {
mmio_req->size = (uint64_t)ret;
/*
diff --git a/hypervisor/include/arch/x86/asm/guest/instr_emul.h
b/hypervisor/include/arch/x86/asm/guest/instr_emul.h
index 6969c0412..7a7e32ec0 100644
--- a/hypervisor/include/arch/x86/asm/guest/instr_emul.h
+++ b/hypervisor/include/arch/x86/asm/guest/instr_emul.h
@@ -92,7 +92,7 @@ struct instr_emul_ctxt { };

int32_t emulate_instruction(struct acrn_vcpu *vcpu); -int32_t
decode_instruction(struct acrn_vcpu *vcpu);
+int32_t decode_instruction(struct acrn_vcpu *vcpu, bool try_decode);
bool is_current_opcode_xchg(struct acrn_vcpu *vcpu);

#endif
--
2.17.1