[PATCH v3 3/9] arm64: KVM: add trap handlers for AArch64 debug registers
Marc Zyngier
marc.zyngier at arm.com
Wed Jul 9 04:09:29 PDT 2014
On Wed, Jul 09 2014 at 10:38:13 am BST, Christoffer Dall <christoffer.dall at linaro.org> wrote:
> On Fri, Jun 20, 2014 at 02:00:01PM +0100, Marc Zyngier wrote:
>> Add handlers for all the AArch64 debug registers that are accessible
>> from EL0 or EL1. The trapping code keeps track of the state of the
>> debug registers, allowing for the switch code to implement a lazy
>> switching strategy.
>>
>> Reviewed-by: Anup Patel <anup.patel at linaro.org>
>> Signed-off-by: Marc Zyngier <marc.zyngier at arm.com>
>> ---
>> arch/arm64/include/asm/kvm_asm.h | 28 ++++++--
>> arch/arm64/include/asm/kvm_host.h | 3 +
>> arch/arm64/kvm/sys_regs.c | 137 +++++++++++++++++++++++++++++++++++++-
>> 3 files changed, 159 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
>> index 9fcd54b..e6b159a 100644
>> --- a/arch/arm64/include/asm/kvm_asm.h
>> +++ b/arch/arm64/include/asm/kvm_asm.h
>> @@ -43,14 +43,25 @@
>> #define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */
>> #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */
>> #define PAR_EL1 21 /* Physical Address Register */
>> +#define MDSCR_EL1 22 /* Monitor Debug System Control Register */
>> +#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */
>> +#define DBGBCR15_EL1 38
>> +#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */
>> +#define DBGBVR15_EL1 54
>> +#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */
>> +#define DBGWCR15_EL1 70
>> +#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */
>> +#define DBGWVR15_EL1 86
>> +#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */
>> +
>> /* 32bit specific registers. Keep them at the end of the range */
>> -#define DACR32_EL2 22 /* Domain Access Control Register */
>> -#define IFSR32_EL2 23 /* Instruction Fault Status Register */
>> -#define FPEXC32_EL2 24 /* Floating-Point Exception Control Register */
>> -#define DBGVCR32_EL2 25 /* Debug Vector Catch Register */
>> -#define TEECR32_EL1 26 /* ThumbEE Configuration Register */
>> -#define TEEHBR32_EL1 27 /* ThumbEE Handler Base Register */
>> -#define NR_SYS_REGS 28
>> +#define DACR32_EL2 88 /* Domain Access Control Register */
>> +#define IFSR32_EL2 89 /* Instruction Fault Status Register */
>> +#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */
>> +#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */
>> +#define TEECR32_EL1 92 /* ThumbEE Configuration Register */
>> +#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */
>> +#define NR_SYS_REGS 94
>>
>> /* 32bit mapping */
>> #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
>> @@ -87,6 +98,9 @@
>> #define ARM_EXCEPTION_IRQ 0
>> #define ARM_EXCEPTION_TRAP 1
>>
>> +#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0
>> +#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
>> +
>> #ifndef __ASSEMBLY__
>> struct kvm;
>> struct kvm_vcpu;
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index 92242ce..79573c86 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -101,6 +101,9 @@ struct kvm_vcpu_arch {
>> /* Exception Information */
>> struct kvm_vcpu_fault_info fault;
>>
>> + /* Debug state */
>> + u64 debug_flags;
>> +
>> /* Pointer to host CPU context */
>> kvm_cpu_context_t *host_cpu_context;
>>
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 4abd84e..808e3b2 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -30,6 +30,7 @@
>> #include <asm/kvm_mmu.h>
>> #include <asm/cacheflush.h>
>> #include <asm/cputype.h>
>> +#include <asm/debug-monitors.h>
>> #include <trace/events/kvm.h>
>>
>> #include "sys_regs.h"
>> @@ -173,6 +174,60 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
>> return read_zero(vcpu, p);
>> }
>>
>> +static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
>> + const struct sys_reg_params *p,
>> + const struct sys_reg_desc *r)
>> +{
>> + if (p->is_write) {
>> + return ignore_write(vcpu, p);
>> + } else {
>> + *vcpu_reg(vcpu, p->Rt) = (1 << 3);
>> + return true;
>> + }
>> +}
>> +
>> +static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
>> + const struct sys_reg_params *p,
>> + const struct sys_reg_desc *r)
>> +{
>> + if (p->is_write) {
>> + return ignore_write(vcpu, p);
>> + } else {
>> + u32 val;
>> + asm volatile("mrs %0, dbgauthstatus_el1" : "=r" (val));
>> + *vcpu_reg(vcpu, p->Rt) = val;
>> + return true;
>> + }
>> +}
>> +
>> +/*
>> + * We want to avoid world-switching all the DBG registers all the
>> + * time. For this, we use a DIRTY but, indicating the guest has
>
> a DIRTY but? (at least there's only one t in there).
The whole debug architecture makes me feel very dirty.
>> + * modified the debug registers, and only restore the registers once,
>> + * disabling traps.
>
> I don't think I understand the "only restore the registers once" bit
> here. I know I'm being incredibly stupid, but I forgot since the last
> review round how this actually works; when we return from the guest and
> the guest has somehow enabled certain DBG functionality, then we set the dirty
> flag, which means we should stop trapping and context switch all the
> registers on world-switches, but if we see when returning from the guest
> that the guest doesn't appear to be using the registers we enable
> trapping and stop world-switching, right?
Almost. We always decide on the trapping when entering the guest:
- If the dirty bit is set (because we're coming back from trapping),
disable the traps, restore the registers
- If debug is actively in use (DBG_MDSCR_KDE or DBG_MDSCR_MDE set),
disable the traps, restore the registers
- Otherwise, enable the traps
When exiting the guest: If the dirty bit is set, save the registers and
clear the dirty bit.
> Do we clearly define which state triggers the world-switching and why
> that's a good rationale? (sorry, the debug architecture is not my
> favorite part of the ARM ARM).
I thing the above comment describes the state precisely. My rational is:
- If we've touched any debug register, it is likely that we're going to
touch more of them. It then makes sense to disable the traps and start
doing the save/restore dance
- If debug is active (DBG_MDSCR_KDE or DBG_MDSCR_MDE set), it is then
mandatory to save/restore the registers, as the guest depends on them.
Does this make the process clearer? If so, I can add it to the comment.
>> + *
>> + * The best thing to do would be to trap MDSCR_EL1 independently, test
>> + * if DBG_MDSCR_KDE or DBG_MDSCR_MDE is getting set, and only set the
>> + * DIRTY bit in that case.
>> + *
>> + * Unfortunately, "old" Linux kernels tend to hit MDSCR_EL1 like a
>> + * woodpecker on a tree, and it is better to disable trapping as soon
>> + * as possible in this case. Some day, make this a tuneable...
>> + */
>> +static bool trap_debug_regs(struct kvm_vcpu *vcpu,
>> + const struct sys_reg_params *p,
>> + const struct sys_reg_desc *r)
>> +{
>> + if (p->is_write) {
>> + vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
>> + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
>> + } else {
>> + *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
>> + }
>> +
>> + return true;
>> +}
>> +
>> static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
>> {
>> u64 amair;
>> @@ -189,6 +244,21 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
>> vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff);
>> }
>>
>> +/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go */
>> +#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
>> + /* DBGBVRn_EL1 */ \
>> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \
>> + trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \
>> + /* DBGBCRn_EL1 */ \
>> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \
>> + trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \
>> + /* DBGWVRn_EL1 */ \
>> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \
>> + trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }, \
>> + /* DBGWCRn_EL1 */ \
>> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \
>> + trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }
>> +
>> /*
>> * Architected system registers.
>> * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
>> @@ -201,8 +271,12 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
>> * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
>> * all PM registers, which doesn't crash the guest kernel at least.
>> *
>> - * Same goes for the whole debug infrastructure, which probably breaks
>> - * some guest functionnality. This should be fixed.
>> + * Debug handling: We do trap most, if not all debug related system
>> + * registers. The implementation is good enough to ensure that a guest
>> + * can use these with minimal performance degradation. The drawback is
>> + * that we don't implement any of the external debug, none of the
>> + * OSlock protocol. This should be revisited if we ever encounter a
>> + * more demanding guest...
>> */
>> static const struct sys_reg_desc sys_reg_descs[] = {
>> /* DC ISW */
>> @@ -215,12 +289,71 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>> { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
>> access_dcsw },
>>
>> + DBG_BCR_BVR_WCR_WVR_EL1(0),
>> + DBG_BCR_BVR_WCR_WVR_EL1(1),
>> + /* MDCCINT_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
>> + trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
>> + /* MDSCR_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
>> + trap_debug_regs, reset_val, MDSCR_EL1, 0 },
>> + DBG_BCR_BVR_WCR_WVR_EL1(2),
>> + DBG_BCR_BVR_WCR_WVR_EL1(3),
>> + DBG_BCR_BVR_WCR_WVR_EL1(4),
>> + DBG_BCR_BVR_WCR_WVR_EL1(5),
>> + DBG_BCR_BVR_WCR_WVR_EL1(6),
>> + DBG_BCR_BVR_WCR_WVR_EL1(7),
>> + DBG_BCR_BVR_WCR_WVR_EL1(8),
>> + DBG_BCR_BVR_WCR_WVR_EL1(9),
>> + DBG_BCR_BVR_WCR_WVR_EL1(10),
>> + DBG_BCR_BVR_WCR_WVR_EL1(11),
>> + DBG_BCR_BVR_WCR_WVR_EL1(12),
>> + DBG_BCR_BVR_WCR_WVR_EL1(13),
>> + DBG_BCR_BVR_WCR_WVR_EL1(14),
>> + DBG_BCR_BVR_WCR_WVR_EL1(15),
>> +
>> + /* MDRAR_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
>> + trap_raz_wi },
>> + /* OSLAR_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100),
>> + trap_raz_wi },
>> + /* OSLSR_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100),
>> + trap_oslsr_el1 },
>> + /* OSDLR_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100),
>> + trap_raz_wi },
>> + /* DBGPRCR_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100),
>> + trap_raz_wi },
>> + /* DBGCLAIMSET_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110),
>> + trap_raz_wi },
>> + /* DBGCLAIMCLR_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110),
>> + trap_raz_wi },
>> + /* DBGAUTHSTATUS_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110),
>> + trap_dbgauthstatus_el1 },
>> +
>> /* TEECR32_EL1 */
>> { Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
>> NULL, reset_val, TEECR32_EL1, 0 },
>> /* TEEHBR32_EL1 */
>> { Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
>> NULL, reset_val, TEEHBR32_EL1, 0 },
>> +
>> + /* MDCCSR_EL1 */
>> + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000),
>> + trap_raz_wi },
>> + /* DBGDTR_EL0 */
>> + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000),
>> + trap_raz_wi },
>> + /* DBGDTR[TR]X_EL0 */
>> + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000),
>> + trap_raz_wi },
>> +
>> /* DBGVCR32_EL2 */
>> { Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
>> NULL, reset_val, DBGVCR32_EL2, 0 },
>> --
>> 1.8.3.4
>>
>
> Besides the commenting stuff above:
>
> Reviewed-by: Christoffer Dall <christoffer.dall at linaro.org>
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
More information about the linux-arm-kernel
mailing list