[PATCH v2 3/9] arm64: KVM: add trap handlers for AArch64 debug registers
Marc Zyngier
marc.zyngier at arm.com
Wed May 28 03:27:23 PDT 2014
On 25/05/14 16:34, Christoffer Dall wrote:
> On Tue, May 20, 2014 at 05:55:39PM +0100, Marc Zyngier wrote:
>> Add handlers for all the AArch64 debug registers that are accessible
>> from EL0 or EL1. The trapping code keeps track of the state of the
>> debug registers, allowing for the switch code to implement a lazy
>> switching strategy.
>>
>> Reviewed-by: Anup Patel <anup.patel at linaro.org>
>> Signed-off-by: Marc Zyngier <marc.zyngier at arm.com>
>> ---
>> arch/arm64/include/asm/kvm_asm.h | 28 ++++++--
>> arch/arm64/include/asm/kvm_host.h | 3 +
>> arch/arm64/kvm/sys_regs.c | 130 +++++++++++++++++++++++++++++++++++++-
>> 3 files changed, 151 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
>> index 9fcd54b..e6b159a 100644
>> --- a/arch/arm64/include/asm/kvm_asm.h
>> +++ b/arch/arm64/include/asm/kvm_asm.h
>> @@ -43,14 +43,25 @@
>> #define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */
>> #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */
>> #define PAR_EL1 21 /* Physical Address Register */
>> +#define MDSCR_EL1 22 /* Monitor Debug System Control Register */
>> +#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */
>> +#define DBGBCR15_EL1 38
>> +#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */
>> +#define DBGBVR15_EL1 54
>> +#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */
>> +#define DBGWCR15_EL1 70
>> +#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */
>> +#define DBGWVR15_EL1 86
>> +#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */
>> +
>> /* 32bit specific registers. Keep them at the end of the range */
>> -#define DACR32_EL2 22 /* Domain Access Control Register */
>> -#define IFSR32_EL2 23 /* Instruction Fault Status Register */
>> -#define FPEXC32_EL2 24 /* Floating-Point Exception Control Register */
>> -#define DBGVCR32_EL2 25 /* Debug Vector Catch Register */
>> -#define TEECR32_EL1 26 /* ThumbEE Configuration Register */
>> -#define TEEHBR32_EL1 27 /* ThumbEE Handler Base Register */
>> -#define NR_SYS_REGS 28
>> +#define DACR32_EL2 88 /* Domain Access Control Register */
>> +#define IFSR32_EL2 89 /* Instruction Fault Status Register */
>> +#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */
>> +#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */
>> +#define TEECR32_EL1 92 /* ThumbEE Configuration Register */
>> +#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */
>> +#define NR_SYS_REGS 94
>>
>> /* 32bit mapping */
>> #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */
>> @@ -87,6 +98,9 @@
>> #define ARM_EXCEPTION_IRQ 0
>> #define ARM_EXCEPTION_TRAP 1
>>
>> +#define KVM_ARM64_DEBUG_DIRTY_SHIFT 0
>> +#define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
>> +
>> #ifndef __ASSEMBLY__
>> struct kvm;
>> struct kvm_vcpu;
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index 0a1d697..4737961 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -101,6 +101,9 @@ struct kvm_vcpu_arch {
>> /* Exception Information */
>> struct kvm_vcpu_fault_info fault;
>>
>> + /* Debug state */
>> + u64 debug_flags;
>> +
>> /* Pointer to host CPU context */
>> kvm_cpu_context_t *host_cpu_context;
>>
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index c3d28f1..d46a965 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -30,6 +30,7 @@
>> #include <asm/kvm_mmu.h>
>> #include <asm/cacheflush.h>
>> #include <asm/cputype.h>
>> +#include <asm/debug-monitors.h>
>> #include <trace/events/kvm.h>
>>
>> #include "sys_regs.h"
>> @@ -173,6 +174,58 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
>> return read_zero(vcpu, p);
>> }
>>
>> +static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
>> + const struct sys_reg_params *p,
>> + const struct sys_reg_desc *r)
>> +{
>> + if (p->is_write) {
>> + return ignore_write(vcpu, p);
>> + } else {
>> + *vcpu_reg(vcpu, p->Rt) = (1 << 3);
>> + return true;
>> + }
>> +}
>> +
>> +static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
>> + const struct sys_reg_params *p,
>> + const struct sys_reg_desc *r)
>> +{
>> + if (p->is_write) {
>> + return ignore_write(vcpu, p);
>> + } else {
>> + *vcpu_reg(vcpu, p->Rt) = 0x2222; /* Implemented and disabled */
>
> is this always safe? What happens when you stop trapping accesses to
> this register and the hardware tells you something different?
>
> Are we assuming that this is always the case since otherwise none of
> this works, or?
No, that's probably a leftover from a previous implementation that
didn't disable traps. I can make it return the actual value on the host,
but we may see it change with migration anyway.
>> + return true;
>> + }
>> +}
>> +
>> +/*
>> + * Trap handler for DBG[BW][CV]Rn_EL1 and MDSCR_EL1. We track the
>> + * "dirtiness" of the registers.
>> + */
>> +static bool trap_debug_regs(struct kvm_vcpu *vcpu,
>> + const struct sys_reg_params *p,
>> + const struct sys_reg_desc *r)
>> +{
>> + /*
>> + * The best thing to do would be to trap MDSCR_EL1
>> + * independently, test if DBG_MDSCR_KDE or DBG_MDSCR_MDE is
>> + * getting set, and only set the DIRTY bit in that case.
>
> this comment is really hard to understand in this patch without any
> explanation of what the dirty flag does. Readers new to this code may
> be in the same situation. Perhaps add a comment on the dirty bit (what
> does this imply?) or explain the rationale here; iow. We want to avoid
> world-switching all the DBG registers all the time, blah blah blah...
Sure.
>> + *
>> + * Unfortunately, "old" Linux kernels tend to hit MDSCR_EL1
>> + * like a woodpecker on a tree, and it is better to disable
>> + * trapping as soon as possible in this case. Some day, make
>> + * this a tuneable...
>> + */
>> + if (p->is_write) {
>> + vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
>> + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
>> + } else {
>> + *vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
>> + }
>> +
>> + return true;
>> +}
>> +
>> static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
>> {
>> u64 amair;
>> @@ -189,6 +242,21 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
>> vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff);
>> }
>>
>> +/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go*/
>> +#define DBG_BCR_BVR_WCR_WVR_EL1(n) \
>> + /* DBGBVRn_EL1 */ \
>> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100), \
>> + trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 }, \
>
> Shouldn't the reg field here be DBGBVR0_EL1?
>
>
>> + /* DBGBCRn_EL1 */ \
>> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101), \
>> + trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 }, \
>
> Shouldn't the reg field here be DBGBCR0_EL1?
>
>> + /* DBGWVRn_EL1 */ \
>> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110), \
>> + trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 }, \
>
> and DBGWVR0_EL1 here?
>
>> + /* DBGWCRn_EL1 */ \
>> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111), \
>> + trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }
>
> and DBGWCR0_EL1 here?
Blah. Nice catch!
>> +
>> /*
>> * Architected system registers.
>> * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
>> @@ -200,9 +268,6 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
>> * Therefore we tell the guest we have 0 counters. Unfortunately, we
>> * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
>> * all PM registers, which doesn't crash the guest kernel at least.
>> - *
>> - * Same goes for the whole debug infrastructure, which probably breaks
>> - * some guest functionnality. This should be fixed.
>> */
>> static const struct sys_reg_desc sys_reg_descs[] = {
>> /* DC ISW */
>> @@ -215,12 +280,71 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>> { Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
>> access_dcsw },
>>
>> + DBG_BCR_BVR_WCR_WVR_EL1(0),
>> + DBG_BCR_BVR_WCR_WVR_EL1(1),
>> + /* MDCCINT_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
>> + trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
>> + /* MDSCR_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
>> + trap_debug_regs, reset_val, MDSCR_EL1, 0 },
>> + DBG_BCR_BVR_WCR_WVR_EL1(2),
>> + DBG_BCR_BVR_WCR_WVR_EL1(3),
>> + DBG_BCR_BVR_WCR_WVR_EL1(4),
>> + DBG_BCR_BVR_WCR_WVR_EL1(5),
>> + DBG_BCR_BVR_WCR_WVR_EL1(6),
>> + DBG_BCR_BVR_WCR_WVR_EL1(7),
>> + DBG_BCR_BVR_WCR_WVR_EL1(8),
>> + DBG_BCR_BVR_WCR_WVR_EL1(9),
>> + DBG_BCR_BVR_WCR_WVR_EL1(10),
>> + DBG_BCR_BVR_WCR_WVR_EL1(11),
>> + DBG_BCR_BVR_WCR_WVR_EL1(12),
>> + DBG_BCR_BVR_WCR_WVR_EL1(13),
>> + DBG_BCR_BVR_WCR_WVR_EL1(14),
>> + DBG_BCR_BVR_WCR_WVR_EL1(15),
>> +
>> + /* MDRAR_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
>> + trap_raz_wi },
>> + /* OSLAR_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100),
>> + trap_raz_wi },
>
> so as long as you're trapping, if the guest writes to OSLK[1] and sets
> the OS lock then it won't actually lock it, because when you read it
> back from OSLSR_EL1 it will read as unlocked? Is that in line with the
> architecture?
The OSlock can always be cleared by external debug at any time, so I
imagine that would be a valid implementation.
>> + /* OSLSR_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100),
>> + trap_oslsr_el1 },
>> + /* OSDLR_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100),
>> + trap_raz_wi },
>> + /* DBGPRCR_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100),
>> + trap_raz_wi },
>> + /* DBGCLAIMSET_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110),
>> + trap_raz_wi },
>> + /* DBGCLAIMCLR_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110),
>> + trap_raz_wi },
>> + /* DBGAUTHSTATUS_EL1 */
>> + { Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110),
>> + trap_dbgauthstatus_el1 },
>> +
>> /* TEECR32_EL1 */
>> { Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
>> NULL, reset_val, TEECR32_EL1, 0 },
>> /* TEEHBR32_EL1 */
>> { Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
>> NULL, reset_val, TEEHBR32_EL1, 0 },
>> +
>> + /* MDCCSR_EL1 */
>> + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000),
>> + trap_raz_wi },
>> + /* DBGDTR_EL0 */
>> + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000),
>> + trap_raz_wi },
>> + /* DBGDTR[TR]X_EL0 */
>> + { Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000),
>> + trap_raz_wi },
>> +
>> /* DBGVCR32_EL2 */
>> { Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
>> NULL, reset_val, DBGVCR32_EL2, 0 },
>> --
>> 1.8.3.4
>>
>
Thanks,
M.
--
Jazz is not dead. It just smells funny...
More information about the linux-arm-kernel
mailing list