[PATCH v2 3/9] arm64: KVM: add trap handlers for AArch64 debug registers

Sun May 25 08:34:50 PDT 2014

On Tue, May 20, 2014 at 05:55:39PM +0100, Marc Zyngier wrote:
> Add handlers for all the AArch64 debug registers that are accessible
> from EL0 or EL1. The trapping code keeps track of the state of the
> debug registers, allowing for the switch code to implement a lazy
> switching strategy.
> 
> Reviewed-by: Anup Patel <anup.patel at linaro.org>
> Signed-off-by: Marc Zyngier <marc.zyngier at arm.com>
> ---
>  arch/arm64/include/asm/kvm_asm.h  |  28 ++++++--
>  arch/arm64/include/asm/kvm_host.h |   3 +
>  arch/arm64/kvm/sys_regs.c         | 130 +++++++++++++++++++++++++++++++++++++-
>  3 files changed, 151 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
> index 9fcd54b..e6b159a 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -43,14 +43,25 @@
>  #define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
>  #define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
>  #define	PAR_EL1		21	/* Physical Address Register */
> +#define MDSCR_EL1	22	/* Monitor Debug System Control Register */
> +#define DBGBCR0_EL1	23	/* Debug Breakpoint Control Registers (0-15) */
> +#define DBGBCR15_EL1	38
> +#define DBGBVR0_EL1	39	/* Debug Breakpoint Value Registers (0-15) */
> +#define DBGBVR15_EL1	54
> +#define DBGWCR0_EL1	55	/* Debug Watchpoint Control Registers (0-15) */
> +#define DBGWCR15_EL1	70
> +#define DBGWVR0_EL1	71	/* Debug Watchpoint Value Registers (0-15) */
> +#define DBGWVR15_EL1	86
> +#define MDCCINT_EL1	87	/* Monitor Debug Comms Channel Interrupt Enable Reg */
> +
>  /* 32bit specific registers. Keep them at the end of the range */
> -#define	DACR32_EL2	22	/* Domain Access Control Register */
> -#define	IFSR32_EL2	23	/* Instruction Fault Status Register */
> -#define	FPEXC32_EL2	24	/* Floating-Point Exception Control Register */
> -#define	DBGVCR32_EL2	25	/* Debug Vector Catch Register */
> -#define	TEECR32_EL1	26	/* ThumbEE Configuration Register */
> -#define	TEEHBR32_EL1	27	/* ThumbEE Handler Base Register */
> -#define	NR_SYS_REGS	28
> +#define	DACR32_EL2	88	/* Domain Access Control Register */
> +#define	IFSR32_EL2	89	/* Instruction Fault Status Register */
> +#define	FPEXC32_EL2	90	/* Floating-Point Exception Control Register */
> +#define	DBGVCR32_EL2	91	/* Debug Vector Catch Register */
> +#define	TEECR32_EL1	92	/* ThumbEE Configuration Register */
> +#define	TEEHBR32_EL1	93	/* ThumbEE Handler Base Register */
> +#define	NR_SYS_REGS	94
>  
>  /* 32bit mapping */
>  #define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
> @@ -87,6 +98,9 @@
>  #define ARM_EXCEPTION_IRQ	  0
>  #define ARM_EXCEPTION_TRAP	  1
>  
> +#define KVM_ARM64_DEBUG_DIRTY_SHIFT	0
> +#define KVM_ARM64_DEBUG_DIRTY		(1 << KVM_ARM64_DEBUG_DIRTY_SHIFT)
> +
>  #ifndef __ASSEMBLY__
>  struct kvm;
>  struct kvm_vcpu;
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 0a1d697..4737961 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -101,6 +101,9 @@ struct kvm_vcpu_arch {
>  	/* Exception Information */
>  	struct kvm_vcpu_fault_info fault;
>  
> +	/* Debug state */
> +	u64 debug_flags;
> +
>  	/* Pointer to host CPU context */
>  	kvm_cpu_context_t *host_cpu_context;
>  
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index c3d28f1..d46a965 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -30,6 +30,7 @@
>  #include <asm/kvm_mmu.h>
>  #include <asm/cacheflush.h>
>  #include <asm/cputype.h>
> +#include <asm/debug-monitors.h>
>  #include <trace/events/kvm.h>
>  
>  #include "sys_regs.h"
> @@ -173,6 +174,58 @@ static bool trap_raz_wi(struct kvm_vcpu *vcpu,
>  		return read_zero(vcpu, p);
>  }
>  
> +static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
> +			   const struct sys_reg_params *p,
> +			   const struct sys_reg_desc *r)
> +{
> +	if (p->is_write) {
> +		return ignore_write(vcpu, p);
> +	} else {
> +		*vcpu_reg(vcpu, p->Rt) = (1 << 3);
> +		return true;
> +	}
> +}
> +
> +static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
> +				   const struct sys_reg_params *p,
> +				   const struct sys_reg_desc *r)
> +{
> +	if (p->is_write) {
> +		return ignore_write(vcpu, p);
> +	} else {
> +		*vcpu_reg(vcpu, p->Rt) = 0x2222; /* Implemented and disabled */

is this always safe?  What happens when you stop trapping accesses to
this register and the hardware tells you something different?

Are we assuming that this is always the case since otherwise none of
this works, or?

> +		return true;
> +	}
> +}
> +
> +/*
> + * Trap handler for DBG[BW][CV]Rn_EL1 and MDSCR_EL1. We track the
> + * "dirtiness" of the registers.
> + */
> +static bool trap_debug_regs(struct kvm_vcpu *vcpu,
> +			    const struct sys_reg_params *p,
> +			    const struct sys_reg_desc *r)
> +{
> +	/*
> +	 * The best thing to do would be to trap MDSCR_EL1
> +	 * independently, test if DBG_MDSCR_KDE or DBG_MDSCR_MDE is
> +	 * getting set, and only set the DIRTY bit in that case.

this comment is really hard to understand in this patch without any
explanation of what the dirty flag does.  Readers new to this code may
be in the same situation.  Perhaps add a comment on the dirty bit (what
does this imply?) or explain the rationale here; iow. We want to avoid
world-switching all the DBG registers all the time, blah blah blah...

> +	 *
> +	 * Unfortunately, "old" Linux kernels tend to hit MDSCR_EL1
> +	 * like a woodpecker on a tree, and it is better to disable
> +	 * trapping as soon as possible in this case. Some day, make
> +	 * this a tuneable...
> +	 */
> +	if (p->is_write) {
> +		vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
> +		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
> +	} else {
> +		*vcpu_reg(vcpu, p->Rt) = vcpu_sys_reg(vcpu, r->reg);
> +	}
> +
> +	return true;
> +}
> +
>  static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
>  {
>  	u64 amair;
> @@ -189,6 +242,21 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
>  	vcpu_sys_reg(vcpu, MPIDR_EL1) = (1UL << 31) | (vcpu->vcpu_id & 0xff);
>  }
>  
> +/* Silly macro to expand the DBG{BCR,BVR,WVR,WCR}n_EL1 registers in one go*/
> +#define DBG_BCR_BVR_WCR_WVR_EL1(n)					\
> +	/* DBGBVRn_EL1 */						\
> +	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b100),	\
> +	  trap_debug_regs, reset_val, (DBGBCR0_EL1 + (n)), 0 },		\

Shouldn't the reg field here be DBGBVR0_EL1?

> +	/* DBGBCRn_EL1 */						\
> +	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b101),	\
> +	  trap_debug_regs, reset_val, (DBGBVR0_EL1 + (n)), 0 },		\

Shouldn't the reg field here be DBGBCR0_EL1?

> +	/* DBGWVRn_EL1 */						\
> +	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b110),	\
> +	  trap_debug_regs, reset_val, (DBGWCR0_EL1 + (n)), 0 },		\

and DBGWVR0_EL1 here?

> +	/* DBGWCRn_EL1 */						\
> +	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm((n)), Op2(0b111),	\
> +	  trap_debug_regs, reset_val, (DBGWVR0_EL1 + (n)), 0 }

and DBGWCR0_EL1 here?

> +
>  /*
>   * Architected system registers.
>   * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2
> @@ -200,9 +268,6 @@ static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
>   * Therefore we tell the guest we have 0 counters.  Unfortunately, we
>   * must always support PMCCNTR (the cycle counter): we just RAZ/WI for
>   * all PM registers, which doesn't crash the guest kernel at least.
> - *
> - * Same goes for the whole debug infrastructure, which probably breaks
> - * some guest functionnality. This should be fixed.
>   */
>  static const struct sys_reg_desc sys_reg_descs[] = {
>  	/* DC ISW */
> @@ -215,12 +280,71 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>  	{ Op0(0b01), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b010),
>  	  access_dcsw },
>  
> +	DBG_BCR_BVR_WCR_WVR_EL1(0),
> +	DBG_BCR_BVR_WCR_WVR_EL1(1),
> +	/* MDCCINT_EL1 */
> +	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b000),
> +	  trap_debug_regs, reset_val, MDCCINT_EL1, 0 },
> +	/* MDSCR_EL1 */
> +	{ Op0(0b10), Op1(0b000), CRn(0b0000), CRm(0b0010), Op2(0b010),
> +	  trap_debug_regs, reset_val, MDSCR_EL1, 0 },
> +	DBG_BCR_BVR_WCR_WVR_EL1(2),
> +	DBG_BCR_BVR_WCR_WVR_EL1(3),
> +	DBG_BCR_BVR_WCR_WVR_EL1(4),
> +	DBG_BCR_BVR_WCR_WVR_EL1(5),
> +	DBG_BCR_BVR_WCR_WVR_EL1(6),
> +	DBG_BCR_BVR_WCR_WVR_EL1(7),
> +	DBG_BCR_BVR_WCR_WVR_EL1(8),
> +	DBG_BCR_BVR_WCR_WVR_EL1(9),
> +	DBG_BCR_BVR_WCR_WVR_EL1(10),
> +	DBG_BCR_BVR_WCR_WVR_EL1(11),
> +	DBG_BCR_BVR_WCR_WVR_EL1(12),
> +	DBG_BCR_BVR_WCR_WVR_EL1(13),
> +	DBG_BCR_BVR_WCR_WVR_EL1(14),
> +	DBG_BCR_BVR_WCR_WVR_EL1(15),
> +
> +	/* MDRAR_EL1 */
> +	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b000),
> +	  trap_raz_wi },
> +	/* OSLAR_EL1 */
> +	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0000), Op2(0b100),
> +	  trap_raz_wi },

so as long as you're trapping, if the guest writes to OSLK[1] and sets
the OS lock then it won't actually lock it, because when you read it
back from OSLSR_EL1 it will read as unlocked?  Is that in line with the
architecture?

> +	/* OSLSR_EL1 */
> +	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0001), Op2(0b100),
> +	  trap_oslsr_el1 },
> +	/* OSDLR_EL1 */
> +	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0011), Op2(0b100),
> +	  trap_raz_wi },
> +	/* DBGPRCR_EL1 */
> +	{ Op0(0b10), Op1(0b000), CRn(0b0001), CRm(0b0100), Op2(0b100),
> +	  trap_raz_wi },
> +	/* DBGCLAIMSET_EL1 */
> +	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1000), Op2(0b110),
> +	  trap_raz_wi },
> +	/* DBGCLAIMCLR_EL1 */
> +	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1001), Op2(0b110),
> +	  trap_raz_wi },
> +	/* DBGAUTHSTATUS_EL1 */
> +	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110),
> +	  trap_dbgauthstatus_el1 },
> +
>  	/* TEECR32_EL1 */
>  	{ Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
>  	  NULL, reset_val, TEECR32_EL1, 0 },
>  	/* TEEHBR32_EL1 */
>  	{ Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
>  	  NULL, reset_val, TEEHBR32_EL1, 0 },
> +
> +	/* MDCCSR_EL1 */
> +	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000),
> +	  trap_raz_wi },
> +	/* DBGDTR_EL0 */
> +	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0100), Op2(0b000),
> +	  trap_raz_wi },
> +	/* DBGDTR[TR]X_EL0 */
> +	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0101), Op2(0b000),
> +	  trap_raz_wi },
> +
>  	/* DBGVCR32_EL2 */
>  	{ Op0(0b10), Op1(0b100), CRn(0b0000), CRm(0b0111), Op2(0b000),
>  	  NULL, reset_val, DBGVCR32_EL2, 0 },
> -- 
> 1.8.3.4
>