[PATCH 0/3] Add basic support for arm64

Raphaël Poggi poggi.raph at gmail.com
Mon May 23 01:33:29 PDT 2016


Hi Sascha,

2016-05-23 9:32 GMT+02:00 Sascha Hauer <s.hauer at pengutronix.de>:
> Hi Raphael,
>
> On Fri, May 20, 2016 at 03:11:06AM +0200, Raphael Poggi wrote:
>> Hello,
>>
>> This patch series introduces a basic support for arm64.
>>
>> [PATCH 1/3] arch: add minimal aarch64 support :
>>         Introduce new architecture by creating a new root directory,
>>         I choose this approach for now, because it is simpler for now, maybe later we will merge this in arch/arm.
>
> I just applied this series to a temporary branch, did a cp -r
> arch/arm64/* arch/arm and committed everything, see the result below.
> This of course breaks arm32 support, but it nicely reveals all places
> that need fixup for arm64. How about we proceed like this: We continue
> to work on the patch below. We continuously find proper solutions for the
> different places that need fixup. For every item that is fixed properly
> we apply the resulting patch mainline and rebase the big
> work-in-progress patch ontop of it. This way the patch should
> continuously get smaller until we finally have working arm32 and arm64
> support in a single architecture.
> One of the first things we'll need is ARM32/ARM64 Kconfig options which we
> can use for the different lib/ functions to depend on (maybe create a
> lib32/ and a lib64/ directory?) This should already make the
> work-in-progress patch much smaller. What do you think?

Perfect, I am fine with this approach.


Thanks,
Raphaël


>
> Sascha
>
> ----------------------------8<------------------------------
>
> From 79e852820d19e3620bfe63b87161317e616546d5 Mon Sep 17 00:00:00 2001
> From: Sascha Hauer <s.hauer at pengutronix.de>
> Date: Mon, 23 May 2016 08:47:36 +0200
> Subject: [PATCH] wip
>
> Signed-off-by: Sascha Hauer <s.hauer at pengutronix.de>
> ---
>  arch/arm/Kconfig                           | 276 ++------------
>  arch/arm/Makefile                          | 190 +---------
>  arch/arm/boards/Makefile                   |   1 +
>  arch/arm/boards/virt/Kconfig               |   8 +
>  arch/arm/boards/virt/Makefile              |   1 +
>  arch/arm/boards/virt/env/bin/_update       |  36 ++
>  arch/arm/boards/virt/env/bin/boot          |  38 ++
>  arch/arm/boards/virt/env/bin/init          |  20 +
>  arch/arm/boards/virt/env/bin/update_kernel |   8 +
>  arch/arm/boards/virt/env/bin/update_root   |   8 +
>  arch/arm/boards/virt/env/config            |  38 ++
>  arch/arm/boards/virt/env/init/mtdparts-nor |  11 +
>  arch/arm/boards/virt/init.c                |  67 ++++
>  arch/arm/configs/virt_defconfig            |  53 +++
>  arch/arm/cpu/Kconfig                       | 102 +----
>  arch/arm/cpu/Makefile                      |  29 +-
>  arch/arm/cpu/cache-armv8.S                 | 168 +++++++++
>  arch/arm/cpu/cache.c                       |  69 +---
>  arch/arm/cpu/cpu.c                         |  44 +--
>  arch/arm/cpu/cpuinfo.c                     |  86 ++---
>  arch/arm/cpu/entry.c                       |   5 +-
>  arch/arm/cpu/exceptions.S                  | 313 ++++++----------
>  arch/arm/cpu/interrupts.c                  |  91 ++---
>  arch/arm/cpu/lowlevel.S                    |  89 ++---
>  arch/arm/cpu/mmu.c                         | 578 ++++++++---------------------
>  arch/arm/cpu/mmu.h                         | 155 ++++++++
>  arch/arm/cpu/start.c                       |  12 +-
>  arch/arm/cpu/uncompress.c                  |   2 +-
>  arch/arm/include/asm/barebox-arm.h         |   2 +-
>  arch/arm/include/asm/bitops.h              | 192 ++--------
>  arch/arm/include/asm/boarddata.h           |   5 +
>  arch/arm/include/asm/cache-l2x0.h          |   8 -
>  arch/arm/include/asm/cache.h               |   4 +-
>  arch/arm/include/asm/errata.h              |   9 -
>  arch/arm/include/asm/gic.h                 | 128 +++++++
>  arch/arm/include/asm/mmu.h                 |   6 +-
>  arch/arm/include/asm/pgtable.h             |   5 +-
>  arch/arm/include/asm/ptrace.h              | 111 +-----
>  arch/arm/include/asm/system.h              | 173 +++++----
>  arch/arm/include/asm/system_info.h         |  73 ++--
>  arch/arm/lib/Makefile                      |  13 -
>  arch/arm/lib/armlinux.c                    |   6 -
>  arch/arm/lib/barebox.lds.S                 |   5 +-
>  arch/arm/lib/bootm.c                       | 109 +++---
>  arch/arm/lib/copy_template.S               | 438 +++++++++-------------
>  arch/arm/lib/memcpy.S                      |  84 +++--
>  arch/arm/lib/memset.S                      | 305 +++++++++------
>  arch/arm/lib/runtime-offset.S              |  18 +-
>  arch/arm/mach-virt/Kconfig                 |  15 +
>  arch/arm/mach-virt/Makefile                |   3 +
>  arch/arm/mach-virt/devices.c               |  30 ++
>  arch/arm/mach-virt/include/mach/debug_ll.h |  24 ++
>  arch/arm/mach-virt/include/mach/devices.h  |  13 +
>  arch/arm/mach-virt/lowlevel.c              |  19 +
>  arch/arm/mach-virt/reset.c                 |  24 ++
>  55 files changed, 2008 insertions(+), 2312 deletions(-)
>  create mode 100644 arch/arm/boards/virt/Kconfig
>  create mode 100644 arch/arm/boards/virt/Makefile
>  create mode 100644 arch/arm/boards/virt/env/bin/_update
>  create mode 100644 arch/arm/boards/virt/env/bin/boot
>  create mode 100644 arch/arm/boards/virt/env/bin/init
>  create mode 100644 arch/arm/boards/virt/env/bin/update_kernel
>  create mode 100644 arch/arm/boards/virt/env/bin/update_root
>  create mode 100644 arch/arm/boards/virt/env/config
>  create mode 100644 arch/arm/boards/virt/env/init/mtdparts-nor
>  create mode 100644 arch/arm/boards/virt/init.c
>  create mode 100644 arch/arm/configs/virt_defconfig
>  create mode 100644 arch/arm/cpu/cache-armv8.S
>  create mode 100644 arch/arm/include/asm/boarddata.h
>  create mode 100644 arch/arm/include/asm/gic.h
>  create mode 100644 arch/arm/mach-virt/Kconfig
>  create mode 100644 arch/arm/mach-virt/Makefile
>  create mode 100644 arch/arm/mach-virt/devices.c
>  create mode 100644 arch/arm/mach-virt/include/mach/debug_ll.h
>  create mode 100644 arch/arm/mach-virt/include/mach/devices.h
>  create mode 100644 arch/arm/mach-virt/lowlevel.c
>  create mode 100644 arch/arm/mach-virt/reset.c
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 1fc887b..34085f6 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -22,15 +22,6 @@ config ARM_USE_COMPRESSED_DTB
>         select UNCOMPRESS
>         select LZO_DECOMPRESS
>
> -config ARCH_BCM283X
> -       bool
> -       select GPIOLIB
> -       select CLKDEV_LOOKUP
> -       select COMMON_CLK
> -       select CLOCKSOURCE_BCM283X
> -       select ARM_AMBA
> -       select HAS_DEBUG_LL
> -
>  menu "System Type"
>
>  config BUILTIN_DTB
> @@ -50,236 +41,18 @@ config BUILTIN_DTB_NAME
>  choice
>         prompt "ARM system type"
>
> -config ARCH_AT91
> -       bool "Atmel AT91"
> -       select GPIOLIB
> -       select CLKDEV_LOOKUP
> -       select HAS_DEBUG_LL
> -       select HAVE_MACH_ARM_HEAD
> -       select HAVE_CLK
> -       select PINCTRL_AT91
> -
> -config ARCH_BCM2835
> -       bool "Broadcom BCM2835 boards"
> -       select ARCH_BCM283X
> -       select CPU_ARM1176
> -
> -config ARCH_BCM2836
> -       bool "Broadcom BCM2836 boards"
> -       select ARCH_BCM283X
> -       select CPU_V7
> -
> -config ARCH_CLPS711X
> -       bool "Cirrus Logic EP711x/EP721x/EP731x"
> -       select CLKDEV_LOOKUP
> -       select CLOCKSOURCE_CLPS711X
> -       select COMMON_CLK
> -       select CPU_32v4T
> -       select GPIOLIB
> -       select MFD_SYSCON
> -
> -config ARCH_DAVINCI
> -       bool "TI Davinci"
> -       select CPU_ARM926T
> -       select HAS_DEBUG_LL
> -       select GPIOLIB
> -
> -config ARCH_DIGIC
> -       bool "Canon DIGIC-based cameras"
> -       select CPU_ARM946E
> -       select HAS_DEBUG_LL
> -       select CLOCKSOURCE_DIGIC
> -       select GPIOLIB
> -       help
> -         Support for Canon's digital cameras that use the DIGIC4 chip.
> -
> -config ARCH_EP93XX
> -       bool "Cirrus Logic EP93xx"
> -       select CPU_ARM920T
> -       select GENERIC_GPIO
> -
> -config ARCH_HIGHBANK
> -       bool "Calxeda Highbank"
> +config ARCH_VIRT
> +       bool "ARM QEMU virt boards"
>         select HAS_DEBUG_LL
> -       select HAS_POWEROFF
> -       select ARCH_HAS_L2X0
> -       select CPU_V7
> +       select CPU_V8
> +       select SYS_SUPPORTS_64BIT_KERNEL
>         select ARM_AMBA
> -       select AMBA_SP804
> -       select CLKDEV_LOOKUP
> -       select COMMON_CLK
> -       select GPIOLIB
> -
> -config ARCH_IMX
> -       bool "Freescale iMX-based"
> -       select GPIOLIB
> -       select COMMON_CLK
> -       select CLKDEV_LOOKUP
> -       select WATCHDOG_IMX_RESET_SOURCE
> -       select HAS_DEBUG_LL
> -
> -config ARCH_MVEBU
> -       bool "Marvell EBU platforms"
> -       select COMMON_CLK
> -       select COMMON_CLK_OF_PROVIDER
> -       select CLKDEV_LOOKUP
> -       select GPIOLIB
> -       select HAS_DEBUG_LL
> -       select HAVE_PBL_MULTI_IMAGES
> -       select HW_HAS_PCI
> -       select MVEBU_MBUS
> -       select OFTREE
> -       select OF_ADDRESS_PCI
> -       select PINCTRL
> -
> -config ARCH_MXS
> -       bool "Freescale i.MX23/28 (mxs) based"
> -       select GPIOLIB
> -       select GENERIC_GPIO
> -       select COMMON_CLK
> -       select CLKDEV_LOOKUP
> -       select HAS_DEBUG_LL
> -
> -config ARCH_NETX
> -       bool "Hilscher NetX based"
> -       select CPU_ARM926T
> -
> -config ARCH_NOMADIK
> -       bool "STMicroelectronics Nomadik"
> -       select CPU_ARM926T
> -       select CLOCKSOURCE_NOMADIK
> -       select HAVE_CLK
> -       help
> -         Support for the Nomadik platform by ST-Ericsson
> -
> -config ARCH_OMAP
> -       bool "TI OMAP"
> -       select HAS_DEBUG_LL
> -       select GPIOLIB
> -
> -config ARCH_PXA
> -       bool "Intel/Marvell PXA based"
> -       select GENERIC_GPIO
> -       select HAS_POWEROFF
> -
> -config ARCH_ROCKCHIP
> -       bool "Rockchip RX3xxx"
> -       select CPU_V7
> -       select ARM_SMP_TWD
> -       select COMMON_CLK
> -       select CLKDEV_LOOKUP
> -       select COMMON_CLK_OF_PROVIDER
> -       select GPIOLIB
> -       select PINCTRL
> -       select PINCTRL_ROCKCHIP
> -       select OFTREE
> -       select HAVE_PBL_MULTI_IMAGES
> -       select HAS_DEBUG_LL
> -       select ARCH_HAS_L2X0
> -
> -config ARCH_SOCFPGA
> -       bool "Altera SOCFPGA cyclone5"
> -       select HAS_DEBUG_LL
> -       select ARM_SMP_TWD
> -       select CPU_V7
> -       select COMMON_CLK
> -       select CLKDEV_LOOKUP
> -       select GPIOLIB
> -       select HAVE_PBL_MULTI_IMAGES
> -       select OFDEVICE if !ARCH_SOCFPGA_XLOAD
> -       select OFTREE if !ARCH_SOCFPGA_XLOAD
> -
> -config ARCH_S3C24xx
> -       bool "Samsung S3C2410, S3C2440"
> -       select ARCH_SAMSUNG
> -       select CPU_ARM920T
> -       select GENERIC_GPIO
> -
> -config ARCH_S5PCxx
> -       bool "Samsung S5PC110, S5PV210"
> -       select ARCH_SAMSUNG
> -       select CPU_V7
> -       select GENERIC_GPIO
> -
> -config ARCH_S3C64xx
> -       bool "Samsung S3C64xx"
> -       select ARCH_SAMSUNG
> -       select CPU_V6
> -       select GENERIC_GPIO
> -
> -config ARCH_VERSATILE
> -       bool "ARM Versatile boards (ARM926EJ-S)"
> -       select GPIOLIB
> -       select HAVE_CLK
> -       select HAS_DEBUG_LL
> -
> -config ARCH_VEXPRESS
> -       bool "ARM Vexpres boards"
> -       select HAS_DEBUG_LL
> -       select CPU_V7
> -       select ARM_AMBA
> -       select AMBA_SP804
> -       select CLKDEV_LOOKUP
> -       select COMMON_CLK
> -
> -config ARCH_TEGRA
> -       bool "NVIDIA Tegra"
> -       select CPU_V7
> -       select HAS_DEBUG_LL
> -       select HW_HAS_PCI
> -       select COMMON_CLK
> -       select COMMON_CLK_OF_PROVIDER
> -       select CLKDEV_LOOKUP
> -       select GPIOLIB
> -       select GPIO_TEGRA
> -       select HAVE_DEFAULT_ENVIRONMENT_NEW
> -       select HAVE_PBL_MULTI_IMAGES
> -       select OFDEVICE
> -       select OFTREE
> -       select RELOCATABLE
> -       select RESET_CONTROLLER
> -       select PINCTRL
> -
> -config ARCH_UEMD
> -       bool "RC Module UEMD Platform"
> -       select CPU_ARM1176
> -       select COMMON_CLK
> -       select COMMON_CLK_OF_PROVIDER
> -       select CLKDEV_LOOKUP
> -       select OFDEVICE
> -       select OFTREE
> -       select CLOCKSOURCE_UEMD
> -       select HAS_DEBUG_LL
> -
> -config ARCH_ZYNQ
> -       bool "Xilinx Zynq-based boards"
> -       select HAS_DEBUG_LL
> +       select HAVE_CONFIGURABLE_MEMORY_LAYOUT
>
>  endchoice
>
>  source arch/arm/cpu/Kconfig
> -source arch/arm/mach-at91/Kconfig
> -source arch/arm/mach-bcm283x/Kconfig
> -source arch/arm/mach-clps711x/Kconfig
> -source arch/arm/mach-davinci/Kconfig
> -source arch/arm/mach-digic/Kconfig
> -source arch/arm/mach-ep93xx/Kconfig
> -source arch/arm/mach-highbank/Kconfig
> -source arch/arm/mach-imx/Kconfig
> -source arch/arm/mach-mxs/Kconfig
> -source arch/arm/mach-mvebu/Kconfig
> -source arch/arm/mach-netx/Kconfig
> -source arch/arm/mach-nomadik/Kconfig
> -source arch/arm/mach-omap/Kconfig
> -source arch/arm/mach-pxa/Kconfig
> -source arch/arm/mach-rockchip/Kconfig
> -source arch/arm/mach-samsung/Kconfig
> -source arch/arm/mach-socfpga/Kconfig
> -source arch/arm/mach-versatile/Kconfig
> -source arch/arm/mach-vexpress/Kconfig
> -source arch/arm/mach-tegra/Kconfig
> -source arch/arm/mach-uemd/Kconfig
> -source arch/arm/mach-zynq/Kconfig
> +source arch/arm/mach-virt/Kconfig
>
>  config ARM_ASM_UNIFIED
>         bool
> @@ -292,20 +65,6 @@ config AEABI
>
>           To use this you need GCC version 4.0.0 or later.
>
> -config THUMB2_BAREBOX
> -       select ARM_ASM_UNIFIED
> -       select AEABI
> -       depends on !ARCH_TEGRA && !ARCH_AT91
> -       depends on CPU_V7 && !CPU_32v4T && !CPU_32v5 && !CPU_32v6
> -       bool "Compile barebox in thumb-2 mode (read help)"
> -       help
> -         This enables compilation of barebox in thumb-2 mode which generates
> -         ~25% smaller binaries. ARM assembly code needs some fixups to be able
> -         to work correctly in thumb-2 mode. the barebox core should have these
> -         fixups since most assembly code is derived from the Kernel. However,
> -         your board lowlevel init code may break in thumb-2 mode. You have been
> -         warned.
> -
>  config ARM_BOARD_APPEND_ATAG
>         bool "Let board specific code to add ATAGs to be passed to the kernel"
>         depends on ARM_LINUX
> @@ -315,6 +74,29 @@ config ARM_BOARD_APPEND_ATAG
>
>  endmenu
>
> +choice
> +       prompt "Barebox code model"
> +       help
> +         You should only select this option if you have a workload that
> +         actually benefits from 64-bit processing or if your machine has
> +         large memory. You will only be presented a single option in this
> +         menu if your system does not support both 32-bit and 64-bit modes.
> +
> +config 32BIT
> +       bool "32-bit barebox"
> +       depends on CPU_SUPPORTS_32BIT_KERNEL && SYS_SUPPORTS_32BIT_KERNEL
> +       help
> +         Select this option if you want to build a 32-bit barebox.
> +
> +config 64BIT
> +       bool "64-bit barebox"
> +       depends on CPU_SUPPORTS_64BIT_KERNEL && SYS_SUPPORTS_64BIT_KERNEL
> +       select ARCH_DMA_ADDR_T_64BIT
> +       help
> +         Select this option if you want to build a 64-bit barebox.
> +
> +endchoice
> +
>  menu "ARM specific settings"
>
>  config ARM_OPTIMZED_STRING_FUNCTIONS
> diff --git a/arch/arm/Makefile b/arch/arm/Makefile
> index 5ccdb83..ad250c4 100644
> --- a/arch/arm/Makefile
> +++ b/arch/arm/Makefile
> @@ -1,7 +1,6 @@
>
>  CPPFLAGS       += -D__ARM__ -fno-strict-aliasing
> -# Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb:
> -CPPFLAGS       +=$(call cc-option,-marm,)
> +CPPFLAGS       +=$(call cc-option,-maarch64,)
>
>  ifeq ($(CONFIG_CPU_BIG_ENDIAN),y)
>  CPPFLAGS       += -mbig-endian
> @@ -13,91 +12,27 @@ AS          += -EL
>  LD             += -EL
>  endif
>
> -# Unaligned access is not supported when MMU is disabled, so given how
> -# at least some of the code would be executed with MMU off, lets be
> -# conservative and instruct the compiler not to generate any unaligned
> -# accesses
> -CFLAGS += -mno-unaligned-access
> -
> -
>  # This selects which instruction set is used.
>  # Note that GCC does not numerically define an architecture version
>  # macro, but instead defines a whole series of macros which makes
>  # testing for a specific architecture or later rather impossible.
> -arch-$(CONFIG_CPU_32v7)                :=-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a)
> -arch-$(CONFIG_CPU_32v6)            :=-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6)
> -arch-$(CONFIG_CPU_32v5)                :=-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t)
> -arch-$(CONFIG_CPU_32v4T)       :=-D__LINUX_ARM_ARCH__=4 -march=armv4t
> -
> -# This selects how we optimise for the processor.
> -tune-$(CONFIG_CPU_ARM920T)     :=-mtune=arm9tdmi
> -tune-$(CONFIG_CPU_ARM926T)     :=-mtune=arm9tdmi
> -tune-$(CONFIG_CPU_XSCALE)      :=$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale
> +arch-$(CONFIG_CPU_64v8)                := -D__LINUX_ARM_ARCH__=8 $(call cc-option,-march=armv8-a)
>
> -ifeq ($(CONFIG_AEABI),y)
> -CFLAGS_ABI     :=-mabi=aapcs-linux -mno-thumb-interwork
> -else
> -CFLAGS_ABI     :=$(call cc-option,-mapcs-32,-mabi=apcs-gnu) $(call cc-option,-mno-thumb-interwork,)
> -endif
> +CFLAGS_ABI     :=-mabi=lp64
>
>  ifeq ($(CONFIG_ARM_UNWIND),y)
>  CFLAGS_ABI     +=-funwind-tables
>  endif
>
> -ifeq ($(CONFIG_THUMB2_BAREBOX),y)
> -AFLAGS_AUTOIT  :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it)
> -AFLAGS_NOWARN  :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W)
> -CFLAGS_THUMB2  :=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN)
> -AFLAGS_THUMB2  :=$(CFLAGS_THUMB2) -Wa$(comma)-mthumb
> -endif
> +CPPFLAGS += $(CFLAGS_ABI) $(arch-y) $(tune-y)
>
> -CPPFLAGS += $(CFLAGS_ABI) $(arch-y) $(tune-y) -msoft-float $(CFLAGS_THUMB2)
> -AFLAGS   += -include asm/unified.h -msoft-float $(AFLAGS_THUMB2)
> +AFLAGS   += -include asm/unified.h
>
>  # Machine directory name.  This list is sorted alphanumerically
>  # by CONFIG_* macro name.
> -machine-$(CONFIG_ARCH_AT91)            := at91
> -machine-$(CONFIG_ARCH_BCM283X)         := bcm283x
> -machine-$(CONFIG_ARCH_CLPS711X)                := clps711x
> -machine-$(CONFIG_ARCH_DAVINCI)         := davinci
> -machine-$(CONFIG_ARCH_DIGIC)           := digic
> -machine-$(CONFIG_ARCH_EP93XX)          := ep93xx
> -machine-$(CONFIG_ARCH_HIGHBANK)                := highbank
> -machine-$(CONFIG_ARCH_IMX)             := imx
> -machine-$(CONFIG_ARCH_MXS)             := mxs
> -machine-$(CONFIG_ARCH_MVEBU)           := mvebu
> -machine-$(CONFIG_ARCH_NOMADIK)         := nomadik
> -machine-$(CONFIG_ARCH_NETX)            := netx
> -machine-$(CONFIG_ARCH_OMAP)            := omap
> -machine-$(CONFIG_ARCH_PXA)             := pxa
> -machine-$(CONFIG_ARCH_ROCKCHIP)                := rockchip
> -machine-$(CONFIG_ARCH_SAMSUNG)         := samsung
> -machine-$(CONFIG_ARCH_SOCFPGA)         := socfpga
> -machine-$(CONFIG_ARCH_VERSATILE)       := versatile
> -machine-$(CONFIG_ARCH_VEXPRESS)                := vexpress
> -machine-$(CONFIG_ARCH_TEGRA)           := tegra
> -machine-$(CONFIG_ARCH_UEMD)            := uemd
> -machine-$(CONFIG_ARCH_ZYNQ)            := zynq
> +machine-$(CONFIG_ARCH_VIRT)            := virt
>
>
> -# Board directory name.  This list is sorted alphanumerically
> -# by CONFIG_* macro name.
> -#
> -# DO NOT ADD NEW ENTRIES TO THIS LIST!
> -# Add to arch/arm/boards/Makefile instead.
> -#
> -# These are here only because they have a board specific config.h.
> -# TODO: Get rid of board specific config.h and move these to
> -# arch/arm/boards/Makefile aswell.
> -board-$(CONFIG_MACH_A9M2410)                   += a9m2410
> -board-$(CONFIG_MACH_A9M2440)                   += a9m2440
> -board-$(CONFIG_MACH_AT91RM9200EK)              += at91rm9200ek
> -board-$(CONFIG_MACH_MINI2440)                  += friendlyarm-mini2440
> -board-$(CONFIG_MACH_MINI6410)                  += friendlyarm-mini6410
> -board-$(CONFIG_MACH_PCM027)                    += phytec-phycore-pxa270
> -board-$(CONFIG_MACH_TINY210)                   += friendlyarm-tiny210
> -board-$(CONFIG_MACH_TINY6410)                  += friendlyarm-tiny6410
> -
>  machdirs := $(patsubst %,arch/arm/mach-%/,$(machine-y))
>
>  ifeq ($(KBUILD_SRC),)
> @@ -141,106 +76,7 @@ endif
>  barebox.s5p: $(KBUILD_BINARY)
>         $(Q)scripts/s5p_cksum $< barebox.s5p
>
> -ifeq ($(CONFIG_ARCH_S5PCxx),y)
> -KBUILD_IMAGE := barebox.s5p
> -endif
> -
> -quiet_cmd_mlo ?= IFT     $@
> -       cmd_mlo ?= scripts/omap_signGP -o MLO -l $(TEXT_BASE) -c $<
> -
> -MLO: $(KBUILD_BINARY)
> -       $(call if_changed,mlo)
> -
> -ifeq ($(CONFIG_OMAP_BUILD_IFT),y)
> -KBUILD_IMAGE := MLO
> -endif
> -
> -quiet_cmd_davinci_ubl_image = UBL-IMG $@
> -      cmd_davinci_ubl_image = set -e; \
> -        scripts/mkublheader $< > $@; \
> -        cat $< >> $@
> -
> -barebox.ubl: $(KBUILD_BINARY) FORCE
> -       $(call if_changed,davinci_ubl_image)
> -
> -ifeq ($(CONFIG_ARCH_DAVINCI),y)
> -KBUILD_IMAGE := barebox.ubl
> -endif
> -
> -quiet_cmd_am35xx_spi_image = SPI-IMG $@
> -      cmd_am35xx_spi_image = scripts/mk-omap-image -s -a $(TEXT_BASE) $< > $@
> -
> -barebox.spi: $(KBUILD_BINARY) FORCE
> -       $(call if_changed,am35xx_spi_image)
> -
> -MLO.spi: MLO FORCE
> -       $(call if_changed,am35xx_spi_image)
> -
> -ifeq ($(CONFIG_OMAP_BUILD_SPI),y)
> -KBUILD_IMAGE := MLO.spi
> -endif
> -
> -quiet_cmd_zynq_image = ZYNQ-IMG $@
> -      cmd_zynq_image = scripts/zynq_mkimage $< $@
> -
> -barebox.zynq: $(KBUILD_BINARY) FORCE
> -       $(call if_changed,zynq_image)
> -
> -ifeq ($(machine-y),zynq)
> -KBUILD_IMAGE := barebox.zynq
> -endif
> -
> -quiet_cmd_canon_a1100_image = DD      $@
> -      cmd_canon_a1100_image = scripts/canon-a1100-image $< $@ || \
> -       echo "WARNING: Couldn't create Canon A1100 image due to previous errors."
> -barebox.canon-a1100.bin: $(KBUILD_BINARY) FORCE
> -       $(call if_changed,canon_a1100_image)
> -
> -ifeq ($(CONFIG_MACH_CANON_A1100),y)
> -KBUILD_IMAGE := barebox.canon-a1100.bin
> -endif
> -
> -KWBIMAGE_OPTS = \
> -       -c -i $(srctree)/$(BOARD)/kwbimage.cfg -d $(TEXT_BASE) -e $(TEXT_BASE)
> -
> -quiet_cmd_kwbimage = KWB     $@
> -      cmd_kwbimage = scripts/kwbimage -p $< $(KWBIMAGE_OPTS) -o $@ || \
> -       echo "WARNING: Couldn't create KWB image due to previous errors."
> -
> -quiet_cmd_kwbimage_uart = KWBUART $@
> -      cmd_kwbimage_uart = scripts/kwbimage -m uart -p $< $(KWBIMAGE_OPTS) -o $@ || \
> -       echo "WARNING Couldn't create KWB image due to previous errors."
> -
> -barebox.kwb: $(KBUILD_BINARY) FORCE
> -       $(call if_changed,kwbimage)
> -
> -barebox.kwbuart: $(KBUILD_BINARY) FORCE
> -       $(call if_changed,kwbimage_uart)
> -
> -ifeq ($(CONFIG_ARCH_MVEBU),y)
> -KBUILD_IMAGE  := barebox.kwb barebox.kwbuart
> -endif
> -
> -barebox.imximg: $(KBUILD_BINARY) FORCE
> -       $(call if_changed,imx_image,$(CFG_$(@F)),)
> -
>  boarddir = $(srctree)/arch/arm/boards
> -imxcfg-$(CONFIG_MACH_FREESCALE_MX53_SMD) += $(boarddir)/freescale-mx53-smd/flash-header.imxcfg
> -imxcfg-$(CONFIG_MACH_CCMX51) += $(boarddir)/ccxmx51/flash-header.imxcfg
> -imxcfg-$(CONFIG_MACH_TX51) += $(boarddir)/karo-tx51/flash-header-karo-tx51.imxcfg
> -imxcfg-$(CONFIG_MACH_GUF_VINCELL) += $(boarddir)/guf-vincell/flash-header.imxcfg
> -imxcfg-$(CONFIG_MACH_EUKREA_CPUIMX51SD) += $(boarddir)/eukrea_cpuimx51/flash-header.imxcfg
> -imxcfg-$(CONFIG_MACH_FREESCALE_MX25_3STACK) += $(boarddir)/freescale-mx25-3ds/flash-header.imxcfg
> -imxcfg-$(CONFIG_MACH_FREESCALE_MX35_3STACK) += $(boarddir)/freescale-mx35-3ds/flash-header.imxcfg
> -imxcfg-$(CONFIG_MACH_TQMA53) += $(boarddir)/tqma53/flash-header.imxcfg
> -imxcfg-$(CONFIG_MACH_EUKREA_CPUIMX25) += $(boarddir)/eukrea_cpuimx25/flash-header.imxcfg
> -imxcfg-$(CONFIG_MACH_EUKREA_CPUIMX35) += $(boarddir)/eukrea_cpuimx35/flash-header.imxcfg
> -imxcfg-$(CONFIG_TX53_REV_1011) += $(boarddir)/karo-tx53/flash-header-tx53-rev1011.imxcfg
> -imxcfg-$(CONFIG_TX53_REV_XX30) += $(boarddir)/karo-tx53/flash-header-tx53-revxx30.imxcfg
> -ifneq ($(imxcfg-y),)
> -CFG_barebox.imximg := $(imxcfg-y)
> -KBUILD_IMAGE  := barebox.imximg
> -endif
>
>  pbl := arch/arm/pbl
>  $(pbl)/zbarebox.S $(pbl)/zbarebox.bin $(pbl)/zbarebox: barebox.bin FORCE
> @@ -249,19 +85,8 @@ $(pbl)/zbarebox.S $(pbl)/zbarebox.bin $(pbl)/zbarebox: barebox.bin FORCE
>  archclean:
>         $(MAKE) $(clean)=$(pbl)
>
> -dts := arch/arm/dts
> -
> -%.dtb: scripts
> -       $(Q)$(MAKE) $(build)=$(dts) $(dts)/$@
> -
>  KBUILD_IMAGE ?= $(KBUILD_BINARY)
>
> -archprepare: maketools
> -maketools:
> -       $(Q)$(MAKE) $(build)=arch/arm/tools include/generated/mach-types.h
> -
> -PHONY += maketools
> -
>  ifneq ($(board-y),)
>  BOARD := arch/arm/boards/$(board-y)/
>  else
> @@ -276,9 +101,6 @@ endif
>
>  common-y += $(BOARD) arch/arm/boards/ $(MACH)
>  common-y += arch/arm/lib/ arch/arm/cpu/
> -common-y += arch/arm/crypto/
> -
> -common-$(CONFIG_OFTREE) += arch/arm/dts/
>
>  lds-y  := arch/arm/lib/barebox.lds
>
> diff --git a/arch/arm/boards/Makefile b/arch/arm/boards/Makefile
> index 9241b66..f9cb059 100644
> --- a/arch/arm/boards/Makefile
> +++ b/arch/arm/boards/Makefile
> @@ -135,3 +135,4 @@ obj-$(CONFIG_MACH_VIRT2REAL)                        += virt2real/
>  obj-$(CONFIG_MACH_ZEDBOARD)                    += avnet-zedboard/
>  obj-$(CONFIG_MACH_ZYLONITE)                    += zylonite/
>  obj-$(CONFIG_MACH_VARISCITE_MX6)               += variscite-mx6/
> +obj-$(CONFIG_MACH_VIRT)                                += virt/
> diff --git a/arch/arm/boards/virt/Kconfig b/arch/arm/boards/virt/Kconfig
> new file mode 100644
> index 0000000..b239127
> --- /dev/null
> +++ b/arch/arm/boards/virt/Kconfig
> @@ -0,0 +1,8 @@
> +
> +if MACH_VIRT
> +
> +config ARCH_TEXT_BASE
> +       hex
> +       default 0x40000000
> +
> +endif
> diff --git a/arch/arm/boards/virt/Makefile b/arch/arm/boards/virt/Makefile
> new file mode 100644
> index 0000000..eb072c0
> --- /dev/null
> +++ b/arch/arm/boards/virt/Makefile
> @@ -0,0 +1 @@
> +obj-y += init.o
> diff --git a/arch/arm/boards/virt/env/bin/_update b/arch/arm/boards/virt/env/bin/_update
> new file mode 100644
> index 0000000..014bce3
> --- /dev/null
> +++ b/arch/arm/boards/virt/env/bin/_update
> @@ -0,0 +1,36 @@
> +#!/bin/sh
> +
> +if [ -z "$part" -o -z "$image" ]; then
> +       echo "define \$part and \$image"
> +       exit 1
> +fi
> +
> +if [ ! -e "$part" ]; then
> +       echo "Partition $part does not exist"
> +       exit 1
> +fi
> +
> +if [ $# = 1 ]; then
> +       image=$1
> +fi
> +
> +if [ x$ip = xdhcp ]; then
> +       dhcp
> +fi
> +
> +ping $eth0.serverip
> +if [ $? -ne 0 ] ; then
> +       echo "update aborted"
> +       exit 1
> +fi
> +
> +unprotect $part
> +
> +echo
> +echo "erasing partition $part"
> +erase $part
> +
> +echo
> +echo "flashing $image to $part"
> +echo
> +tftp $image $part
> diff --git a/arch/arm/boards/virt/env/bin/boot b/arch/arm/boards/virt/env/bin/boot
> new file mode 100644
> index 0000000..3859dc1
> --- /dev/null
> +++ b/arch/arm/boards/virt/env/bin/boot
> @@ -0,0 +1,38 @@
> +#!/bin/sh
> +
> +. /env/config
> +
> +if [ x$1 = xflash ]; then
> +       root=flash
> +       kernel=flash
> +fi
> +
> +if [ x$1 = xnet ]; then
> +       root=net
> +       kernel=net
> +fi
> +
> +if [ x$ip = xdhcp ]; then
> +       bootargs="$bootargs ip=dhcp"
> +else
> +       bootargs="$bootargs ip=$eth0.ipaddr:$eth0.serverip:$eth0.gateway:$eth0.netmask:::"
> +fi
> +
> +if [ x$root = xflash ]; then
> +       bootargs="$bootargs root=$rootpart rootfstype=jffs2"
> +else
> +       bootargs="$bootargs root=/dev/nfs nfsroot=$eth0.serverip:$nfsroot,v3,tcp"
> +fi
> +
> +bootargs="$bootargs mtdparts=physmap-flash.0:$mtdparts"
> +
> +if [ $kernel = net ]; then
> +       if [ x$ip = xdhcp ]; then
> +               dhcp
> +       fi
> +       tftp $uimage uImage || exit 1
> +       bootm uImage
> +else
> +       bootm /dev/nor0.kernel
> +fi
> +
> diff --git a/arch/arm/boards/virt/env/bin/init b/arch/arm/boards/virt/env/bin/init
> new file mode 100644
> index 0000000..48e2139
> --- /dev/null
> +++ b/arch/arm/boards/virt/env/bin/init
> @@ -0,0 +1,20 @@
> +#!/bin/sh
> +
> +PATH=/env/bin
> +export PATH
> +
> +. /env/config
> +addpart /dev/nor0 $mtdparts
> +
> +echo
> +echo -n "Hit any key to stop autoboot: "
> +timeout -a $autoboot_timeout
> +if [ $? != 0 ]; then
> +       echo
> +       echo "type update_kernel [<imagename>] to update kernel into flash"
> +       echo "type udate_root [<imagename>] to update rootfs into flash"
> +       echo
> +       exit
> +fi
> +
> +boot
> \ No newline at end of file
> diff --git a/arch/arm/boards/virt/env/bin/update_kernel b/arch/arm/boards/virt/env/bin/update_kernel
> new file mode 100644
> index 0000000..1ad95fc
> --- /dev/null
> +++ b/arch/arm/boards/virt/env/bin/update_kernel
> @@ -0,0 +1,8 @@
> +#!/bin/sh
> +
> +. /env/config
> +
> +image=$uimage
> +part=/dev/nor0.kernel
> +
> +. /env/bin/_update $1
> diff --git a/arch/arm/boards/virt/env/bin/update_root b/arch/arm/boards/virt/env/bin/update_root
> new file mode 100644
> index 0000000..b757a5b
> --- /dev/null
> +++ b/arch/arm/boards/virt/env/bin/update_root
> @@ -0,0 +1,8 @@
> +#!/bin/sh
> +
> +. /env/config
> +
> +image=$jffs2
> +part=/dev/nor0.root
> +
> +. /env/bin/_update $1
> diff --git a/arch/arm/boards/virt/env/config b/arch/arm/boards/virt/env/config
> new file mode 100644
> index 0000000..6c0abda
> --- /dev/null
> +++ b/arch/arm/boards/virt/env/config
> @@ -0,0 +1,38 @@
> +#!/bin/sh
> +
> +# use 'dhcp' to do dhcp in barebox and in kernel
> +# use 'none' if you want to skip kernel ip autoconfiguration
> +ip=dhcp
> +global.dhcp.vendor_id=barebox-${global.hostname}
> +
> +# or set your networking parameters here
> +#eth0.ipaddr=a.b.c.d
> +#eth0.netmask=a.b.c.d
> +#eth0.gateway=a.b.c.d
> +#eth0.serverip=a.b.c.d
> +
> +# can be either 'nfs', 'tftp' or 'nor'
> +kernel_loc=tftp
> +# can be either 'net', 'nor' or 'initrd'
> +rootfs_loc=initrd
> +
> +# can be either 'jffs2' or 'ubifs'
> +rootfs_type=ubifs
> +rootfsimage=root.$rootfs_type
> +
> +kernelimage=zImage
> +#kernelimage=uImage
> +#kernelimage=Image
> +#kernelimage=Image.lzo
> +
> +nfsroot="$eth0.serverip:/opt/work/busybox/arm9/rootfs_arm"
> +
> +nor_parts="256k(barebox)ro,64k(bareboxenv),1536k(kernel),-(root)"
> +rootfs_mtdblock_nor=3
> +
> +autoboot_timeout=3
> +
> +bootargs="console=ttyAMA0,115200n8 CONSOLE=/dev/ttyAMA0"
> +
> +# set a fancy prompt (if support is compiled in)
> +PS1="\e[1;31m[barebox@\h]:\w\e[0m\n# "
> diff --git a/arch/arm/boards/virt/env/init/mtdparts-nor b/arch/arm/boards/virt/env/init/mtdparts-nor
> new file mode 100644
> index 0000000..3307596
> --- /dev/null
> +++ b/arch/arm/boards/virt/env/init/mtdparts-nor
> @@ -0,0 +1,11 @@
> +#!/bin/sh
> +
> +if [ "$1" = menu ]; then
> +        init-menu-add-entry "$0" "NOR partitions"
> +        exit
> +fi
> +
> +mtdparts="2048k at 0(nor0.barebox)ro,256k(nor0.barebox-env),256k(nor0.barebox-logo),256k(nor0.barebox-logo2),5120k(nor0.kernel),-(nor0.root)"
> +kernelname="application-flash"
> +
> +mtdparts-add -d nor0 -k ${kernelname} -p ${mtdparts}
> diff --git a/arch/arm/boards/virt/init.c b/arch/arm/boards/virt/init.c
> new file mode 100644
> index 0000000..9626067
> --- /dev/null
> +++ b/arch/arm/boards/virt/init.c
> @@ -0,0 +1,67 @@
> +/*
> + * Copyright (C) 2016 Raphaël Poggi <poggi.raph at gmail.com>
> + *
> + * GPLv2 only
> + */
> +
> +#include <common.h>
> +#include <init.h>
> +#include <asm/armlinux.h>
> +#include <asm/system_info.h>
> +#include <mach/devices.h>
> +#include <environment.h>
> +#include <linux/sizes.h>
> +#include <io.h>
> +#include <globalvar.h>
> +#include <asm/mmu.h>
> +
> +static int virt_mem_init(void)
> +{
> +       virt_add_ddram(SZ_512M);
> +
> +       add_cfi_flash_device(0, 0x00000000, SZ_4M, 0);
> +
> +       devfs_add_partition("nor0", 0x00000, 0x40000, DEVFS_PARTITION_FIXED, "self0");
> +       devfs_add_partition("nor0", 0x40000, 0x20000, DEVFS_PARTITION_FIXED, "env0");
> +
> +       return 0;
> +}
> +mem_initcall(virt_mem_init);
> +
> +static int virt_console_init(void)
> +{
> +       virt_register_uart(0);
> +
> +       return 0;
> +}
> +console_initcall(virt_console_init);
> +
> +static int virt_core_init(void)
> +{
> +       char *hostname = "virt";
> +
> +       if (cpu_is_cortex_a53())
> +               hostname = "virt-a53";
> +       else if (cpu_is_cortex_a57())
> +               hostname = "virt-a57";
> +
> +       barebox_set_model("ARM QEMU virt");
> +       barebox_set_hostname(hostname);
> +
> +       return 0;
> +}
> +postcore_initcall(virt_core_init);
> +
> +static int virt_mmu_enable(void)
> +{
> +       /* Mapping all periph range */
> +       arch_remap_range(0x09000000, 0x01000000, PMD_SECT_DEF_CACHED);
> +
> +       /* Mapping all flash range */
> +       arch_remap_range(0x00000000, 0x08000000, PMD_SECT_DEF_CACHED);
> +
> +       mmu_enable();
> +
> +       return 0;
> +}
> +postmmu_initcall(virt_mmu_enable);
> diff --git a/arch/arm/configs/virt_defconfig b/arch/arm/configs/virt_defconfig
> new file mode 100644
> index 0000000..ae928a2
> --- /dev/null
> +++ b/arch/arm/configs/virt_defconfig
> @@ -0,0 +1,53 @@
> +CONFIG_AEABI=y
> +CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS=y
> +CONFIG_BAREBOX_MAX_IMAGE_SIZE=0x05000000
> +CONFIG_BAREBOX_MAX_BARE_INIT_SIZE=0x01000000
> +CONFIG_MEMORY_LAYOUT_FIXED=y
> +CONFIG_STACK_BASE=0x60000000
> +CONFIG_MALLOC_BASE=0x50000000
> +CONFIG_PROMPT="virt: "
> +CONFIG_HUSH_FANCY_PROMPT=y
> +CONFIG_CMDLINE_EDITING=y
> +CONFIG_AUTO_COMPLETE=y
> +CONFIG_MENU=y
> +CONFIG_PASSWORD=y
> +CONFIG_PARTITION=y
> +CONFIG_DEFAULT_ENVIRONMENT_GENERIC_NEW=y
> +CONFIG_DEFAULT_ENVIRONMENT_PATH="arch/arm/boards/virt/env"
> +CONFIG_DEBUG_INFO=y
> +# CONFIG_CMD_ARM_CPUINFO is not set
> +CONFIG_LONGHELP=y
> +# CONFIG_CMD_BOOTM is not set
> +# CONFIG_CMD_BOOTU is not set
> +# CONFIG_CMD_MOUNT is not set
> +# CONFIG_CMD_UMOUNT is not set
> +# CONFIG_CMD_CAT is not set
> +# CONFIG_CMD_CD is not set
> +# CONFIG_CMD_CP is not set
> +# CONFIG_CMD_LS is not set
> +# CONFIG_CMD_MKDIR is not set
> +# CONFIG_CMD_PWD is not set
> +# CONFIG_CMD_RM is not set
> +# CONFIG_CMD_RMDIR is not set
> +# CONFIG_CMD_FALSE is not set
> +# CONFIG_CMD_TEST is not set
> +# CONFIG_CMD_TRUE is not set
> +# CONFIG_CMD_CLEAR is not set
> +# CONFIG_CMD_ECHO is not set
> +CONFIG_CMD_CRC=y
> +CONFIG_CMD_CRC_CMP=y
> +# CONFIG_CMD_MD is not set
> +# CONFIG_CMD_MEMCMP is not set
> +# CONFIG_CMD_MEMCPY is not set
> +# CONFIG_CMD_MEMSET is not set
> +# CONFIG_CMD_MW is not set
> +CONFIG_SERIAL_AMBA_PL011=y
> +# CONFIG_SPI is not set
> +CONFIG_MTD=y
> +CONFIG_DRIVER_CFI=y
> +CONFIG_DRIVER_CFI_BANK_WIDTH_8=y
> +CONFIG_CFI_BUFFER_WRITE=y
> +CONFIG_NAND=y
> +# CONFIG_FS_RAMFS is not set
> +CONFIG_DIGEST_SHA1_GENERIC=y
> +CONFIG_DIGEST_SHA256_GENERIC=y
> diff --git a/arch/arm/cpu/Kconfig b/arch/arm/cpu/Kconfig
> index 4f5d9b6..86d64a4 100644
> --- a/arch/arm/cpu/Kconfig
> +++ b/arch/arm/cpu/Kconfig
> @@ -1,6 +1,6 @@
>  comment "Processor Type"
>
> -config CPU_32
> +config CPU_64
>         bool
>         default y
>
> @@ -8,90 +8,13 @@ config CPU_32
>  # which CPUs we support in the kernel image, and the compiler instruction
>  # optimiser behaviour.
>
> -# ARM1176
> -config CPU_ARM1176
> +# ARMv8
> +config CPU_V8
>         bool
> -       select CPU_V6
> +       select CPU_64v8
> +       select CPU_SUPPORTS_64BIT_KERNEL
>
> -# ARM920T
> -config CPU_ARM920T
> -       bool
> -       select CPU_32v4T
> -       help
> -         The ARM920T is licensed to be produced by numerous vendors,
> -         and is used in the Maverick EP9312 and the Samsung S3C2410.
> -
> -         More information on the Maverick EP9312 at
> -         <http://www.cirrus.com/en/products/ep9312.html>.
> -
> -         Say Y if you want support for the ARM920T processor.
> -         Otherwise, say N.
> -
> -# ARM926T
> -config CPU_ARM926T
> -       bool
> -       select CPU_32v5
> -       help
> -         This is a variant of the ARM920. It has slightly different
> -         instruction sequences for cache and TLB operations. Curiously,
> -         there is no documentation on it at the ARM corporate website.
> -
> -         Say Y if you want support for the ARM926T processor.
> -         Otherwise, say N.
> -
> -# ARM946E-S
> -config CPU_ARM946E
> -       bool
> -       select CPU_32v4T
> -       help
> -         ARM946E-S is a member of the ARM9E-S family of high-
> -         performance, 32-bit system-on-chip processor solutions.
> -         The TCM and ARMv5TE 32-bit instruction set is supported.
> -
> -         Say Y if you want support for the ARM946E-S processor.
> -         Otherwise, say N.
> -
> -# Feroceon
> -config CPU_FEROCEON
> -       bool
> -       select CPU_32v5
> -       help
> -         This is a Marvell implementation of an ARMv5TE compatible
> -         ARM core, used in the Marvell Kirkwood SoC family.
> -
> -# ARMv6
> -config CPU_V6
> -       bool
> -       select CPU_32v6
> -
> -# ARMv7
> -config CPU_V7
> -       bool
> -       select CPU_32v7
> -
> -config CPU_XSC3
> -        bool
> -        select CPU_32v4T
> -        help
> -          Select code specific to PXA3xx variants
> -
> -# Xscale PXA25x, PXA27x
> -config CPU_XSCALE
> -       bool
> -       select CPU_32v4T
> -
> -# Figure out what processor architecture version we should be using.
> -# This defines the compiler instruction set which depends on the machine type.
> -config CPU_32v4T
> -       bool
> -
> -config CPU_32v5
> -       bool
> -
> -config CPU_32v6
> -       bool
> -
> -config CPU_32v7
> +config CPU_64v8
>         bool
>
>  comment "processor features"
> @@ -117,10 +40,15 @@ config BOOT_ENDIANNESS_SWITCH
>
>           Currently implemented only by "bootz" command.
>
> -config ARCH_HAS_L2X0
> +config SYS_SUPPORTS_32BIT_KERNEL
> +       bool
> +
> +config SYS_SUPPORTS_64BIT_KERNEL
> +       bool
> +
> +config CPU_SUPPORTS_32BIT_KERNEL
>         bool
>
> -config CACHE_L2X0
> -       bool "Enable L2x0 PrimeCell"
> -       depends on MMU && ARCH_HAS_L2X0
> +config CPU_SUPPORTS_64BIT_KERNEL
> +       bool
>
> diff --git a/arch/arm/cpu/Makefile b/arch/arm/cpu/Makefile
> index 854df60e..fe6e7af 100644
> --- a/arch/arm/cpu/Makefile
> +++ b/arch/arm/cpu/Makefile
> @@ -1,38 +1,25 @@
>  obj-y += cpu.o
>  obj-$(CONFIG_ARM_EXCEPTIONS) += exceptions.o
>  obj-$(CONFIG_ARM_EXCEPTIONS) += interrupts.o
> -obj-y += start.o setupc.o entry.o
> +obj-y += start.o entry.o
>
>  #
>  # Any variants can be called as start-armxyz.S
>  #
>  obj-$(CONFIG_CMD_ARM_CPUINFO) += cpuinfo.o
>  obj-$(CONFIG_CMD_ARM_MMUINFO) += mmuinfo.o
> -obj-$(CONFIG_OFDEVICE) += dtb.o
> -obj-$(CONFIG_MMU) += mmu.o cache.o mmu-early.o
> -pbl-$(CONFIG_MMU) += mmu-early.o
> +obj-$(CONFIG_MMU) += mmu.o cache.o
>
> -ifeq ($(CONFIG_MMU),)
> -obj-y += no-mmu.o
> -endif
> -
> -obj-$(CONFIG_CPU_32v4T) += cache-armv4.o
> -pbl-$(CONFIG_CPU_32v4T) += cache-armv4.o
> -obj-$(CONFIG_CPU_32v5) += cache-armv5.o
> -pbl-$(CONFIG_CPU_32v5) += cache-armv5.o
> -obj-$(CONFIG_CPU_32v6) += cache-armv6.o
> -pbl-$(CONFIG_CPU_32v6) += cache-armv6.o
> -AFLAGS_cache-armv7.o       :=-Wa,-march=armv7-a
> -obj-$(CONFIG_CPU_32v7) += cache-armv7.o
> -AFLAGS_pbl-cache-armv7.o       :=-Wa,-march=armv7-a
> -pbl-$(CONFIG_CPU_32v7) += cache-armv7.o
> -obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o
> +AFLAGS_cache-armv8.o       :=-Wa,-march=armv8-a
> +obj-$(CONFIG_CPU_64v8) += cache-armv8.o
> +AFLAGS_pbl-cache-armv8.o       :=-Wa,-march=armv8-a
> +pbl-$(CONFIG_CPU_64v8) += cache-armv8.o
>
>  pbl-y += setupc.o entry.o
>  pbl-$(CONFIG_PBL_SINGLE_IMAGE) += start-pbl.o
>  pbl-$(CONFIG_PBL_MULTI_IMAGES) += uncompress.o
>
> -obj-y += common.o cache.o
> -pbl-y += common.o cache.o
> +obj-y += cache.o
> +pbl-y += cache.o
>
>  lwl-y += lowlevel.o
> diff --git a/arch/arm/cpu/cache-armv8.S b/arch/arm/cpu/cache-armv8.S
> new file mode 100644
> index 0000000..82b2f81
> --- /dev/null
> +++ b/arch/arm/cpu/cache-armv8.S
> @@ -0,0 +1,168 @@
> +/*
> + * (C) Copyright 2013
> + * David Feng <fenghua at phytium.com.cn>
> + *
> + * This file is based on sample code from ARMv8 ARM.
> + *
> + * SPDX-License-Identifier:    GPL-2.0+
> + */
> +
> +#include <config.h>
> +#include <linux/linkage.h>
> +#include <init.h>
> +
> +/*
> + * void v8_flush_dcache_level(level)
> + *
> + * clean and invalidate one level cache.
> + *
> + * x0: cache level
> + * x1: 0 flush & invalidate, 1 invalidate only
> + * x2~x9: clobbered
> + */
> +.section .text.v8_flush_dcache_level
> +ENTRY(v8_flush_dcache_level)
> +       lsl     x12, x0, #1
> +       msr     csselr_el1, x12         /* select cache level */
> +       isb                             /* sync change of cssidr_el1 */
> +       mrs     x6, ccsidr_el1          /* read the new cssidr_el1 */
> +       and     x2, x6, #7              /* x2 <- log2(cache line size)-4 */
> +       add     x2, x2, #4              /* x2 <- log2(cache line size) */
> +       mov     x3, #0x3ff
> +       and     x3, x3, x6, lsr #3      /* x3 <- max number of #ways */
> +       clz     w5, w3                  /* bit position of #ways */
> +       mov     x4, #0x7fff
> +       and     x4, x4, x6, lsr #13     /* x4 <- max number of #sets */
> +       /* x12 <- cache level << 1 */
> +       /* x2 <- line length offset */
> +       /* x3 <- number of cache ways - 1 */
> +       /* x4 <- number of cache sets - 1 */
> +       /* x5 <- bit position of #ways */
> +
> +loop_set:
> +       mov     x6, x3                  /* x6 <- working copy of #ways */
> +loop_way:
> +       lsl     x7, x6, x5
> +       orr     x9, x12, x7             /* map way and level to cisw value */
> +       lsl     x7, x4, x2
> +       orr     x9, x9, x7              /* map set number to cisw value */
> +       tbz     w1, #0, 1f
> +       dc      isw, x9
> +       b       2f
> +1:     dc      cisw, x9                /* clean & invalidate by set/way */
> +2:     subs    x6, x6, #1              /* decrement the way */
> +       b.ge    loop_way
> +       subs    x4, x4, #1              /* decrement the set */
> +       b.ge    loop_set
> +
> +       ret
> +ENDPROC(v8_flush_dcache_level)
> +
> +/*
> + * void v8_flush_dcache_all(int invalidate_only)
> + *
> + * x0: 0 flush & invalidate, 1 invalidate only
> + *
> + * clean and invalidate all data cache by SET/WAY.
> + */
> +.section .text.v8_dcache_all
> +ENTRY(v8_dcache_all)
> +       mov     x1, x0
> +       dsb     sy
> +       mrs     x10, clidr_el1          /* read clidr_el1 */
> +       lsr     x11, x10, #24
> +       and     x11, x11, #0x7          /* x11 <- loc */
> +       cbz     x11, finished           /* if loc is 0, exit */
> +       mov     x15, x30
> +       mov     x0, #0                  /* start flush at cache level 0 */
> +       /* x0  <- cache level */
> +       /* x10 <- clidr_el1 */
> +       /* x11 <- loc */
> +       /* x15 <- return address */
> +
> +loop_level:
> +       lsl     x12, x0, #1
> +       add     x12, x12, x0            /* x0 <- tripled cache level */
> +       lsr     x12, x10, x12
> +       and     x12, x12, #7            /* x12 <- cache type */
> +       cmp     x12, #2
> +       b.lt    skip                    /* skip if no cache or icache */
> +       bl      v8_flush_dcache_level   /* x1 = 0 flush, 1 invalidate */
> +skip:
> +       add     x0, x0, #1              /* increment cache level */
> +       cmp     x11, x0
> +       b.gt    loop_level
> +
> +       mov     x0, #0
> +       msr     csselr_el1, x0          /* restore csselr_el1 */
> +       dsb     sy
> +       isb
> +       mov     x30, x15
> +
> +finished:
> +       ret
> +ENDPROC(v8_dcache_all)
> +
> +.section .text.v8_flush_dcache_all
> +ENTRY(v8_flush_dcache_all)
> +       mov     x16, x30
> +       mov     x0, #0
> +       bl      v8_dcache_all
> +       mov     x30, x16
> +       ret
> +ENDPROC(v8_flush_dcache_all)
> +
> +.section .text.v8_invalidate_dcache_all
> +ENTRY(v8_invalidate_dcache_all)
> +       mov     x16, x30
> +       mov     x0, #0x1
> +       bl      v8_dcache_all
> +       mov     x30, x16
> +       ret
> +ENDPROC(v8_invalidate_dcache_all)
> +
> +/*
> + * void v8_flush_dcache_range(start, end)
> + *
> + * clean & invalidate data cache in the range
> + *
> + * x0: start address
> + * x1: end address
> + */
> +.section .text.v8_flush_dcache_range
> +ENTRY(v8_flush_dcache_range)
> +       mrs     x3, ctr_el0
> +       lsr     x3, x3, #16
> +       and     x3, x3, #0xf
> +       mov     x2, #4
> +       lsl     x2, x2, x3              /* cache line size */
> +
> +       /* x2 <- minimal cache line size in cache system */
> +       sub     x3, x2, #1
> +       bic     x0, x0, x3
> +1:     dc      civac, x0       /* clean & invalidate data or unified cache */
> +       add     x0, x0, x2
> +       cmp     x0, x1
> +       b.lo    1b
> +       dsb     sy
> +       ret
> +ENDPROC(v8_flush_dcache_range)
> +
> +/*
> + * void v8_invalidate_icache_all(void)
> + *
> + * invalidate all tlb entries.
> + */
> +.section .text.v8_invalidate_icache_all
> +ENTRY(v8_invalidate_icache_all)
> +       ic      ialluis
> +       isb     sy
> +       ret
> +ENDPROC(v8_invalidate_icache_all)
> +
> +.section .text.v8_flush_l3_cache
> +ENTRY(v8_flush_l3_cache)
> +       mov     x0, #0                  /* return status as success */
> +       ret
> +ENDPROC(v8_flush_l3_cache)
> +       .weak   v8_flush_l3_cache
> diff --git a/arch/arm/cpu/cache.c b/arch/arm/cpu/cache.c
> index 27ead1c..8465cf9 100644
> --- a/arch/arm/cpu/cache.c
> +++ b/arch/arm/cpu/cache.c
> @@ -32,10 +32,7 @@ struct cache_fns *cache_fns;
>                 .mmu_cache_flush = arch##_mmu_cache_flush,                      \
>         };
>
> -DEFINE_CPU_FNS(v4)
> -DEFINE_CPU_FNS(v5)
> -DEFINE_CPU_FNS(v6)
> -DEFINE_CPU_FNS(v7)
> +DEFINE_CPU_FNS(v8)
>
>  void __dma_clean_range(unsigned long start, unsigned long end)
>  {
> @@ -78,29 +75,9 @@ void __mmu_cache_flush(void)
>  int arm_set_cache_functions(void)
>  {
>         switch (cpu_architecture()) {
> -#ifdef CONFIG_CPU_32v4T
> -       case CPU_ARCH_ARMv4T:
> -               cache_fns = &cache_fns_armv4;
> +       case CPU_ARCH_ARMv8:
> +               cache_fns = &cache_fns_armv8;
>                 break;
> -#endif
> -#ifdef CONFIG_CPU_32v5
> -       case CPU_ARCH_ARMv5:
> -       case CPU_ARCH_ARMv5T:
> -       case CPU_ARCH_ARMv5TE:
> -       case CPU_ARCH_ARMv5TEJ:
> -               cache_fns = &cache_fns_armv5;
> -               break;
> -#endif
> -#ifdef CONFIG_CPU_32v6
> -       case CPU_ARCH_ARMv6:
> -               cache_fns = &cache_fns_armv6;
> -               break;
> -#endif
> -#ifdef CONFIG_CPU_32v7
> -       case CPU_ARCH_ARMv7:
> -               cache_fns = &cache_fns_armv7;
> -               break;
> -#endif
>         default:
>                 while(1);
>         }
> @@ -115,49 +92,19 @@ int arm_set_cache_functions(void)
>  void arm_early_mmu_cache_flush(void)
>  {
>         switch (arm_early_get_cpu_architecture()) {
> -#ifdef CONFIG_CPU_32v4T
> -       case CPU_ARCH_ARMv4T:
> -               v4_mmu_cache_flush();
> -               return;
> -#endif
> -#ifdef CONFIG_CPU_32v5
> -       case CPU_ARCH_ARMv5:
> -       case CPU_ARCH_ARMv5T:
> -       case CPU_ARCH_ARMv5TE:
> -       case CPU_ARCH_ARMv5TEJ:
> -               v5_mmu_cache_flush();
> +       case CPU_ARCH_ARMv8:
> +//             v7_mmu_cache_flush();
>                 return;
> -#endif
> -#ifdef CONFIG_CPU_32v6
> -       case CPU_ARCH_ARMv6:
> -               v6_mmu_cache_flush();
> -               return;
> -#endif
> -#ifdef CONFIG_CPU_32v7
> -       case CPU_ARCH_ARMv7:
> -               v7_mmu_cache_flush();
> -               return;
> -#endif
>         }
>  }
>
> -void v7_mmu_cache_invalidate(void);
> +//void v7_mmu_cache_invalidate(void);
>
>  void arm_early_mmu_cache_invalidate(void)
>  {
>         switch (arm_early_get_cpu_architecture()) {
> -       case CPU_ARCH_ARMv4T:
> -       case CPU_ARCH_ARMv5:
> -       case CPU_ARCH_ARMv5T:
> -       case CPU_ARCH_ARMv5TE:
> -       case CPU_ARCH_ARMv5TEJ:
> -       case CPU_ARCH_ARMv6:
> -               asm volatile("mcr p15, 0, %0, c7, c6, 0\n" : : "r"(0));
> -               return;
> -#ifdef CONFIG_CPU_32v7
> -       case CPU_ARCH_ARMv7:
> -               v7_mmu_cache_invalidate();
> +       case CPU_ARCH_ARMv8:
> +//             v7_mmu_cache_invalidate();
>                 return;
> -#endif
>         }
>  }
> diff --git a/arch/arm/cpu/cpu.c b/arch/arm/cpu/cpu.c
> index eb12166..19cd944 100644
> --- a/arch/arm/cpu/cpu.c
> +++ b/arch/arm/cpu/cpu.c
> @@ -40,11 +40,8 @@
>   */
>  void icache_enable(void)
>  {
> -       u32 r;
> -
> -       r = get_cr();
> -       r |= CR_I;
> -       set_cr(r);
> +       v8_invalidate_icache_all();
> +       set_sctlr(get_sctlr() | CR_I);
>  }
>
>  /**
> @@ -52,11 +49,7 @@ void icache_enable(void)
>   */
>  void icache_disable(void)
>  {
> -       u32 r;
> -
> -       r = get_cr();
> -       r &= ~CR_I;
> -       set_cr(r);
> +       set_sctlr(get_sctlr() & ~CR_I);
>  }
>
>  /**
> @@ -65,26 +58,7 @@ void icache_disable(void)
>   */
>  int icache_status(void)
>  {
> -       return (get_cr () & CR_I) != 0;
> -}
> -
> -/*
> - * SoC like the ux500 have the l2x0 always enable
> - * with or without MMU enable
> - */
> -struct outer_cache_fns outer_cache;
> -
> -/*
> - * Clean and invalide caches, disable MMU
> - */
> -void mmu_disable(void)
> -{
> -       __mmu_cache_flush();
> -       if (outer_cache.disable) {
> -               outer_cache.flush_all();
> -               outer_cache.disable();
> -       }
> -       __mmu_cache_off();
> +       return (get_sctlr() & CR_I) != 0;
>  }
>
>  /**
> @@ -96,18 +70,8 @@ void mmu_disable(void)
>   */
>  static void arch_shutdown(void)
>  {
> -       uint32_t r;
> -
>         mmu_disable();
>         flush_icache();
> -       /*
> -        * barebox normally does not use interrupts, but some functionalities
> -        * (eg. OMAP4_USBBOOT) require them enabled. So be sure interrupts are
> -        * disabled before exiting.
> -        */
> -       __asm__ __volatile__("mrs %0, cpsr" : "=r"(r));
> -       r |= PSR_I_BIT;
> -       __asm__ __volatile__("msr cpsr, %0" : : "r"(r));
>  }
>  archshutdown_exitcall(arch_shutdown);
>
> diff --git a/arch/arm/cpu/cpuinfo.c b/arch/arm/cpu/cpuinfo.c
> index 8b22e9b..2306101 100644
> --- a/arch/arm/cpu/cpuinfo.c
> +++ b/arch/arm/cpu/cpuinfo.c
> @@ -21,21 +21,10 @@
>  #include <complete.h>
>
>  #define CPU_ARCH_UNKNOWN       0
> -#define CPU_ARCH_ARMv3         1
> -#define CPU_ARCH_ARMv4         2
> -#define CPU_ARCH_ARMv4T                3
> -#define CPU_ARCH_ARMv5         4
> -#define CPU_ARCH_ARMv5T                5
> -#define CPU_ARCH_ARMv5TE       6
> -#define CPU_ARCH_ARMv5TEJ      7
> -#define CPU_ARCH_ARMv6         8
> -#define CPU_ARCH_ARMv7         9
> -
> -#define ARM_CPU_PART_CORTEX_A5      0xC050
> -#define ARM_CPU_PART_CORTEX_A7      0xC070
> -#define ARM_CPU_PART_CORTEX_A8      0xC080
> -#define ARM_CPU_PART_CORTEX_A9      0xC090
> -#define ARM_CPU_PART_CORTEX_A15     0xC0F0
> +#define CPU_ARCH_ARMv8         10
> +
> +#define ARM_CPU_PART_CORTEX_A53            0xD034
> +#define ARM_CPU_PART_CORTEX_A57            0xD070
>
>  static void decode_cache(unsigned long size)
>  {
> @@ -61,22 +50,22 @@ static int do_cpuinfo(int argc, char *argv[])
>         int cpu_arch;
>
>         __asm__ __volatile__(
> -               "mrc    p15, 0, %0, c0, c0, 0   @ read control reg\n"
> +               "mrs    %0, midr_el1\n"
>                 : "=r" (mainid)
>                 :
>                 : "memory");
>
> -       __asm__ __volatile__(
> -               "mrc    p15, 0, %0, c0, c0, 1   @ read control reg\n"
> -               : "=r" (cache)
> -               :
> -               : "memory");
> -
> -       __asm__ __volatile__(
> -               "mrc    p15, 0, %0, c1, c0, 0   @ read control reg\n"
> -                       : "=r" (cr)
> -                       :
> -                       : "memory");
> +//     __asm__ __volatile__(
> +//             "mrc    p15, 0, %0, c0, c0, 1   @ read control reg\n"
> +//             : "=r" (cache)
> +//             :
> +//             : "memory");
> +//
> +//     __asm__ __volatile__(
> +//             "mrc    p15, 0, %0, c1, c0, 0   @ read control reg\n"
> +//                     : "=r" (cr)
> +//                     :
> +//                     : "memory");
>
>         switch (mainid >> 24) {
>         case 0x41:
> @@ -111,8 +100,8 @@ static int do_cpuinfo(int argc, char *argv[])
>
>                 /* Revised CPUID format. Read the Memory Model Feature
>                  * Register 0 and check for VMSAv7 or PMSAv7 */
> -               asm("mrc        p15, 0, %0, c0, c1, 4"
> -                   : "=r" (mmfr0));
> +//             asm("mrc        p15, 0, %0, c0, c1, 4"
> +//                 : "=r" (mmfr0));
>                 if ((mmfr0 & 0x0000000f) >= 0x00000003 ||
>                     (mmfr0 & 0x000000f0) >= 0x00000030)
>                         cpu_arch = CPU_ARCH_ARMv7;
> @@ -152,6 +141,9 @@ static int do_cpuinfo(int argc, char *argv[])
>         case CPU_ARCH_ARMv7:
>                 architecture = "v7";
>                 break;
> +       case CPU_ARCH_ARMv8:
> +               architecture = "v8";
> +               break;
>         case CPU_ARCH_UNKNOWN:
>         default:
>                 architecture = "Unknown";
> @@ -181,29 +173,31 @@ static int do_cpuinfo(int argc, char *argv[])
>                 case ARM_CPU_PART_CORTEX_A15:
>                         part = "Cortex-A15";
>                         break;
> +               case ARM_CPU_PART_CORTEX_A53:
> +                       part = "Cortex-A53";
>                 default:
>                         part = "unknown";
>                 }
>                 printf("core: %s r%up%u\n", part, major, minor);
>         }
>
> -       if (cache & (1 << 24)) {
> -               /* separate I/D cache */
> -               printf("I-cache: ");
> -               decode_cache(cache & 0xfff);
> -               printf("D-cache: ");
> -               decode_cache((cache >> 12) & 0xfff);
> -       } else {
> -               /* unified I/D cache */
> -               printf("cache: ");
> -               decode_cache(cache & 0xfff);
> -       }
> -
> -       printf("Control register: ");
> -       for (i = 0; i < ARRAY_SIZE(crbits); i++)
> -               if (cr & (1 << i))
> -                       printf("%s ", crbits[i]);
> -       printf("\n");
> +//     if (cache & (1 << 24)) {
> +//             /* separate I/D cache */
> +//             printf("I-cache: ");
> +//             decode_cache(cache & 0xfff);
> +//             printf("D-cache: ");
> +//             decode_cache((cache >> 12) & 0xfff);
> +//     } else {
> +//             /* unified I/D cache */
> +//             printf("cache: ");
> +//             decode_cache(cache & 0xfff);
> +//     }
> +
> +//     printf("Control register: ");
> +//     for (i = 0; i < ARRAY_SIZE(crbits); i++)
> +//             if (cr & (1 << i))
> +//                     printf("%s ", crbits[i]);
> +//     printf("\n");
>
>         return 0;
>  }
> diff --git a/arch/arm/cpu/entry.c b/arch/arm/cpu/entry.c
> index 0cdcfec..a029f09 100644
> --- a/arch/arm/cpu/entry.c
> +++ b/arch/arm/cpu/entry.c
> @@ -1,7 +1,6 @@
>  #include <types.h>
>
>  #include <asm/cache.h>
> -#include <asm/barebox-arm.h>
>
>  #include "entry.h"
>
> @@ -24,10 +23,10 @@
>   * be fine.
>   */
>
> -void __naked __noreturn barebox_arm_entry(unsigned long membase,
> +void __noreturn barebox_arm_entry(unsigned long membase,
>                                           unsigned long memsize, void *boarddata)
>  {
> -       arm_setup_stack(arm_mem_stack(membase, membase + memsize) + STACK_SIZE - 16);
> +       arm_setup_stack(membase + memsize - 16);
>         arm_early_mmu_cache_invalidate();
>
>         if (IS_ENABLED(CONFIG_PBL_MULTI_IMAGES))
> diff --git a/arch/arm/cpu/exceptions.S b/arch/arm/cpu/exceptions.S
> index eda0d6a..5812025 100644
> --- a/arch/arm/cpu/exceptions.S
> +++ b/arch/arm/cpu/exceptions.S
> @@ -1,220 +1,119 @@
> -#include <config.h>
> -#include <linux/linkage.h>
> -#include <asm-generic/memory_layout.h>
> -
>  /*
> - *************************************************************************
> - *
> - * Interrupt handling
> + * (C) Copyright 2013
> + * David Feng <fenghua at phytium.com.cn>
>   *
> - *************************************************************************
> + * SPDX-License-Identifier:    GPL-2.0+
>   */
>
> -@
> -@ IRQ stack frame.
> -@
> -#define S_FRAME_SIZE   72
> -
> -#define S_OLD_R0       68
> -#define S_PSR          64
> -#define S_PC           60
> -#define S_LR           56
> -#define S_SP           52
> -
> -#define S_IP           48
> -#define S_FP           44
> -#define S_R10          40
> -#define S_R9           36
> -#define S_R8           32
> -#define S_R7           28
> -#define S_R6           24
> -#define S_R5           20
> -#define S_R4           16
> -#define S_R3           12
> -#define S_R2           8
> -#define S_R1           4
> -#define S_R0           0
> -
> -#define MODE_SVC       0x13
> +#include <config.h>
> +#include <asm/ptrace.h>
> +#include <linux/linkage.h>
>
>  /*
> - * use bad_save_user_regs for abort/prefetch/undef/swi ...
> - * use irq_save_user_regs / irq_restore_user_regs for IRQ/FIQ handling
> + * Enter Exception.
> + * This will save the processor state that is ELR/X0~X30
> + * to the stack frame.
>   */
> -
> -       .macro  bad_save_user_regs
> -       sub     sp, sp, #S_FRAME_SIZE
> -       stmia   sp, {r0 - r12}                  @ Calling r0-r12
> -       ldr     r2, =abort_stack
> -       ldmia   r2, {r2 - r3}                   @ get pc, cpsr
> -       add     r0, sp, #S_FRAME_SIZE           @ restore sp_SVC
> -
> -       add     r5, sp, #S_SP
> -       mov     r1, lr
> -       stmia   r5, {r0 - r3}                   @ save sp_SVC, lr_SVC, pc, cpsr
> -       mov     r0, sp
> -       .endm
> -
> -       .macro  irq_save_user_regs
> -       sub     sp, sp, #S_FRAME_SIZE
> -       stmia   sp, {r0 - r12}                  @ Calling r0-r12
> -       add     r8, sp, #S_PC
> -       stmdb   r8, {sp, lr}^                   @ Calling SP, LR
> -       str     lr, [r8, #0]                    @ Save calling PC
> -       mrs     r6, spsr
> -       str     r6, [r8, #4]                    @ Save CPSR
> -       str     r0, [r8, #8]                    @ Save OLD_R0
> -       mov     r0, sp
> -       .endm
> -
> -       .macro  irq_restore_user_regs
> -       ldmia   sp, {r0 - lr}^                  @ Calling r0 - lr
> -       mov     r0, r0
> -       ldr     lr, [sp, #S_PC]                 @ Get PC
> -       add     sp, sp, #S_FRAME_SIZE
> -       subs    pc, lr, #4                      @ return & move spsr_svc into cpsr
> -       .endm
> -
> -       .macro get_bad_stack
> -       ldr     r13, =abort_stack
> -       str     lr, [r13]                       @ save caller lr / spsr
> -       mrs     lr, spsr
> -       str     lr, [r13, #4]
> -
> -       mov     r13, #MODE_SVC                  @ prepare SVC-Mode
> -       @ msr   spsr_c, r13
> -       msr     spsr, r13
> -       mov     lr, pc
> -       movs    pc, lr
> -       .endm
> -
> -       .macro try_data_abort
> -       ldr     r13, =arm_ignore_data_abort     @ check try mode
> -       ldr     r13, [r13]
> -       cmp     r13, #0
> -       beq     do_abort_\@
> -       ldr     r13, =arm_data_abort_occurred
> -       str     r13, [r13]
> -       mrs     r13, spsr                       @ read saved CPSR
> -       tst     r13, #1<<5                      @ check Thumb mode
> -       subeq   lr, #4                          @ next ARM instr
> -       subne   lr, #6                          @ next Thumb instr
> -       movs    pc, lr
> -do_abort_\@:
> -       .endm
> -
> -       .macro get_irq_stack                    @ setup IRQ stack
> -       ldr     sp, IRQ_STACK_START
> -       .endm
> -
> -       .macro get_fiq_stack                    @ setup FIQ stack
> -       ldr     sp, FIQ_STACK_START
> -       .endm
> +.macro exception_entry
> +       stp     x29, x30, [sp, #-16]!
> +       stp     x27, x28, [sp, #-16]!
> +       stp     x25, x26, [sp, #-16]!
> +       stp     x23, x24, [sp, #-16]!
> +       stp     x21, x22, [sp, #-16]!
> +       stp     x19, x20, [sp, #-16]!
> +       stp     x17, x18, [sp, #-16]!
> +       stp     x15, x16, [sp, #-16]!
> +       stp     x13, x14, [sp, #-16]!
> +       stp     x11, x12, [sp, #-16]!
> +       stp     x9, x10, [sp, #-16]!
> +       stp     x7, x8, [sp, #-16]!
> +       stp     x5, x6, [sp, #-16]!
> +       stp     x3, x4, [sp, #-16]!
> +       stp     x1, x2, [sp, #-16]!
> +
> +       /* Could be running at EL3/EL2/EL1 */
> +       mrs     x11, CurrentEL
> +       cmp     x11, #0xC               /* Check EL3 state */
> +       b.eq    1f
> +       cmp     x11, #0x8               /* Check EL2 state */
> +       b.eq    2f
> +       cmp     x11, #0x4               /* Check EL1 state */
> +       b.eq    3f
> +3:     mrs     x1, esr_el3
> +       mrs     x2, elr_el3
> +       b       0f
> +2:     mrs     x1, esr_el2
> +       mrs     x2, elr_el2
> +       b       0f
> +1:     mrs     x1, esr_el1
> +       mrs     x2, elr_el1
> +0:
> +       stp     x2, x0, [sp, #-16]!
> +       mov     x0, sp
> +.endm
>
>  /*
> - * exception handlers
> + * Exception vectors.
>   */
> -       .section ".text","ax"
> -       .arm
> -
> -       .align  5
> -undefined_instruction:
> -       get_bad_stack
> -       bad_save_user_regs
> -       bl      do_undefined_instruction
> -
> -       .align  5
> -software_interrupt:
> -       get_bad_stack
> -       bad_save_user_regs
> -       bl      do_software_interrupt
> -
> -       .align  5
> -prefetch_abort:
> -       get_bad_stack
> -       bad_save_user_regs
> -       bl      do_prefetch_abort
> -
> -       .align  5
> -data_abort:
> -       try_data_abort
> -       get_bad_stack
> -       bad_save_user_regs
> -       bl      do_data_abort
> -
> -       .align  5
> -irq:
> -       get_bad_stack
> -       bad_save_user_regs
> -       bl      do_irq
> -
> -       .align  5
> -fiq:
> -       get_bad_stack
> -       bad_save_user_regs
> -       bl      do_fiq
> -
> -#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_ARM_EXCEPTIONS)
> -/*
> - * With relocatable binary support the runtime exception vectors do not match
> - * the addresses in the binary. We have to fix them up during runtime
> - */
> -ENTRY(arm_fixup_vectors)
> -       ldr     r0, =undefined_instruction
> -       ldr     r1, =_undefined_instruction
> -       str     r0, [r1]
> -       ldr     r0, =software_interrupt
> -       ldr     r1, =_software_interrupt
> -       str     r0, [r1]
> -       ldr     r0, =prefetch_abort
> -       ldr     r1, =_prefetch_abort
> -       str     r0, [r1]
> -       ldr     r0, =data_abort
> -       ldr     r1, =_data_abort
> -       str     r0, [r1]
> -       ldr     r0, =irq
> -       ldr     r1, =_irq
> -       str     r0, [r1]
> -       ldr     r0, =fiq
> -       ldr     r1, =_fiq
> -       str     r0, [r1]
> -       bx      lr
> -ENDPROC(arm_fixup_vectors)
> -#endif
> -
> -.section .text_exceptions
> -.globl extable
> -extable:
> -1:     b 1b                            /* barebox_arm_reset_vector */
> -#ifdef CONFIG_ARM_EXCEPTIONS
> -       ldr pc, _undefined_instruction  /* undefined instruction */
> -       ldr pc, _software_interrupt     /* software interrupt (SWI) */
> -       ldr pc, _prefetch_abort         /* prefetch abort */
> -       ldr pc, _data_abort             /* data abort */
> -1:     b 1b                            /* (reserved) */
> -       ldr pc, _irq                    /* irq (interrupt) */
> -       ldr pc, _fiq                    /* fiq (fast interrupt) */
> -.globl _undefined_instruction
> -_undefined_instruction: .word undefined_instruction
> -.globl _software_interrupt
> -_software_interrupt: .word software_interrupt
> -.globl _prefetch_abort
> -_prefetch_abort: .word prefetch_abort
> -.globl _data_abort
> -_data_abort: .word data_abort
> -.globl _irq
> -_irq: .word irq
> -.globl _fiq
> -_fiq: .word fiq
> -#else
> -1:     b 1b                            /* undefined instruction */
> -1:     b 1b                            /* software interrupt (SWI) */
> -1:     b 1b                            /* prefetch abort */
> -1:     b 1b                            /* data abort */
> -1:     b 1b                            /* (reserved) */
> -1:     b 1b                            /* irq (interrupt) */
> -1:     b 1b                            /* fiq (fast interrupt) */
> -#endif
> +       .align  11
> +       .globl  vectors
> +vectors:
> +       .align  7
> +       b       _do_bad_sync    /* Current EL Synchronous Thread */
> +
> +       .align  7
> +       b       _do_bad_irq     /* Current EL IRQ Thread */
> +
> +       .align  7
> +       b       _do_bad_fiq     /* Current EL FIQ Thread */
> +
> +       .align  7
> +       b       _do_bad_error   /* Current EL Error Thread */
> +
> +       .align  7
> +       b       _do_sync        /* Current EL Synchronous Handler */
> +
> +       .align  7
> +       b       _do_irq         /* Current EL IRQ Handler */
> +
> +       .align  7
> +       b       _do_fiq         /* Current EL FIQ Handler */
> +
> +       .align  7
> +       b       _do_error       /* Current EL Error Handler */
> +
> +
> +_do_bad_sync:
> +       exception_entry
> +       bl      do_bad_sync
> +
> +_do_bad_irq:
> +       exception_entry
> +       bl      do_bad_irq
> +
> +_do_bad_fiq:
> +       exception_entry
> +       bl      do_bad_fiq
> +
> +_do_bad_error:
> +       exception_entry
> +       bl      do_bad_error
> +
> +_do_sync:
> +       exception_entry
> +       bl      do_sync
> +
> +_do_irq:
> +       exception_entry
> +       bl      do_irq
> +
> +_do_fiq:
> +       exception_entry
> +       bl      do_fiq
> +
> +_do_error:
> +       exception_entry
> +       bl      do_error
>
>  .section .data
>  .align 4
> diff --git a/arch/arm/cpu/interrupts.c b/arch/arm/cpu/interrupts.c
> index fb4bb78..d42a5b1 100644
> --- a/arch/arm/cpu/interrupts.c
> +++ b/arch/arm/cpu/interrupts.c
> @@ -27,54 +27,8 @@
>  #include <asm/ptrace.h>
>  #include <asm/unwind.h>
>
> -/**
> - * Display current register set content
> - * @param[in] regs Guess what
> - */
> -void show_regs (struct pt_regs *regs)
> -{
> -       unsigned long flags;
> -       const char *processor_modes[] = {
> -       "USER_26",      "FIQ_26",       "IRQ_26",       "SVC_26",
> -       "UK4_26",       "UK5_26",       "UK6_26",       "UK7_26",
> -       "UK8_26",       "UK9_26",       "UK10_26",      "UK11_26",
> -       "UK12_26",      "UK13_26",      "UK14_26",      "UK15_26",
> -       "USER_32",      "FIQ_32",       "IRQ_32",       "SVC_32",
> -       "UK4_32",       "UK5_32",       "UK6_32",       "ABT_32",
> -       "UK8_32",       "UK9_32",       "UK10_32",      "UND_32",
> -       "UK12_32",      "UK13_32",      "UK14_32",      "SYS_32",
> -       };
> -
> -       flags = condition_codes (regs);
> -
> -       printf ("pc : [<%08lx>]    lr : [<%08lx>]\n"
> -               "sp : %08lx  ip : %08lx  fp : %08lx\n",
> -               instruction_pointer (regs),
> -               regs->ARM_lr, regs->ARM_sp, regs->ARM_ip, regs->ARM_fp);
> -       printf ("r10: %08lx  r9 : %08lx  r8 : %08lx\n",
> -               regs->ARM_r10, regs->ARM_r9, regs->ARM_r8);
> -       printf ("r7 : %08lx  r6 : %08lx  r5 : %08lx  r4 : %08lx\n",
> -               regs->ARM_r7, regs->ARM_r6, regs->ARM_r5, regs->ARM_r4);
> -       printf ("r3 : %08lx  r2 : %08lx  r1 : %08lx  r0 : %08lx\n",
> -               regs->ARM_r3, regs->ARM_r2, regs->ARM_r1, regs->ARM_r0);
> -       printf ("Flags: %c%c%c%c",
> -               flags & PSR_N_BIT ? 'N' : 'n',
> -               flags & PSR_Z_BIT ? 'Z' : 'z',
> -               flags & PSR_C_BIT ? 'C' : 'c', flags & PSR_V_BIT ? 'V' : 'v');
> -       printf ("  IRQs %s  FIQs %s  Mode %s%s\n",
> -               interrupts_enabled (regs) ? "on" : "off",
> -               fast_interrupts_enabled (regs) ? "on" : "off",
> -               processor_modes[processor_mode (regs)],
> -               thumb_mode (regs) ? " (T)" : "");
> -#ifdef CONFIG_ARM_UNWIND
> -       unwind_backtrace(regs);
> -#endif
> -}
> -
>  static void __noreturn do_exception(struct pt_regs *pt_regs)
>  {
> -       show_regs(pt_regs);
> -
>         panic("");
>  }
>
> @@ -121,14 +75,6 @@ void do_prefetch_abort (struct pt_regs *pt_regs)
>   */
>  void do_data_abort (struct pt_regs *pt_regs)
>  {
> -       u32 far;
> -
> -       asm volatile ("mrc     p15, 0, %0, c6, c0, 0" : "=r" (far) : : "cc");
> -
> -       printf("unable to handle %s at address 0x%08x\n",
> -                       far < PAGE_SIZE ? "NULL pointer dereference" :
> -                       "paging request", far);
> -
>         do_exception(pt_regs);
>  }
>
> @@ -156,6 +102,43 @@ void do_irq (struct pt_regs *pt_regs)
>         do_exception(pt_regs);
>  }
>
> +void do_bad_sync(struct pt_regs *pt_regs)
> +{
> +       printf("bad sync\n");
> +       do_exception(pt_regs);
> +}
> +
> +void do_bad_irq(struct pt_regs *pt_regs)
> +{
> +       printf("bad irq\n");
> +       do_exception(pt_regs);
> +}
> +
> +void do_bad_fiq(struct pt_regs *pt_regs)
> +{
> +       printf("bad fiq\n");
> +       do_exception(pt_regs);
> +}
> +
> +void do_bad_error(struct pt_regs *pt_regs)
> +{
> +       printf("bad error\n");
> +       do_exception(pt_regs);
> +}
> +
> +void do_sync(struct pt_regs *pt_regs)
> +{
> +       printf("sync exception\n");
> +       do_exception(pt_regs);
> +}
> +
> +
> +void do_error(struct pt_regs *pt_regs)
> +{
> +       printf("error exception\n");
> +       do_exception(pt_regs);
> +}
> +
>  extern volatile int arm_ignore_data_abort;
>  extern volatile int arm_data_abort_occurred;
>
> diff --git a/arch/arm/cpu/lowlevel.S b/arch/arm/cpu/lowlevel.S
> index e5baa12..0691b2b 100644
> --- a/arch/arm/cpu/lowlevel.S
> +++ b/arch/arm/cpu/lowlevel.S
> @@ -1,60 +1,43 @@
>  #include <linux/linkage.h>
>  #include <init.h>
>  #include <asm/system.h>
> +#include <asm/gic.h>
> +#include <asm-generic/memory_layout.h>
>
>  .section ".text_bare_init_","ax"
> +
>  ENTRY(arm_cpu_lowlevel_init)
> -       /* save lr, since it may be banked away with a processor mode change */
> -       mov     r2, lr
> -       /* set the cpu to SVC32 mode, mask irq and fiq */
> -       mrs     r12, cpsr
> -       bic     r12, r12, #0x1f
> -       orr     r12, r12, #0xd3
> -       msr     cpsr, r12
> -
> -#if __LINUX_ARM_ARCH__ >= 6
> -       /*
> -        * ICIALLU: Invalidate all instruction caches to PoU,
> -        * includes flushing of branch predictors.
> -        * Even if the i-cache is off it might contain stale entries
> -        * that are better discarded before enabling the cache.
> -        * Architectually this is even possible after a cold reset.
> -        */
> -       mcr     p15, 0, r12, c7, c5, 0
> -       /* DSB, ensure completion of the invalidation */
> -       mcr     p15, 0, r12, c7, c10, 4
> -       /*
> -        * ISB, ensure instruction fetch path is in sync.
> -        * Note that the ARM Architecture Reference Manual, ARMv7-A and ARMv7-R
> -        * edition (ARM DDI 0406C.c) doesn't define this instruction in the
> -        * ARMv6 part (D12.7.10). It only has: "Support of additional
> -        * operations is IMPLEMENTATION DEFINED".
> -        * But an earlier version of the ARMARM (ARM DDI 0100I) does define it
> -        * as "Flush prefetch buffer (PrefetchFlush)".
> -        */
> -       mcr     p15, 0, r12, c7, c5, 4
> -#endif
> -
> -       /* disable MMU stuff and data/unified caches */
> -       mrc     p15, 0, r12, c1, c0, 0          /* SCTLR */
> -       bic     r12, r12, #(CR_M | CR_C | CR_B)
> -       bic     r12, r12, #(CR_S | CR_R | CR_V)
> -
> -       /* enable instruction cache */
> -       orr     r12, r12, #CR_I
> -
> -#if __LINUX_ARM_ARCH__ >= 6
> -       orr     r12, r12, #CR_U
> -       bic     r12, r12, #CR_A
> -#else
> -       orr     r12, r12, #CR_A
> -#endif
> -
> -#ifdef __ARMEB__
> -       orr     r12, r12, #CR_B
> -#endif
> -
> -       mcr     p15, 0, r12, c1, c0, 0          /* SCTLR */
> -
> -       mov     pc, r2
> +       adr     x0, vectors
> +       mrs     x1, CurrentEL
> +       cmp     x1, #0xC                /* Check EL3 state */
> +       b.eq    1f
> +       cmp     x1, #0x8                /* Check EL2 state */
> +       b.eq    2f
> +       cmp     x1, #0x4                /* Check EL1 state */
> +       b.eq    3f
> +
> +1:
> +       msr     vbar_el3, x0
> +       mov     x0, #1                  /* Non-Secure EL0/1 */
> +       orr     x0, x0, #(1 << 10)      /* 64-bit EL2 */
> +       msr     scr_el3, x0
> +       msr     cptr_el3, xzr
> +       b       done
> +
> +2:
> +       msr     vbar_el2, x0
> +       mov     x0, #0x33ff             /* Enable FP/SIMD */
> +       msr     cptr_el2, x0
> +       b       done
> +
> +
> +3:
> +       msr     vbar_el1, x0
> +       mov     x0, #(3 << 20)          /* Enable FP/SIMD */
> +       msr     cpacr_el1, x0
> +       b       done
> +
> +done:
> +       ret
> +
>  ENDPROC(arm_cpu_lowlevel_init)
> diff --git a/arch/arm/cpu/mmu.c b/arch/arm/cpu/mmu.c
> index a31bce4..b171f80 100644
> --- a/arch/arm/cpu/mmu.c
> +++ b/arch/arm/cpu/mmu.c
> @@ -32,54 +32,8 @@
>
>  #include "mmu.h"
>
> -static unsigned long *ttb;
> -
> -static void create_sections(unsigned long virt, unsigned long phys, int size_m,
> -               unsigned int flags)
> -{
> -       int i;
> -
> -       phys >>= 20;
> -       virt >>= 20;
> -
> -       for (i = size_m; i > 0; i--, virt++, phys++)
> -               ttb[virt] = (phys << 20) | flags;
> -
> -       __mmu_cache_flush();
> -}
> -
> -/*
> - * Do it the simple way for now and invalidate the entire
> - * tlb
> - */
> -static inline void tlb_invalidate(void)
> -{
> -       asm volatile (
> -               "mov    r0, #0\n"
> -               "mcr    p15, 0, r0, c7, c10, 4; @ drain write buffer\n"
> -               "mcr    p15, 0, r0, c8, c6, 0;  @ invalidate D TLBs\n"
> -               "mcr    p15, 0, r0, c8, c5, 0;  @ invalidate I TLBs\n"
> -               :
> -               :
> -               : "r0"
> -       );
> -}
> -
> -#define PTE_FLAGS_CACHED_V7 (PTE_EXT_TEX(1) | PTE_BUFFERABLE | PTE_CACHEABLE)
> -#define PTE_FLAGS_WC_V7 PTE_EXT_TEX(1)
> -#define PTE_FLAGS_UNCACHED_V7 (0)
> -#define PTE_FLAGS_CACHED_V4 (PTE_SMALL_AP_UNO_SRW | PTE_BUFFERABLE | PTE_CACHEABLE)
> -#define PTE_FLAGS_UNCACHED_V4 PTE_SMALL_AP_UNO_SRW
> -
> -/*
> - * PTE flags to set cached and uncached areas.
> - * This will be determined at runtime.
> - */
> -static uint32_t pte_flags_cached;
> -static uint32_t pte_flags_wc;
> -static uint32_t pte_flags_uncached;
> -
> -#define PTE_MASK ((1 << 12) - 1)
> +static uint64_t *ttb;
> +static int free_idx;
>
>  static void arm_mmu_not_initialized_error(void)
>  {
> @@ -92,329 +46,175 @@ static void arm_mmu_not_initialized_error(void)
>         panic("MMU not initialized\n");
>  }
>
> +
>  /*
> - * Create a second level translation table for the given virtual address.
> - * We initially create a flat uncached mapping on it.
> - * Not yet exported, but may be later if someone finds use for it.
> + * Do it the simple way for now and invalidate the entire
> + * tlb
>   */
> -static u32 *arm_create_pte(unsigned long virt)
> +static inline void tlb_invalidate(void)
>  {
> -       u32 *table;
> -       int i;
> +       unsigned int el = current_el();
>
> -       table = memalign(0x400, 0x400);
> +       dsb();
>
> -       if (!ttb)
> -               arm_mmu_not_initialized_error();
> -
> -       ttb[virt >> 20] = (unsigned long)table | PMD_TYPE_TABLE;
> -
> -       for (i = 0; i < 256; i++) {
> -               table[i] = virt | PTE_TYPE_SMALL | pte_flags_uncached;
> -               virt += PAGE_SIZE;
> -       }
> +       if (el == 1)
> +               __asm__ __volatile__("tlbi alle1\n\t" : : : "memory");
> +       else if (el == 2)
> +               __asm__ __volatile__("tlbi alle2\n\t" : : : "memory");
> +       else if (el == 3)
> +               __asm__ __volatile__("tlbi alle3\n\t" : : : "memory");
>
> -       return table;
> +       dsb();
> +       isb();
>  }
>
> -static u32 *find_pte(unsigned long adr)
> +static int level2shift(int level)
>  {
> -       u32 *table;
> -
> -       if (!ttb)
> -               arm_mmu_not_initialized_error();
> -
> -       if ((ttb[adr >> 20] & PMD_TYPE_MASK) != PMD_TYPE_TABLE) {
> -               struct memory_bank *bank;
> -               int i = 0;
> +       /* Page is 12 bits wide, every level translates 9 bits */
> +       return (12 + 9 * (3 - level));
> +}
>
> -               /*
> -                * This should only be called for page mapped memory inside our
> -                * memory banks. It's a bug to call it with section mapped memory
> -                * locations.
> -                */
> -               pr_crit("%s: TTB for address 0x%08lx is not of type table\n",
> -                               __func__, adr);
> -               pr_crit("Memory banks:\n");
> -               for_each_memory_bank(bank)
> -                       pr_crit("#%d 0x%08lx - 0x%08lx\n", i, bank->start,
> -                                       bank->start + bank->size - 1);
> -               BUG();
> -       }
> +static uint64_t level2mask(int level)
> +{
> +       uint64_t mask = -EINVAL;
>
> -       /* find the coarse page table base address */
> -       table = (u32 *)(ttb[adr >> 20] & ~0x3ff);
> +       if (level == 1)
> +               mask = L1_ADDR_MASK;
> +       else if (level == 2)
> +               mask = L2_ADDR_MASK;
> +       else if (level == 3)
> +               mask = L3_ADDR_MASK;
>
> -       /* find second level descriptor */
> -       return &table[(adr >> PAGE_SHIFT) & 0xff];
> +       return mask;
>  }
>
> -static void dma_flush_range(unsigned long start, unsigned long end)
> +static int pte_type(uint64_t *pte)
>  {
> -       __dma_flush_range(start, end);
> -       if (outer_cache.flush_range)
> -               outer_cache.flush_range(start, end);
> +       return *pte & PMD_TYPE_MASK;
>  }
>
> -static void dma_inv_range(unsigned long start, unsigned long end)
> +static void set_table(uint64_t *pt, uint64_t *table_addr)
>  {
> -       if (outer_cache.inv_range)
> -               outer_cache.inv_range(start, end);
> -       __dma_inv_range(start, end);
> +       uint64_t val;
> +
> +       val = PMD_TYPE_TABLE | (uint64_t)table_addr;
> +       *pt = val;
>  }
>
> -static int __remap_range(void *_start, size_t size, u32 pte_flags)
> +static uint64_t *create_table(void)
>  {
> -       unsigned long start = (unsigned long)_start;
> -       u32 *p;
> -       int numentries, i;
> -
> -       numentries = size >> PAGE_SHIFT;
> -       p = find_pte(start);
> -
> -       for (i = 0; i < numentries; i++) {
> -               p[i] &= ~PTE_MASK;
> -               p[i] |= pte_flags | PTE_TYPE_SMALL;
> -       }
> +       uint64_t *new_table = ttb + free_idx * GRANULE_SIZE;
>
> -       dma_flush_range((unsigned long)p,
> -                       (unsigned long)p + numentries * sizeof(u32));
> +       /* Mark all entries as invalid */
> +       memset(new_table, 0, GRANULE_SIZE);
>
> -       tlb_invalidate();
> +       free_idx++;
>
> -       return 0;
> +       return new_table;
>  }
>
> -int arch_remap_range(void *start, size_t size, unsigned flags)
> +static uint64_t *get_level_table(uint64_t *pte)
>  {
> -       u32 pte_flags;
> -
> -       switch (flags) {
> -       case MAP_CACHED:
> -               pte_flags = pte_flags_cached;
> -               break;
> -       case MAP_UNCACHED:
> -               pte_flags = pte_flags_uncached;
> -               break;
> -       default:
> -               return -EINVAL;
> +       uint64_t *table = (uint64_t *)(*pte & XLAT_ADDR_MASK);
> +
> +       if (pte_type(pte) != PMD_TYPE_TABLE) {
> +               table = create_table();
> +               set_table(pte, table);
>         }
>
> -       return __remap_range(start, size, pte_flags);
> +       return table;
>  }
>
> -void *map_io_sections(unsigned long phys, void *_start, size_t size)
> +static uint64_t *find_pte(uint64_t addr)
>  {
> -       unsigned long start = (unsigned long)_start, sec;
> +       uint64_t *pte;
> +       uint64_t block_shift;
> +       uint64_t idx;
> +       int i;
>
> -       phys >>= 20;
> -       for (sec = start; sec < start + size; sec += (1 << 20))
> -               ttb[sec >> 20] = (phys++ << 20) | PMD_SECT_DEF_UNCACHED;
> +       pte = ttb;
>
> -       dma_flush_range((unsigned long)ttb, (unsigned long)ttb + 0x4000);
> -       tlb_invalidate();
> -       return _start;
> +       for (i = 1; i < 4; i++) {
> +               block_shift = level2shift(i);
> +               idx = (addr & level2mask(i)) >> block_shift;
> +               pte += idx;
> +
> +               if ((pte_type(pte) != PMD_TYPE_TABLE) || (block_shift <= GRANULE_SIZE_SHIFT))
> +                       break;
> +               else
> +                       pte = (uint64_t *)(*pte & XLAT_ADDR_MASK);
> +       }
> +
> +       return pte;
>  }
>
> -/*
> - * remap the memory bank described by mem cachable and
> - * bufferable
> - */
> -static int arm_mmu_remap_sdram(struct memory_bank *bank)
> +static void map_region(uint64_t virt, uint64_t phys, uint64_t size, uint64_t attr)
>  {
> -       unsigned long phys = (unsigned long)bank->start;
> -       unsigned long ttb_start = phys >> 20;
> -       unsigned long ttb_end = (phys >> 20) + (bank->size >> 20);
> -       unsigned long num_ptes = bank->size >> 12;
> -       int i, pte;
> -       u32 *ptes;
> +       uint64_t block_size;
> +       uint64_t block_shift;
> +       uint64_t *pte;
> +       uint64_t idx;
> +       uint64_t addr;
> +       uint64_t *table;
> +       int level;
>
> -       pr_debug("remapping SDRAM from 0x%08lx (size 0x%08lx)\n",
> -                       phys, bank->size);
> +       if (!ttb)
> +               arm_mmu_not_initialized_error();
>
> -       /*
> -        * We replace each 1MiB section in this range with second level page
> -        * tables, therefore we must have 1Mib aligment here.
> -        */
> -       if ((phys & (SZ_1M - 1)) || (bank->size & (SZ_1M - 1)))
> -               return -EINVAL;
> +       addr = virt;
>
> -       ptes = xmemalign(PAGE_SIZE, num_ptes * sizeof(u32));
> +       attr &= ~(PMD_TYPE_SECT);
>
> -       pr_debug("ptes: 0x%p ttb_start: 0x%08lx ttb_end: 0x%08lx\n",
> -                       ptes, ttb_start, ttb_end);
> +       while (size) {
> +               table = ttb;
> +               for (level = 1; level < 4; level++) {
> +                       block_shift = level2shift(level);
> +                       idx = (addr & level2mask(level)) >> block_shift;
> +                       block_size = (1 << block_shift);
>
> -       for (i = 0; i < num_ptes; i++) {
> -               ptes[i] = (phys + i * PAGE_SIZE) | PTE_TYPE_SMALL |
> -                       pte_flags_cached;
> -       }
> +                       pte = table + idx;
>
> -       pte = 0;
> +                       if (level == 3)
> +                               attr |= PMD_TYPE_PAGE;
> +                       else
> +                               attr |= PMD_TYPE_SECT;
>
> -       for (i = ttb_start; i < ttb_end; i++) {
> -               ttb[i] = (unsigned long)(&ptes[pte]) | PMD_TYPE_TABLE |
> -                       (0 << 4);
> -               pte += 256;
> -       }
> +                       if (size >= block_size && IS_ALIGNED(addr, block_size)) {
> +                               *pte = phys | attr;
> +                               addr += block_size;
> +                               phys += block_size;
> +                               size -= block_size;
> +                               break;
>
> -       dma_flush_range((unsigned long)ttb, (unsigned long)ttb + 0x4000);
> -       dma_flush_range((unsigned long)ptes,
> -                       (unsigned long)ptes + num_ptes * sizeof(u32));
> +                       }
>
> -       tlb_invalidate();
> +                       table = get_level_table(pte);
> +               }
>
> -       return 0;
> +       }
>  }
> -/*
> - * We have 8 exception vectors and the table consists of absolute
> - * jumps, so we need 8 * 4 bytes for the instructions and another
> - * 8 * 4 bytes for the addresses.
> - */
> -#define ARM_VECTORS_SIZE       (sizeof(u32) * 8 * 2)
> -
> -#define ARM_HIGH_VECTORS       0xffff0000
> -#define ARM_LOW_VECTORS                0x0
>
> -/**
> - * create_vector_table - create a vector table at given address
> - * @adr - The address where the vector table should be created
> - *
> - * After executing this function the vector table is found at the
> - * virtual address @adr.
> - */
> -static void create_vector_table(unsigned long adr)
> +static void create_sections(uint64_t virt, uint64_t phys, uint64_t size_m, uint64_t flags)
>  {
> -       struct resource *vectors_sdram;
> -       void *vectors;
> -       u32 *exc;
> -       int idx;
> -
> -       vectors_sdram = request_sdram_region("vector table", adr, SZ_4K);
> -       if (vectors_sdram) {
> -               /*
> -                * The vector table address is inside the SDRAM physical
> -                * address space. Use the existing identity mapping for
> -                * the vector table.
> -                */
> -               pr_debug("Creating vector table, virt = phys = 0x%08lx\n", adr);
> -               vectors = (void *)vectors_sdram->start;
> -       } else {
> -               /*
> -                * The vector table address is outside of SDRAM. Create
> -                * a secondary page table for the section and map
> -                * allocated memory to the vector address.
> -                */
> -               vectors = xmemalign(PAGE_SIZE, PAGE_SIZE);
> -               pr_debug("Creating vector table, virt = 0x%p, phys = 0x%08lx\n",
> -                        vectors, adr);
> -               exc = arm_create_pte(adr);
> -               idx = (adr & (SZ_1M - 1)) >> PAGE_SHIFT;
> -               exc[idx] = (u32)vectors | PTE_TYPE_SMALL | pte_flags_cached;
> -       }
>
> -       arm_fixup_vectors();
> -
> -       memset(vectors, 0, PAGE_SIZE);
> -       memcpy(vectors, __exceptions_start, __exceptions_stop - __exceptions_start);
> +       map_region(virt, phys, size_m, flags);
>  }
>
> -/**
> - * set_vector_table - let CPU use the vector table at given address
> - * @adr - The address of the vector table
> - *
> - * Depending on the CPU the possibilities differ. ARMv7 and later allow
> - * to map the vector table to arbitrary addresses. Other CPUs only allow
> - * vectors at 0xffff0000 or at 0x0.
> - */
> -static int set_vector_table(unsigned long adr)
> +void *map_io_sections(uint64_t phys, void *_start, size_t size)
>  {
> -       u32 cr;
> -
> -       if (cpu_architecture() >= CPU_ARCH_ARMv7) {
> -               pr_debug("Vectors are at 0x%08lx\n", adr);
> -               set_vbar(adr);
> -               return 0;
> -       }
>
> -       if (adr == ARM_HIGH_VECTORS) {
> -               cr = get_cr();
> -               cr |= CR_V;
> -               set_cr(cr);
> -               cr = get_cr();
> -               if (cr & CR_V) {
> -                       pr_debug("Vectors are at 0x%08lx\n", adr);
> -                       return 0;
> -               } else {
> -                       return -EINVAL;
> -               }
> -       }
> -
> -       if (adr == ARM_LOW_VECTORS) {
> -               cr = get_cr();
> -               cr &= ~CR_V;
> -               set_cr(cr);
> -               cr = get_cr();
> -               if (cr & CR_V) {
> -                       return -EINVAL;
> -               } else {
> -                       pr_debug("Vectors are at 0x%08lx\n", adr);
> -                       return 0;
> -               }
> -       }
> +       map_region((uint64_t)_start, phys, (uint64_t)size, PMD_SECT_DEF_UNCACHED);
>
> -       return -EINVAL;
> +       tlb_invalidate();
> +       return _start;
>  }
>
> -static void create_zero_page(void)
> -{
> -       struct resource *zero_sdram;
> -       u32 *zero;
> -
> -       zero_sdram = request_sdram_region("zero page", 0x0, SZ_4K);
> -       if (zero_sdram) {
> -               /*
> -                * Here we would need to set the second level page table
> -                * entry to faulting. This is not yet implemented.
> -                */
> -               pr_debug("zero page is in SDRAM area, currently not supported\n");
> -       } else {
> -               zero = arm_create_pte(0x0);
> -               zero[0] = 0;
> -               pr_debug("Created zero page\n");
> -       }
> -}
>
> -/*
> - * Map vectors and zero page
> - */
> -static void vectors_init(void)
> +int arch_remap_range(void *_start, size_t size, unsigned flags)
>  {
> -       /*
> -        * First try to use the vectors where they actually are, works
> -        * on ARMv7 and later.
> -        */
> -       if (!set_vector_table((unsigned long)__exceptions_start)) {
> -               arm_fixup_vectors();
> -               create_zero_page();
> -               return;
> -       }
> -
> -       /*
> -        * Next try high vectors at 0xffff0000.
> -        */
> -       if (!set_vector_table(ARM_HIGH_VECTORS)) {
> -               create_zero_page();
> -               create_vector_table(ARM_HIGH_VECTORS);
> -               return;
> -       }
> +       map_region((uint64_t)_start, (uint64_t)_start, (uint64_t)size, flags);
>
> -       /*
> -        * As a last resort use low vectors at 0x0. With this we can't
> -        * set the zero page to faulting and can't catch NULL pointer
> -        * exceptions.
> -        */
> -       set_vector_table(ARM_LOW_VECTORS);
> -       create_vector_table(ARM_LOW_VECTORS);
> +       return 0;
>  }
>
>  /*
> @@ -423,7 +223,6 @@ static void vectors_init(void)
>  static int mmu_init(void)
>  {
>         struct memory_bank *bank;
> -       int i;
>
>         if (list_empty(&memory_banks))
>                 /*
> @@ -434,56 +233,31 @@ static int mmu_init(void)
>                  */
>                 panic("MMU: No memory bank found! Cannot continue\n");
>
> -       arm_set_cache_functions();
> -
> -       if (cpu_architecture() >= CPU_ARCH_ARMv7) {
> -               pte_flags_cached = PTE_FLAGS_CACHED_V7;
> -               pte_flags_wc = PTE_FLAGS_WC_V7;
> -               pte_flags_uncached = PTE_FLAGS_UNCACHED_V7;
> -       } else {
> -               pte_flags_cached = PTE_FLAGS_CACHED_V4;
> -               pte_flags_wc = PTE_FLAGS_UNCACHED_V4;
> -               pte_flags_uncached = PTE_FLAGS_UNCACHED_V4;
> -       }
> -
> -       if (get_cr() & CR_M) {
> -               /*
> -                * Early MMU code has already enabled the MMU. We assume a
> -                * flat 1:1 section mapping in this case.
> -                */
> -               asm volatile ("mrc  p15,0,%0,c2,c0,0" : "=r"(ttb));
> -
> -               /* Clear unpredictable bits [13:0] */
> -               ttb = (unsigned long *)((unsigned long)ttb & ~0x3fff);
> -
> +       if (get_sctlr() & CR_M) {
> +               ttb = (uint64_t *)get_ttbr(1);
>                 if (!request_sdram_region("ttb", (unsigned long)ttb, SZ_16K))
>                         /*
> -                        * This can mean that:
> -                        * - the early MMU code has put the ttb into a place
> -                        *   which we don't have inside our available memory
> -                        * - Somebody else has occupied the ttb region which means
> -                        *   the ttb will get corrupted.
> -                        */
> +                       * This can mean that:
> +                       * - the early MMU code has put the ttb into a place
> +                       *   which we don't have inside our available memory
> +                       * - Somebody else has occupied the ttb region which means
> +                       *   the ttb will get corrupted.
> +                       */
>                         pr_crit("Critical Error: Can't request SDRAM region for ttb at %p\n",
> -                                       ttb);
> +                               ttb);
>         } else {
> -               ttb = memalign(0x10000, 0x4000);
> -       }
> +               ttb = memalign(0x1000, SZ_16K);
> +               free_idx = 1;
>
> -       pr_debug("ttb: 0x%p\n", ttb);
> +               memset(ttb, 0, GRANULE_SIZE);
>
> -       /* Set the ttb register */
> -       asm volatile ("mcr  p15,0,%0,c2,c0,0" : : "r"(ttb) /*:*/);
> +               set_ttbr_tcr_mair(current_el(), (uint64_t)ttb, TCR_FLAGS, MEMORY_ATTR);
> +       }
>
> -       /* Set the Domain Access Control Register */
> -       i = 0x3;
> -       asm volatile ("mcr  p15,0,%0,c3,c0,0" : : "r"(i) /*:*/);
> +       pr_debug("ttb: 0x%p\n", ttb);
>
>         /* create a flat mapping using 1MiB sections */
> -       create_sections(0, 0, PAGE_SIZE, PMD_SECT_AP_WRITE | PMD_SECT_AP_READ |
> -                       PMD_TYPE_SECT);
> -
> -       vectors_init();
> +       create_sections(0, 0, GRANULE_SIZE, PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT);
>
>         /*
>          * First remap sdram cached using sections.
> @@ -491,92 +265,70 @@ static int mmu_init(void)
>          * below
>          */
>         for_each_memory_bank(bank)
> -               create_sections(bank->start, bank->start, bank->size >> 20,
> -                               PMD_SECT_DEF_CACHED);
> -
> -       __mmu_cache_on();
> -
> -       /*
> -        * Now that we have the MMU and caches on remap sdram again using
> -        * page tables
> -        */
> -       for_each_memory_bank(bank)
> -               arm_mmu_remap_sdram(bank);
> +               create_sections(bank->start, bank->start, bank->size, PMD_SECT_DEF_CACHED);
>
>         return 0;
>  }
>  mmu_initcall(mmu_init);
>
> -void *dma_alloc_coherent(size_t size, dma_addr_t *dma_handle)
> +void mmu_enable(void)
>  {
> -       void *ret;
> -
> -       size = PAGE_ALIGN(size);
> -       ret = xmemalign(PAGE_SIZE, size);
> -       if (dma_handle)
> -               *dma_handle = (dma_addr_t)ret;
> -
> -       dma_inv_range((unsigned long)ret, (unsigned long)ret + size);
> +       if (!ttb)
> +               arm_mmu_not_initialized_error();
>
> -       __remap_range(ret, size, pte_flags_uncached);
> +       if (!(get_sctlr() & CR_M)) {
>
> -       return ret;
> +               isb();
> +               set_sctlr(get_sctlr() | CR_M | CR_C | CR_I);
> +       }
>  }
>
> -void *dma_alloc_writecombine(size_t size, dma_addr_t *dma_handle)
> +void mmu_disable(void)
>  {
> -       void *ret;
> +       unsigned int sctlr;
> +
> +       if (!ttb)
> +               arm_mmu_not_initialized_error();
>
> -       size = PAGE_ALIGN(size);
> -       ret = xmemalign(PAGE_SIZE, size);
> -       if (dma_handle)
> -               *dma_handle = (dma_addr_t)ret;
> +       sctlr = get_sctlr();
> +       sctlr &= ~(CR_M | CR_C | CR_I);
>
> -       dma_inv_range((unsigned long)ret, (unsigned long)ret + size);
> +       tlb_invalidate();
>
> -       __remap_range(ret, size, pte_flags_wc);
> +       dsb();
> +       isb();
>
> -       return ret;
> -}
> +       set_sctlr(sctlr);
>
> -unsigned long virt_to_phys(volatile void *virt)
> -{
> -       return (unsigned long)virt;
> +       dsb();
> +       isb();
>  }
>
> -void *phys_to_virt(unsigned long phys)
> +void mmu_early_enable(uint64_t membase, uint64_t memsize, uint64_t _ttb)
>  {
> -       return (void *)phys;
> -}
> +       ttb = (uint64_t *)_ttb;
>
> -void dma_free_coherent(void *mem, dma_addr_t dma_handle, size_t size)
> -{
> -       size = PAGE_ALIGN(size);
> -       __remap_range(mem, size, pte_flags_cached);
> +       memset(ttb, 0, GRANULE_SIZE);
> +       free_idx = 1;
> +
> +       set_ttbr_tcr_mair(current_el(), (uint64_t)ttb, TCR_FLAGS, MEMORY_ATTR);
>
> -       free(mem);
> +       create_sections(0, 0, 4096, PMD_SECT_AP_WRITE |
> +                       PMD_SECT_AP_READ | PMD_TYPE_SECT);
> +
> +       create_sections(membase, membase, memsize, PMD_SECT_AP_WRITE |
> +               PMD_SECT_AP_READ | PMD_TYPE_SECT | PMD_SECT_WB);
> +
> +       isb();
> +       set_sctlr(get_sctlr() | CR_M);
>  }
>
> -void dma_sync_single_for_cpu(unsigned long address, size_t size,
> -                            enum dma_data_direction dir)
> +unsigned long virt_to_phys(volatile void *virt)
>  {
> -       if (dir != DMA_TO_DEVICE) {
> -               if (outer_cache.inv_range)
> -                       outer_cache.inv_range(address, address + size);
> -               __dma_inv_range(address, address + size);
> -       }
> +       return (unsigned long)virt;
>  }
>
> -void dma_sync_single_for_device(unsigned long address, size_t size,
> -                               enum dma_data_direction dir)
> +void *phys_to_virt(unsigned long phys)
>  {
> -       if (dir == DMA_FROM_DEVICE) {
> -               __dma_inv_range(address, address + size);
> -               if (outer_cache.inv_range)
> -                       outer_cache.inv_range(address, address + size);
> -       } else {
> -               __dma_clean_range(address, address + size);
> -               if (outer_cache.clean_range)
> -                       outer_cache.clean_range(address, address + size);
> -       }
> +       return (void *)phys;
>  }
> diff --git a/arch/arm/cpu/mmu.h b/arch/arm/cpu/mmu.h
> index 79ebc80..a20adec 100644
> --- a/arch/arm/cpu/mmu.h
> +++ b/arch/arm/cpu/mmu.h
> @@ -1,6 +1,159 @@
>  #ifndef __ARM_MMU_H
>  #define __ARM_MMU_H
>
> +#define UL(x)          _AC(x, UL)
> +
> +#define UNUSED_DESC                0x6EbAAD0BBADbA6E0
> +
> +#define VA_START                   0x0
> +#define BITS_PER_VA                33
> +
> +/* Granule size of 4KB is being used */
> +#define GRANULE_SIZE_SHIFT         12
> +#define GRANULE_SIZE               (1 << GRANULE_SIZE_SHIFT)
> +#define XLAT_ADDR_MASK             ((1UL << BITS_PER_VA) - GRANULE_SIZE)
> +#define GRANULE_SIZE_MASK          ((1 << GRANULE_SIZE_SHIFT) - 1)
> +
> +#define BITS_RESOLVED_PER_LVL   (GRANULE_SIZE_SHIFT - 3)
> +#define L1_ADDR_SHIFT           (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 2)
> +#define L2_ADDR_SHIFT           (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 1)
> +#define L3_ADDR_SHIFT           (GRANULE_SIZE_SHIFT + BITS_RESOLVED_PER_LVL * 0)
> +
> +
> +#define L1_ADDR_MASK     (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L1_ADDR_SHIFT)
> +#define L2_ADDR_MASK     (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L2_ADDR_SHIFT)
> +#define L3_ADDR_MASK     (((1UL << BITS_RESOLVED_PER_LVL) - 1) << L3_ADDR_SHIFT)
> +
> +/* These macros give the size of the region addressed by each entry of a xlat
> +   table at any given level */
> +#define L3_XLAT_SIZE               (1UL << L3_ADDR_SHIFT)
> +#define L2_XLAT_SIZE               (1UL << L2_ADDR_SHIFT)
> +#define L1_XLAT_SIZE               (1UL << L1_ADDR_SHIFT)
> +
> +#define GRANULE_MASK   GRANULE_SIZE
> +
> +/*
> + * Memory types
> + */
> +#define MT_DEVICE_NGNRNE       0
> +#define MT_DEVICE_NGNRE                1
> +#define MT_DEVICE_GRE          2
> +#define MT_NORMAL_NC           3
> +#define MT_NORMAL              4
> +
> +#define MEMORY_ATTRIBUTES      ((0x00 << (MT_DEVICE_NGNRNE*8)) |       \
> +               (0x04 << (MT_DEVICE_NGNRE*8)) |         \
> +               (0x0c << (MT_DEVICE_GRE*8)) |           \
> +               (0x44 << (MT_NORMAL_NC*8)) |            \
> +               (UL(0xff) << (MT_NORMAL*8)))
> +
> +/*
> + * Hardware page table definitions.
> + *
> + * Level 2 descriptor (PMD).
> + */
> +#define PMD_TYPE_MASK          (3 << 0)
> +#define PMD_TYPE_FAULT         (0 << 0)
> +#define PMD_TYPE_TABLE         (3 << 0)
> +#define PMD_TYPE_PAGE          (3 << 0)
> +#define PMD_TYPE_SECT          (1 << 0)
> +
> +/*
> + * Section
> + */
> +#define PMD_SECT_NON_SHARE     (0 << 8)
> +#define PMD_SECT_OUTER_SHARE   (2 << 8)
> +#define PMD_SECT_INNER_SHARE   (3 << 8)
> +#define PMD_SECT_AF            (1 << 10)
> +#define PMD_SECT_NG            (1 << 11)
> +#define PMD_SECT_PXN           (UL(1) << 53)
> +#define PMD_SECT_UXN           (UL(1) << 54)
> +
> +/*
> + * AttrIndx[2:0]
> + */
> +#define PMD_ATTRINDX(t)                ((t) << 2)
> +#define PMD_ATTRINDX_MASK      (7 << 2)
> +
> +/*
> + * TCR flags.
> + */
> +#define TCR_T0SZ(x)            ((64 - (x)) << 0)
> +#define TCR_IRGN_NC            (0 << 8)
> +#define TCR_IRGN_WBWA          (1 << 8)
> +#define TCR_IRGN_WT            (2 << 8)
> +#define TCR_IRGN_WBNWA         (3 << 8)
> +#define TCR_IRGN_MASK          (3 << 8)
> +#define TCR_ORGN_NC            (0 << 10)
> +#define TCR_ORGN_WBWA          (1 << 10)
> +#define TCR_ORGN_WT            (2 << 10)
> +#define TCR_ORGN_WBNWA         (3 << 10)
> +#define TCR_ORGN_MASK          (3 << 10)
> +#define TCR_SHARED_NON         (0 << 12)
> +#define TCR_SHARED_OUTER       (2 << 12)
> +#define TCR_SHARED_INNER       (3 << 12)
> +#define TCR_TG0_4K             (0 << 14)
> +#define TCR_TG0_64K            (1 << 14)
> +#define TCR_TG0_16K            (2 << 14)
> +#define TCR_EL1_IPS_BITS       (UL(3) << 32)   /* 42 bits physical address */
> +#define TCR_EL2_IPS_BITS       (3 << 16)       /* 42 bits physical address */
> +#define TCR_EL3_IPS_BITS       (3 << 16)       /* 42 bits physical address */
> +
> +#define TCR_EL1_RSVD           (1 << 31)
> +#define TCR_EL2_RSVD           (1 << 31 | 1 << 23)
> +#define TCR_EL3_RSVD           (1 << 31 | 1 << 23)
> +
> +#define TCR_FLAGS              (TCR_TG0_4K | \
> +               TCR_SHARED_OUTER | \
> +               TCR_SHARED_INNER | \
> +               TCR_IRGN_WBWA | \
> +               TCR_ORGN_WBWA | \
> +               TCR_T0SZ(BITS_PER_VA))
> +
> +#define MEMORY_ATTR     (PMD_SECT_AF | PMD_SECT_INNER_SHARE |    \
> +               PMD_ATTRINDX(MT_NORMAL) |       \
> +               PMD_TYPE_SECT)
> +
> +#ifndef __ASSEMBLY__
> +
> +static inline void set_ttbr_tcr_mair(int el, uint64_t table, uint64_t tcr, uint64_t attr)
> +{
> +       asm volatile("dsb sy");
> +       if (el == 1) {
> +               asm volatile("msr ttbr0_el1, %0" : : "r" (table) : "memory");
> +               asm volatile("msr tcr_el1, %0" : : "r" (tcr) : "memory");
> +               asm volatile("msr mair_el1, %0" : : "r" (attr) : "memory");
> +       } else if (el == 2) {
> +               asm volatile("msr ttbr0_el2, %0" : : "r" (table) : "memory");
> +               asm volatile("msr tcr_el2, %0" : : "r" (tcr) : "memory");
> +               asm volatile("msr mair_el2, %0" : : "r" (attr) : "memory");
> +       } else if (el == 3) {
> +               asm volatile("msr ttbr0_el3, %0" : : "r" (table) : "memory");
> +               asm volatile("msr tcr_el3, %0" : : "r" (tcr) : "memory");
> +               asm volatile("msr mair_el3, %0" : : "r" (attr) : "memory");
> +       } else {
> +               hang();
> +       }
> +       asm volatile("isb");
> +}
> +
> +static inline uint64_t get_ttbr(int el)
> +{
> +       uint64_t val;
> +       if (el == 1) {
> +               asm volatile("mrs %0, ttbr0_el1" : "=r" (val));
> +       } else if (el == 2) {
> +               asm volatile("mrs %0, ttbr0_el2" : "=r" (val));
> +       } else if (el == 3) {
> +               asm volatile("mrs %0, ttbr0_el3" : "=r" (val));
> +       } else {
> +               hang();
> +       }
> +
> +       return val;
> +}
> +#endif
> +
>  #ifdef CONFIG_MMU
>  void __mmu_cache_on(void);
>  void __mmu_cache_off(void);
> @@ -11,4 +164,6 @@ static inline void __mmu_cache_off(void) {}
>  static inline void __mmu_cache_flush(void) {}
>  #endif
>
> +void mmu_early_enable(uint64_t membase, uint64_t memsize, uint64_t _ttb);
> +
>  #endif /* __ARM_MMU_H */
> diff --git a/arch/arm/cpu/start.c b/arch/arm/cpu/start.c
> index e037d91..1d017bc 100644
> --- a/arch/arm/cpu/start.c
> +++ b/arch/arm/cpu/start.c
> @@ -31,7 +31,7 @@
>  #include <malloc.h>
>
>  #include <debug_ll.h>
> -#include "mmu-early.h"
> +#include "mmu.h"
>
>  unsigned long arm_stack_top;
>  static unsigned long arm_head_bottom;
> @@ -151,7 +151,7 @@ __noreturn void barebox_non_pbl_start(unsigned long membase,
>                 relocate_to_adr(barebox_base);
>         }
>
> -       setup_c();
> +//     setup_c();
>
>         barrier();
>
> @@ -170,7 +170,7 @@ __noreturn void barebox_non_pbl_start(unsigned long membase,
>                 } else {
>                         pr_debug("enabling MMU, ttb @ 0x%08lx\n", ttb);
>                         arm_early_mmu_cache_invalidate();
> -                       mmu_early_enable(membase, memsize, ttb);
> +                       mmu_early_enable((uint64_t)membase, (uint64_t)memsize, (uint64_t)ttb);
>                 }
>         }
>
> @@ -193,7 +193,7 @@ __noreturn void barebox_non_pbl_start(unsigned long membase,
>                 if (totalsize) {
>                         unsigned long mem = arm_mem_boarddata(membase, endmem,
>                                                               totalsize);
> -                       pr_debug("found %s in boarddata, copying to 0x%08lx\n",
> +                       pr_debug("found %s in boarddata, copying to 0x%lu\n",
>                                  name, mem);
>                         barebox_boarddata = memcpy((void *)mem, boarddata,
>                                                    totalsize);
> @@ -229,7 +229,7 @@ __noreturn void barebox_non_pbl_start(unsigned long membase,
>
>  #ifndef CONFIG_PBL_IMAGE
>
> -void __naked __section(.text_entry) start(void)
> +void __section(.text_entry) start(void)
>  {
>         barebox_arm_head();
>  }
> @@ -239,7 +239,7 @@ void __naked __section(.text_entry) start(void)
>   * First function in the uncompressed image. We get here from
>   * the pbl. The stack already has been set up by the pbl.
>   */
> -void __naked __section(.text_entry) start(unsigned long membase,
> +void __section(.text_entry) start(unsigned long membase,
>                 unsigned long memsize, void *boarddata)
>  {
>         barebox_non_pbl_start(membase, memsize, boarddata);
> diff --git a/arch/arm/cpu/uncompress.c b/arch/arm/cpu/uncompress.c
> index b8e2e9f..5bcce6b 100644
> --- a/arch/arm/cpu/uncompress.c
> +++ b/arch/arm/cpu/uncompress.c
> @@ -60,7 +60,7 @@ void __noreturn barebox_multi_pbl_start(unsigned long membase,
>                  * to the current address. Otherwise it may be a readonly location.
>                  * Copy and relocate to the start of the memory in this case.
>                  */
> -               if (pc > membase && pc - membase < memsize)
> +               if (pc > membase && pc < membase + memsize)
>                         relocate_to_current_adr();
>                 else
>                         relocate_to_adr(membase);
> diff --git a/arch/arm/include/asm/barebox-arm.h b/arch/arm/include/asm/barebox-arm.h
> index 8e7b45c..6713326 100644
> --- a/arch/arm/include/asm/barebox-arm.h
> +++ b/arch/arm/include/asm/barebox-arm.h
> @@ -97,7 +97,7 @@ void *barebox_arm_boot_dtb(void);
>  static inline unsigned long arm_mem_stack(unsigned long membase,
>                                           unsigned long endmem)
>  {
> -       return endmem - SZ_64K - STACK_SIZE;
> +       return endmem - STACK_SIZE;
>  }
>
>  static inline unsigned long arm_mem_ttb(unsigned long membase,
> diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h
> index 138ebe2..ac85a0a 100644
> --- a/arch/arm/include/asm/bitops.h
> +++ b/arch/arm/include/asm/bitops.h
> @@ -1,184 +1,48 @@
>  /*
> - * Copyright 1995, Russell King.
> - * Various bits and pieces copyrights include:
> - *  Linus Torvalds (test_bit).
> + * Copyright (C) 2012 ARM Ltd.
>   *
> - * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
>   *
> - * Please note that the code in this file should never be included
> - * from user space.  Many of these are not implemented in assembler
> - * since they would be too costly.  Also, they require priviledged
> - * instructions (which are not available from user mode) to ensure
> - * that they are atomic.
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>   */
> +#ifndef __ASM_BITOPS_H
> +#define __ASM_BITOPS_H
>
> -#ifndef __ASM_ARM_BITOPS_H
> -#define __ASM_ARM_BITOPS_H
> +#include <linux/compiler.h>
>
>  #ifndef _LINUX_BITOPS_H
>  #error only <linux/bitops.h> can be included directly
>  #endif
>
>  /*
> - * Functions equivalent of ops.h
> - */
> -static inline void __set_bit(int nr, volatile void *addr)
> -{
> -       ((unsigned char *) addr)[nr >> 3] |= (1U << (nr & 7));
> -}
> -
> -static inline void __clear_bit(int nr, volatile void *addr)
> -{
> -       ((unsigned char *) addr)[nr >> 3] &= ~(1U << (nr & 7));
> -}
> -
> -static inline void __change_bit(int nr, volatile void *addr)
> -{
> -       ((unsigned char *) addr)[nr >> 3] ^= (1U << (nr & 7));
> -}
> -
> -static inline int __test_and_set_bit(int nr, volatile void *addr)
> -{
> -       unsigned int mask = 1 << (nr & 7);
> -       unsigned int oldval;
> -
> -       oldval = ((unsigned char *) addr)[nr >> 3];
> -       ((unsigned char *) addr)[nr >> 3] = oldval | mask;
> -       return oldval & mask;
> -}
> -
> -static inline int __test_and_clear_bit(int nr, volatile void *addr)
> -{
> -       unsigned int mask = 1 << (nr & 7);
> -       unsigned int oldval;
> -
> -       oldval = ((unsigned char *) addr)[nr >> 3];
> -       ((unsigned char *) addr)[nr >> 3] = oldval & ~mask;
> -       return oldval & mask;
> -}
> -
> -static inline int __test_and_change_bit(int nr, volatile void *addr)
> -{
> -       unsigned int mask = 1 << (nr & 7);
> -       unsigned int oldval;
> -
> -       oldval = ((unsigned char *) addr)[nr >> 3];
> -       ((unsigned char *) addr)[nr >> 3] = oldval ^ mask;
> -       return oldval & mask;
> -}
> -
> -/*
> - * This routine doesn't need to be atomic.
> - */
> -static inline int test_bit(int nr, const void * addr)
> -{
> -    return ((unsigned char *) addr)[nr >> 3] & (1U << (nr & 7));
> -}
> -
> -#define set_bit(x, y)                  __set_bit(x, y)
> -#define clear_bit(x, y)                        __clear_bit(x, y)
> -#define change_bit(x, y)               __change_bit(x, y)
> -#define test_and_set_bit(x, y)         __test_and_set_bit(x, y)
> -#define test_and_clear_bit(x, y)       __test_and_clear_bit(x, y)
> -#define test_and_change_bit(x, y)      __test_and_change_bit(x, y)
> -
> -#ifndef __ARMEB__
> -/*
> - * These are the little endian definitions.
> + * Little endian assembly atomic bitops.
>   */
> -extern int _find_first_zero_bit_le(const void *p, unsigned size);
> -extern int _find_next_zero_bit_le(const void *p, int size, int offset);
> -extern int _find_first_bit_le(const unsigned long *p, unsigned size);
> -extern int _find_next_bit_le(const unsigned long *p, int size, int offset);
> -#define find_first_zero_bit(p, sz)     _find_first_zero_bit_le(p, sz)
> -#define find_next_zero_bit(p, sz, off) _find_next_zero_bit_le(p, sz, off)
> -#define find_first_bit(p, sz)          _find_first_bit_le(p, sz)
> -#define find_next_bit(p, sz, off)      _find_next_bit_le(p, sz, off)
> +extern void set_bit(int nr, volatile unsigned long *p);
> +extern void clear_bit(int nr, volatile unsigned long *p);
> +extern void change_bit(int nr, volatile unsigned long *p);
> +extern int test_and_set_bit(int nr, volatile unsigned long *p);
> +extern int test_and_clear_bit(int nr, volatile unsigned long *p);
> +extern int test_and_change_bit(int nr, volatile unsigned long *p);
>
> -#define WORD_BITOFF_TO_LE(x)           ((x))
> -
> -#else          /* ! __ARMEB__ */
> -
> -/*
> - * These are the big endian definitions.
> - */
> -extern int _find_first_zero_bit_be(const void *p, unsigned size);
> -extern int _find_next_zero_bit_be(const void *p, int size, int offset);
> -extern int _find_first_bit_be(const unsigned long *p, unsigned size);
> -extern int _find_next_bit_be(const unsigned long *p, int size, int offset);
> -#define find_first_zero_bit(p, sz)     _find_first_zero_bit_be(p, sz)
> -#define find_next_zero_bit(p, sz, off) _find_next_zero_bit_be(p, sz, off)
> -#define find_first_bit(p, sz)          _find_first_bit_be(p, sz)
> -#define find_next_bit(p, sz, off)      _find_next_bit_be(p, sz, off)
> -
> -#define WORD_BITOFF_TO_LE(x)           ((x) ^ 0x18)
> -
> -#endif         /* __ARMEB__ */
> -
> -#if defined(__LINUX_ARM_ARCH__) && (__LINUX_ARM_ARCH__ >= 5)
> -static inline int constant_fls(int x)
> -{
> -       int r = 32;
> -
> -       if (!x)
> -               return 0;
> -       if (!(x & 0xffff0000u)) {
> -               x <<= 16;
> -               r -= 16;
> -       }
> -       if (!(x & 0xff000000u)) {
> -               x <<= 8;
> -               r -= 8;
> -       }
> -       if (!(x & 0xf0000000u)) {
> -               x <<= 4;
> -               r -= 4;
> -       }
> -       if (!(x & 0xc0000000u)) {
> -               x <<= 2;
> -               r -= 2;
> -       }
> -       if (!(x & 0x80000000u)) {
> -               x <<= 1;
> -               r -= 1;
> -       }
> -       return r;
> -}
> -
> -/*
> - * On ARMv5 and above those functions can be implemented around
> - * the clz instruction for much better code efficiency.
> - */
> -#define fls(x) \
> -       (__builtin_constant_p(x) ? constant_fls(x) : \
> -       ({ int __r; asm("clz\t%0, %1" : "=r"(__r) : "r"(x) : "cc"); 32-__r; }))
> -#define ffs(x) ({ unsigned long __t = (x); fls(__t &-__t); })
> -#define __ffs(x) (ffs(x) - 1)
> -#define ffz(x) __ffs(~(x))
> -#else          /* ! __ARM__USE_GENERIC_FF */
> -/*
> - * ffz = Find First Zero in word. Undefined if no zero exists,
> - * so code should check against ~0UL first..
> - */
> -static inline unsigned long ffz(unsigned long word)
> -{
> -       int k;
> -
> -       word = ~word;
> -       k = 31;
> -       if (word & 0x0000ffff) { k -= 16; word <<= 16; }
> -       if (word & 0x00ff0000) { k -= 8;  word <<= 8;  }
> -       if (word & 0x0f000000) { k -= 4;  word <<= 4;  }
> -       if (word & 0x30000000) { k -= 2;  word <<= 2;  }
> -       if (word & 0x40000000) { k -= 1; }
> -       return k;
> -}
>  #include <asm-generic/bitops/__ffs.h>
>  #include <asm-generic/bitops/ffs.h>
> +#include <asm-generic/bitops/__fls.h>
>  #include <asm-generic/bitops/fls.h>
> -#endif         /* __ARM__USE_GENERIC_FF */
> +
> +#include <asm-generic/bitops/ffz.h>
>  #include <asm-generic/bitops/fls64.h>
> +#include <asm-generic/bitops/find.h>
>
>  #include <asm-generic/bitops/hweight.h>
>
> -#endif /* _ARM_BITOPS_H */
> +#include <asm-generic/bitops/ops.h>
> +
> +#endif /* __ASM_BITOPS_H */
> diff --git a/arch/arm/include/asm/boarddata.h b/arch/arm/include/asm/boarddata.h
> new file mode 100644
> index 0000000..8c3c5f0
> --- /dev/null
> +++ b/arch/arm/include/asm/boarddata.h
> @@ -0,0 +1,5 @@
> +#ifndef __ASM_BOARDDATA_H
> +#define __ASM_BOARDDATA_H
> +
> +
> +#endif /* __ASM_BOARDDATA_H */
> diff --git a/arch/arm/include/asm/cache-l2x0.h b/arch/arm/include/asm/cache-l2x0.h
> index 9bb245b..963dd99 100644
> --- a/arch/arm/include/asm/cache-l2x0.h
> +++ b/arch/arm/include/asm/cache-l2x0.h
> @@ -56,14 +56,6 @@
>  #define L2X0_LINE_TAG                  0xF30
>  #define L2X0_DEBUG_CTRL                        0xF40
>  #define L2X0_PREFETCH_CTRL             0xF60
> -#define   L2X0_DOUBLE_LINEFILL_EN                      (1 << 30)
> -#define   L2X0_INSTRUCTION_PREFETCH_EN                 (1 << 29)
> -#define   L2X0_DATA_PREFETCH_EN                                (1 << 28)
> -#define   L2X0_DOUBLE_LINEFILL_ON_WRAP_READ_DIS                (1 << 27)
> -#define   L2X0_PREFETCH_DROP_EN                                (1 << 24)
> -#define   L2X0_INCR_DOUBLE_LINEFILL_EN                 (1 << 23)
> -#define   L2X0_ESCLUSIVE_SEQUENCE_EN                   (1 << 21)
> -
>  #define L2X0_POWER_CTRL                        0xF80
>  #define   L2X0_DYNAMIC_CLK_GATING_EN   (1 << 1)
>  #define   L2X0_STNDBY_MODE_EN          (1 << 0)
> diff --git a/arch/arm/include/asm/cache.h b/arch/arm/include/asm/cache.h
> index 2f6eab0..5a524f3 100644
> --- a/arch/arm/include/asm/cache.h
> +++ b/arch/arm/include/asm/cache.h
> @@ -1,9 +1,11 @@
>  #ifndef __ASM_CACHE_H
>  #define __ASM_CACHE_H
>
> +extern void v8_invalidate_icache_all(void);
> +
>  static inline void flush_icache(void)
>  {
> -       asm volatile("mcr p15, 0, %0, c7, c5, 0" : : "r" (0));
> +       v8_invalidate_icache_all();
>  }
>
>  int arm_set_cache_functions(void);
> diff --git a/arch/arm/include/asm/errata.h b/arch/arm/include/asm/errata.h
> index 98137b5..9525823 100644
> --- a/arch/arm/include/asm/errata.h
> +++ b/arch/arm/include/asm/errata.h
> @@ -77,12 +77,3 @@ static inline void enable_arm_errata_794072_war(void)
>                 "mcr    p15, 0, r0, c15, c0, 1\n"
>         );
>  }
> -
> -static inline void enable_arm_errata_845369_war(void)
> -{
> -       __asm__ __volatile__ (
> -               "mrc    p15, 0, r0, c15, c0, 1\n"
> -               "orr    r0, r0, #1 << 22\n"
> -               "mcr    p15, 0, r0, c15, c0, 1\n"
> -       );
> -}
> diff --git a/arch/arm/include/asm/gic.h b/arch/arm/include/asm/gic.h
> new file mode 100644
> index 0000000..c7c17e3
> --- /dev/null
> +++ b/arch/arm/include/asm/gic.h
> @@ -0,0 +1,128 @@
> +#ifndef __GIC_H__
> +#define __GIC_H__
> +
> +/* Generic Interrupt Controller Definitions */
> +//#ifdef CONFIG_GICV3
> +//#define GICD_BASE                       (0x2f000000)
> +//#define GICR_BASE                       (0x2f100000)
> +//#else
> +
> +//#if defined(CONFIG_TARGET_VEXPRESS64_BASE_FVP) || \
> +        defined(CONFIG_TARGET_VEXPRESS64_BASE_FVP_DRAM)
> +//#define GICD_BASE                       (0x2f000000)
> +//#define GICC_BASE                       (0x2c000000)
> +//#elif CONFIG_TARGET_VEXPRESS64_JUNO
> +#define GIC_DIST_BASE                       (0x2C010000)
> +#define GIC_CPU_BASE                       (0x2C02f000)
> +//#else
> +//#error "Unknown board variant"
> +//#endif
> +//#endif /* !CONFIG_GICV3 */
> +
> +/* Register offsets for the ARM generic interrupt controller (GIC) */
> +
> +#define GIC_DIST_OFFSET                0x1000
> +#define GIC_CPU_OFFSET_A9      0x0100
> +#define GIC_CPU_OFFSET_A15     0x2000
> +
> +/* Distributor Registers */
> +#define GICD_CTLR              0x0000
> +#define GICD_TYPER             0x0004
> +#define GICD_IIDR              0x0008
> +#define GICD_STATUSR           0x0010
> +#define GICD_SETSPI_NSR                0x0040
> +#define GICD_CLRSPI_NSR                0x0048
> +#define GICD_SETSPI_SR         0x0050
> +#define GICD_CLRSPI_SR         0x0058
> +#define GICD_SEIR              0x0068
> +#define GICD_IGROUPRn          0x0080
> +#define GICD_ISENABLERn                0x0100
> +#define GICD_ICENABLERn                0x0180
> +#define GICD_ISPENDRn          0x0200
> +#define GICD_ICPENDRn          0x0280
> +#define GICD_ISACTIVERn                0x0300
> +#define GICD_ICACTIVERn                0x0380
> +#define GICD_IPRIORITYRn       0x0400
> +#define GICD_ITARGETSRn                0x0800
> +#define GICD_ICFGR             0x0c00
> +#define GICD_IGROUPMODRn       0x0d00
> +#define GICD_NSACRn            0x0e00
> +#define GICD_SGIR              0x0f00
> +#define GICD_CPENDSGIRn                0x0f10
> +#define GICD_SPENDSGIRn                0x0f20
> +#define GICD_IROUTERn          0x6000
> +
> +/* Cpu Interface Memory Mapped Registers */
> +#define GICC_CTLR              0x0000
> +#define GICC_PMR               0x0004
> +#define GICC_BPR               0x0008
> +#define GICC_IAR               0x000C
> +#define GICC_EOIR              0x0010
> +#define GICC_RPR               0x0014
> +#define GICC_HPPIR             0x0018
> +#define GICC_ABPR              0x001c
> +#define GICC_AIAR              0x0020
> +#define GICC_AEOIR             0x0024
> +#define GICC_AHPPIR            0x0028
> +#define GICC_APRn              0x00d0
> +#define GICC_NSAPRn            0x00e0
> +#define GICC_IIDR              0x00fc
> +#define GICC_DIR               0x1000
> +
> +/* ReDistributor Registers for Control and Physical LPIs */
> +#define GICR_CTLR              0x0000
> +#define GICR_IIDR              0x0004
> +#define GICR_TYPER             0x0008
> +#define GICR_STATUSR           0x0010
> +#define GICR_WAKER             0x0014
> +#define GICR_SETLPIR           0x0040
> +#define GICR_CLRLPIR           0x0048
> +#define GICR_SEIR              0x0068
> +#define GICR_PROPBASER         0x0070
> +#define GICR_PENDBASER         0x0078
> +#define GICR_INVLPIR           0x00a0
> +#define GICR_INVALLR           0x00b0
> +#define GICR_SYNCR             0x00c0
> +#define GICR_MOVLPIR           0x0100
> +#define GICR_MOVALLR           0x0110
> +
> +/* ReDistributor Registers for SGIs and PPIs */
> +#define GICR_IGROUPRn          0x0080
> +#define GICR_ISENABLERn                0x0100
> +#define GICR_ICENABLERn                0x0180
> +#define GICR_ISPENDRn          0x0200
> +#define GICR_ICPENDRn          0x0280
> +#define GICR_ISACTIVERn                0x0300
> +#define GICR_ICACTIVERn                0x0380
> +#define GICR_IPRIORITYRn       0x0400
> +#define GICR_ICFGR0            0x0c00
> +#define GICR_ICFGR1            0x0c04
> +#define GICR_IGROUPMODRn       0x0d00
> +#define GICR_NSACRn            0x0e00
> +
> +/* Cpu Interface System Registers */
> +#define ICC_IAR0_EL1           S3_0_C12_C8_0
> +#define ICC_IAR1_EL1           S3_0_C12_C12_0
> +#define ICC_EOIR0_EL1          S3_0_C12_C8_1
> +#define ICC_EOIR1_EL1          S3_0_C12_C12_1
> +#define ICC_HPPIR0_EL1         S3_0_C12_C8_2
> +#define ICC_HPPIR1_EL1         S3_0_C12_C12_2
> +#define ICC_BPR0_EL1           S3_0_C12_C8_3
> +#define ICC_BPR1_EL1           S3_0_C12_C12_3
> +#define ICC_DIR_EL1            S3_0_C12_C11_1
> +#define ICC_PMR_EL1            S3_0_C4_C6_0
> +#define ICC_RPR_EL1            S3_0_C12_C11_3
> +#define ICC_CTLR_EL1           S3_0_C12_C12_4
> +#define ICC_CTLR_EL3           S3_6_C12_C12_4
> +#define ICC_SRE_EL1            S3_0_C12_C12_5
> +#define ICC_SRE_EL2            S3_4_C12_C9_5
> +#define ICC_SRE_EL3            S3_6_C12_C12_5
> +#define ICC_IGRPEN0_EL1                S3_0_C12_C12_6
> +#define ICC_IGRPEN1_EL1                S3_0_C12_C12_7
> +#define ICC_IGRPEN1_EL3                S3_6_C12_C12_7
> +#define ICC_SEIEN_EL1          S3_0_C12_C13_0
> +#define ICC_SGI0R_EL1          S3_0_C12_C11_7
> +#define ICC_SGI1R_EL1          S3_0_C12_C11_5
> +#define ICC_ASGI1R_EL1         S3_0_C12_C11_6
> +
> +#endif /* __GIC_H__ */
> diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
> index 8de6544..8a1d80a 100644
> --- a/arch/arm/include/asm/mmu.h
> +++ b/arch/arm/include/asm/mmu.h
> @@ -13,9 +13,7 @@
>
>  struct arm_memory;
>
> -static inline void mmu_enable(void)
> -{
> -}
> +void mmu_enable(void);
>  void mmu_disable(void);
>  static inline void arm_create_section(unsigned long virt, unsigned long phys, int size_m,
>                 unsigned int flags)
> @@ -30,7 +28,7 @@ static inline void setup_dma_coherent(unsigned long offset)
>  #define ARCH_HAS_REMAP
>  #define MAP_ARCH_DEFAULT MAP_CACHED
>  int arch_remap_range(void *_start, size_t size, unsigned flags);
> -void *map_io_sections(unsigned long physaddr, void *start, size_t size);
> +void *map_io_sections(uint64_t phys, void *_start, size_t size);
>  #else
>  #define MAP_ARCH_DEFAULT MAP_UNCACHED
>  static inline void *map_io_sections(unsigned long phys, void *start, size_t size)
> diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
> index fd1521d..e4a3c53 100644
> --- a/arch/arm/include/asm/pgtable.h
> +++ b/arch/arm/include/asm/pgtable.h
> @@ -18,8 +18,9 @@
>   */
>  #define PMD_TYPE_MASK          (3 << 0)
>  #define PMD_TYPE_FAULT         (0 << 0)
> -#define PMD_TYPE_TABLE         (1 << 0)
> -#define PMD_TYPE_SECT          (2 << 0)
> +#define PMD_TYPE_TABLE         (3 << 0)
> +#define PMD_TYPE_PAGE          (3 << 0)
> +#define PMD_TYPE_SECT          (1 << 0)
>  #define PMD_BIT4               (1 << 4)
>  #define PMD_DOMAIN(x)          ((x) << 5)
>  #define PMD_PROTECTION         (1 << 9)        /* v5 */
> diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
> index 022d365..450b63a 100644
> --- a/arch/arm/include/asm/ptrace.h
> +++ b/arch/arm/include/asm/ptrace.h
> @@ -20,124 +20,15 @@
>  /* options set using PTRACE_SETOPTIONS */
>  #define PTRACE_O_TRACESYSGOOD  0x00000001
>
> -/*
> - * PSR bits
> - */
> -#define USR26_MODE     0x00000000
> -#define FIQ26_MODE     0x00000001
> -#define IRQ26_MODE     0x00000002
> -#define SVC26_MODE     0x00000003
> -#define USR_MODE       0x00000010
> -#define FIQ_MODE       0x00000011
> -#define IRQ_MODE       0x00000012
> -#define SVC_MODE       0x00000013
> -#define ABT_MODE       0x00000017
> -#define UND_MODE       0x0000001b
> -#define SYSTEM_MODE    0x0000001f
> -#define MODE32_BIT     0x00000010
> -#define MODE_MASK      0x0000001f
> -#define PSR_T_BIT      0x00000020
> -#define PSR_F_BIT      0x00000040
> -#define PSR_I_BIT      0x00000080
> -#define PSR_A_BIT      0x00000100
> -#define PSR_E_BIT      0x00000200
> -#define PSR_J_BIT      0x01000000
> -#define PSR_Q_BIT      0x08000000
> -#define PSR_V_BIT      0x10000000
> -#define PSR_C_BIT      0x20000000
> -#define PSR_Z_BIT      0x40000000
> -#define PSR_N_BIT      0x80000000
> -#define PCMASK         0
> -
>  #ifndef __ASSEMBLY__
>
>  /* this struct defines the way the registers are stored on the
>     stack during a system call. */
>
>  struct pt_regs {
> -       long uregs[18];
> +       long uregs[31];
>  };
>
> -#define ARM_cpsr       uregs[16]
> -#define ARM_pc         uregs[15]
> -#define ARM_lr         uregs[14]
> -#define ARM_sp         uregs[13]
> -#define ARM_ip         uregs[12]
> -#define ARM_fp         uregs[11]
> -#define ARM_r10                uregs[10]
> -#define ARM_r9         uregs[9]
> -#define ARM_r8         uregs[8]
> -#define ARM_r7         uregs[7]
> -#define ARM_r6         uregs[6]
> -#define ARM_r5         uregs[5]
> -#define ARM_r4         uregs[4]
> -#define ARM_r3         uregs[3]
> -#define ARM_r2         uregs[2]
> -#define ARM_r1         uregs[1]
> -#define ARM_r0         uregs[0]
> -#define ARM_ORIG_r0    uregs[17]
> -
> -#ifdef __KERNEL__
> -
> -#define user_mode(regs)        \
> -       (((regs)->ARM_cpsr & 0xf) == 0)
> -
> -#ifdef CONFIG_ARM_THUMB
> -#define thumb_mode(regs) \
> -       (((regs)->ARM_cpsr & PSR_T_BIT))
> -#else
> -#define thumb_mode(regs) (0)
> -#endif
> -
> -#define processor_mode(regs) \
> -       ((regs)->ARM_cpsr & MODE_MASK)
> -
> -#define interrupts_enabled(regs) \
> -       (!((regs)->ARM_cpsr & PSR_I_BIT))
> -
> -#define fast_interrupts_enabled(regs) \
> -       (!((regs)->ARM_cpsr & PSR_F_BIT))
> -
> -#define condition_codes(regs) \
> -       ((regs)->ARM_cpsr & (PSR_V_BIT | PSR_C_BIT | PSR_Z_BIT | PSR_N_BIT))
> -
> -/* Are the current registers suitable for user mode?
> - * (used to maintain security in signal handlers)
> - */
> -static inline int valid_user_regs(struct pt_regs *regs)
> -{
> -       if ((regs->ARM_cpsr & 0xf) == 0 &&
> -           (regs->ARM_cpsr & (PSR_F_BIT | PSR_I_BIT)) == 0)
> -               return 1;
> -
> -       /*
> -        * Force CPSR to something logical...
> -        */
> -       regs->ARM_cpsr &= (PSR_V_BIT | PSR_C_BIT | PSR_Z_BIT | PSR_N_BIT |
> -                               0x10);
> -
> -       return 0;
> -}
> -
> -#endif /* __KERNEL__ */
> -
>  #endif /* __ASSEMBLY__ */
>
> -#ifndef __ASSEMBLY__
> -#define pc_pointer(v) \
> -       ((v) & ~PCMASK)
> -
> -#define instruction_pointer(regs) \
> -       (pc_pointer((regs)->ARM_pc))
> -
> -#ifdef __KERNEL__
> -extern void show_regs(struct pt_regs *);
> -
> -#define predicate(x)   (x & 0xf0000000)
> -#define PREDICATE_ALWAYS       0xe0000000
> -
> -#endif
> -
> -#endif /* __ASSEMBLY__ */
> -
>  #endif
> diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
> index b118a42..04a79c4 100644
> --- a/arch/arm/include/asm/system.h
> +++ b/arch/arm/include/asm/system.h
> @@ -1,96 +1,125 @@
>  #ifndef __ASM_ARM_SYSTEM_H
>  #define __ASM_ARM_SYSTEM_H
>
> -#if __LINUX_ARM_ARCH__ >= 7
>  #define isb() __asm__ __volatile__ ("isb" : : : "memory")
> -#define dsb() __asm__ __volatile__ ("dsb" : : : "memory")
> +#define dsb() __asm__ __volatile__ ("dsb sy" : : : "memory")
>  #define dmb() __asm__ __volatile__ ("dmb" : : : "memory")
> -#elif defined(CONFIG_CPU_XSC3) || __LINUX_ARM_ARCH__ == 6
> -#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
> -                                    : : "r" (0) : "memory")
> -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
> -                                    : : "r" (0) : "memory")
> -#define dmb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
> -                                    : : "r" (0) : "memory")
> -#elif defined(CONFIG_CPU_FA526)
> -#define isb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c5, 4" \
> -                                    : : "r" (0) : "memory")
> -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
> -                                    : : "r" (0) : "memory")
> -#define dmb() __asm__ __volatile__ ("" : : : "memory")
> -#else
> -#define isb() __asm__ __volatile__ ("" : : : "memory")
> -#define dsb() __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 4" \
> -                                    : : "r" (0) : "memory")
> -#define dmb() __asm__ __volatile__ ("" : : : "memory")
> -#endif
>
>  /*
> - * CR1 bits (CP#15 CR1)
> + * SCTLR_EL1/SCTLR_EL2/SCTLR_EL3 bits definitions
>   */
> -#define CR_M    (1 << 0)       /* MMU enable                           */
> -#define CR_A    (1 << 1)       /* Alignment abort enable               */
> -#define CR_C    (1 << 2)       /* Dcache enable                        */
> -#define CR_W    (1 << 3)       /* Write buffer enable                  */
> -#define CR_P    (1 << 4)       /* 32-bit exception handler             */
> -#define CR_D    (1 << 5)       /* 32-bit data address range            */
> -#define CR_L    (1 << 6)       /* Implementation defined               */
> -#define CR_B    (1 << 7)       /* Big endian                           */
> -#define CR_S    (1 << 8)       /* System MMU protection                */
> -#define CR_R    (1 << 9)       /* ROM MMU protection                   */
> -#define CR_F    (1 << 10)      /* Implementation defined               */
> -#define CR_Z    (1 << 11)      /* Implementation defined               */
> -#define CR_I    (1 << 12)      /* Icache enable                        */
> -#define CR_V    (1 << 13)      /* Vectors relocated to 0xffff0000      */
> -#define CR_RR   (1 << 14)      /* Round Robin cache replacement        */
> -#define CR_L4   (1 << 15)      /* LDR pc can set T bit                 */
> -#define CR_DT   (1 << 16)
> -#define CR_IT   (1 << 18)
> -#define CR_ST   (1 << 19)
> -#define CR_FI   (1 << 21)      /* Fast interrupt (lower latency mode)  */
> -#define CR_U    (1 << 22)      /* Unaligned access operation           */
> -#define CR_XP   (1 << 23)      /* Extended page tables                 */
> -#define CR_VE   (1 << 24)      /* Vectored interrupts                  */
> -#define CR_EE   (1 << 25)      /* Exception (Big) Endian               */
> -#define CR_TRE  (1 << 28)      /* TEX remap enable                     */
> -#define CR_AFE  (1 << 29)      /* Access flag enable                   */
> -#define CR_TE   (1 << 30)      /* Thumb exception enable               */
> +#define CR_M           (1 << 0)        /* MMU enable                   */
> +#define CR_A           (1 << 1)        /* Alignment abort enable       */
> +#define CR_C           (1 << 2)        /* Dcache enable                */
> +#define CR_SA          (1 << 3)        /* Stack Alignment Check Enable */
> +#define CR_I           (1 << 12)       /* Icache enable                */
> +#define CR_WXN         (1 << 19)       /* Write Permision Imply XN     */
> +#define CR_EE          (1 << 25)       /* Exception (Big) Endian       */
> +
> +#ifndef CONFIG_SYS_FULL_VA
> +#define PGTABLE_SIZE   (0x10000)
> +#else
> +#define PGTABLE_SIZE   CONFIG_SYS_PGTABLE_SIZE
> +#endif
> +
> +/* 2MB granularity */
> +#define MMU_SECTION_SHIFT      21
> +#define MMU_SECTION_SIZE       (1 << MMU_SECTION_SHIFT)
>
>  #ifndef __ASSEMBLY__
> -static inline unsigned int get_cr(void)
> +
> +enum dcache_option {
> +       DCACHE_OFF = 0x3,
> +};
> +
> +#define wfi()                          \
> +       ({asm volatile(                 \
> +       "wfi" : : : "memory");          \
> +       })
> +
> +static inline unsigned int current_el(void)
>  {
> -       unsigned int val;
> -       asm volatile ("mrc p15, 0, %0, c1, c0, 0  @ get CR" : "=r" (val) : : "cc");
> -       return val;
> +       unsigned int el;
> +       asm volatile("mrs %0, CurrentEL" : "=r" (el) : : "cc");
> +       return el >> 2;
>  }
>
> -static inline void set_cr(unsigned int val)
> +static inline unsigned int get_sctlr(void)
>  {
> -       asm volatile("mcr p15, 0, %0, c1, c0, 0 @ set CR"
> -         : : "r" (val) : "cc");
> -       isb();
> +       unsigned int el, val;
> +
> +       el = current_el();
> +       if (el == 1)
> +               asm volatile("mrs %0, sctlr_el1" : "=r" (val) : : "cc");
> +       else if (el == 2)
> +               asm volatile("mrs %0, sctlr_el2" : "=r" (val) : : "cc");
> +       else
> +               asm volatile("mrs %0, sctlr_el3" : "=r" (val) : : "cc");
> +
> +       return val;
>  }
>
> -#ifdef CONFIG_CPU_32v7
> -static inline unsigned int get_vbar(void)
> +static inline void set_sctlr(unsigned int val)
>  {
> -       unsigned int vbar;
> -       asm volatile("mrc p15, 0, %0, c12, c0, 0 @ get VBAR"
> -                    : "=r" (vbar) : : "cc");
> -       return vbar;
> +       unsigned int el;
> +
> +       el = current_el();
> +       if (el == 1)
> +               asm volatile("msr sctlr_el1, %0" : : "r" (val) : "cc");
> +       else if (el == 2)
> +               asm volatile("msr sctlr_el2, %0" : : "r" (val) : "cc");
> +       else
> +               asm volatile("msr sctlr_el3, %0" : : "r" (val) : "cc");
> +
> +       asm volatile("isb");
>  }
>
> -static inline void set_vbar(unsigned int vbar)
> +static inline unsigned long read_mpidr(void)
>  {
> -       asm volatile("mcr p15, 0, %0, c12, c0, 0 @ set VBAR"
> -                    : : "r" (vbar) : "cc");
> -       isb();
> +       unsigned long val;
> +
> +       asm volatile("mrs %0, mpidr_el1" : "=r" (val));
> +
> +       return val;
>  }
> -#else
> -static inline unsigned int get_vbar(void) { return 0; }
> -static inline void set_vbar(unsigned int vbar) {}
> -#endif
>
> -#endif
> +#define BSP_COREID     0
> +
> +void __asm_flush_dcache_all(void);
> +void __asm_invalidate_dcache_all(void);
> +void __asm_flush_dcache_range(u64 start, u64 end);
> +void __asm_invalidate_tlb_all(void);
> +void __asm_invalidate_icache_all(void);
> +int __asm_flush_l3_cache(void);
> +
> +void armv8_switch_to_el2(void);
> +void armv8_switch_to_el1(void);
> +void gic_init(void);
> +void gic_send_sgi(unsigned long sgino);
> +void wait_for_wakeup(void);
> +void protect_secure_region(void);
> +void smp_kick_all_cpus(void);
> +
> +void flush_l3_cache(void);
> +
> +/*
> + *Issue a hypervisor call in accordance with ARM "SMC Calling convention",
> + * DEN0028A
> + *
> + * @args: input and output arguments
> + *
> + */
> +void hvc_call(struct pt_regs *args);
> +
> +/*
> + *Issue a secure monitor call in accordance with ARM "SMC Calling convention",
> + * DEN0028A
> + *
> + * @args: input and output arguments
> + *
> + */
> +void smc_call(struct pt_regs *args);
> +
> +#endif /* __ASSEMBLY__ */
>
>  #endif /* __ASM_ARM_SYSTEM_H */
> diff --git a/arch/arm/include/asm/system_info.h b/arch/arm/include/asm/system_info.h
> index 0761848..f595aae 100644
> --- a/arch/arm/include/asm/system_info.h
> +++ b/arch/arm/include/asm/system_info.h
> @@ -13,6 +13,7 @@
>  #define CPU_ARCH_ARMv5TEJ      7
>  #define CPU_ARCH_ARMv6         8
>  #define CPU_ARCH_ARMv7         9
> +#define CPU_ARCH_ARMv8         10
>
>  #define CPU_IS_ARM720          0x41007200
>  #define CPU_IS_ARM720_MASK     0xff00fff0
> @@ -41,6 +42,12 @@
>  #define CPU_IS_CORTEX_A15      0x410fc0f0
>  #define CPU_IS_CORTEX_A15_MASK 0xff0ffff0
>
> +#define CPU_IS_CORTEX_A53      0x410fd034
> +#define CPU_IS_CORTEX_A53_MASK 0xff0ffff0
> +
> +#define CPU_IS_CORTEX_A57      0x411fd070
> +#define CPU_IS_CORTEX_A57_MASK 0xff0ffff0
> +
>  #define CPU_IS_PXA250          0x69052100
>  #define CPU_IS_PXA250_MASK     0xfffff7f0
>
> @@ -112,6 +119,20 @@
>  #define cpu_is_cortex_a15() (0)
>  #endif
>
> +
> +#ifdef CONFIG_CPU_64v8
> +#ifdef ARM_ARCH
> +#define ARM_MULTIARCH
> +#else
> +#define ARM_ARCH CPU_ARCH_ARMv8
> +#endif
> +#define cpu_is_cortex_a53() cpu_is_arm(CORTEX_A53)
> +#define cpu_is_cortex_a57() cpu_is_arm(CORTEX_A57)
> +#else
> +#define cpu_is_cortex_a53() (0)
> +#define cpu_is_cortex_a57() (0)
> +#endif
> +
>  #ifndef __ASSEMBLY__
>
>  #ifdef ARM_MULTIARCH
> @@ -124,31 +145,33 @@ static inline int arm_early_get_cpu_architecture(void)
>  {
>         int cpu_arch;
>
> -       if ((read_cpuid_id() & 0x0008f000) == 0) {
> -               cpu_arch = CPU_ARCH_UNKNOWN;
> -       } else if ((read_cpuid_id() & 0x0008f000) == 0x00007000) {
> -               cpu_arch = (read_cpuid_id() & (1 << 23)) ? CPU_ARCH_ARMv4T : CPU_ARCH_ARMv3;
> -       } else if ((read_cpuid_id() & 0x00080000) == 0x00000000) {
> -               cpu_arch = (read_cpuid_id() >> 16) & 7;
> -               if (cpu_arch)
> -                       cpu_arch += CPU_ARCH_ARMv3;
> -       } else if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) {
> -               unsigned int mmfr0;
> -
> -               /* Revised CPUID format. Read the Memory Model Feature
> -                * Register 0 and check for VMSAv7 or PMSAv7 */
> -               asm("mrc        p15, 0, %0, c0, c1, 4"
> -                   : "=r" (mmfr0));
> -               if ((mmfr0 & 0x0000000f) >= 0x00000003 ||
> -                   (mmfr0 & 0x000000f0) >= 0x00000030)
> -                       cpu_arch = CPU_ARCH_ARMv7;
> -               else if ((mmfr0 & 0x0000000f) == 0x00000002 ||
> -                        (mmfr0 & 0x000000f0) == 0x00000020)
> -                       cpu_arch = CPU_ARCH_ARMv6;
> -               else
> -                       cpu_arch = CPU_ARCH_UNKNOWN;
> -       } else
> -               cpu_arch = CPU_ARCH_UNKNOWN;
> +//     if ((read_cpuid_id() & 0x0008f000) == 0) {
> +//             cpu_arch = CPU_ARCH_UNKNOWN;
> +//     } else if ((read_cpuid_id() & 0x0008f000) == 0x00007000) {
> +//             cpu_arch = (read_cpuid_id() & (1 << 23)) ? CPU_ARCH_ARMv4T : CPU_ARCH_ARMv3;
> +//     } else if ((read_cpuid_id() & 0x00080000) == 0x00000000) {
> +//             cpu_arch = (read_cpuid_id() >> 16) & 7;
> +//             if (cpu_arch)
> +//                     cpu_arch += CPU_ARCH_ARMv3;
> +//     } else if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) {
> +//             unsigned int mmfr0;
> +//
> +//             /* Revised CPUID format. Read the Memory Model Feature
> +//              * Register 0 and check for VMSAv7 or PMSAv7 */
> +//             asm("mrc        p15, 0, %0, c0, c1, 4"
> +//                 : "=r" (mmfr0));
> +//             if ((mmfr0 & 0x0000000f) >= 0x00000003 ||
> +//                 (mmfr0 & 0x000000f0) >= 0x00000030)
> +//                     cpu_arch = CPU_ARCH_ARMv7;
> +//             else if ((mmfr0 & 0x0000000f) == 0x00000002 ||
> +//                      (mmfr0 & 0x000000f0) == 0x00000020)
> +//                     cpu_arch = CPU_ARCH_ARMv6;
> +//             else
> +//                     cpu_arch = CPU_ARCH_UNKNOWN;
> +//     } else
> +//             cpu_arch = CPU_ARCH_UNKNOWN;
> +
> +       cpu_arch = CPU_ARCH_ARMv8;
>
>         return cpu_arch;
>  }
> diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile
> index e1c6f5b..5b9d4a5 100644
> --- a/arch/arm/lib/Makefile
> +++ b/arch/arm/lib/Makefile
> @@ -3,24 +3,11 @@ obj-$(CONFIG_BOOTM)   += bootm.o
>  obj-$(CONFIG_CMD_BOOTZ)        += bootz.o
>  obj-$(CONFIG_CMD_BOOTU)        += bootu.o
>  obj-y  += div0.o
> -obj-y  += findbit.o
> -obj-y  += io.o
> -obj-y  += io-readsb.o
> -obj-y  += io-readsw-armv4.o
> -obj-y  += io-readsl.o
> -obj-y  += io-writesb.o
> -obj-y  += io-writesw-armv4.o
> -obj-y  += io-writesl.o
> -obj-y  += lib1funcs.o
> -obj-y  += ashrdi3.o
> -obj-y  += ashldi3.o
> -obj-y  += lshrdi3.o
>  obj-y  += runtime-offset.o
>  pbl-y  += runtime-offset.o
>  obj-$(CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS)    += memcpy.o
>  obj-$(CONFIG_ARM_OPTIMZED_STRING_FUNCTIONS)    += memset.o
>  obj-$(CONFIG_ARM_UNWIND) += unwind.o
> -obj-$(CONFIG_ARM_SEMIHOSTING) += semihosting-trap.o semihosting.o
>  obj-$(CONFIG_MODULES) += module.o
>  extra-y += barebox.lds
>
> diff --git a/arch/arm/lib/armlinux.c b/arch/arm/lib/armlinux.c
> index 47b9bd3..21a2292 100644
> --- a/arch/arm/lib/armlinux.c
> +++ b/arch/arm/lib/armlinux.c
> @@ -270,12 +270,6 @@ void start_linux(void *adr, int swap, unsigned long initrd_address,
>         architecture = armlinux_get_architecture();
>
>         shutdown_barebox();
> -       if (swap) {
> -               u32 reg;
> -               __asm__ __volatile__("mrc p15, 0, %0, c1, c0" : "=r" (reg));
> -               reg ^= CR_B; /* swap big-endian flag */
> -               __asm__ __volatile__("mcr p15, 0, %0, c1, c0" :: "r" (reg));
> -       }
>
>         kernel(0, architecture, params);
>  }
> diff --git a/arch/arm/lib/barebox.lds.S b/arch/arm/lib/barebox.lds.S
> index 6dc8bd2..240699f 100644
> --- a/arch/arm/lib/barebox.lds.S
> +++ b/arch/arm/lib/barebox.lds.S
> @@ -20,8 +20,8 @@
>
>  #include <asm-generic/barebox.lds.h>
>
> -OUTPUT_FORMAT("elf32-littlearm", "elf32-littlearm", "elf32-littlearm")
> -OUTPUT_ARCH(arm)
> +OUTPUT_FORMAT("elf64-littleaarch64", "elf64-littleaarch64", "elf64-littleaarch64")
> +OUTPUT_ARCH(aarch64)
>  ENTRY(start)
>  SECTIONS
>  {
> @@ -43,7 +43,6 @@ SECTIONS
>                 __bare_init_start = .;
>                 *(.text_bare_init*)
>                 __bare_init_end = .;
> -               . = ALIGN(4);
>                 __exceptions_start = .;
>                 KEEP(*(.text_exceptions*))
>                 __exceptions_stop = .;
> diff --git a/arch/arm/lib/bootm.c b/arch/arm/lib/bootm.c
> index f6024c8..1913d5f 100644
> --- a/arch/arm/lib/bootm.c
> +++ b/arch/arm/lib/bootm.c
> @@ -67,55 +67,6 @@ static int sdram_start_and_size(unsigned long *start, unsigned long *size)
>         return 0;
>  }
>
> -static int get_kernel_addresses(size_t image_size,
> -                                int verbose, unsigned long *load_address,
> -                                unsigned long *mem_free)
> -{
> -       unsigned long mem_start, mem_size;
> -       int ret;
> -       size_t image_decomp_size;
> -       unsigned long spacing;
> -
> -       ret = sdram_start_and_size(&mem_start, &mem_size);
> -       if (ret)
> -               return ret;
> -
> -       /*
> -        * We don't know the exact decompressed size so just use a conservative
> -        * default of 4 times the size of the compressed image.
> -        */
> -       image_decomp_size = PAGE_ALIGN(image_size * 4);
> -
> -       /*
> -        * By default put oftree/initrd close behind compressed kernel image to
> -        * avoid placing it outside of the kernels lowmem region.
> -        */
> -       spacing = SZ_1M;
> -
> -       if (*load_address == UIMAGE_INVALID_ADDRESS) {
> -               /*
> -                * Place the kernel at an address where it does not need to
> -                * relocate itself before decompression.
> -                */
> -               *load_address = mem_start + image_decomp_size;
> -               if (verbose)
> -                       printf("no OS load address, defaulting to 0x%08lx\n",
> -                               *load_address);
> -       } else if (*load_address <= mem_start + image_decomp_size) {
> -               /*
> -                * If the user/image specified an address where the kernel needs
> -                * to relocate itself before decompression we need to extend the
> -                * spacing to allow this relocation to happen without
> -                * overwriting anything placed behind the kernel.
> -                */
> -               spacing += image_decomp_size;
> -       }
> -
> -       *mem_free = PAGE_ALIGN(*load_address + image_size + spacing);
> -
> -       return 0;
> -}
> -
>  static int __do_bootm_linux(struct image_data *data, unsigned long free_mem, int swap)
>  {
>         unsigned long kernel;
> @@ -173,20 +124,38 @@ static int __do_bootm_linux(struct image_data *data, unsigned long free_mem, int
>
>  static int do_bootm_linux(struct image_data *data)
>  {
> -       unsigned long load_address, mem_free;
> +       unsigned long load_address, mem_start, mem_size, mem_free;
>         int ret;
>
> -       load_address = data->os_address;
> -
> -       ret = get_kernel_addresses(bootm_get_os_size(data),
> -                            bootm_verbose(data), &load_address, &mem_free);
> +       ret = sdram_start_and_size(&mem_start, &mem_size);
>         if (ret)
>                 return ret;
>
> +       load_address = data->os_address;
> +
> +       if (load_address == UIMAGE_INVALID_ADDRESS) {
> +               /*
> +                * Just use a conservative default of 4 times the size of the
> +                * compressed image, to avoid the need for the kernel to
> +                * relocate itself before decompression.
> +                */
> +               load_address = mem_start + PAGE_ALIGN(
> +                              bootm_get_os_size(data) * 4);
> +               if (bootm_verbose(data))
> +                       printf("no OS load address, defaulting to 0x%08lx\n",
> +                               load_address);
> +       }
> +
>         ret = bootm_load_os(data, load_address);
>         if (ret)
>                 return ret;
>
> +       /*
> +        * put oftree/initrd close behind compressed kernel image to avoid
> +        * placing it outside of the kernels lowmem.
> +        */
> +       mem_free = PAGE_ALIGN(data->os_res->end + SZ_1M);
> +
>         return __do_bootm_linux(data, mem_free, 0);
>  }
>
> @@ -282,7 +251,11 @@ static int do_bootz_linux(struct image_data *data)
>         u32 end, start;
>         size_t image_size;
>         unsigned long load_address = data->os_address;
> -       unsigned long mem_free;
> +       unsigned long mem_start, mem_size, mem_free;
> +
> +       ret = sdram_start_and_size(&mem_start, &mem_size);
> +       if (ret)
> +               return ret;
>
>         fd = open(data->os_file, O_RDONLY);
>         if (fd < 0) {
> @@ -318,12 +291,20 @@ static int do_bootz_linux(struct image_data *data)
>         }
>
>         image_size = end - start;
> -       load_address = data->os_address;
>
> -       ret = get_kernel_addresses(image_size, bootm_verbose(data),
> -                            &load_address, &mem_free);
> -       if (ret)
> -               return ret;
> +       if (load_address == UIMAGE_INVALID_ADDRESS) {
> +               /*
> +                * Just use a conservative default of 4 times the size of the
> +                * compressed image, to avoid the need for the kernel to
> +                * relocate itself before decompression.
> +                */
> +               data->os_address = mem_start + PAGE_ALIGN(image_size * 4);
> +
> +               load_address = data->os_address;
> +               if (bootm_verbose(data))
> +                       printf("no OS load address, defaulting to 0x%08lx\n",
> +                               load_address);
> +       }
>
>         data->os_res = request_sdram_region("zimage", load_address, image_size);
>         if (!data->os_res) {
> @@ -359,6 +340,12 @@ static int do_bootz_linux(struct image_data *data)
>
>         close(fd);
>
> +       /*
> +        * put oftree/initrd close behind compressed kernel image to avoid
> +        * placing it outside of the kernels lowmem.
> +        */
> +       mem_free = PAGE_ALIGN(data->os_res->end + SZ_1M);
> +
>         return __do_bootm_linux(data, mem_free, swap);
>
>  err_out:
> @@ -575,7 +562,7 @@ static int armlinux_register_image_handler(void)
>                 register_image_handler(&aimage_handler);
>                 binfmt_register(&binfmt_aimage_hook);
>         }
> -       if (IS_BUILTIN(CONFIG_FITIMAGE))
> +       if (IS_BUILTIN(CONFIG_CMD_BOOTM_FITIMAGE))
>                 register_image_handler(&arm_fit_handler);
>         binfmt_register(&binfmt_arm_zimage_hook);
>         binfmt_register(&binfmt_barebox_hook);
> diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
> index d8eb063..cc9a842 100644
> --- a/arch/arm/lib/copy_template.S
> +++ b/arch/arm/lib/copy_template.S
> @@ -1,268 +1,192 @@
>  /*
> - *  linux/arch/arm/lib/copy_template.s
> + * Copyright (C) 2013 ARM Ltd.
> + * Copyright (C) 2013 Linaro.
>   *
> - *  Code template for optimized memory copy functions
> + * This code is based on glibc cortex strings work originally authored by Linaro
> + * and re-licensed under GPLv2 for the Linux kernel. The original code can
> + * be found @
>   *
> - *  Author:    Nicolas Pitre
> - *  Created:   Sep 28, 2005
> - *  Copyright: MontaVista Software, Inc.
> + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
> + * files/head:/src/aarch64/
>   *
> - *  This program is free software; you can redistribute it and/or modify
> - *  it under the terms of the GNU General Public License version 2 as
> - *  published by the Free Software Foundation.
> - */
> -
> -/*
> - * Theory of operation
> - * -------------------
> - *
> - * This file provides the core code for a forward memory copy used in
> - * the implementation of memcopy(), copy_to_user() and copy_from_user().
> - *
> - * The including file must define the following accessor macros
> - * according to the need of the given function:
> - *
> - * ldr1w ptr reg abort
> - *
> - *     This loads one word from 'ptr', stores it in 'reg' and increments
> - *     'ptr' to the next word. The 'abort' argument is used for fixup tables.
> - *
> - * ldr4w ptr reg1 reg2 reg3 reg4 abort
> - * ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
> - *
> - *     This loads four or eight words starting from 'ptr', stores them
> - *     in provided registers and increments 'ptr' past those words.
> - *     The'abort' argument is used for fixup tables.
> - *
> - * ldr1b ptr reg cond abort
> - *
> - *     Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
> - *     It also must apply the condition code if provided, otherwise the
> - *     "al" condition is assumed by default.
> - *
> - * str1w ptr reg abort
> - * str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
> - * str1b ptr reg cond abort
> - *
> - *     Same as their ldr* counterparts, but data is stored to 'ptr' location
> - *     rather than being loaded.
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
>   *
> - * enter reg1 reg2
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
>   *
> - *     Preserve the provided registers on the stack plus any additional
> - *     data as needed by the implementation including this code. Called
> - *     upon code entry.
> - *
> - * exit reg1 reg2
> - *
> - *     Restore registers with the values previously saved with the
> - *     'preserv' macro. Called upon code termination.
> - *
> - * LDR1W_SHIFT
> - * STR1W_SHIFT
> - *
> - *     Correction to be applied to the "ip" register when branching into
> - *     the ldr1w or str1w instructions (some of these macros may expand to
> - *     than one 32bit instruction in Thumb-2)
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>   */
>
>
> -               enter   r4, lr
> -
> -               subs    r2, r2, #4
> -               blt     8f
> -               ands    ip, r0, #3
> -       PLD(    pld     [r1, #0]                )
> -               bne     9f
> -               ands    ip, r1, #3
> -               bne     10f
> -
> -1:             subs    r2, r2, #(28)
> -               stmfd   sp!, {r5 - r8}
> -               blt     5f
> -
> -       CALGN(  ands    ip, r0, #31             )
> -       CALGN(  rsb     r3, ip, #32             )
> -       CALGN(  sbcnes  r4, r3, r2              )  @ C is always set here
> -       CALGN(  bcs     2f                      )
> -       CALGN(  adr     r4, 6f                  )
> -       CALGN(  subs    r2, r2, r3              )  @ C gets set
> -       CALGN(  add     pc, r4, ip              )
> -
> -       PLD(    pld     [r1, #0]                )
> -2:     PLD(    subs    r2, r2, #96             )
> -       PLD(    pld     [r1, #28]               )
> -       PLD(    blt     4f                      )
> -       PLD(    pld     [r1, #60]               )
> -       PLD(    pld     [r1, #92]               )
> -
> -3:     PLD(    pld     [r1, #124]              )
> -4:             ldr8w   r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
> -               subs    r2, r2, #32
> -               str8w   r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
> -               bge     3b
> -       PLD(    cmn     r2, #96                 )
> -       PLD(    bge     4b                      )
> -
> -5:             ands    ip, r2, #28
> -               rsb     ip, ip, #32
> -#if LDR1W_SHIFT > 0
> -               lsl     ip, ip, #LDR1W_SHIFT
> -#endif
> -               addne   pc, pc, ip              @ C is always clear here
> -               b       7f
> -6:
> -               .rept   (1 << LDR1W_SHIFT)
> -               W(nop)
> -               .endr
> -               ldr1w   r1, r3, abort=20f
> -               ldr1w   r1, r4, abort=20f
> -               ldr1w   r1, r5, abort=20f
> -               ldr1w   r1, r6, abort=20f
> -               ldr1w   r1, r7, abort=20f
> -               ldr1w   r1, r8, abort=20f
> -               ldr1w   r1, lr, abort=20f
> -
> -#if LDR1W_SHIFT < STR1W_SHIFT
> -               lsl     ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
> -#elif LDR1W_SHIFT > STR1W_SHIFT
> -               lsr     ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
> -#endif
> -               add     pc, pc, ip
> -               nop
> -               .rept   (1 << STR1W_SHIFT)
> -               W(nop)
> -               .endr
> -               str1w   r0, r3, abort=20f
> -               str1w   r0, r4, abort=20f
> -               str1w   r0, r5, abort=20f
> -               str1w   r0, r6, abort=20f
> -               str1w   r0, r7, abort=20f
> -               str1w   r0, r8, abort=20f
> -               str1w   r0, lr, abort=20f
> -
> -       CALGN(  bcs     2b                      )
> -
> -7:             ldmfd   sp!, {r5 - r8}
> -
> -8:             movs    r2, r2, lsl #31
> -               ldr1b   r1, r3, ne, abort=21f
> -               ldr1b   r1, r4, cs, abort=21f
> -               ldr1b   r1, ip, cs, abort=21f
> -               str1b   r0, r3, ne, abort=21f
> -               str1b   r0, r4, cs, abort=21f
> -               str1b   r0, ip, cs, abort=21f
> -
> -               exit    r4, pc
> -
> -9:             rsb     ip, ip, #4
> -               cmp     ip, #2
> -               ldr1b   r1, r3, gt, abort=21f
> -               ldr1b   r1, r4, ge, abort=21f
> -               ldr1b   r1, lr, abort=21f
> -               str1b   r0, r3, gt, abort=21f
> -               str1b   r0, r4, ge, abort=21f
> -               subs    r2, r2, ip
> -               str1b   r0, lr, abort=21f
> -               blt     8b
> -               ands    ip, r1, #3
> -               beq     1b
> -
> -10:            bic     r1, r1, #3
> -               cmp     ip, #2
> -               ldr1w   r1, lr, abort=21f
> -               beq     17f
> -               bgt     18f
> -
> -
> -               .macro  forward_copy_shift pull push
> -
> -               subs    r2, r2, #28
> -               blt     14f
> -
> -       CALGN(  ands    ip, r0, #31             )
> -       CALGN(  rsb     ip, ip, #32             )
> -       CALGN(  sbcnes  r4, ip, r2              )  @ C is always set here
> -       CALGN(  subcc   r2, r2, ip              )
> -       CALGN(  bcc     15f                     )
> -
> -11:            stmfd   sp!, {r5 - r9}
> -
> -       PLD(    pld     [r1, #0]                )
> -       PLD(    subs    r2, r2, #96             )
> -       PLD(    pld     [r1, #28]               )
> -       PLD(    blt     13f                     )
> -       PLD(    pld     [r1, #60]               )
> -       PLD(    pld     [r1, #92]               )
> -
> -12:    PLD(    pld     [r1, #124]              )
> -13:            ldr4w   r1, r4, r5, r6, r7, abort=19f
> -               mov     r3, lr, pull #\pull
> -               subs    r2, r2, #32
> -               ldr4w   r1, r8, r9, ip, lr, abort=19f
> -               orr     r3, r3, r4, push #\push
> -               mov     r4, r4, pull #\pull
> -               orr     r4, r4, r5, push #\push
> -               mov     r5, r5, pull #\pull
> -               orr     r5, r5, r6, push #\push
> -               mov     r6, r6, pull #\pull
> -               orr     r6, r6, r7, push #\push
> -               mov     r7, r7, pull #\pull
> -               orr     r7, r7, r8, push #\push
> -               mov     r8, r8, pull #\pull
> -               orr     r8, r8, r9, push #\push
> -               mov     r9, r9, pull #\pull
> -               orr     r9, r9, ip, push #\push
> -               mov     ip, ip, pull #\pull
> -               orr     ip, ip, lr, push #\push
> -               str8w   r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
> -               bge     12b
> -       PLD(    cmn     r2, #96                 )
> -       PLD(    bge     13b                     )
> -
> -               ldmfd   sp!, {r5 - r9}
> -
> -14:            ands    ip, r2, #28
> -               beq     16f
> -
> -15:            mov     r3, lr, pull #\pull
> -               ldr1w   r1, lr, abort=21f
> -               subs    ip, ip, #4
> -               orr     r3, r3, lr, push #\push
> -               str1w   r0, r3, abort=21f
> -               bgt     15b
> -       CALGN(  cmp     r2, #0                  )
> -       CALGN(  bge     11b                     )
> -
> -16:            sub     r1, r1, #(\push / 8)
> -               b       8b
> -
> -               .endm
> -
> -
> -               forward_copy_shift      pull=8  push=24
> -
> -17:            forward_copy_shift      pull=16 push=16
> -
> -18:            forward_copy_shift      pull=24 push=8
> -
> -
>  /*
> - * Abort preamble and completion macros.
> - * If a fixup handler is required then those macros must surround it.
> - * It is assumed that the fixup code will handle the private part of
> - * the exit macro.
> + * Copy a buffer from src to dest (alignment handled by the hardware)
> + *
> + * Parameters:
> + *     x0 - dest
> + *     x1 - src
> + *     x2 - n
> + * Returns:
> + *     x0 - dest
>   */
> -
> -       .macro  copy_abort_preamble
> -19:    ldmfd   sp!, {r5 - r9}
> -       b       21f
> -20:    ldmfd   sp!, {r5 - r8}
> -21:
> -       .endm
> -
> -       .macro  copy_abort_end
> -       ldmfd   sp!, {r4, pc}
> -       .endm
> -
> -
> +dstin  .req    x0
> +src    .req    x1
> +count  .req    x2
> +tmp1   .req    x3
> +tmp1w  .req    w3
> +tmp2   .req    x4
> +tmp2w  .req    w4
> +dst    .req    x6
> +
> +A_l    .req    x7
> +A_h    .req    x8
> +B_l    .req    x9
> +B_h    .req    x10
> +C_l    .req    x11
> +C_h    .req    x12
> +D_l    .req    x13
> +D_h    .req    x14
> +
> +       mov     dst, dstin
> +       cmp     count, #16
> +       /*When memory length is less than 16, the accessed are not aligned.*/
> +       b.lo    .Ltiny15
> +
> +       neg     tmp2, src
> +       ands    tmp2, tmp2, #15/* Bytes to reach alignment. */
> +       b.eq    .LSrcAligned
> +       sub     count, count, tmp2
> +       /*
> +       * Copy the leading memory data from src to dst in an increasing
> +       * address order.By this way,the risk of overwritting the source
> +       * memory data is eliminated when the distance between src and
> +       * dst is less than 16. The memory accesses here are alignment.
> +       */
> +       tbz     tmp2, #0, 1f
> +       ldrb1   tmp1w, src, #1
> +       strb1   tmp1w, dst, #1
> +1:
> +       tbz     tmp2, #1, 2f
> +       ldrh1   tmp1w, src, #2
> +       strh1   tmp1w, dst, #2
> +2:
> +       tbz     tmp2, #2, 3f
> +       ldr1    tmp1w, src, #4
> +       str1    tmp1w, dst, #4
> +3:
> +       tbz     tmp2, #3, .LSrcAligned
> +       ldr1    tmp1, src, #8
> +       str1    tmp1, dst, #8
> +
> +.LSrcAligned:
> +       cmp     count, #64
> +       b.ge    .Lcpy_over64
> +       /*
> +       * Deal with small copies quickly by dropping straight into the
> +       * exit block.
> +       */
> +.Ltail63:
> +       /*
> +       * Copy up to 48 bytes of data. At this point we only need the
> +       * bottom 6 bits of count to be accurate.
> +       */
> +       ands    tmp1, count, #0x30
> +       b.eq    .Ltiny15
> +       cmp     tmp1w, #0x20
> +       b.eq    1f
> +       b.lt    2f
> +       ldp1    A_l, A_h, src, #16
> +       stp1    A_l, A_h, dst, #16
> +1:
> +       ldp1    A_l, A_h, src, #16
> +       stp1    A_l, A_h, dst, #16
> +2:
> +       ldp1    A_l, A_h, src, #16
> +       stp1    A_l, A_h, dst, #16
> +.Ltiny15:
> +       /*
> +       * Prefer to break one ldp/stp into several load/store to access
> +       * memory in an increasing address order,rather than to load/store 16
> +       * bytes from (src-16) to (dst-16) and to backward the src to aligned
> +       * address,which way is used in original cortex memcpy. If keeping
> +       * the original memcpy process here, memmove need to satisfy the
> +       * precondition that src address is at least 16 bytes bigger than dst
> +       * address,otherwise some source data will be overwritten when memove
> +       * call memcpy directly. To make memmove simpler and decouple the
> +       * memcpy's dependency on memmove, withdrew the original process.
> +       */
> +       tbz     count, #3, 1f
> +       ldr1    tmp1, src, #8
> +       str1    tmp1, dst, #8
> +1:
> +       tbz     count, #2, 2f
> +       ldr1    tmp1w, src, #4
> +       str1    tmp1w, dst, #4
> +2:
> +       tbz     count, #1, 3f
> +       ldrh1   tmp1w, src, #2
> +       strh1   tmp1w, dst, #2
> +3:
> +       tbz     count, #0, .Lexitfunc
> +       ldrb1   tmp1w, src, #1
> +       strb1   tmp1w, dst, #1
> +
> +       b       .Lexitfunc
> +
> +.Lcpy_over64:
> +       subs    count, count, #128
> +       b.ge    .Lcpy_body_large
> +       /*
> +       * Less than 128 bytes to copy, so handle 64 here and then jump
> +       * to the tail.
> +       */
> +       ldp1    A_l, A_h, src, #16
> +       stp1    A_l, A_h, dst, #16
> +       ldp1    B_l, B_h, src, #16
> +       ldp1    C_l, C_h, src, #16
> +       stp1    B_l, B_h, dst, #16
> +       stp1    C_l, C_h, dst, #16
> +       ldp1    D_l, D_h, src, #16
> +       stp1    D_l, D_h, dst, #16
> +
> +       tst     count, #0x3f
> +       b.ne    .Ltail63
> +       b       .Lexitfunc
> +
> +       /*
> +       * Critical loop.  Start at a new cache line boundary.  Assuming
> +       * 64 bytes per line this ensures the entire loop is in one line.
> +       */
> +.Lcpy_body_large:
> +       /* pre-get 64 bytes data. */
> +       ldp1    A_l, A_h, src, #16
> +       ldp1    B_l, B_h, src, #16
> +       ldp1    C_l, C_h, src, #16
> +       ldp1    D_l, D_h, src, #16
> +1:
> +       /*
> +       * interlace the load of next 64 bytes data block with store of the last
> +       * loaded 64 bytes data.
> +       */
> +       stp1    A_l, A_h, dst, #16
> +       ldp1    A_l, A_h, src, #16
> +       stp1    B_l, B_h, dst, #16
> +       ldp1    B_l, B_h, src, #16
> +       stp1    C_l, C_h, dst, #16
> +       ldp1    C_l, C_h, src, #16
> +       stp1    D_l, D_h, dst, #16
> +       ldp1    D_l, D_h, src, #16
> +       subs    count, count, #64
> +       b.ge    1b
> +       stp1    A_l, A_h, dst, #16
> +       stp1    B_l, B_h, dst, #16
> +       stp1    C_l, C_h, dst, #16
> +       stp1    D_l, D_h, dst, #16
> +
> +       tst     count, #0x3f
> +       b.ne    .Ltail63
> +.Lexitfunc:
> diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
> index 5123691..cfed319 100644
> --- a/arch/arm/lib/memcpy.S
> +++ b/arch/arm/lib/memcpy.S
> @@ -1,64 +1,74 @@
>  /*
> - *  linux/arch/arm/lib/memcpy.S
> + * Copyright (C) 2013 ARM Ltd.
> + * Copyright (C) 2013 Linaro.
>   *
> - *  Author:    Nicolas Pitre
> - *  Created:   Sep 28, 2005
> - *  Copyright: MontaVista Software, Inc.
> + * This code is based on glibc cortex strings work originally authored by Linaro
> + * and re-licensed under GPLv2 for the Linux kernel. The original code can
> + * be found @
>   *
> - *  This program is free software; you can redistribute it and/or modify
> - *  it under the terms of the GNU General Public License version 2 as
> - *  published by the Free Software Foundation.
> + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
> + * files/head:/src/aarch64/
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>   */
>
>  #include <linux/linkage.h>
>  #include <asm/assembler.h>
>
> -#define LDR1W_SHIFT    0
> -#define STR1W_SHIFT    0
> -
> -       .macro ldr1w ptr reg abort
> -       W(ldr) \reg, [\ptr], #4
> -       .endm
> -
> -       .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
> -       ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
> +/*
> + * Copy a buffer from src to dest (alignment handled by the hardware)
> + *
> + * Parameters:
> + *     x0 - dest
> + *     x1 - src
> + *     x2 - n
> + * Returns:
> + *     x0 - dest
> + */
> +       .macro ldrb1 ptr, regB, val
> +       ldrb  \ptr, [\regB], \val
>         .endm
>
> -       .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
> -       ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
> +       .macro strb1 ptr, regB, val
> +       strb \ptr, [\regB], \val
>         .endm
>
> -       .macro ldr1b ptr reg cond=al abort
> -       ldr\cond\()b \reg, [\ptr], #1
> +       .macro ldrh1 ptr, regB, val
> +       ldrh  \ptr, [\regB], \val
>         .endm
>
> -       .macro str1w ptr reg abort
> -       W(str) \reg, [\ptr], #4
> +       .macro strh1 ptr, regB, val
> +       strh \ptr, [\regB], \val
>         .endm
>
> -       .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
> -       stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
> +       .macro ldr1 ptr, regB, val
> +       ldr \ptr, [\regB], \val
>         .endm
>
> -       .macro str1b ptr reg cond=al abort
> -       str\cond\()b \reg, [\ptr], #1
> +       .macro str1 ptr, regB, val
> +       str \ptr, [\regB], \val
>         .endm
>
> -       .macro enter reg1 reg2
> -       stmdb sp!, {r0, \reg1, \reg2}
> +       .macro ldp1 ptr, regB, regC, val
> +       ldp \ptr, \regB, [\regC], \val
>         .endm
>
> -       .macro exit reg1 reg2
> -       ldmfd sp!, {r0, \reg1, \reg2}
> +       .macro stp1 ptr, regB, regC, val
> +       stp \ptr, \regB, [\regC], \val
>         .endm
>
> -       .text
> -
> -/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
> -
> +       .weak memcpy
>  ENTRY(memcpy)
> -
>  #include "copy_template.S"
> -
> +       ret
>  ENDPROC(memcpy)
> -
> diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
> index c4d2672..380a540 100644
> --- a/arch/arm/lib/memset.S
> +++ b/arch/arm/lib/memset.S
> @@ -1,124 +1,215 @@
>  /*
> - *  linux/arch/arm/lib/memset.S
> + * Copyright (C) 2013 ARM Ltd.
> + * Copyright (C) 2013 Linaro.
>   *
> - *  Copyright (C) 1995-2000 Russell King
> + * This code is based on glibc cortex strings work originally authored by Linaro
> + * and re-licensed under GPLv2 for the Linux kernel. The original code can
> + * be found @
> + *
> + * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
> + * files/head:/src/aarch64/
>   *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License version 2 as
>   * published by the Free Software Foundation.
>   *
> - *  ASM optimised string functions
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>   */
> +
>  #include <linux/linkage.h>
>  #include <asm/assembler.h>
>
> -       .text
> -       .align  5
> -
> -ENTRY(memset)
> -       ands    r3, r0, #3              @ 1 unaligned?
> -       mov     ip, r0                  @ preserve r0 as return value
> -       bne     6f                      @ 1
>  /*
> - * we know that the pointer in ip is aligned to a word boundary.
> - */
> -1:     orr     r1, r1, r1, lsl #8
> -       orr     r1, r1, r1, lsl #16
> -       mov     r3, r1
> -       cmp     r2, #16
> -       blt     4f
> -
> -#if ! CALGN(1)+0
> -
> -/*
> - * We need an 2 extra registers for this loop - use r8 and the LR
> - */
> -       stmfd   sp!, {r8, lr}
> -       mov     r8, r1
> -       mov     lr, r1
> -
> -2:     subs    r2, r2, #64
> -       stmgeia ip!, {r1, r3, r8, lr}   @ 64 bytes at a time.
> -       stmgeia ip!, {r1, r3, r8, lr}
> -       stmgeia ip!, {r1, r3, r8, lr}
> -       stmgeia ip!, {r1, r3, r8, lr}
> -       bgt     2b
> -       ldmeqfd sp!, {r8, pc}           @ Now <64 bytes to go.
> -/*
> - * No need to correct the count; we're only testing bits from now on
> + * Fill in the buffer with character c (alignment handled by the hardware)
> + *
> + * Parameters:
> + *     x0 - buf
> + *     x1 - c
> + *     x2 - n
> + * Returns:
> + *     x0 - buf
>   */
> -       tst     r2, #32
> -       stmneia ip!, {r1, r3, r8, lr}
> -       stmneia ip!, {r1, r3, r8, lr}
> -       tst     r2, #16
> -       stmneia ip!, {r1, r3, r8, lr}
> -       ldmfd   sp!, {r8, lr}
> -
> -#else
>
> +dstin          .req    x0
> +val            .req    w1
> +count          .req    x2
> +tmp1           .req    x3
> +tmp1w          .req    w3
> +tmp2           .req    x4
> +tmp2w          .req    w4
> +zva_len_x      .req    x5
> +zva_len                .req    w5
> +zva_bits_x     .req    x6
> +
> +A_l            .req    x7
> +A_lw           .req    w7
> +dst            .req    x8
> +tmp3w          .req    w9
> +tmp3           .req    x9
> +
> +       .weak memset
> +ENTRY(memset)
> +       mov     dst, dstin      /* Preserve return value.  */
> +       and     A_lw, val, #255
> +       orr     A_lw, A_lw, A_lw, lsl #8
> +       orr     A_lw, A_lw, A_lw, lsl #16
> +       orr     A_l, A_l, A_l, lsl #32
> +
> +       cmp     count, #15
> +       b.hi    .Lover16_proc
> +       /*All store maybe are non-aligned..*/
> +       tbz     count, #3, 1f
> +       str     A_l, [dst], #8
> +1:
> +       tbz     count, #2, 2f
> +       str     A_lw, [dst], #4
> +2:
> +       tbz     count, #1, 3f
> +       strh    A_lw, [dst], #2
> +3:
> +       tbz     count, #0, 4f
> +       strb    A_lw, [dst]
> +4:
> +       ret
> +
> +.Lover16_proc:
> +       /*Whether  the start address is aligned with 16.*/
> +       neg     tmp2, dst
> +       ands    tmp2, tmp2, #15
> +       b.eq    .Laligned
>  /*
> - * This version aligns the destination pointer in order to write
> - * whole cache lines at once.
> - */
> -
> -       stmfd   sp!, {r4-r8, lr}
> -       mov     r4, r1
> -       mov     r5, r1
> -       mov     r6, r1
> -       mov     r7, r1
> -       mov     r8, r1
> -       mov     lr, r1
> -
> -       cmp     r2, #96
> -       tstgt   ip, #31
> -       ble     3f
> -
> -       and     r8, ip, #31
> -       rsb     r8, r8, #32
> -       sub     r2, r2, r8
> -       movs    r8, r8, lsl #(32 - 4)
> -       stmcsia ip!, {r4, r5, r6, r7}
> -       stmmiia ip!, {r4, r5}
> -       tst     r8, #(1 << 30)
> -       mov     r8, r1
> -       strne   r1, [ip], #4
> -
> -3:     subs    r2, r2, #64
> -       stmgeia ip!, {r1, r3-r8, lr}
> -       stmgeia ip!, {r1, r3-r8, lr}
> -       bgt     3b
> -       ldmeqfd sp!, {r4-r8, pc}
> -
> -       tst     r2, #32
> -       stmneia ip!, {r1, r3-r8, lr}
> -       tst     r2, #16
> -       stmneia ip!, {r4-r7}
> -       ldmfd   sp!, {r4-r8, lr}
> -
> -#endif
> -
> -4:     tst     r2, #8
> -       stmneia ip!, {r1, r3}
> -       tst     r2, #4
> -       strne   r1, [ip], #4
> +* The count is not less than 16, we can use stp to store the start 16 bytes,
> +* then adjust the dst aligned with 16.This process will make the current
> +* memory address at alignment boundary.
> +*/
> +       stp     A_l, A_l, [dst] /*non-aligned store..*/
> +       /*make the dst aligned..*/
> +       sub     count, count, tmp2
> +       add     dst, dst, tmp2
> +
> +.Laligned:
> +       cbz     A_l, .Lzero_mem
> +
> +.Ltail_maybe_long:
> +       cmp     count, #64
> +       b.ge    .Lnot_short
> +.Ltail63:
> +       ands    tmp1, count, #0x30
> +       b.eq    3f
> +       cmp     tmp1w, #0x20
> +       b.eq    1f
> +       b.lt    2f
> +       stp     A_l, A_l, [dst], #16
> +1:
> +       stp     A_l, A_l, [dst], #16
> +2:
> +       stp     A_l, A_l, [dst], #16
>  /*
> - * When we get here, we've got less than 4 bytes to zero.  We
> - * may have an unaligned pointer as well.
> - */
> -5:     tst     r2, #2
> -       strneb  r1, [ip], #1
> -       strneb  r1, [ip], #1
> -       tst     r2, #1
> -       strneb  r1, [ip], #1
> -       mov     pc, lr
> -
> -6:     subs    r2, r2, #4              @ 1 do we have enough
> -       blt     5b                      @ 1 bytes to align with?
> -       cmp     r3, #2                  @ 1
> -       strltb  r1, [ip], #1            @ 1
> -       strleb  r1, [ip], #1            @ 1
> -       strb    r1, [ip], #1            @ 1
> -       add     r2, r2, r3              @ 1 (r2 = r2 - (4 - r3))
> -       b       1b
> +* The last store length is less than 16,use stp to write last 16 bytes.
> +* It will lead some bytes written twice and the access is non-aligned.
> +*/
> +3:
> +       ands    count, count, #15
> +       cbz     count, 4f
> +       add     dst, dst, count
> +       stp     A_l, A_l, [dst, #-16]   /* Repeat some/all of last store. */
> +4:
> +       ret
> +
> +       /*
> +       * Critical loop. Start at a new cache line boundary. Assuming
> +       * 64 bytes per line, this ensures the entire loop is in one line.
> +       */
> +.Lnot_short:
> +       sub     dst, dst, #16/* Pre-bias.  */
> +       sub     count, count, #64
> +1:
> +       stp     A_l, A_l, [dst, #16]
> +       stp     A_l, A_l, [dst, #32]
> +       stp     A_l, A_l, [dst, #48]
> +       stp     A_l, A_l, [dst, #64]!
> +       subs    count, count, #64
> +       b.ge    1b
> +       tst     count, #0x3f
> +       add     dst, dst, #16
> +       b.ne    .Ltail63
> +.Lexitfunc:
> +       ret
> +
> +       /*
> +       * For zeroing memory, check to see if we can use the ZVA feature to
> +       * zero entire 'cache' lines.
> +       */
> +.Lzero_mem:
> +       cmp     count, #63
> +       b.le    .Ltail63
> +       /*
> +       * For zeroing small amounts of memory, it's not worth setting up
> +       * the line-clear code.
> +       */
> +       cmp     count, #128
> +       b.lt    .Lnot_short /*count is at least  128 bytes*/
> +
> +       mrs     tmp1, dczid_el0
> +       tbnz    tmp1, #4, .Lnot_short
> +       mov     tmp3w, #4
> +       and     zva_len, tmp1w, #15     /* Safety: other bits reserved.  */
> +       lsl     zva_len, tmp3w, zva_len
> +
> +       ands    tmp3w, zva_len, #63
> +       /*
> +       * ensure the zva_len is not less than 64.
> +       * It is not meaningful to use ZVA if the block size is less than 64.
> +       */
> +       b.ne    .Lnot_short
> +.Lzero_by_line:
> +       /*
> +       * Compute how far we need to go to become suitably aligned. We're
> +       * already at quad-word alignment.
> +       */
> +       cmp     count, zva_len_x
> +       b.lt    .Lnot_short             /* Not enough to reach alignment.  */
> +       sub     zva_bits_x, zva_len_x, #1
> +       neg     tmp2, dst
> +       ands    tmp2, tmp2, zva_bits_x
> +       b.eq    2f                      /* Already aligned.  */
> +       /* Not aligned, check that there's enough to copy after alignment.*/
> +       sub     tmp1, count, tmp2
> +       /*
> +       * grantee the remain length to be ZVA is bigger than 64,
> +       * avoid to make the 2f's process over mem range.*/
> +       cmp     tmp1, #64
> +       ccmp    tmp1, zva_len_x, #8, ge /* NZCV=0b1000 */
> +       b.lt    .Lnot_short
> +       /*
> +       * We know that there's at least 64 bytes to zero and that it's safe
> +       * to overrun by 64 bytes.
> +       */
> +       mov     count, tmp1
> +1:
> +       stp     A_l, A_l, [dst]
> +       stp     A_l, A_l, [dst, #16]
> +       stp     A_l, A_l, [dst, #32]
> +       subs    tmp2, tmp2, #64
> +       stp     A_l, A_l, [dst, #48]
> +       add     dst, dst, #64
> +       b.ge    1b
> +       /* We've overrun a bit, so adjust dst downwards.*/
> +       add     dst, dst, tmp2
> +2:
> +       sub     count, count, zva_len_x
> +3:
> +       dc      zva, dst
> +       add     dst, dst, zva_len_x
> +       subs    count, count, zva_len_x
> +       b.ge    3b
> +       ands    count, count, zva_bits_x
> +       b.ne    .Ltail_maybe_long
> +       ret
>  ENDPROC(memset)
> -
> diff --git a/arch/arm/lib/runtime-offset.S b/arch/arm/lib/runtime-offset.S
> index f10c4c8..e368baa 100644
> --- a/arch/arm/lib/runtime-offset.S
> +++ b/arch/arm/lib/runtime-offset.S
> @@ -8,11 +8,11 @@
>   * we are currently running at.
>   */
>  ENTRY(get_runtime_offset)
> -1:     adr r0, 1b
> -       ldr r1, linkadr
> -       subs r0, r1, r0
> -THUMB( subs r0, r0, #1)
> -       mov pc, lr
> +1:     adr x0, 1b
> +       adr x1, get_runtime_offset
> +       subs x0, x1, x0
> +       subs x0, x0, #1
> +       ret
>
>  linkadr:
>  .word get_runtime_offset
> @@ -28,7 +28,7 @@ __ld_var_base:
>   */
>  .macro ld_var_entry name
>         ENTRY(__ld_var_\name)
> -               ldr r0, __\name
> +               ldr x0, __\name
>                 b 1f
>         __\name: .word \name - __ld_var_base
>         ENDPROC(__ld_var_\name)
> @@ -47,6 +47,6 @@ ld_var_entry __image_end
>  #endif
>
>  1:
> -       ldr r1, =__ld_var_base
> -       adds r0, r0, r1
> -       mov pc, lr
> +       ldr x1, =__ld_var_base
> +       adds x0, x0, x1
> +       ret
> diff --git a/arch/arm/mach-virt/Kconfig b/arch/arm/mach-virt/Kconfig
> new file mode 100644
> index 0000000..1f43606
> --- /dev/null
> +++ b/arch/arm/mach-virt/Kconfig
> @@ -0,0 +1,15 @@
> +if ARCH_VIRT
> +
> +config ARCH_TEXT_BASE
> +       hex
> +       default 0x40000000
> +
> +choice
> +       prompt "ARM Board type"
> +
> +config MACH_VIRT
> +       bool "ARM QEMU virt"
> +
> +endchoice
> +
> +endif
> diff --git a/arch/arm/mach-virt/Makefile b/arch/arm/mach-virt/Makefile
> new file mode 100644
> index 0000000..3924a10
> --- /dev/null
> +++ b/arch/arm/mach-virt/Makefile
> @@ -0,0 +1,3 @@
> +obj-y += devices.o reset.o
> +
> +lwl-y += lowlevel.o
> diff --git a/arch/arm/mach-virt/devices.c b/arch/arm/mach-virt/devices.c
> new file mode 100644
> index 0000000..999f463
> --- /dev/null
> +++ b/arch/arm/mach-virt/devices.c
> @@ -0,0 +1,30 @@
> +/*
> + * Copyright (C) 2016 Raphaël Poggi <poggi.raph at gmail.com>
> + *
> + * GPLv2 only
> + */
> +
> +#include <common.h>
> +#include <linux/amba/bus.h>
> +#include <asm/memory.h>
> +#include <mach/devices.h>
> +#include <linux/ioport.h>
> +
> +void virt_add_ddram(u32 size)
> +{
> +       arm_add_mem_device("ram0", 0x40000000, size);
> +}
> +
> +void virt_register_uart(unsigned id)
> +{
> +       resource_size_t start;
> +
> +       switch (id) {
> +       case 0:
> +               start = 0x09000000;
> +               break;
> +       default:
> +               return;
> +       }
> +       amba_apb_device_add(NULL, "uart-pl011", id, start, 4096, NULL, 0);
> +}
> diff --git a/arch/arm/mach-virt/include/mach/debug_ll.h b/arch/arm/mach-virt/include/mach/debug_ll.h
> new file mode 100644
> index 0000000..89b0692
> --- /dev/null
> +++ b/arch/arm/mach-virt/include/mach/debug_ll.h
> @@ -0,0 +1,24 @@
> +/*
> + * Copyright 2013 Jean-Christophe PLAGNIOL-VILLARD <plagniol at jcrosoft.com>
> + *
> + * GPLv2 only
> + */
> +
> +#ifndef __MACH_DEBUG_LL_H__
> +#define   __MACH_DEBUG_LL_H__
> +
> +#include <linux/amba/serial.h>
> +#include <io.h>
> +
> +#define DEBUG_LL_PHYS_BASE             0x10000000
> +#define DEBUG_LL_PHYS_BASE_RS1         0x1c000000
> +
> +#ifdef MP
> +#define DEBUG_LL_UART_ADDR DEBUG_LL_PHYS_BASE
> +#else
> +#define DEBUG_LL_UART_ADDR DEBUG_LL_PHYS_BASE_RS1
> +#endif
> +
> +#include <asm/debug_ll_pl011.h>
> +
> +#endif
> diff --git a/arch/arm/mach-virt/include/mach/devices.h b/arch/arm/mach-virt/include/mach/devices.h
> new file mode 100644
> index 0000000..9872c61
> --- /dev/null
> +++ b/arch/arm/mach-virt/include/mach/devices.h
> @@ -0,0 +1,13 @@
> +/*
> + * Copyright (C) 2016 Raphaël Poggi <poggi.raph at gmail.com>
> + *
> + * GPLv2 only
> + */
> +
> +#ifndef __ASM_ARCH_DEVICES_H__
> +#define __ASM_ARCH_DEVICES_H__
> +
> +void virt_add_ddram(u32 size);
> +void virt_register_uart(unsigned id);
> +
> +#endif /* __ASM_ARCH_DEVICES_H__ */
> diff --git a/arch/arm/mach-virt/lowlevel.c b/arch/arm/mach-virt/lowlevel.c
> new file mode 100644
> index 0000000..6f695a5
> --- /dev/null
> +++ b/arch/arm/mach-virt/lowlevel.c
> @@ -0,0 +1,19 @@
> +/*
> + * Copyright (C) 2013 Jean-Christophe PLAGNIOL-VILLARD <plagnio at jcrosoft.com>
> + *
> + * GPLv2 only
> + */
> +
> +#include <common.h>
> +#include <linux/sizes.h>
> +#include <asm/barebox-arm-head.h>
> +#include <asm/barebox-arm.h>
> +#include <asm/system_info.h>
> +
> +void barebox_arm_reset_vector(void)
> +{
> +       arm_cpu_lowlevel_init();
> +       arm_setup_stack(STACK_BASE);
> +
> +       barebox_arm_entry(0x40000000, SZ_512M, NULL);
> +}
> diff --git a/arch/arm/mach-virt/reset.c b/arch/arm/mach-virt/reset.c
> new file mode 100644
> index 0000000..fb895eb
> --- /dev/null
> +++ b/arch/arm/mach-virt/reset.c
> @@ -0,0 +1,24 @@
> +/*
> + * Copyright (C) 2016 Raphaël Poggi <poggi.raph at gmail.com>
> + *
> + * GPLv2 only
> + */
> +
> +#include <common.h>
> +#include <io.h>
> +#include <init.h>
> +#include <restart.h>
> +#include <mach/devices.h>
> +
> +static void virt_reset_soc(struct restart_handler *rst)
> +{
> +       hang();
> +}
> +
> +static int restart_register_feature(void)
> +{
> +       restart_handler_register_fn(virt_reset_soc);
> +
> +       return 0;
> +}
> +coredevice_initcall(restart_register_feature);
> --
> 2.8.0.rc3
>
>
> --
> Pengutronix e.K.                           |                             |
> Industrial Linux Solutions                 | http://www.pengutronix.de/  |
> Peiner Str. 6-8, 31137 Hildesheim, Germany | Phone: +49-5121-206917-0    |
> Amtsgericht Hildesheim, HRA 2686           | Fax:   +49-5121-206917-5555 |
>
> _______________________________________________
> barebox mailing list
> barebox at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/barebox



More information about the barebox mailing list