[OpenWrt-Devel] [OpenWRT-Devel] mvebu: introduce mvneta buffer manager offload module

Sebastian Careba nitroshift at yahoo.com
Thu May 19 02:41:00 EDT 2016


Buffer manager (BM) is a dedicated hardware unit that can be used by all
ethernet ports of Armada XP and 38x SoC's. It allows to offload CPU on RX
path by sparing DRAM access on refilling buffer pool, hardware-based
filling of descriptor ring data and better memory utilization due to HW
arbitration for using 'short' pools for small packets.

Tests performed with A388 SoC working as a network bridge between two
packet generators showed increase of maximum processed 64B packets by
~20k (~555k packets with BM enabled vs ~535 packets without BM). Also
when pushing 1500B-packets with a line rate achieved, CPU load decreased
from around 25% without BM to 20% with BM.

BM comprise up to 4 buffer pointers' (BP) rings kept in DRAM, which
are called external BP pools - BPPE. Allocating and releasing buffer
pointers (BP) to/from BPPE is performed indirectly by write/read access
to a dedicated internal SRAM, where internal BP pools (BPPI) are placed.
BM hardware controls status of BPPE automatically, as well as assigning
proper buffers to RX descriptors. For more details please refer to
Functional Specification of Armada XP or 38x SoC.

In order to enable support for a separate hardware block, common for all
ports, a new driver has to be implemented ('mvneta_bm'). It provides
initialization sequence of address space, clocks, registers, SRAM,
empty pools' structures and also obtaining optional configuration
from DT (please refer to device tree binding documentation). mvneta_bm
exposes also a necessary API to mvneta driver, as well as a dedicated
structure with BM information (bm_priv), whose presence is used as a
flag notifying of BM usage by port. It has to be ensured that mvneta_bm
probe is executed prior to the ones in ports' driver. In case BM is not
used or its probe fails, mvneta falls back to use software buffer
management.

A sequence executed in mvneta_probe function is modified in order to have
an access to needed resources before possible port's BM initialization is
done. According to port-pools mapping provided by DT appropriate registers
are configured and the buffer pools are filled. RX path is modified
accordingly. Becaues the hardware allows a wide variety of configuration
options, following assumptions are made:
* using BM mechanisms can be selectively disabled/enabled basing on DT configuration among the ports
* 'long' pool's single buffer size is tied to port's MTU
* using 'long' pool by port is obligatory and it cannot be shared
* using 'short' pool for smaller packets is optional
* one 'short' pool can be shared among all ports

This commit enables hardware buffer management operation cooperating with
existing mvneta driver. New device tree binding documentation is added and
the one of mvneta is updated accordingly.

[gregory.clement at free-electrons.com: removed the suspend/resume part]
Signed-off-by: Gregory Clement <gregory.clement at free-electrons.com>
Signed-off-by: Marcin Wojtas <mw at semihalf.com>
Signed-off-by: Sebastian Careba <nitroshift at yahoo.com>
---
--- a/drivers/net/ethernet/marvell/Makefile
+++ b/drivers/net/ethernet/marvell/Makefile
@@ -4,6 +4,7 @@
 
 obj-$(CONFIG_MVMDIO) += mvmdio.o
 obj-$(CONFIG_MV643XX_ETH) += mv643xx_eth.o
+obj-$(CONFIG_MVNETA_BM) += mvneta_bm.o
 obj-$(CONFIG_MVNETA) += mvneta.o
 obj-$(CONFIG_MVPP2) += mvpp2.o
 obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -11,32 +11,38 @@
  * warranty of any kind, whether express or implied.
  */
 
-#include <linux/kernel.h>
-#include <linux/netdevice.h>
+#include <linux/clk.h>
+#include <linux/cpu.h>
 #include <linux/etherdevice.h>
-#include <linux/platform_device.h>
-#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
 #include <linux/inetdevice.h>
-#include <linux/mbus.h>
-#include <linux/module.h>
 #include <linux/interrupt.h>
-#include <linux/if_vlan.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
 #include <linux/io.h>
-#include <net/tso.h>
+#include <linux/kernel.h>
+#include <linux/mbus.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
-#include <linux/of_address.h>
 #include <linux/phy.h>
-#include <linux/clk.h>
-#include <linux/cpu.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <net/hwbm.h>
+#include "mvneta_bm.h"
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/tso.h>
 
 /* Registers */
 #define MVNETA_RXQ_CONFIG_REG(q)                (0x1400 + ((q) << 2))
 #define      MVNETA_RXQ_HW_BUF_ALLOC            BIT(0)
+#define      MVNETA_RXQ_SHORT_POOL_ID_SHIFT	4
+#define      MVNETA_RXQ_SHORT_POOL_ID_MASK	0x30
+#define      MVNETA_RXQ_LONG_POOL_ID_SHIFT	6
+#define      MVNETA_RXQ_LONG_POOL_ID_MASK	0xc0
 #define      MVNETA_RXQ_PKT_OFFSET_ALL_MASK     (0xf    << 8)
 #define      MVNETA_RXQ_PKT_OFFSET_MASK(offs)   ((offs) << 8)
 #define MVNETA_RXQ_THRESHOLD_REG(q)             (0x14c0 + ((q) << 2))
@@ -50,6 +56,9 @@
 #define MVNETA_RXQ_STATUS_UPDATE_REG(q)         (0x1500 + ((q) << 2))
 #define      MVNETA_RXQ_ADD_NON_OCCUPIED_SHIFT  16
 #define      MVNETA_RXQ_ADD_NON_OCCUPIED_MAX    255
+#define MVNETA_PORT_POOL_BUFFER_SZ_REG(pool)	(0x1700 + ((pool) << 2))
+#define      MVNETA_PORT_POOL_BUFFER_SZ_SHIFT	3
+#define      MVNETA_PORT_POOL_BUFFER_SZ_MASK	0xfff8
 #define MVNETA_PORT_RX_RESET                    0x1cc0
 #define      MVNETA_PORT_RX_DMA_RESET           BIT(0)
 #define MVNETA_PHY_ADDR                         0x2000
@@ -107,12 +116,21 @@
 #define MVNETA_GMAC_CLOCK_DIVIDER                0x24f4
 #define      MVNETA_GMAC_1MS_CLOCK_ENABLE        BIT(31)
 #define MVNETA_ACC_MODE                          0x2500
+#define MVNETA_BM_ADDRESS                        0x2504
 #define MVNETA_CPU_MAP(cpu)                      (0x2540 + ((cpu) << 2))
 #define      MVNETA_CPU_RXQ_ACCESS_ALL_MASK      0x000000ff
 #define      MVNETA_CPU_TXQ_ACCESS_ALL_MASK      0x0000ff00
+#define      MVNETA_CPU_RXQ_ACCESS(rxq)		 BIT(rxq)
+#define      MVNETA_CPU_TXQ_ACCESS(txq)		 BIT(txq + 8)
 #define MVNETA_RXQ_TIME_COAL_REG(q)              (0x2580 + ((q) << 2))
 
-/* Exception Interrupt Port/Queue Cause register */
+/* Exception Interrupt Port/Queue Cause register
+ *
+ * Their behavior depend of the mapping done using the PCPX2Q
+ * registers. For a given CPU if the bit associated to a queue is not
+ * set, then for the register a read from this CPU will always return
+ * 0 and a write won't do anything
+ */
 
 #define MVNETA_INTR_NEW_CAUSE                    0x25a0
 #define MVNETA_INTR_NEW_MASK                     0x25a4
@@ -245,7 +263,10 @@
 #define MVNETA_CPU_D_CACHE_LINE_SIZE    32
 #define MVNETA_TX_CSUM_DEF_SIZE		1600
 #define MVNETA_TX_CSUM_MAX_SIZE		9800
-#define MVNETA_ACC_MODE_EXT		1
+#define MVNETA_ACC_MODE_EXT1		1
+#define MVNETA_ACC_MODE_EXT2		2
+
+#define MVNETA_MAX_DECODE_WIN		6
 
 /* Timeout constants */
 #define MVNETA_TX_DISABLE_TIMEOUT_MSEC	1000
@@ -254,6 +275,11 @@
 
 #define MVNETA_TX_MTU_MAX		0x3ffff
 
+/* The RSS lookup table actually has 256 entries but we do not use
+ * them yet
+ */
+#define MVNETA_RSS_LU_TABLE_SIZE	1
+
 /* TSO header size */
 #define TSO_HEADER_SIZE 128
 
@@ -280,7 +306,8 @@
 	((addr >= txq->tso_hdrs_phys) && \
 	 (addr < txq->tso_hdrs_phys + txq->size * TSO_HEADER_SIZE))
 
-#define MVNETA_RX_BUF_SIZE(pkt_size)   ((pkt_size) + NET_SKB_PAD)
+#define MVNETA_RX_GET_BM_POOL_ID(rxd) \
+	(((rxd)->status & MVNETA_RXD_BM_POOL_MASK) >> MVNETA_RXD_BM_POOL_SHIFT)
 
 struct mvneta_statistic {
 	unsigned short offset;
@@ -346,6 +373,7 @@ struct mvneta_pcpu_port {
 };
 
 struct mvneta_port {
+	u8 id;
 	struct mvneta_pcpu_port __percpu	*ports;
 	struct mvneta_pcpu_stats __percpu	*stats;
 
@@ -356,9 +384,17 @@ struct mvneta_port {
 	struct mvneta_tx_queue *txqs;
 	struct net_device *dev;
 	struct notifier_block cpu_notifier;
+	int rxq_def;
+	/* Protect the access to the percpu interrupt registers,
+	 * ensuring that the configuration remains coherent.
+	 */
+	spinlock_t lock;
+	bool is_stopped;
 
 	/* Core clock */
 	struct clk *clk;
+	/* AXI clock */
+	struct clk *clk_bus;
 	u8 mcast_count[256];
 	u16 tx_ring_size;
 	u16 rx_ring_size;
@@ -371,9 +407,16 @@ struct mvneta_port {
 	unsigned int duplex;
 	unsigned int speed;
 	unsigned int tx_csum_limit;
-	int use_inband_status:1;
+	unsigned int use_inband_status:1;
+
+	struct mvneta_bm *bm_priv;
+	struct mvneta_bm_pool *pool_long;
+	struct mvneta_bm_pool *pool_short;
+	int bm_win_id;
 
 	u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)];
+
+	u32 indir[MVNETA_RSS_LU_TABLE_SIZE];
 };
 
 /* The mvneta_tx_desc and mvneta_rx_desc structures describe the
@@ -396,6 +439,8 @@ struct mvneta_port {
 #define MVNETA_TX_L4_CSUM_NOT	BIT(31)
 
 #define MVNETA_RXD_ERR_CRC		0x0
+#define MVNETA_RXD_BM_POOL_SHIFT	13
+#define MVNETA_RXD_BM_POOL_MASK		(BIT(13) | BIT(14))
 #define MVNETA_RXD_ERR_SUMMARY		BIT(16)
 #define MVNETA_RXD_ERR_OVERRUN		BIT(17)
 #define MVNETA_RXD_ERR_LEN		BIT(18)
@@ -499,6 +544,9 @@ struct mvneta_tx_queue {
 
 	/* DMA address of TSO headers */
 	dma_addr_t tso_hdrs_phys;
+
+	/* Affinity mask for CPUs*/
+	cpumask_t affinity_mask;
 };
 
 struct mvneta_rx_queue {
@@ -537,6 +585,9 @@ static int rxq_def;
 
 static int rx_copybreak __read_mostly = 256;
 
+/* HW BM need that each port be identify by a unique ID */
+static int global_port_id;
+
 #define MVNETA_DRIVER_NAME "mvneta"
 #define MVNETA_DRIVER_VERSION "1.0"
 
@@ -803,6 +854,215 @@ static void mvneta_rxq_bm_disable(struct
 	mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val);
 }
 
+/* Enable buffer management (BM) */
+static void mvneta_rxq_bm_enable(struct mvneta_port *pp,
+				 struct mvneta_rx_queue *rxq)
+{
+	u32 val;
+
+	val = mvreg_read(pp, MVNETA_RXQ_CONFIG_REG(rxq->id));
+	val |= MVNETA_RXQ_HW_BUF_ALLOC;
+	mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val);
+}
+
+/* Notify HW about port's assignment of pool for bigger packets */
+static void mvneta_rxq_long_pool_set(struct mvneta_port *pp,
+				     struct mvneta_rx_queue *rxq)
+{
+	u32 val;
+
+	val = mvreg_read(pp, MVNETA_RXQ_CONFIG_REG(rxq->id));
+	val &= ~MVNETA_RXQ_LONG_POOL_ID_MASK;
+	val |= (pp->pool_long->id << MVNETA_RXQ_LONG_POOL_ID_SHIFT);
+
+	mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val);
+}
+
+/* Notify HW about port's assignment of pool for smaller packets */
+static void mvneta_rxq_short_pool_set(struct mvneta_port *pp,
+				      struct mvneta_rx_queue *rxq)
+{
+	u32 val;
+
+	val = mvreg_read(pp, MVNETA_RXQ_CONFIG_REG(rxq->id));
+	val &= ~MVNETA_RXQ_SHORT_POOL_ID_MASK;
+	val |= (pp->pool_short->id << MVNETA_RXQ_SHORT_POOL_ID_SHIFT);
+
+	mvreg_write(pp, MVNETA_RXQ_CONFIG_REG(rxq->id), val);
+}
+
+/* Set port's receive buffer size for assigned BM pool */
+static inline void mvneta_bm_pool_bufsize_set(struct mvneta_port *pp,
+					      int buf_size,
+					      u8 pool_id)
+{
+	u32 val;
+
+	if (!IS_ALIGNED(buf_size, 8)) {
+		dev_warn(pp->dev->dev.parent,
+			 "illegal buf_size value %d, round to %d\n",
+			 buf_size, ALIGN(buf_size, 8));
+		buf_size = ALIGN(buf_size, 8);
+	}
+
+	val = mvreg_read(pp, MVNETA_PORT_POOL_BUFFER_SZ_REG(pool_id));
+	val |= buf_size & MVNETA_PORT_POOL_BUFFER_SZ_MASK;
+	mvreg_write(pp, MVNETA_PORT_POOL_BUFFER_SZ_REG(pool_id), val);
+}
+
+/* Configure MBUS window in order to enable access BM internal SRAM */
+static int mvneta_mbus_io_win_set(struct mvneta_port *pp, u32 base, u32 wsize,
+				  u8 target, u8 attr)
+{
+	u32 win_enable, win_protect;
+	int i;
+
+	win_enable = mvreg_read(pp, MVNETA_BASE_ADDR_ENABLE);
+
+	if (pp->bm_win_id < 0) {
+		/* Find first not occupied window */
+		for (i = 0; i < MVNETA_MAX_DECODE_WIN; i++) {
+			if (win_enable & (1 << i)) {
+				pp->bm_win_id = i;
+				break;
+			}
+		}
+		if (i == MVNETA_MAX_DECODE_WIN)
+			return -ENOMEM;
+	} else {
+		i = pp->bm_win_id;
+	}
+
+	mvreg_write(pp, MVNETA_WIN_BASE(i), 0);
+	mvreg_write(pp, MVNETA_WIN_SIZE(i), 0);
+
+	if (i < 4)
+		mvreg_write(pp, MVNETA_WIN_REMAP(i), 0);
+
+	mvreg_write(pp, MVNETA_WIN_BASE(i), (base & 0xffff0000) |
+		    (attr << 8) | target);
+
+	mvreg_write(pp, MVNETA_WIN_SIZE(i), (wsize - 1) & 0xffff0000);
+
+	win_protect = mvreg_read(pp, MVNETA_ACCESS_PROTECT_ENABLE);
+	win_protect |= 3 << (2 * i);
+	mvreg_write(pp, MVNETA_ACCESS_PROTECT_ENABLE, win_protect);
+
+	win_enable &= ~(1 << i);
+	mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable);
+
+	return 0;
+}
+
+/* Assign and initialize pools for port. In case of fail
+ * buffer manager will remain disabled for current port.
+ */
+static int mvneta_bm_port_init(struct platform_device *pdev,
+			       struct mvneta_port *pp)
+{
+	struct device_node *dn = pdev->dev.of_node;
+	u32 long_pool_id, short_pool_id, wsize;
+	u8 target, attr;
+	int err;
+
+	/* Get BM window information */
+	err = mvebu_mbus_get_io_win_info(pp->bm_priv->bppi_phys_addr, &wsize,
+					 &target, &attr);
+	if (err < 0)
+		return err;
+
+	pp->bm_win_id = -1;
+
+	/* Open NETA -> BM window */
+	err = mvneta_mbus_io_win_set(pp, pp->bm_priv->bppi_phys_addr, wsize,
+				     target, attr);
+	if (err < 0) {
+		netdev_info(pp->dev, "fail to configure mbus window to BM\n");
+		return err;
+	}
+
+	if (of_property_read_u32(dn, "bm,pool-long", &long_pool_id)) {
+		netdev_info(pp->dev, "missing long pool id\n");
+		return -EINVAL;
+	}
+
+	/* Create port's long pool depending on mtu */
+	pp->pool_long = mvneta_bm_pool_use(pp->bm_priv, long_pool_id,
+					   MVNETA_BM_LONG, pp->id,
+					   MVNETA_RX_PKT_SIZE(pp->dev->mtu));
+	if (!pp->pool_long) {
+		netdev_info(pp->dev, "fail to obtain long pool for port\n");
+		return -ENOMEM;
+	}
+
+	pp->pool_long->port_map |= 1 << pp->id;
+
+	mvneta_bm_pool_bufsize_set(pp, pp->pool_long->buf_size,
+				   pp->pool_long->id);
+
+	/* If short pool id is not defined, assume using single pool */
+	if (of_property_read_u32(dn, "bm,pool-short", &short_pool_id))
+		short_pool_id = long_pool_id;
+
+	/* Create port's short pool */
+	pp->pool_short = mvneta_bm_pool_use(pp->bm_priv, short_pool_id,
+					    MVNETA_BM_SHORT, pp->id,
+					    MVNETA_BM_SHORT_PKT_SIZE);
+	if (!pp->pool_short) {
+		netdev_info(pp->dev, "fail to obtain short pool for port\n");
+		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
+		return -ENOMEM;
+	}
+
+	if (short_pool_id != long_pool_id) {
+		pp->pool_short->port_map |= 1 << pp->id;
+		mvneta_bm_pool_bufsize_set(pp, pp->pool_short->buf_size,
+					   pp->pool_short->id);
+	}
+
+	return 0;
+}
+
+/* Update settings of a pool for bigger packets */
+static void mvneta_bm_update_mtu(struct mvneta_port *pp, int mtu)
+{
+	struct mvneta_bm_pool *bm_pool = pp->pool_long;
+	struct hwbm_pool *hwbm_pool = &bm_pool->hwbm_pool;
+	int num;
+
+	/* Release all buffers from long pool */
+	mvneta_bm_bufs_free(pp->bm_priv, bm_pool, 1 << pp->id);
+	if (hwbm_pool->buf_num) {
+		WARN(1, "cannot free all buffers in pool %d\n",
+		     bm_pool->id);
+		goto bm_mtu_err;
+	}
+
+	bm_pool->pkt_size = MVNETA_RX_PKT_SIZE(mtu);
+	bm_pool->buf_size = MVNETA_RX_BUF_SIZE(bm_pool->pkt_size);
+	hwbm_pool->frag_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+			SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(bm_pool->pkt_size));
+
+	/* Fill entire long pool */
+	num = hwbm_pool_add(hwbm_pool, hwbm_pool->size, GFP_ATOMIC);
+	if (num != hwbm_pool->size) {
+		WARN(1, "pool %d: %d of %d allocated\n",
+		     bm_pool->id, num, hwbm_pool->size);
+		goto bm_mtu_err;
+	}
+	mvneta_bm_pool_bufsize_set(pp, bm_pool->buf_size, bm_pool->id);
+
+	return;
+
+bm_mtu_err:
+	mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
+	mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short, 1 << pp->id);
+
+	pp->bm_priv = NULL;
+	mvreg_write(pp, MVNETA_ACC_MODE, MVNETA_ACC_MODE_EXT1);
+	netdev_info(pp->dev, "fail to update MTU, fall back to software BM\n");
+}
+
 /* Start the Ethernet port RX and TX activity */
 static void mvneta_port_up(struct mvneta_port *pp)
 {
@@ -819,7 +1079,13 @@ static void mvneta_port_up(struct mvneta
 	mvreg_write(pp, MVNETA_TXQ_CMD, q_map);
 
 	/* Enable all initialized RXQs. */
-	mvreg_write(pp, MVNETA_RXQ_CMD, BIT(rxq_def));
+	for (queue = 0; queue < rxq_number; queue++) {
+		struct mvneta_rx_queue *rxq = &pp->rxqs[queue];
+
+		if (rxq->descs != NULL)
+			q_map |= (1 << queue);
+	}
+	mvreg_write(pp, MVNETA_RXQ_CMD, q_map);
 }
 
 /* Stop the Ethernet port activity */
@@ -973,6 +1239,81 @@ static void mvneta_set_other_mcast_table
 		mvreg_write(pp, MVNETA_DA_FILT_OTH_MCAST + offset, val);
 }
 
+static void mvneta_set_autoneg(struct mvneta_port *pp, int enable)
+{
+	u32 val;
+
+	if (enable) {
+		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+		val &= ~(MVNETA_GMAC_FORCE_LINK_PASS |
+			 MVNETA_GMAC_FORCE_LINK_DOWN |
+			 MVNETA_GMAC_AN_FLOW_CTRL_EN);
+		val |= MVNETA_GMAC_INBAND_AN_ENABLE |
+		       MVNETA_GMAC_AN_SPEED_EN |
+		       MVNETA_GMAC_AN_DUPLEX_EN;
+		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
+
+		val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER);
+		val |= MVNETA_GMAC_1MS_CLOCK_ENABLE;
+		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val);
+
+		val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
+		val |= MVNETA_GMAC2_INBAND_AN_ENABLE;
+		mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+	} else {
+		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+		val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE |
+		       MVNETA_GMAC_AN_SPEED_EN |
+		       MVNETA_GMAC_AN_DUPLEX_EN);
+		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
+
+		val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER);
+		val &= ~MVNETA_GMAC_1MS_CLOCK_ENABLE;
+		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val);
+
+		val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
+		val &= ~MVNETA_GMAC2_INBAND_AN_ENABLE;
+		mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+	}
+}
+
+static void mvneta_percpu_unmask_interrupt(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	/* All the queue are unmasked, but actually only the ones
+	 * mapped to this CPU will be unmasked
+	 */
+	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+		    MVNETA_RX_INTR_MASK_ALL |
+		    MVNETA_TX_INTR_MASK_ALL |
+		    MVNETA_MISCINTR_INTR_MASK);
+}
+
+static void mvneta_percpu_mask_interrupt(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	/* All the queue are masked, but actually only the ones
+	 * mapped to this CPU will be masked
+	 */
+	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
+	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+}
+
+static void mvneta_percpu_clear_intr_cause(void *arg)
+{
+	struct mvneta_port *pp = arg;
+
+	/* All the queue are cleared, but actually only the ones
+	 * mapped to this CPU will be cleared
+	 */
+	mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0);
+	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
+	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
+}
+
 /* This method sets defaults to the NETA port:
  *	Clears interrupt Cause and Mask registers.
  *	Clears all MAC tables.
@@ -987,28 +1328,45 @@ static void mvneta_defaults_set(struct m
 	int cpu;
 	int queue;
 	u32 val;
+	int max_cpu = num_present_cpus();
 
 	/* Clear all Cause registers */
-	mvreg_write(pp, MVNETA_INTR_NEW_CAUSE, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
-	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
+	on_each_cpu(mvneta_percpu_clear_intr_cause, pp, true);
 
 	/* Mask all interrupts */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 	mvreg_write(pp, MVNETA_INTR_ENABLE, 0);
 
 	/* Enable MBUS Retry bit16 */
 	mvreg_write(pp, MVNETA_MBUS_RETRY, 0x20);
 
-	/* Set CPU queue access map - all CPUs have access to all RX
-	 * queues and to all TX queues
+	/* Set CPU queue access map. CPUs are assigned to the RX and
+	 * TX queues modulo their number. If there is only one TX
+	 * queue then it is assigned to the CPU associated to the
+	 * default RX queue.
 	 */
-	for_each_present_cpu(cpu)
-		mvreg_write(pp, MVNETA_CPU_MAP(cpu),
-			    (MVNETA_CPU_RXQ_ACCESS_ALL_MASK |
-			     MVNETA_CPU_TXQ_ACCESS_ALL_MASK));
+	for_each_present_cpu(cpu) {
+		int rxq_map = 0, txq_map = 0;
+		int rxq, txq;
+
+		for (rxq = 0; rxq < rxq_number; rxq++)
+			if ((rxq % max_cpu) == cpu)
+				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
+
+		for (txq = 0; txq < txq_number; txq++)
+			if ((txq % max_cpu) == cpu)
+				txq_map |= MVNETA_CPU_TXQ_ACCESS(txq);
+
+		/* With only one TX queue we configure a special case
+		 * which will allow to get all the irq on a single
+		 * CPU
+		 */
+		if (txq_number == 1)
+			txq_map = (cpu == pp->rxq_def) ?
+				MVNETA_CPU_TXQ_ACCESS(1) : 0;
+
+		mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map);
+	}
 
 	/* Reset RX and TX DMAs */
 	mvreg_write(pp, MVNETA_PORT_RX_RESET, MVNETA_PORT_RX_DMA_RESET);
@@ -1025,11 +1383,19 @@ static void mvneta_defaults_set(struct m
 	mvreg_write(pp, MVNETA_PORT_RX_RESET, 0);
 
 	/* Set Port Acceleration Mode */
-	val = MVNETA_ACC_MODE_EXT;
+	if (pp->bm_priv)
+		/* HW buffer management + legacy parser */
+		val = MVNETA_ACC_MODE_EXT2;
+	else
+		/* SW buffer management + legacy parser */
+		val = MVNETA_ACC_MODE_EXT1;
 	mvreg_write(pp, MVNETA_ACC_MODE, val);
 
+	if (pp->bm_priv)
+		mvreg_write(pp, MVNETA_BM_ADDRESS, pp->bm_priv->bppi_phys_addr);
+
 	/* Update val of portCfg register accordingly with all RxQueue types */
-	val = MVNETA_PORT_CONFIG_DEFL_VALUE(rxq_def);
+	val = MVNETA_PORT_CONFIG_DEFL_VALUE(pp->rxq_def);
 	mvreg_write(pp, MVNETA_PORT_CONFIG, val);
 
 	val = 0;
@@ -1058,26 +1424,7 @@ static void mvneta_defaults_set(struct m
 	val &= ~MVNETA_PHY_POLLING_ENABLE;
 	mvreg_write(pp, MVNETA_UNIT_CONTROL, val);
 
-	if (pp->use_inband_status) {
-		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
-		val &= ~(MVNETA_GMAC_FORCE_LINK_PASS |
-			 MVNETA_GMAC_FORCE_LINK_DOWN |
-			 MVNETA_GMAC_AN_FLOW_CTRL_EN);
-		val |= MVNETA_GMAC_INBAND_AN_ENABLE |
-		       MVNETA_GMAC_AN_SPEED_EN |
-		       MVNETA_GMAC_AN_DUPLEX_EN;
-		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
-		val = mvreg_read(pp, MVNETA_GMAC_CLOCK_DIVIDER);
-		val |= MVNETA_GMAC_1MS_CLOCK_ENABLE;
-		mvreg_write(pp, MVNETA_GMAC_CLOCK_DIVIDER, val);
-	} else {
-		val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
-		val &= ~(MVNETA_GMAC_INBAND_AN_ENABLE |
-		       MVNETA_GMAC_AN_SPEED_EN |
-		       MVNETA_GMAC_AN_DUPLEX_EN);
-		mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
-	}
-
+	mvneta_set_autoneg(pp, pp->use_inband_status);
 	mvneta_set_ucast_table(pp, -1);
 	mvneta_set_special_mcast_table(pp, -1);
 	mvneta_set_other_mcast_table(pp, -1);
@@ -1413,23 +1760,25 @@ static void mvneta_txq_done(struct mvnet
 	}
 }
 
-static void *mvneta_frag_alloc(const struct mvneta_port *pp)
+void *mvneta_frag_alloc(unsigned int frag_size)
 {
-	if (likely(pp->frag_size <= PAGE_SIZE))
-		return netdev_alloc_frag(pp->frag_size);
+	if (likely(frag_size <= PAGE_SIZE))
+		return netdev_alloc_frag(frag_size);
 	else
-		return kmalloc(pp->frag_size, GFP_ATOMIC);
+		return kmalloc(frag_size, GFP_ATOMIC);
 }
+EXPORT_SYMBOL_GPL(mvneta_frag_alloc);
 
-static void mvneta_frag_free(const struct mvneta_port *pp, void *data)
+void mvneta_frag_free(unsigned int frag_size, void *data)
 {
-	if (likely(pp->frag_size <= PAGE_SIZE))
+	if (likely(frag_size <= PAGE_SIZE))
 		skb_free_frag(data);
 	else
 		kfree(data);
 }
+EXPORT_SYMBOL_GPL(mvneta_frag_free);
 
-/* Refill processing */
+/* Refill processing for SW buffer management */
 static int mvneta_rx_refill(struct mvneta_port *pp,
 			    struct mvneta_rx_desc *rx_desc)
 
@@ -1437,7 +1786,7 @@ static int mvneta_rx_refill(struct mvnet
 	dma_addr_t phys_addr;
 	void *data;
 
-	data = mvneta_frag_alloc(pp);
+	data = mvneta_frag_alloc(pp->frag_size);
 	if (!data)
 		return -ENOMEM;
 
@@ -1445,7 +1794,7 @@ static int mvneta_rx_refill(struct mvnet
 				   MVNETA_RX_BUF_SIZE(pp->pkt_size),
 				   DMA_FROM_DEVICE);
 	if (unlikely(dma_mapping_error(pp->dev->dev.parent, phys_addr))) {
-		mvneta_frag_free(pp, data);
+		mvneta_frag_free(pp->frag_size, data);
 		return -ENOMEM;
 	}
 
@@ -1491,22 +1840,156 @@ static void mvneta_rxq_drop_pkts(struct
 	int rx_done, i;
 
 	rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq);
+	if (rx_done)
+		mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
+
+	if (pp->bm_priv) {
+		for (i = 0; i < rx_done; i++) {
+			struct mvneta_rx_desc *rx_desc =
+						  mvneta_rxq_next_desc_get(rxq);
+			u8 pool_id = MVNETA_RX_GET_BM_POOL_ID(rx_desc);
+			struct mvneta_bm_pool *bm_pool;
+
+			bm_pool = &pp->bm_priv->bm_pools[pool_id];
+			/* Return dropped buffer to the pool */
+			mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool,
+					      rx_desc->buf_phys_addr);
+		}
+		return;
+	}
+
 	for (i = 0; i < rxq->size; i++) {
 		struct mvneta_rx_desc *rx_desc = rxq->descs + i;
 		void *data = (void *)rx_desc->buf_cookie;
 
 		dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
 				 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
-		mvneta_frag_free(pp, data);
+		mvneta_frag_free(pp->frag_size, data);
 	}
+}
 
-	if (rx_done)
-		mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
+/* Main rx processing when using software buffer management */
+static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
+			  struct mvneta_rx_queue *rxq)
+{
+	struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
+	struct net_device *dev = pp->dev;
+	int rx_done;
+	u32 rcvd_pkts = 0;
+	u32 rcvd_bytes = 0;
+
+	/* Get number of received packets */
+	rx_done = mvneta_rxq_busy_desc_num_get(pp, rxq);
+
+	if (rx_todo > rx_done)
+		rx_todo = rx_done;
+
+	rx_done = 0;
+
+	/* Fairness NAPI loop */
+	while (rx_done < rx_todo) {
+		struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
+		struct sk_buff *skb;
+		unsigned char *data;
+		dma_addr_t phys_addr;
+		u32 rx_status, frag_size;
+		int rx_bytes, err;
+
+		rx_done++;
+		rx_status = rx_desc->status;
+		rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
+		data = (unsigned char *)rx_desc->buf_cookie;
+		phys_addr = rx_desc->buf_phys_addr;
+
+		if (!mvneta_rxq_desc_is_first_last(rx_status) ||
+		    (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
+err_drop_frame:
+			dev->stats.rx_errors++;
+			mvneta_rx_error(pp, rx_desc);
+			/* leave the descriptor untouched */
+			continue;
+		}
+
+		if (rx_bytes <= rx_copybreak) {
+		/* better copy a small frame and not unmap the DMA region */
+			skb = netdev_alloc_skb_ip_align(dev, rx_bytes);
+			if (unlikely(!skb))
+				goto err_drop_frame;
+
+			dma_sync_single_range_for_cpu(dev->dev.parent,
+						      rx_desc->buf_phys_addr,
+						      MVNETA_MH_SIZE + NET_SKB_PAD,
+						      rx_bytes,
+						      DMA_FROM_DEVICE);
+			memcpy(skb_put(skb, rx_bytes),
+			       data + MVNETA_MH_SIZE + NET_SKB_PAD,
+			       rx_bytes);
+
+			skb->protocol = eth_type_trans(skb, dev);
+			mvneta_rx_csum(pp, rx_status, skb);
+			napi_gro_receive(&port->napi, skb);
+
+			rcvd_pkts++;
+			rcvd_bytes += rx_bytes;
+
+			/* leave the descriptor and buffer untouched */
+			continue;
+		}
+
+		/* Refill processing */
+		err = mvneta_rx_refill(pp, rx_desc);
+		if (err) {
+			netdev_err(dev, "Linux processing - Can't refill\n");
+			rxq->missed++;
+			goto err_drop_frame;
+		}
+
+		frag_size = pp->frag_size;
+
+		skb = build_skb(data, frag_size > PAGE_SIZE ? 0 : frag_size);
+
+		/* After refill old buffer has to be unmapped regardless
+		 * the skb is successfully built or not.
+		 */
+		dma_unmap_single(dev->dev.parent, phys_addr,
+				 MVNETA_RX_BUF_SIZE(pp->pkt_size),
+				 DMA_FROM_DEVICE);
+
+		if (!skb)
+			goto err_drop_frame;
+
+		rcvd_pkts++;
+		rcvd_bytes += rx_bytes;
+
+		/* Linux processing */
+		skb_reserve(skb, MVNETA_MH_SIZE + NET_SKB_PAD);
+		skb_put(skb, rx_bytes);
+
+		skb->protocol = eth_type_trans(skb, dev);
+
+		mvneta_rx_csum(pp, rx_status, skb);
+
+		napi_gro_receive(&port->napi, skb);
+	}
+
+	if (rcvd_pkts) {
+		struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats);
+
+		u64_stats_update_begin(&stats->syncp);
+		stats->rx_packets += rcvd_pkts;
+		stats->rx_bytes   += rcvd_bytes;
+		u64_stats_update_end(&stats->syncp);
+	}
+
+	/* Update rxq management counters */
+	mvneta_rxq_desc_num_update(pp, rxq, rx_done, rx_done);
+
+	return rx_done;
 }
 
-/* Main rx processing */
-static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
-		     struct mvneta_rx_queue *rxq)
+/* Main rx processing when using hardware buffer management */
+static int mvneta_rx_hwbm(struct mvneta_port *pp, int rx_todo,
+			  struct mvneta_rx_queue *rxq)
 {
 	struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
 	struct net_device *dev = pp->dev;
@@ -1525,21 +2008,29 @@ static int mvneta_rx(struct mvneta_port
 	/* Fairness NAPI loop */
 	while (rx_done < rx_todo) {
 		struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq);
+		struct mvneta_bm_pool *bm_pool = NULL;
 		struct sk_buff *skb;
 		unsigned char *data;
 		dma_addr_t phys_addr;
-		u32 rx_status;
+		u32 rx_status, frag_size;
 		int rx_bytes, err;
+		u8 pool_id;
 
 		rx_done++;
 		rx_status = rx_desc->status;
 		rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
 		data = (unsigned char *)rx_desc->buf_cookie;
 		phys_addr = rx_desc->buf_phys_addr;
+		pool_id = MVNETA_RX_GET_BM_POOL_ID(rx_desc);
+		bm_pool = &pp->bm_priv->bm_pools[pool_id];
 
 		if (!mvneta_rxq_desc_is_first_last(rx_status) ||
 		    (rx_status & MVNETA_RXD_ERR_SUMMARY)) {
-		err_drop_frame:
+err_drop_frame_ret_pool:
+			/* Return the buffer to the pool */
+			mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool,
+					      rx_desc->buf_phys_addr);
+err_drop_frame:
 			dev->stats.rx_errors++;
 			mvneta_rx_error(pp, rx_desc);
 			/* leave the descriptor untouched */
@@ -1550,7 +2041,7 @@ static int mvneta_rx(struct mvneta_port
 			/* better copy a small frame and not unmap the DMA region */
 			skb = netdev_alloc_skb_ip_align(dev, rx_bytes);
 			if (unlikely(!skb))
-				goto err_drop_frame;
+				goto err_drop_frame_ret_pool;
 
 			dma_sync_single_range_for_cpu(dev->dev.parent,
 			                              rx_desc->buf_phys_addr,
@@ -1568,26 +2059,31 @@ static int mvneta_rx(struct mvneta_port
 			rcvd_pkts++;
 			rcvd_bytes += rx_bytes;
 
+			/* Return the buffer to the pool */
+			mvneta_bm_pool_put_bp(pp->bm_priv, bm_pool,
+					      rx_desc->buf_phys_addr);
+
 			/* leave the descriptor and buffer untouched */
 			continue;
 		}
 
 		/* Refill processing */
-		err = mvneta_rx_refill(pp, rx_desc);
+		err = hwbm_pool_refill(&bm_pool->hwbm_pool, GFP_ATOMIC);
 		if (err) {
 			netdev_err(dev, "Linux processing - Can't refill\n");
 			rxq->missed++;
-			goto err_drop_frame;
+			goto err_drop_frame_ret_pool;
 		}
 
-		skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size);
+		frag_size = bm_pool->hwbm_pool.frag_size;
+
+		skb = build_skb(data, frag_size > PAGE_SIZE ? 0 : frag_size);
 
 		/* After refill old buffer has to be unmapped regardless
 		 * the skb is successfully built or not.
 		 */
-		dma_unmap_single(dev->dev.parent, phys_addr,
-				 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
-
+		dma_unmap_single(&pp->bm_priv->pdev->dev, phys_addr,
+				 bm_pool->buf_size, DMA_FROM_DEVICE);
 		if (!skb)
 			goto err_drop_frame;
 
@@ -2082,19 +2578,19 @@ static void mvneta_set_rx_mode(struct ne
 	if (dev->flags & IFF_PROMISC) {
 		/* Accept all: Multicast + Unicast */
 		mvneta_rx_unicast_promisc_set(pp, 1);
-		mvneta_set_ucast_table(pp, rxq_def);
-		mvneta_set_special_mcast_table(pp, rxq_def);
-		mvneta_set_other_mcast_table(pp, rxq_def);
+		mvneta_set_ucast_table(pp, pp->rxq_def);
+		mvneta_set_special_mcast_table(pp, pp->rxq_def);
+		mvneta_set_other_mcast_table(pp, pp->rxq_def);
 	} else {
 		/* Accept single Unicast */
 		mvneta_rx_unicast_promisc_set(pp, 0);
 		mvneta_set_ucast_table(pp, -1);
-		mvneta_mac_addr_set(pp, dev->dev_addr, rxq_def);
+		mvneta_mac_addr_set(pp, dev->dev_addr, pp->rxq_def);
 
 		if (dev->flags & IFF_ALLMULTI) {
 			/* Accept all multicast */
-			mvneta_set_special_mcast_table(pp, rxq_def);
-			mvneta_set_other_mcast_table(pp, rxq_def);
+			mvneta_set_special_mcast_table(pp, pp->rxq_def);
+			mvneta_set_other_mcast_table(pp, pp->rxq_def);
 		} else {
 			/* Accept only initialized multicast */
 			mvneta_set_special_mcast_table(pp, -1);
@@ -2103,7 +2599,7 @@ static void mvneta_set_rx_mode(struct ne
 			if (!netdev_mc_empty(dev)) {
 				netdev_for_each_mc_addr(ha, dev) {
 					mvneta_mcast_addr_set(pp, ha->addr,
-							      rxq_def);
+							      pp->rxq_def);
 				}
 			}
 		}
@@ -2154,6 +2650,7 @@ static int mvneta_poll(struct napi_struc
 {
 	int rx_done = 0;
 	u32 cause_rx_tx;
+	int rx_queue;
 	struct mvneta_port *pp = netdev_priv(napi->dev);
 	struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
 
@@ -2185,8 +2682,18 @@ static int mvneta_poll(struct napi_struc
 	/* For the case where the last mvneta_poll did not process all
 	 * RX packets
 	 */
+	rx_queue = fls(((cause_rx_tx >> 8) & 0xff));
+
 	cause_rx_tx |= port->cause_rx_tx;
-	rx_done = mvneta_rx(pp, budget, &pp->rxqs[rxq_def]);
+
+	if (rx_queue) {
+		rx_queue = rx_queue - 1;
+		if (pp->bm_priv)
+			rx_done = mvneta_rx_hwbm(pp, budget, &pp->rxqs[rx_queue]);
+		else
+			rx_done = mvneta_rx_swbm(pp, budget, &pp->rxqs[rx_queue]);
+	}
+
 	budget -= rx_done;
 
 	if (budget > 0) {
@@ -2273,9 +2780,17 @@ static int mvneta_rxq_init(struct mvneta
 	mvneta_rx_pkts_coal_set(pp, rxq, rxq->pkts_coal);
 	mvneta_rx_time_coal_set(pp, rxq, rxq->time_coal);
 
-	/* Fill RXQ with buffers from RX pool */
-	mvneta_rxq_buf_size_set(pp, rxq, MVNETA_RX_BUF_SIZE(pp->pkt_size));
-	mvneta_rxq_bm_disable(pp, rxq);
+	if (!pp->bm_priv) {
+		/* Fill RXQ with buffers from RX pool */
+		mvneta_rxq_buf_size_set(pp, rxq,
+					MVNETA_RX_BUF_SIZE(pp->pkt_size));
+		mvneta_rxq_bm_disable(pp, rxq);
+	} else {
+		mvneta_rxq_bm_enable(pp, rxq);
+		mvneta_rxq_long_pool_set(pp, rxq);
+		mvneta_rxq_short_pool_set(pp, rxq);
+	}
+
 	mvneta_rxq_fill(pp, rxq, rxq->size);
 
 	return 0;
@@ -2303,6 +2818,8 @@ static void mvneta_rxq_deinit(struct mvn
 static int mvneta_txq_init(struct mvneta_port *pp,
 			   struct mvneta_tx_queue *txq)
 {
+	int cpu;
+
 	txq->size = pp->tx_ring_size;
 
 	/* A queue must always have room for at least one skb.
@@ -2355,6 +2872,14 @@ static int mvneta_txq_init(struct mvneta
 	}
 	mvneta_tx_done_pkts_coal_set(pp, txq, txq->done_pkts_coal);
 
+	/* Setup XPS mapping */
+	if (txq_number > 1)
+		cpu = txq->id % num_present_cpus();
+	else
+		cpu = pp->rxq_def % num_present_cpus();
+	cpumask_set_cpu(cpu, &txq->affinity_mask);
+	netif_set_xps_queue(pp->dev, &txq->affinity_mask, txq->id);
+
 	return 0;
 }
 
@@ -2399,19 +2924,27 @@ static void mvneta_cleanup_txqs(struct m
 /* Cleanup all Rx queues */
 static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
 {
-	mvneta_rxq_deinit(pp, &pp->rxqs[rxq_def]);
+	int queue;
+
+	for (queue = 0; queue < txq_number; queue++)
+		mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
 }
 
 
 /* Init all Rx queues */
 static int mvneta_setup_rxqs(struct mvneta_port *pp)
 {
-	int err = mvneta_rxq_init(pp, &pp->rxqs[rxq_def]);
-	if (err) {
-		netdev_err(pp->dev, "%s: can't create rxq=%d\n",
-			   __func__, rxq_def);
-		mvneta_cleanup_rxqs(pp);
-		return err;
+	int queue;
+
+	for (queue = 0; queue < rxq_number; queue++) {
+		int err = mvneta_rxq_init(pp, &pp->rxqs[queue]);
+
+		if (err) {
+			netdev_err(pp->dev, "%s: can't create rxq=%d\n",
+				   __func__, queue);
+			mvneta_cleanup_rxqs(pp);
+			return err;
+		}
 	}
 
 	return 0;
@@ -2437,7 +2970,7 @@ static int mvneta_setup_txqs(struct mvne
 
 static void mvneta_start_dev(struct mvneta_port *pp)
 {
-	unsigned int cpu;
+	int cpu;
 
 	mvneta_max_rx_size_set(pp, pp->pkt_size);
 	mvneta_txq_max_tx_size_set(pp, pp->pkt_size);
@@ -2446,17 +2979,15 @@ static void mvneta_start_dev(struct mvne
 	mvneta_port_enable(pp);
 
 	/* Enable polling on the port */
-	for_each_present_cpu(cpu) {
+	for_each_online_cpu(cpu) {
 		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
 
 		napi_enable(&port->napi);
 	}
 
-	/* Unmask interrupts */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-		    MVNETA_RX_INTR_MASK(rxq_number) |
-		    MVNETA_TX_INTR_MASK(txq_number) |
-		    MVNETA_MISCINTR_INTR_MASK);
+	/* Unmask interrupts. It has to be done from each CPU */
+	on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
+
 	mvreg_write(pp, MVNETA_INTR_MISC_MASK,
 		    MVNETA_CAUSE_PHY_STATUS_CHANGE |
 		    MVNETA_CAUSE_LINK_CHANGE |
@@ -2472,7 +3003,7 @@ static void mvneta_stop_dev(struct mvnet
 
 	phy_stop(pp->phy_dev);
 
-	for_each_present_cpu(cpu) {
+	for_each_online_cpu(cpu) {
 		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
 
 		napi_disable(&port->napi);
@@ -2487,13 +3018,10 @@ static void mvneta_stop_dev(struct mvnet
 	mvneta_port_disable(pp);
 
 	/* Clear all ethernet port interrupts */
-	mvreg_write(pp, MVNETA_INTR_MISC_CAUSE, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_CAUSE, 0);
+	on_each_cpu(mvneta_percpu_clear_intr_cause, pp, true);
 
 	/* Mask all ethernet port interrupts */
-	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-	mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 
 	mvneta_tx_reset(pp);
 	mvneta_rx_reset(pp);
@@ -2535,6 +3063,9 @@ static int mvneta_change_mtu(struct net_
 	dev->mtu = mtu;
 
 	if (!netif_running(dev)) {
+		if (pp->bm_priv)
+			mvneta_bm_update_mtu(pp, mtu);
+
 		netdev_update_features(dev);
 		return 0;
 	}
@@ -2547,6 +3078,9 @@ static int mvneta_change_mtu(struct net_
 	mvneta_cleanup_txqs(pp);
 	mvneta_cleanup_rxqs(pp);
 
+	if (pp->bm_priv)
+		mvneta_bm_update_mtu(pp, mtu);
+
 	pp->pkt_size = MVNETA_RX_PKT_SIZE(dev->mtu);
 	pp->frag_size = SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(pp->pkt_size)) +
 	                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -2615,7 +3149,7 @@ static int mvneta_set_mac_addr(struct ne
 	mvneta_mac_addr_set(pp, dev->dev_addr, -1);
 
 	/* Set new addr in hw */
-	mvneta_mac_addr_set(pp, sockaddr->sa_data, rxq_def);
+	mvneta_mac_addr_set(pp, sockaddr->sa_data, pp->rxq_def);
 
 	eth_commit_mac_addr_change(dev, addr);
 	return 0;
@@ -2730,24 +3264,56 @@ static void mvneta_percpu_disable(void *
 	disable_percpu_irq(pp->dev->irq);
 }
 
+/* Electing a CPU must be done in an atomic way: it should be done
+ * after or before the removal/insertion of a CPU and this function is
+ * not reentrant.
+ */
 static void mvneta_percpu_elect(struct mvneta_port *pp)
 {
-	int online_cpu_idx, cpu, i = 0;
+	int elected_cpu = 0, max_cpu, cpu, i = 0;
 
-	online_cpu_idx = rxq_def % num_online_cpus();
+	/* Use the cpu associated to the rxq when it is online, in all
+	 * the other cases, use the cpu 0 which can't be offline.
+	 */
+	if (cpu_online(pp->rxq_def))
+		elected_cpu = pp->rxq_def;
+
+	max_cpu = num_present_cpus();
 
 	for_each_online_cpu(cpu) {
-		if (i == online_cpu_idx)
-			/* Enable per-CPU interrupt on the one CPU we
-			 * just elected
+		int rxq_map = 0, txq_map = 0;
+		int rxq;
+
+		for (rxq = 0; rxq < rxq_number; rxq++)
+			if ((rxq % max_cpu) == cpu)
+				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
+
+		if (cpu == elected_cpu)
+			/* Map the default receive queue queue to the
+			 * elected CPU
 			 */
-			smp_call_function_single(cpu, mvneta_percpu_enable,
-						pp, true);
+			rxq_map |= MVNETA_CPU_RXQ_ACCESS(pp->rxq_def);
+
+		/* We update the TX queue map only if we have one
+		 * queue. In this case we associate the TX queue to
+		 * the CPU bound to the default RX queue
+		 */
+		if (txq_number == 1)
+			txq_map = (cpu == elected_cpu) ?
+				MVNETA_CPU_TXQ_ACCESS(1) : 0;
 		else
-			/* Disable per-CPU interrupt on all the other CPU */
-			smp_call_function_single(cpu, mvneta_percpu_disable,
-						pp, true);
+			txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) &
+				MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
+
+		mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map);
+
+		/* Update the interrupt mask on each CPU according the
+		 * new mapping
+		 */
+		smp_call_function_single(cpu, mvneta_percpu_unmask_interrupt,
+					 pp, true);
 		i++;
+
 	}
 };
 
@@ -2762,6 +3328,14 @@ static int mvneta_percpu_notifier(struct
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
+		spin_lock(&pp->lock);
+		/* Configuring the driver for a new CPU while the
+		 * driver is stopping is racy, so just avoid it.
+		 */
+		if (pp->is_stopped) {
+			spin_unlock(&pp->lock);
+			break;
+		}
 		netif_tx_stop_all_queues(pp->dev);
 
 		/* We have to synchronise on tha napi of each CPU
@@ -2777,34 +3351,40 @@ static int mvneta_percpu_notifier(struct
 		}
 
 		/* Mask all ethernet port interrupts */
-		mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-		mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-		mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+		on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
 		napi_enable(&port->napi);
 
+
+		/* Enable per-CPU interrupts on the CPU that is
+		 * brought up.
+		 */
+		smp_call_function_single(cpu, mvneta_percpu_enable,
+					 pp, true);
+
 		/* Enable per-CPU interrupt on the one CPU we care
 		 * about.
 		 */
 		mvneta_percpu_elect(pp);
 
 		/* Unmask all ethernet port interrupts */
-		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-			MVNETA_RX_INTR_MASK(rxq_number) |
-			MVNETA_TX_INTR_MASK(txq_number) |
-			MVNETA_MISCINTR_INTR_MASK);
+		on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
 		mvreg_write(pp, MVNETA_INTR_MISC_MASK,
 			MVNETA_CAUSE_PHY_STATUS_CHANGE |
 			MVNETA_CAUSE_LINK_CHANGE |
 			MVNETA_CAUSE_PSC_SYNC_CHANGE);
 		netif_tx_start_all_queues(pp->dev);
+		spin_unlock(&pp->lock);
 		break;
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
 		netif_tx_stop_all_queues(pp->dev);
+		/* Thanks to this lock we are sure that any pending
+		 * cpu election is done
+		 */
+		spin_lock(&pp->lock);
 		/* Mask all ethernet port interrupts */
-		mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
-		mvreg_write(pp, MVNETA_INTR_OLD_MASK, 0);
-		mvreg_write(pp, MVNETA_INTR_MISC_MASK, 0);
+		on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+		spin_unlock(&pp->lock);
 
 		napi_synchronize(&port->napi);
 		napi_disable(&port->napi);
@@ -2818,12 +3398,11 @@ static int mvneta_percpu_notifier(struct
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 		/* Check if a new CPU must be elected now this on is down */
+		spin_lock(&pp->lock);
 		mvneta_percpu_elect(pp);
+		spin_unlock(&pp->lock);
 		/* Unmask all ethernet port interrupts */
-		mvreg_write(pp, MVNETA_INTR_NEW_MASK,
-			MVNETA_RX_INTR_MASK(rxq_number) |
-			MVNETA_TX_INTR_MASK(txq_number) |
-			MVNETA_MISCINTR_INTR_MASK);
+		on_each_cpu(mvneta_percpu_unmask_interrupt, pp, true);
 		mvreg_write(pp, MVNETA_INTR_MISC_MASK,
 			MVNETA_CAUSE_PHY_STATUS_CHANGE |
 			MVNETA_CAUSE_LINK_CHANGE |
@@ -2860,17 +3439,12 @@ static int mvneta_open(struct net_device
 		goto err_cleanup_txqs;
 	}
 
-	/* Even though the documentation says that request_percpu_irq
-	 * doesn't enable the interrupts automatically, it actually
-	 * does so on the local CPU.
-	 *
-	 * Make sure it's disabled.
+	/* Enable per-CPU interrupt on all the CPU to handle our RX
+	 * queue interrupts
 	 */
-	mvneta_percpu_disable(pp);
-
-	/* Elect a CPU to handle our RX queue interrupt */
-	mvneta_percpu_elect(pp);
+	on_each_cpu(mvneta_percpu_enable, pp, true);
 
+	pp->is_stopped = false;
 	/* Register a CPU notifier to handle the case where our CPU
 	 * might be taken offline.
 	 */
@@ -2902,13 +3476,20 @@ err_cleanup_rxqs:
 static int mvneta_stop(struct net_device *dev)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
-	int cpu;
 
+	/* Inform that we are stopping so we don't want to setup the
+	 * driver for new CPUs in the notifiers
+	 */
+	spin_lock(&pp->lock);
+	pp->is_stopped = true;
 	mvneta_stop_dev(pp);
 	mvneta_mdio_remove(pp);
 	unregister_cpu_notifier(&pp->cpu_notifier);
-	for_each_present_cpu(cpu)
-		smp_call_function_single(cpu, mvneta_percpu_disable, pp, true);
+	/* Now that the notifier are unregistered, we can release le
+	 * lock
+	 */
+	spin_unlock(&pp->lock);
+	on_each_cpu(mvneta_percpu_disable, pp, true);
 	free_percpu_irq(dev->irq, pp->ports);
 	mvneta_cleanup_rxqs(pp);
 	mvneta_cleanup_txqs(pp);
@@ -2943,10 +3524,43 @@ int mvneta_ethtool_get_settings(struct n
 int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
+	struct phy_device *phydev = pp->phy_dev;
 
-	if (!pp->phy_dev)
+	if (!phydev)
 		return -ENODEV;
 
+	if ((cmd->autoneg == AUTONEG_ENABLE) != pp->use_inband_status) {
+		u32 val;
+
+		mvneta_set_autoneg(pp, cmd->autoneg == AUTONEG_ENABLE);
+
+		if (cmd->autoneg == AUTONEG_DISABLE) {
+			val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
+			val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED |
+				 MVNETA_GMAC_CONFIG_GMII_SPEED |
+				 MVNETA_GMAC_CONFIG_FULL_DUPLEX);
+
+			if (phydev->duplex)
+				val |= MVNETA_GMAC_CONFIG_FULL_DUPLEX;
+
+			if (phydev->speed == SPEED_1000)
+				val |= MVNETA_GMAC_CONFIG_GMII_SPEED;
+			else if (phydev->speed == SPEED_100)
+				val |= MVNETA_GMAC_CONFIG_MII_SPEED;
+
+			mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val);
+		}
+
+		pp->use_inband_status = (cmd->autoneg == AUTONEG_ENABLE);
+		netdev_info(pp->dev, "autoneg status set to %i\n",
+			    pp->use_inband_status);
+
+		if (netif_running(dev)) {
+			mvneta_port_down(pp);
+			mvneta_port_up(pp);
+		}
+	}
+
 	return phy_ethtool_sset(pp->phy_dev, cmd);
 }
 
@@ -3056,26 +3670,25 @@ static void mvneta_ethtool_update_stats(
 	const struct mvneta_statistic *s;
 	void __iomem *base = pp->base;
 	u32 high, low, val;
+	u64 val64;
 	int i;
 
 	for (i = 0, s = mvneta_statistics;
 	     s < mvneta_statistics + ARRAY_SIZE(mvneta_statistics);
 	     s++, i++) {
-		val = 0;
-
 		switch (s->type) {
 		case T_REG_32:
 			val = readl_relaxed(base + s->offset);
+			pp->ethtool_stats[i] += val;
 			break;
 		case T_REG_64:
 			/* Docs say to read low 32-bit then high */
 			low = readl_relaxed(base + s->offset);
 			high = readl_relaxed(base + s->offset + 4);
-			val = (u64)high << 32 | low;
+			val64 = (u64)high << 32 | low;
+			pp->ethtool_stats[i] += val64;
 			break;
 		}
-
-		pp->ethtool_stats[i] += val;
 	}
 }
 
@@ -3098,6 +3711,106 @@ static int mvneta_ethtool_get_sset_count
 	return -EOPNOTSUPP;
 }
 
+static u32 mvneta_ethtool_get_rxfh_indir_size(struct net_device *dev)
+{
+	return MVNETA_RSS_LU_TABLE_SIZE;
+}
+
+static int mvneta_ethtool_get_rxnfc(struct net_device *dev,
+				    struct ethtool_rxnfc *info,
+				    u32 *rules __always_unused)
+{
+	switch (info->cmd) {
+	case ETHTOOL_GRXRINGS:
+		info->data =  rxq_number;
+		return 0;
+	case ETHTOOL_GRXFH:
+		return -EOPNOTSUPP;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int  mvneta_config_rss(struct mvneta_port *pp)
+{
+	int cpu;
+	u32 val;
+
+	netif_tx_stop_all_queues(pp->dev);
+
+	on_each_cpu(mvneta_percpu_mask_interrupt, pp, true);
+
+	/* We have to synchronise on the napi of each CPU */
+	for_each_online_cpu(cpu) {
+		struct mvneta_pcpu_port *pcpu_port =
+			per_cpu_ptr(pp->ports, cpu);
+
+		napi_synchronize(&pcpu_port->napi);
+		napi_disable(&pcpu_port->napi);
+	}
+
+	pp->rxq_def = pp->indir[0];
+
+	/* Update unicast mapping */
+	mvneta_set_rx_mode(pp->dev);
+
+	/* Update val of portCfg register accordingly with all RxQueue types */
+	val = MVNETA_PORT_CONFIG_DEFL_VALUE(pp->rxq_def);
+	mvreg_write(pp, MVNETA_PORT_CONFIG, val);
+
+	/* Update the elected CPU matching the new rxq_def */
+	spin_lock(&pp->lock);
+	mvneta_percpu_elect(pp);
+	spin_unlock(&pp->lock);
+
+	/* We have to synchronise on the napi of each CPU */
+	for_each_online_cpu(cpu) {
+		struct mvneta_pcpu_port *pcpu_port =
+			per_cpu_ptr(pp->ports, cpu);
+
+		napi_enable(&pcpu_port->napi);
+	}
+
+	netif_tx_start_all_queues(pp->dev);
+
+	return 0;
+}
+
+static int mvneta_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
+				   const u8 *key, const u8 hfunc)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+	/* We require at least one supported parameter to be changed
+	 * and no change in any of the unsupported parameters
+	 */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+
+	if (!indir)
+		return 0;
+
+	memcpy(pp->indir, indir, MVNETA_RSS_LU_TABLE_SIZE);
+
+	return mvneta_config_rss(pp);
+}
+
+static int mvneta_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
+				   u8 *hfunc)
+{
+	struct mvneta_port *pp = netdev_priv(dev);
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
+	if (!indir)
+		return 0;
+
+	memcpy(indir, pp->indir, MVNETA_RSS_LU_TABLE_SIZE);
+
+	return 0;
+}
+
 static const struct net_device_ops mvneta_netdev_ops = {
 	.ndo_open            = mvneta_open,
 	.ndo_stop            = mvneta_stop,
@@ -3122,6 +3835,10 @@ const struct ethtool_ops mvneta_eth_tool
 	.get_strings	= mvneta_ethtool_get_strings,
 	.get_ethtool_stats = mvneta_ethtool_get_stats,
 	.get_sset_count	= mvneta_ethtool_get_sset_count,
+	.get_rxfh_indir_size = mvneta_ethtool_get_rxfh_indir_size,
+	.get_rxnfc	= mvneta_ethtool_get_rxnfc,
+	.get_rxfh	= mvneta_ethtool_get_rxfh,
+	.set_rxfh	= mvneta_ethtool_set_rxfh,
 };
 
 /* Initialize hw */
@@ -3230,9 +3947,6 @@ static int mvneta_port_power_up(struct m
 		return -EINVAL;
 	}
 
-	if (pp->use_inband_status)
-		ctrl |= MVNETA_GMAC2_INBAND_AN_ENABLE;
-
 	/* Cancel Port Reset */
 	ctrl &= ~MVNETA_GMAC2_PORT_RESET;
 	mvreg_write(pp, MVNETA_GMAC_CTRL_2, ctrl);
@@ -3251,6 +3965,7 @@ static int mvneta_probe(struct platform_
 	struct resource *res;
 	struct device_node *dn = pdev->dev.of_node;
 	struct device_node *phy_node;
+	struct device_node *bm_node;
 	struct mvneta_port *pp;
 	struct net_device *dev;
 	const char *dt_mac_addr;
@@ -3314,7 +4029,13 @@ static int mvneta_probe(struct platform_
 				 strcmp(managed, "in-band-status") == 0);
 	pp->cpu_notifier.notifier_call = mvneta_percpu_notifier;
 
-	pp->clk = devm_clk_get(&pdev->dev, NULL);
+	pp->rxq_def = rxq_def;
+
+	pp->indir[0] = rxq_def;
+
+	pp->clk = devm_clk_get(&pdev->dev, "core");
+	if (IS_ERR(pp->clk))
+		pp->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pp->clk)) {
 		err = PTR_ERR(pp->clk);
 		goto err_put_phy_node;
@@ -3322,6 +4043,10 @@ static int mvneta_probe(struct platform_
 
 	clk_prepare_enable(pp->clk);
 
+	pp->clk_bus = devm_clk_get(&pdev->dev, "bus");
+	if (!IS_ERR(pp->clk_bus))
+		clk_prepare_enable(pp->clk_bus);
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	pp->base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(pp->base)) {
@@ -3374,26 +4099,39 @@ static int mvneta_probe(struct platform_
 
 	pp->tx_csum_limit = tx_csum_limit;
 
+	dram_target_info = mv_mbus_dram_info();
+	if (dram_target_info)
+		mvneta_conf_mbus_windows(pp, dram_target_info);
+
 	pp->tx_ring_size = MVNETA_MAX_TXD;
 	pp->rx_ring_size = MVNETA_MAX_RXD;
 
 	pp->dev = dev;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
+	pp->id = global_port_id++;
+
+	/* Obtain access to BM resources if enabled and already initialized */
+	bm_node = of_parse_phandle(dn, "buffer-manager", 0);
+	if (bm_node && bm_node->data) {
+		pp->bm_priv = bm_node->data;
+		err = mvneta_bm_port_init(pdev, pp);
+		if (err < 0) {
+			dev_info(&pdev->dev, "use SW buffer management\n");
+			pp->bm_priv = NULL;
+		}
+	}
+
 	err = mvneta_init(&pdev->dev, pp);
 	if (err < 0)
-		goto err_free_stats;
+		goto err_netdev;
 
 	err = mvneta_port_power_up(pp, phy_mode);
 	if (err < 0) {
 		dev_err(&pdev->dev, "can't power up port\n");
-		goto err_free_stats;
+		goto err_netdev;
 	}
 
-	dram_target_info = mv_mbus_dram_info();
-	if (dram_target_info)
-		mvneta_conf_mbus_windows(pp, dram_target_info);
-
 	for_each_present_cpu(cpu) {
 		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
 
@@ -3423,16 +4161,24 @@ static int mvneta_probe(struct platform_
 
 		mvneta_fixed_link_update(pp, phy);
 
-		put_device(&phy->dev);
+		put_device(&phy->dev);
 	}
 
 	return 0;
 
+err_netdev:
+	unregister_netdev(dev);
+	if (pp->bm_priv) {
+		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
+		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short,
+				       1 << pp->id);
+	}
 err_free_stats:
 	free_percpu(pp->stats);
 err_free_ports:
 	free_percpu(pp->ports);
 err_clk:
+	clk_disable_unprepare(pp->clk_bus);
 	clk_disable_unprepare(pp->clk);
 err_put_phy_node:
 	of_node_put(phy_node);
@@ -3450,6 +4196,7 @@ static int mvneta_remove(struct platform
 	struct mvneta_port *pp = netdev_priv(dev);
 
 	unregister_netdev(dev);
+	clk_disable_unprepare(pp->clk_bus);
 	clk_disable_unprepare(pp->clk);
 	free_percpu(pp->ports);
 	free_percpu(pp->stats);
@@ -3457,6 +4204,12 @@ static int mvneta_remove(struct platform
 	of_node_put(pp->phy_node);
 	free_netdev(dev);
 
+	if (pp->bm_priv) {
+		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_long, 1 << pp->id);
+		mvneta_bm_pool_destroy(pp->bm_priv, pp->pool_short,
+				       1 << pp->id);
+	}
+
 	return 0;
 }
 
--- a/dev/null
+++ b/drivers/net/ethernet/marvell/mvneta_bm.c
@@ -0,0 +1,487 @@
+/*
+ * Driver for Marvell NETA network controller Buffer Manager.
+ *
+ * Copyright (C) 2015 Marvell
+ *
+ * Marcin Wojtas <mw at semihalf.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk.h>
+#include <linux/genalloc.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mbus.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/skbuff.h>
+#include <net/hwbm.h>
+#include "mvneta_bm.h"
+
+#define MVNETA_BM_DRIVER_NAME "mvneta_bm"
+#define MVNETA_BM_DRIVER_VERSION "1.0"
+
+static void mvneta_bm_write(struct mvneta_bm *priv, u32 offset, u32 data)
+{
+	writel(data, priv->reg_base + offset);
+}
+
+static u32 mvneta_bm_read(struct mvneta_bm *priv, u32 offset)
+{
+	return readl(priv->reg_base + offset);
+}
+
+static void mvneta_bm_pool_enable(struct mvneta_bm *priv, int pool_id)
+{
+	u32 val;
+
+	val = mvneta_bm_read(priv, MVNETA_BM_POOL_BASE_REG(pool_id));
+	val |= MVNETA_BM_POOL_ENABLE_MASK;
+	mvneta_bm_write(priv, MVNETA_BM_POOL_BASE_REG(pool_id), val);
+
+	/* Clear BM cause register */
+	mvneta_bm_write(priv, MVNETA_BM_INTR_CAUSE_REG, 0);
+}
+
+static void mvneta_bm_pool_disable(struct mvneta_bm *priv, int pool_id)
+{
+	u32 val;
+
+	val = mvneta_bm_read(priv, MVNETA_BM_POOL_BASE_REG(pool_id));
+	val &= ~MVNETA_BM_POOL_ENABLE_MASK;
+	mvneta_bm_write(priv, MVNETA_BM_POOL_BASE_REG(pool_id), val);
+}
+
+static inline void mvneta_bm_config_set(struct mvneta_bm *priv, u32 mask)
+{
+	u32 val;
+
+	val = mvneta_bm_read(priv, MVNETA_BM_CONFIG_REG);
+	val |= mask;
+	mvneta_bm_write(priv, MVNETA_BM_CONFIG_REG, val);
+}
+
+static inline void mvneta_bm_config_clear(struct mvneta_bm *priv, u32 mask)
+{
+	u32 val;
+
+	val = mvneta_bm_read(priv, MVNETA_BM_CONFIG_REG);
+	val &= ~mask;
+	mvneta_bm_write(priv, MVNETA_BM_CONFIG_REG, val);
+}
+
+static void mvneta_bm_pool_target_set(struct mvneta_bm *priv, int pool_id,
+				      u8 target_id, u8 attr)
+{
+	u32 val;
+
+	val = mvneta_bm_read(priv, MVNETA_BM_XBAR_POOL_REG(pool_id));
+	val &= ~MVNETA_BM_TARGET_ID_MASK(pool_id);
+	val &= ~MVNETA_BM_XBAR_ATTR_MASK(pool_id);
+	val |= MVNETA_BM_TARGET_ID_VAL(pool_id, target_id);
+	val |= MVNETA_BM_XBAR_ATTR_VAL(pool_id, attr);
+
+	mvneta_bm_write(priv, MVNETA_BM_XBAR_POOL_REG(pool_id), val);
+}
+
+int mvneta_bm_construct(struct hwbm_pool *hwbm_pool, void *buf)
+{
+	struct mvneta_bm_pool *bm_pool =
+		(struct mvneta_bm_pool *)hwbm_pool->priv;
+	struct mvneta_bm *priv = bm_pool->priv;
+	dma_addr_t phys_addr;
+
+	/* In order to update buf_cookie field of RX descriptor properly,
+	 * BM hardware expects buf virtual address to be placed in the
+	 * first four bytes of mapped buffer.
+	 */
+	*(u32 *)buf = (u32)buf;
+	phys_addr = dma_map_single(&priv->pdev->dev, buf, bm_pool->buf_size,
+				   DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(&priv->pdev->dev, phys_addr)))
+		return -ENOMEM;
+
+	mvneta_bm_pool_put_bp(priv, bm_pool, phys_addr);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mvneta_bm_construct);
+
+/* Create pool */
+static int mvneta_bm_pool_create(struct mvneta_bm *priv,
+				 struct mvneta_bm_pool *bm_pool)
+{
+	struct platform_device *pdev = priv->pdev;
+	u8 target_id, attr;
+	int size_bytes, err;
+	size_bytes = sizeof(u32) * bm_pool->hwbm_pool.size;
+	bm_pool->virt_addr = dma_alloc_coherent(&pdev->dev, size_bytes,
+						&bm_pool->phys_addr,
+						GFP_KERNEL);
+	if (!bm_pool->virt_addr)
+		return -ENOMEM;
+
+	if (!IS_ALIGNED((u32)bm_pool->virt_addr, MVNETA_BM_POOL_PTR_ALIGN)) {
+		dma_free_coherent(&pdev->dev, size_bytes, bm_pool->virt_addr,
+				  bm_pool->phys_addr);
+		dev_err(&pdev->dev, "BM pool %d is not %d bytes aligned\n",
+			bm_pool->id, MVNETA_BM_POOL_PTR_ALIGN);
+		return -ENOMEM;
+	}
+
+	err = mvebu_mbus_get_dram_win_info(bm_pool->phys_addr, &target_id,
+					   &attr);
+	if (err < 0) {
+		dma_free_coherent(&pdev->dev, size_bytes, bm_pool->virt_addr,
+				  bm_pool->phys_addr);
+		return err;
+	}
+
+	/* Set pool address */
+	mvneta_bm_write(priv, MVNETA_BM_POOL_BASE_REG(bm_pool->id),
+			bm_pool->phys_addr);
+
+	mvneta_bm_pool_target_set(priv, bm_pool->id, target_id,  attr);
+	mvneta_bm_pool_enable(priv, bm_pool->id);
+
+	return 0;
+}
+
+/* Notify the driver that BM pool is being used as specific type and return the
+ * pool pointer on success
+ */
+struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id,
+					  enum mvneta_bm_type type, u8 port_id,
+					  int pkt_size)
+{
+	struct mvneta_bm_pool *new_pool = &priv->bm_pools[pool_id];
+	int num, err;
+
+	if (new_pool->type == MVNETA_BM_LONG &&
+	    new_pool->port_map != 1 << port_id) {
+		dev_err(&priv->pdev->dev,
+			"long pool cannot be shared by the ports\n");
+		return NULL;
+	}
+
+	if (new_pool->type == MVNETA_BM_SHORT && new_pool->type != type) {
+		dev_err(&priv->pdev->dev,
+			"mixing pools' types between the ports is forbidden\n");
+		return NULL;
+	}
+
+	if (new_pool->pkt_size == 0 || type != MVNETA_BM_SHORT)
+		new_pool->pkt_size = pkt_size;
+
+	/* Allocate buffers in case BM pool hasn't been used yet */
+	if (new_pool->type == MVNETA_BM_FREE) {
+		struct hwbm_pool *hwbm_pool = &new_pool->hwbm_pool;
+
+		new_pool->priv = priv;
+		new_pool->type = type;
+		new_pool->buf_size = MVNETA_RX_BUF_SIZE(new_pool->pkt_size);
+		hwbm_pool->frag_size =
+			SKB_DATA_ALIGN(MVNETA_RX_BUF_SIZE(new_pool->pkt_size)) +
+			SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+		hwbm_pool->construct = mvneta_bm_construct;
+		hwbm_pool->priv = new_pool;
+
+		/* Create new pool */
+		err = mvneta_bm_pool_create(priv, new_pool);
+		if (err) {
+			dev_err(&priv->pdev->dev, "fail to create pool %d\n",
+				new_pool->id);
+			return NULL;
+		}
+
+		/* Allocate buffers for this pool */
+		num = hwbm_pool_add(hwbm_pool, hwbm_pool->size, GFP_ATOMIC);
+		if (num != hwbm_pool->size) {
+			WARN(1, "pool %d: %d of %d allocated\n",
+			     new_pool->id, num, hwbm_pool->size);
+			return NULL;
+		}
+	}
+
+	return new_pool;
+}
+EXPORT_SYMBOL_GPL(mvneta_bm_pool_use);
+
+/* Free all buffers from the pool */
+void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
+			 u8 port_map)
+{
+	int i;
+
+	bm_pool->port_map &= ~port_map;
+	if (bm_pool->port_map)
+		return;
+
+	mvneta_bm_config_set(priv, MVNETA_BM_EMPTY_LIMIT_MASK);
+
+	for (i = 0; i < bm_pool->hwbm_pool.buf_num; i++) {
+		dma_addr_t buf_phys_addr;
+		u32 *vaddr;
+
+		/* Get buffer physical address (indirect access) */
+		buf_phys_addr = mvneta_bm_pool_get_bp(priv, bm_pool);
+
+		/* Work-around to the problems when destroying the pool,
+		 * when it occurs that a read access to BPPI returns 0.
+		 */
+		if (buf_phys_addr == 0)
+			continue;
+
+		vaddr = phys_to_virt(buf_phys_addr);
+		if (!vaddr)
+			break;
+
+		dma_unmap_single(&priv->pdev->dev, buf_phys_addr,
+				 bm_pool->buf_size, DMA_FROM_DEVICE);
+		hwbm_buf_free(&bm_pool->hwbm_pool, vaddr);
+	}
+
+	mvneta_bm_config_clear(priv, MVNETA_BM_EMPTY_LIMIT_MASK);
+
+	/* Update BM driver with number of buffers removed from pool */
+	bm_pool->hwbm_pool.buf_num -= i;
+}
+EXPORT_SYMBOL_GPL(mvneta_bm_bufs_free);
+
+/* Cleanup pool */
+void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
+			    struct mvneta_bm_pool *bm_pool, u8 port_map)
+{
+	struct hwbm_pool *hwbm_pool = &bm_pool->hwbm_pool;
+	bm_pool->port_map &= ~port_map;
+	if (bm_pool->port_map)
+		return;
+
+	bm_pool->type = MVNETA_BM_FREE;
+
+	mvneta_bm_bufs_free(priv, bm_pool, port_map);
+	if (hwbm_pool->buf_num)
+		WARN(1, "cannot free all buffers in pool %d\n", bm_pool->id);
+
+	if (bm_pool->virt_addr) {
+		dma_free_coherent(&priv->pdev->dev,
+				  sizeof(u32) * hwbm_pool->size,
+				  bm_pool->virt_addr, bm_pool->phys_addr);
+		bm_pool->virt_addr = NULL;
+	}
+
+	mvneta_bm_pool_disable(priv, bm_pool->id);
+}
+EXPORT_SYMBOL_GPL(mvneta_bm_pool_destroy);
+
+static void mvneta_bm_pools_init(struct mvneta_bm *priv)
+{
+	struct device_node *dn = priv->pdev->dev.of_node;
+	struct mvneta_bm_pool *bm_pool;
+	char prop[15];
+	u32 size;
+	int i;
+
+	/* Activate BM unit */
+	mvneta_bm_write(priv, MVNETA_BM_COMMAND_REG, MVNETA_BM_START_MASK);
+
+	/* Create all pools with maximum size */
+	for (i = 0; i < MVNETA_BM_POOLS_NUM; i++) {
+		bm_pool = &priv->bm_pools[i];
+		bm_pool->id = i;
+		bm_pool->type = MVNETA_BM_FREE;
+
+		/* Reset read pointer */
+		mvneta_bm_write(priv, MVNETA_BM_POOL_READ_PTR_REG(i), 0);
+
+		/* Reset write pointer */
+		mvneta_bm_write(priv, MVNETA_BM_POOL_WRITE_PTR_REG(i), 0);
+
+		/* Configure pool size according to DT or use default value */
+		sprintf(prop, "pool%d,capacity", i);
+		if (of_property_read_u32(dn, prop, &size)) {
+			size = MVNETA_BM_POOL_CAP_DEF;
+		} else if (size > MVNETA_BM_POOL_CAP_MAX) {
+			dev_warn(&priv->pdev->dev,
+				 "Illegal pool %d capacity %d, set to %d\n",
+				 i, size, MVNETA_BM_POOL_CAP_MAX);
+			size = MVNETA_BM_POOL_CAP_MAX;
+		} else if (size < MVNETA_BM_POOL_CAP_MIN) {
+			dev_warn(&priv->pdev->dev,
+				 "Illegal pool %d capacity %d, set to %d\n",
+				 i, size, MVNETA_BM_POOL_CAP_MIN);
+			size = MVNETA_BM_POOL_CAP_MIN;
+		} else if (!IS_ALIGNED(size, MVNETA_BM_POOL_CAP_ALIGN)) {
+			dev_warn(&priv->pdev->dev,
+				 "Illegal pool %d capacity %d, round to %d\n",
+				 i, size, ALIGN(size,
+				 MVNETA_BM_POOL_CAP_ALIGN));
+			size = ALIGN(size, MVNETA_BM_POOL_CAP_ALIGN);
+		}
+		bm_pool->hwbm_pool.size = size;
+
+		mvneta_bm_write(priv, MVNETA_BM_POOL_SIZE_REG(i),
+				bm_pool->hwbm_pool.size);
+
+		/* Obtain custom pkt_size from DT */
+		sprintf(prop, "pool%d,pkt-size", i);
+		if (of_property_read_u32(dn, prop, &bm_pool->pkt_size))
+			bm_pool->pkt_size = 0;
+	}
+}
+
+static void mvneta_bm_default_set(struct mvneta_bm *priv)
+{
+	u32 val;
+
+	/* Mask BM all interrupts */
+	mvneta_bm_write(priv, MVNETA_BM_INTR_MASK_REG, 0);
+
+	/* Clear BM cause register */
+	mvneta_bm_write(priv, MVNETA_BM_INTR_CAUSE_REG, 0);
+
+	/* Set BM configuration register */
+	val = mvneta_bm_read(priv, MVNETA_BM_CONFIG_REG);
+
+	/* Reduce MaxInBurstSize from 32 BPs to 16 BPs */
+	val &= ~MVNETA_BM_MAX_IN_BURST_SIZE_MASK;
+	val |= MVNETA_BM_MAX_IN_BURST_SIZE_16BP;
+	mvneta_bm_write(priv, MVNETA_BM_CONFIG_REG, val);
+}
+
+static int mvneta_bm_init(struct mvneta_bm *priv)
+{
+	mvneta_bm_default_set(priv);
+
+	/* Allocate and initialize BM pools structures */
+	priv->bm_pools = devm_kcalloc(&priv->pdev->dev, MVNETA_BM_POOLS_NUM,
+				      sizeof(struct mvneta_bm_pool),
+				      GFP_KERNEL);
+	if (!priv->bm_pools)
+		return -ENOMEM;
+
+	mvneta_bm_pools_init(priv);
+
+	return 0;
+}
+
+static int mvneta_bm_get_sram(struct device_node *dn,
+			      struct mvneta_bm *priv)
+{
+	priv->bppi_pool = of_gen_pool_get(dn, "internal-mem", 0);
+	if (!priv->bppi_pool)
+		return -ENOMEM;
+
+	priv->bppi_virt_addr = gen_pool_dma_alloc(priv->bppi_pool,
+						  MVNETA_BM_BPPI_SIZE,
+						  &priv->bppi_phys_addr);
+	if (!priv->bppi_virt_addr)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void mvneta_bm_put_sram(struct mvneta_bm *priv)
+{
+	gen_pool_free(priv->bppi_pool, priv->bppi_phys_addr,
+		      MVNETA_BM_BPPI_SIZE);
+}
+
+static int mvneta_bm_probe(struct platform_device *pdev)
+{
+	struct device_node *dn = pdev->dev.of_node;
+	struct mvneta_bm *priv;
+	struct resource *res;
+	int err;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(struct mvneta_bm), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->reg_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->reg_base))
+		return PTR_ERR(priv->reg_base);
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk))
+		return PTR_ERR(priv->clk);
+	err = clk_prepare_enable(priv->clk);
+	if (err < 0)
+		return err;
+
+	err = mvneta_bm_get_sram(dn, priv);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to allocate internal memory\n");
+		goto err_clk;
+	}
+
+	priv->pdev = pdev;
+
+	/* Initialize buffer manager internals */
+	err = mvneta_bm_init(priv);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to initialize controller\n");
+		goto err_sram;
+	}
+
+	dn->data = priv;
+	platform_set_drvdata(pdev, priv);
+
+	dev_info(&pdev->dev, "Buffer Manager for network controller enabled\n");
+
+	return 0;
+
+err_sram:
+	mvneta_bm_put_sram(priv);
+err_clk:
+	clk_disable_unprepare(priv->clk);
+	return err;
+}
+
+static int mvneta_bm_remove(struct platform_device *pdev)
+{
+	struct mvneta_bm *priv = platform_get_drvdata(pdev);
+	u8 all_ports_map = 0xff;
+	int i = 0;
+
+	for (i = 0; i < MVNETA_BM_POOLS_NUM; i++) {
+		struct mvneta_bm_pool *bm_pool = &priv->bm_pools[i];
+
+		mvneta_bm_pool_destroy(priv, bm_pool, all_ports_map);
+	}
+
+	mvneta_bm_put_sram(priv);
+
+	/* Dectivate BM unit */
+	mvneta_bm_write(priv, MVNETA_BM_COMMAND_REG, MVNETA_BM_STOP_MASK);
+
+	clk_disable_unprepare(priv->clk);
+
+	return 0;
+}
+
+static const struct of_device_id mvneta_bm_match[] = {
+	{ .compatible = "marvell,armada-380-neta-bm" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, mvneta_bm_match);
+
+static struct platform_driver mvneta_bm_driver = {
+	.probe = mvneta_bm_probe,
+	.remove = mvneta_bm_remove,
+	.driver = {
+		.name = MVNETA_BM_DRIVER_NAME,
+		.of_match_table = mvneta_bm_match,
+	},
+};
+
+module_platform_driver(mvneta_bm_driver);
+
+MODULE_DESCRIPTION("Marvell NETA Buffer Manager Driver - www.marvell.com");
+MODULE_AUTHOR("Marcin Wojtas <mw at semihalf.com>");
+MODULE_LICENSE("GPL v2");
--- a/dev/null
+++ b/drivers/net/ethernet/marvell/mvneta_bm.h
@@ -0,0 +1,182 @@
+/*
+ * Driver for Marvell NETA network controller Buffer Manager.
+ *
+ * Copyright (C) 2015 Marvell
+ *
+ * Marcin Wojtas <mw at semihalf.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef _MVNETA_BM_H_
+#define _MVNETA_BM_H_
+
+/* BM Configuration Register */
+#define MVNETA_BM_CONFIG_REG			0x0
+#define    MVNETA_BM_STATUS_MASK		0x30
+#define    MVNETA_BM_ACTIVE_MASK		BIT(4)
+#define    MVNETA_BM_MAX_IN_BURST_SIZE_MASK	0x60000
+#define    MVNETA_BM_MAX_IN_BURST_SIZE_16BP	BIT(18)
+#define    MVNETA_BM_EMPTY_LIMIT_MASK		BIT(19)
+
+/* BM Activation Register */
+#define MVNETA_BM_COMMAND_REG			0x4
+#define    MVNETA_BM_START_MASK			BIT(0)
+#define    MVNETA_BM_STOP_MASK			BIT(1)
+#define    MVNETA_BM_PAUSE_MASK			BIT(2)
+
+/* BM Xbar interface Register */
+#define MVNETA_BM_XBAR_01_REG			0x8
+#define MVNETA_BM_XBAR_23_REG			0xc
+#define MVNETA_BM_XBAR_POOL_REG(pool)		\
+		(((pool) < 2) ? MVNETA_BM_XBAR_01_REG : MVNETA_BM_XBAR_23_REG)
+#define     MVNETA_BM_TARGET_ID_OFFS(pool)	(((pool) & 1) ? 16 : 0)
+#define     MVNETA_BM_TARGET_ID_MASK(pool)	\
+		(0xf << MVNETA_BM_TARGET_ID_OFFS(pool))
+#define     MVNETA_BM_TARGET_ID_VAL(pool, id)	\
+		((id) << MVNETA_BM_TARGET_ID_OFFS(pool))
+#define     MVNETA_BM_XBAR_ATTR_OFFS(pool)	(((pool) & 1) ? 20 : 4)
+#define     MVNETA_BM_XBAR_ATTR_MASK(pool)	\
+		(0xff << MVNETA_BM_XBAR_ATTR_OFFS(pool))
+#define     MVNETA_BM_XBAR_ATTR_VAL(pool, attr)	\
+		((attr) << MVNETA_BM_XBAR_ATTR_OFFS(pool))
+
+/* Address of External Buffer Pointers Pool Register */
+#define MVNETA_BM_POOL_BASE_REG(pool)		(0x10 + ((pool) << 4))
+#define     MVNETA_BM_POOL_ENABLE_MASK		BIT(0)
+
+/* External Buffer Pointers Pool RD pointer Register */
+#define MVNETA_BM_POOL_READ_PTR_REG(pool)	(0x14 + ((pool) << 4))
+#define     MVNETA_BM_POOL_SET_READ_PTR_MASK	0xfffc
+#define     MVNETA_BM_POOL_GET_READ_PTR_OFFS	16
+#define     MVNETA_BM_POOL_GET_READ_PTR_MASK	0xfffc0000
+
+/* External Buffer Pointers Pool WR pointer */
+#define MVNETA_BM_POOL_WRITE_PTR_REG(pool)	(0x18 + ((pool) << 4))
+#define     MVNETA_BM_POOL_SET_WRITE_PTR_OFFS	0
+#define     MVNETA_BM_POOL_SET_WRITE_PTR_MASK	0xfffc
+#define     MVNETA_BM_POOL_GET_WRITE_PTR_OFFS	16
+#define     MVNETA_BM_POOL_GET_WRITE_PTR_MASK	0xfffc0000
+
+/* External Buffer Pointers Pool Size Register */
+#define MVNETA_BM_POOL_SIZE_REG(pool)		(0x1c + ((pool) << 4))
+#define     MVNETA_BM_POOL_SIZE_MASK		0x3fff
+
+/* BM Interrupt Cause Register */
+#define MVNETA_BM_INTR_CAUSE_REG		(0x50)
+
+/* BM interrupt Mask Register */
+#define MVNETA_BM_INTR_MASK_REG			(0x54)
+
+/* Other definitions */
+#define MVNETA_BM_SHORT_PKT_SIZE		256
+#define MVNETA_BM_POOLS_NUM			4
+#define MVNETA_BM_POOL_CAP_MIN			128
+#define MVNETA_BM_POOL_CAP_DEF			2048
+#define MVNETA_BM_POOL_CAP_MAX			\
+		(16 * 1024 - MVNETA_BM_POOL_CAP_ALIGN)
+#define MVNETA_BM_POOL_CAP_ALIGN		32
+#define MVNETA_BM_POOL_PTR_ALIGN		32
+
+#define MVNETA_BM_POOL_ACCESS_OFFS		8
+
+#define MVNETA_BM_BPPI_SIZE			0x100000
+
+#define MVNETA_RX_BUF_SIZE(pkt_size)   ((pkt_size) + NET_SKB_PAD)
+
+enum mvneta_bm_type {
+	MVNETA_BM_FREE,
+	MVNETA_BM_LONG,
+	MVNETA_BM_SHORT
+};
+
+struct mvneta_bm {
+	void __iomem *reg_base;
+	struct clk *clk;
+	struct platform_device *pdev;
+
+	struct gen_pool *bppi_pool;
+	/* BPPI virtual base address */
+	void __iomem *bppi_virt_addr;
+	/* BPPI physical base address */
+	dma_addr_t bppi_phys_addr;
+
+	/* BM pools */
+	struct mvneta_bm_pool *bm_pools;
+};
+
+struct mvneta_bm_pool {
+	struct hwbm_pool hwbm_pool;
+	/* Pool number in the range 0-3 */
+	u8 id;
+	enum mvneta_bm_type type;
+
+	/* Packet size */
+	int pkt_size;
+	/* Size of the buffer acces through DMA*/
+	u32 buf_size;
+
+	/* BPPE virtual base address */
+	u32 *virt_addr;
+	/* BPPE physical base address */
+	dma_addr_t phys_addr;
+
+	/* Ports using BM pool */
+	u8 port_map;
+
+	struct mvneta_bm *priv;
+};
+
+/* Declarations and definitions */
+void *mvneta_frag_alloc(unsigned int frag_size);
+void mvneta_frag_free(unsigned int frag_size, void *data);
+
+#if defined(CONFIG_MVNETA_BM) || defined(CONFIG_MVNETA_BM_MODULE)
+void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
+			    struct mvneta_bm_pool *bm_pool, u8 port_map);
+void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
+			 u8 port_map);
+int mvneta_bm_construct(struct hwbm_pool *hwbm_pool, void *buf);
+int mvneta_bm_pool_refill(struct mvneta_bm *priv,
+			  struct mvneta_bm_pool *bm_pool);
+struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id,
+					  enum mvneta_bm_type type, u8 port_id,
+					  int pkt_size);
+
+static inline void mvneta_bm_pool_put_bp(struct mvneta_bm *priv,
+					 struct mvneta_bm_pool *bm_pool,
+					 dma_addr_t buf_phys_addr)
+{
+	writel_relaxed(buf_phys_addr, priv->bppi_virt_addr +
+		       (bm_pool->id << MVNETA_BM_POOL_ACCESS_OFFS));
+}
+
+static inline u32 mvneta_bm_pool_get_bp(struct mvneta_bm *priv,
+					struct mvneta_bm_pool *bm_pool)
+{
+	return readl_relaxed(priv->bppi_virt_addr +
+			     (bm_pool->id << MVNETA_BM_POOL_ACCESS_OFFS));
+}
+#else
+void mvneta_bm_pool_destroy(struct mvneta_bm *priv,
+			    struct mvneta_bm_pool *bm_pool, u8 port_map) {}
+void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool,
+			 u8 port_map) {}
+int mvneta_bm_construct(struct hwbm_pool *hwbm_pool, void *buf) { return 0; }
+int mvneta_bm_pool_refill(struct mvneta_bm *priv,
+			  struct mvneta_bm_pool *bm_pool) {return 0; }
+struct mvneta_bm_pool *mvneta_bm_pool_use(struct mvneta_bm *priv, u8 pool_id,
+					  enum mvneta_bm_type type, u8 port_id,
+					  int pkt_size) { return NULL; }
+
+static inline void mvneta_bm_pool_put_bp(struct mvneta_bm *priv,
+					 struct mvneta_bm_pool *bm_pool,
+					 dma_addr_t buf_phys_addr) {}
+
+static inline u32 mvneta_bm_pool_get_bp(struct mvneta_bm *priv,
+					struct mvneta_bm_pool *bm_pool)
+{ return 0; }
+#endif /* CONFIG_MVNETA_BM */
+#endif
--- a/arch/arm/boot/dts/armada-xp-db.dts
+++ b/arch/arm/boot/dts/armada-xp-db.dts
@@ -77,7 +77,8 @@
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
 			  MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000
 			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
+			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000
+		          MBUS_ID(0x0c, 0x04) 0 0 0xf1200000 0x100000>;
 
 		devbus-bootcs {
 			status = "okay";
@@ -181,21 +182,33 @@
 				status = "okay";
 				phy = <&phy0>;
 				phy-mode = "rgmii-id";
+				buffer-manager = <&bm>;
+				bm,pool-long = <0>;
 			};
 			ethernet at 74000 {
 				status = "okay";
 				phy = <&phy1>;
 				phy-mode = "rgmii-id";
+				buffer-manager = <&bm>;
+				bm,pool-long = <1>;
 			};
 			ethernet at 30000 {
 				status = "okay";
 				phy = <&phy2>;
 				phy-mode = "sgmii";
+				buffer-manager = <&bm>;
+				bm,pool-long = <2>;
 			};
 			ethernet at 34000 {
 				status = "okay";
 				phy = <&phy3>;
 				phy-mode = "sgmii";
+				buffer-manager = <&bm>;
+				bm,pool-long = <3>;
+			};
+
+				bm at c0000 {
+				    status = "okay";
 			};
 
 			mvsdio at d4000 {
@@ -230,5 +243,9 @@
 				};
 			};
 		};
+
+	bm-bppi {
+	    status = "okay";
 	};
+    };
 };
--- a/arch/arm/boot/dts/armada-xp-gp.dts
+++ b/arch/arm/boot/dts/armada-xp-gp.dts
@@ -96,7 +96,8 @@
 			  MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
 			  MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x1000000
 			  MBUS_ID(0x09, 0x09) 0 0 0xf1100000 0x10000
-			  MBUS_ID(0x09, 0x05) 0 0 0xf1110000 0x10000>;
+			  MBUS_ID(0x09, 0x05) 0 0 0xf8110000 0x10000
+			  MBUS_ID(0x0c, 0x04) 0 0 0xf1200000 0x100000>;
 
 		devbus-bootcs {
 			status = "okay";
@@ -196,21 +197,29 @@
 				status = "okay";
 				phy = <&phy0>;
 				phy-mode = "qsgmii";
+				buffer-manager = <&bm>;
+				bm,pool-long = <0>;
 			};
 			ethernet at 74000 {
 				status = "okay";
 				phy = <&phy1>;
 				phy-mode = "qsgmii";
+				buffer-manager = <&bm>;
+				bm,pool-long = <1>;
 			};
 			ethernet at 30000 {
 				status = "okay";
 				phy = <&phy2>;
 				phy-mode = "qsgmii";
+				buffer-manager = <&bm>;
+				bm,pool-long = <2>;
 			};
 			ethernet at 34000 {
 				status = "okay";
 				phy = <&phy3>;
 				phy-mode = "qsgmii";
+				buffer-manager = <&bm>;
+				bm,pool-long = <3>;
 			};
 
 			/* Front-side USB slot */
@@ -235,6 +244,10 @@
 				};
 			};
 
+		bm at c0000 {
+		    status = "okay";
+		};
+
 			nand at d0000 {
 				status = "okay";
 				num-cs = <1>;
@@ -243,5 +256,10 @@
 				nand-on-flash-bbt;
 			};
 		};
+
+	bm-bppi {
+            status = "okay";
+
 	};
+    };
 };
--- a/arch/arm/boot/dts/armada-xp.dtsi
+++ b/arch/arm/boot/dts/armada-xp.dtsi
@@ -251,6 +251,14 @@
 				marvell,crypto-sram-size = <0x800>;
 			};
 
+            bm: bm at c0000 {
+                compatible = "marvell,armada-380-neta-bm";
+                reg = <0xc0000 0xac>;
+                clocks = <&gateclk 13>;
+                internal-mem = <&bm_bppi>;
+                status = "disabled";
+            };
+
 			xor at f0900 {
 				compatible = "marvell,orion-xor";
 				reg = <0xF0900 0x100
@@ -289,7 +297,18 @@
 			#size-cells = <1>;
 			ranges = <0 MBUS_ID(0x09, 0x05) 0 0x800>;
 		};
-	};
+
+        bm_bppi: bm-bppi {
+            compatible = "mmio-sram";
+            reg = <MBUS_ID(0x0c, 0x04) 0 0x100000>;
+            ranges = <0 MBUS_ID(0x0c, 0x04) 0 0x100000>;
+            #address-cells = <1>;
+            #size-cells = <1>;
+            clocks = <&gateclk 13>;
+            no-memory-wc;
+            status = "disabled";
+        };
+   };
 
 	clocks {
 		/* 25 MHz reference crystal */
--- a/dev/null
+++ b/include/net/hwbm.h
@@ -0,0 +1,28 @@
+#ifndef _HWBM_H
+#define _HWBM_H
+
+struct hwbm_pool {
+	/* Capacity of the pool */
+	int size;
+	/* Size of the buffers managed */
+	int frag_size;
+	/* Number of buffers currently used by this pool */
+	int buf_num;
+	/* constructor called during alocation */
+	int (*construct)(struct hwbm_pool *bm_pool, void *buf);
+	/* protect acces to the buffer counter*/
+	spinlock_t lock;
+	/* private data */
+	void *priv;
+};
+#ifdef CONFIG_HWBM
+void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf);
+int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp);
+int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp);
+#else
+void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf) {}
+int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp) { return 0; }
+int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp)
+{ return 0; }
+#endif /* CONFIG_HWBM */
+#endif /* _HWBM_H */
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -259,6 +259,9 @@ config XPS
 	depends on SMP
 	default y
 
+config HWBM
+	bool
+
 config CGROUP_NET_PRIO
 	bool "Network priority cgroup"
 	depends on CGROUPS
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -25,3 +25,4 @@ obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_cl
 obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
 obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
 obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
+obj-$(CONFIG_HWBM) += hwbm.o
--- a/dev/null
+++ b/net/core/hwbm.c
@@ -0,0 +1,87 @@
+/* Support for hardware buffer manager.
+ *
+ * Copyright (C) 2016 Marvell
+ *
+ * Gregory CLEMENT <gregory.clement at free-electrons.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/skbuff.h>
+#include <net/hwbm.h>
+
+void hwbm_buf_free(struct hwbm_pool *bm_pool, void *buf)
+{
+	if (likely(bm_pool->frag_size <= PAGE_SIZE))
+		skb_free_frag(buf);
+	else
+		kfree(buf);
+}
+EXPORT_SYMBOL_GPL(hwbm_buf_free);
+
+/* Refill processing for HW buffer management */
+int hwbm_pool_refill(struct hwbm_pool *bm_pool, gfp_t gfp)
+{
+	int frag_size = bm_pool->frag_size;
+	void *buf;
+
+	if (likely(frag_size <= PAGE_SIZE))
+		buf = netdev_alloc_frag(frag_size);
+	else
+		buf = kmalloc(frag_size, gfp);
+
+	if (!buf)
+		return -ENOMEM;
+
+	if (bm_pool->construct)
+		if (bm_pool->construct(bm_pool, buf)) {
+			hwbm_buf_free(bm_pool, buf);
+			return -ENOMEM;
+		}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hwbm_pool_refill);
+
+int hwbm_pool_add(struct hwbm_pool *bm_pool, unsigned int buf_num, gfp_t gfp)
+{
+	int err, i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&bm_pool->lock, flags);
+	if (bm_pool->buf_num == bm_pool->size) {
+		pr_warn("pool already filled\n");
+		return bm_pool->buf_num;
+	}
+
+	if (buf_num + bm_pool->buf_num > bm_pool->size) {
+		pr_warn("cannot allocate %d buffers for pool\n",
+			buf_num);
+		return 0;
+	}
+
+	if ((buf_num + bm_pool->buf_num) < bm_pool->buf_num) {
+		pr_warn("Adding %d buffers to the %d current buffers will overflow\n",
+			buf_num,  bm_pool->buf_num);
+		return 0;
+	}
+
+	for (i = 0; i < buf_num; i++) {
+		err = hwbm_pool_refill(bm_pool, gfp);
+		if (err < 0)
+			break;
+	}
+
+	/* Update BM driver with number of buffers added to pool */
+	bm_pool->buf_num += i;
+
+	pr_debug("hwpm pool: %d of %d buffers added\n", i, buf_num);
+	spin_unlock_irqrestore(&bm_pool->lock, flags);
+
+	return i;
+}
+EXPORT_SYMBOL_GPL(hwbm_pool_add);
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@@ -948,6 +948,58 @@ void mvebu_mbus_get_pcie_io_aperture(str
 	*res = mbus_state.pcie_io_aperture;
 }
 
+int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, u8 *attr)
+{
+    const struct mbus_dram_target_info *dram;
+    int i;
+
+    /* Get dram info */
+    dram = mv_mbus_dram_info();
+    if (!dram) {
+        pr_err("missing DRAM information\n");
+        return -ENODEV;
+    }
+
+    /* Try to find matching DRAM window for phyaddr */
+    for (i = 0; i < dram->num_cs; i++) {
+        const struct mbus_dram_window *cs = dram->cs + i;
+
+        if (cs->base <= phyaddr &&
+            phyaddr <= (cs->base + cs->size - 1)) {
+            *target = dram->mbus_dram_target_id;
+            *attr = cs->mbus_attr;
+            return 0;
+        }
+    }
+
+    pr_err("invalid dram address 0x%x\n", phyaddr);
+    return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(mvebu_mbus_get_dram_win_info);
+
+int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, u8 *target,
+                   u8 *attr)
+{
+    int win;
+
+    for (win = 0; win < mbus_state.soc->num_wins; win++) {
+        u64 wbase;
+        int enabled;
+
+        mvebu_mbus_read_window(&mbus_state, win, &enabled, &wbase,
+                       size, target, attr, NULL);
+
+        if (!enabled)
+            continue;
+
+        if (wbase <= phyaddr && phyaddr <= wbase + *size)
+            return win;
+    }
+
+    return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(mvebu_mbus_get_io_win_info);
+
 static __init int mvebu_mbus_debugfs_init(void)
 {
 	struct mvebu_mbus_state *s = &mbus_state;
--- a/include/linux/mbus.h
+++ b/include/linux/mbus.h
@@ -69,6 +69,9 @@ static inline const struct mbus_dram_tar
 int mvebu_mbus_save_cpu_target(u32 *store_addr);
 void mvebu_mbus_get_pcie_mem_aperture(struct resource *res);
 void mvebu_mbus_get_pcie_io_aperture(struct resource *res);
+int mvebu_mbus_get_dram_win_info(phys_addr_t phyaddr, u8 *target, u8 *attr);
+int mvebu_mbus_get_io_win_info(phys_addr_t phyaddr, u32 *size, u8 *target,
+                   u8 *attr);
 int mvebu_mbus_add_window_remap_by_id(unsigned int target,
 				      unsigned int attribute,
 				      phys_addr_t base, size_t size,
--- a/drivers/misc/sram.c
+++ b/drivers/misc/sram.c
@@ -360,7 +360,10 @@ static int sram_probe(struct platform_de
 		return -EBUSY;
 	}
 
-	sram->virt_base = devm_ioremap_wc(sram->dev, res->start, size);
+	if (of_property_read_bool(pdev->dev.of_node, "no-memory-wc"))
+		sram->virt_base = devm_ioremap(sram->dev, res->start, size);
+	else
+		sram->virt_base = devm_ioremap_wc(sram->dev, res->start, size);
 	if (IS_ERR(sram->virt_base))
 		return PTR_ERR(sram->virt_base);

--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -40,6 +40,19 @@ config MVMDIO
 
 	  This driver is used by the MV643XX_ETH and MVNETA drivers.
 
+config MVNETA_BM_ENABLE
+	tristate "Marvell Armada 38x/XP network interface BM support"
+	depends on MVNETA
+	---help---
+	  This driver supports auxiliary block of the network
+	  interface units in the Marvell ARMADA XP and ARMADA 38x SoC
+	  family, which is called buffer manager.
+
+	  This driver, when enabled, strictly cooperates with mvneta
+	  driver and is common for all network ports of the devices,
+	  even for Armada 370 SoC, which doesn't support hardware
+	  buffer management.
+
 config MVNETA
 	tristate "Marvell Armada 370/38x/XP network interface support"
 	depends on PLAT_ORION
@@ -53,6 +66,15 @@ config MVNETA
 	  driver, which should be used for the older Marvell SoCs
 	  (Dove, Orion, Discovery, Kirkwood).
 
+config MVNETA_BM
+	tristate
+	default y if MVNETA=y && MVNETA_BM_ENABLE
+	default MVNETA_BM_ENABLE
+	select HWBM
+	help
+	  MVNETA_BM must not be 'm' if MVNETA=y, so this symbol ensures
+	  that all dependencies are met.
+
 config MVPP2
 	tristate "Marvell Armada 375 network interface support"
 	depends on MACH_ARMADA_375
_______________________________________________
openwrt-devel mailing list
openwrt-devel at lists.openwrt.org
https://lists.openwrt.org/cgi-bin/mailman/listinfo/openwrt-devel


More information about the openwrt-devel mailing list