[PATCH v4 0/2] nvme/pci: PRP list DMA pool partitioning

Keith Busch kbusch at kernel.org
Tue Apr 22 10:48:00 PDT 2025


On Tue, Apr 22, 2025 at 10:19:57AM -0600, Caleb Sander Mateos wrote:
> This reduces the _raw_spin_lock_irqsave overhead by about half, to
> 1.2%.

Could you try this atop your series? I hope to see if we can squeeze a
little more out by keeping the spinlock and list links local to the node
using them.

---
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index cbd7734922f91..08a1488155084 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -412,9 +412,10 @@ nvme_setup_prp_pools(struct nvme_dev *dev, unsigned numa_node)
 	if (prp_pools->small)
 		return prp_pools; /* already initialized */
 
-	prp_pools->large = dma_pool_create("prp list page", dev->dev,
+	prp_pools->large = dma_pool_create_node("prp list page", dev->dev,
 						NVME_CTRL_PAGE_SIZE,
-						NVME_CTRL_PAGE_SIZE, 0);
+						NVME_CTRL_PAGE_SIZE, 0,
+						numa_node);
 	if (!prp_pools->large)
 		return ERR_PTR(-ENOMEM);
 
@@ -422,8 +423,9 @@ nvme_setup_prp_pools(struct nvme_dev *dev, unsigned numa_node)
 		small_align = 512;
 
 	/* Optimisation for I/Os between 4k and 128k */
-	prp_pools->small = dma_pool_create("prp list 256", dev->dev,
-						256, small_align, 0);
+	prp_pools->small = dma_pool_create_node("prp list 256", dev->dev,
+						256, small_align, 0,
+						numa_node);
 	if (!prp_pools->small) {
 		dma_pool_destroy(prp_pools->large);
 		prp_pools->large = NULL;
diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h
index f632ecfb42384..36cb5f66111c6 100644
--- a/include/linux/dmapool.h
+++ b/include/linux/dmapool.h
@@ -18,8 +18,16 @@ struct device;
 
 #ifdef CONFIG_HAS_DMA
 
-struct dma_pool *dma_pool_create(const char *name, struct device *dev, 
-			size_t size, size_t align, size_t allocation);
+struct dma_pool *dma_pool_create_node(const char *name, struct device *dev,
+			size_t size, size_t align, size_t allocation, int node);
+
+static inline struct dma_pool *dma_pool_create(const char *name,
+			struct device *dev, size_t size, size_t align,
+			size_t allocation)
+{
+	return dma_pool_create_node(name, dev, size, align, allocation,
+				    NUMA_NO_NODE);
+}
 
 void dma_pool_destroy(struct dma_pool *pool);
 
@@ -35,6 +43,10 @@ struct dma_pool *dmam_pool_create(const char *name, struct device *dev,
 void dmam_pool_destroy(struct dma_pool *pool);
 
 #else /* !CONFIG_HAS_DMA */
+static inline struct dma_pool *dma_pool_create_node(const char *name,
+	struct device *dev, size_t size, size_t align, size_t allocation,
+	int node);
+{ return NULL; }
 static inline struct dma_pool *dma_pool_create(const char *name,
 	struct device *dev, size_t size, size_t align, size_t allocation)
 { return NULL; }
diff --git a/mm/dmapool.c b/mm/dmapool.c
index f0bfc6c490f4e..e07242b18c576 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -56,6 +56,7 @@ struct dma_pool {		/* the pool */
 	unsigned int size;
 	unsigned int allocation;
 	unsigned int boundary;
+	int node;
 	char name[32];
 	struct list_head pools;
 };
@@ -199,12 +200,13 @@ static void pool_block_push(struct dma_pool *pool, struct dma_block *block,
 
 
 /**
- * dma_pool_create - Creates a pool of consistent memory blocks, for dma.
+ * dma_pool_create_node - Creates a pool of consistent memory blocks, for dma.
  * @name: name of pool, for diagnostics
  * @dev: device that will be doing the DMA
  * @size: size of the blocks in this pool.
  * @align: alignment requirement for blocks; must be a power of two
  * @boundary: returned blocks won't cross this power of two boundary
+ * @node: NUMA node to use when allocating structs 'dma_pool' and 'dma_page'
  * Context: not in_interrupt()
  *
  * Given one of these pools, dma_pool_alloc()
@@ -221,8 +223,8 @@ static void pool_block_push(struct dma_pool *pool, struct dma_block *block,
  * Return: a dma allocation pool with the requested characteristics, or
  * %NULL if one can't be created.
  */
-struct dma_pool *dma_pool_create(const char *name, struct device *dev,
-				 size_t size, size_t align, size_t boundary)
+struct dma_pool *dma_pool_create_node(const char *name, struct device *dev,
+			size_t size, size_t align, size_t boundary, int node)
 {
 	struct dma_pool *retval;
 	size_t allocation;
@@ -251,13 +253,14 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
 
 	boundary = min(boundary, allocation);
 
-	retval = kzalloc(sizeof(*retval), GFP_KERNEL);
+	retval = kzalloc_node(sizeof(*retval), GFP_KERNEL, node);
 	if (!retval)
 		return retval;
 
 	strscpy(retval->name, name, sizeof(retval->name));
 
 	retval->dev = dev;
+	retval->node = node;
 
 	INIT_LIST_HEAD(&retval->page_list);
 	spin_lock_init(&retval->lock);
@@ -335,7 +338,7 @@ static struct dma_page *pool_alloc_page(struct dma_pool *pool, gfp_t mem_flags)
 {
 	struct dma_page *page;
 
-	page = kmalloc(sizeof(*page), mem_flags);
+	page = kmalloc_node(sizeof(*page), mem_flags, pool->node);
 	if (!page)
 		return NULL;
 
--



More information about the Linux-nvme mailing list