[PATCHv5 8/8] nvme-pci: convert metadata mapping to dma iter

Christoph Hellwig hch at lst.de
Sun Aug 10 07:27:48 PDT 2025


On Fri, Aug 08, 2025 at 08:58:26AM -0700, Keith Busch wrote:
>  
>  struct nvme_dma_vec {
> @@ -281,13 +285,14 @@ struct nvme_iod {
>  	u8 nr_descriptors;
>  
>  	unsigned int total_len;
> +	unsigned int meta_total_len;
>  	struct dma_iova_state dma_state;
> +	struct dma_iova_state meta_dma_state;
>  	void *descriptors[NVME_MAX_NR_DESCRIPTORS];
>  	struct nvme_dma_vec *dma_vecs;
>  	unsigned int nr_dma_vecs;
>  
>  	dma_addr_t meta_dma;
> -	struct sg_table meta_sgt;
>  	struct nvme_sgl_desc *meta_descriptor;

Maybe keep the meta fields together as much as we can to ensure they
are in the same cacheline(s)?

> +static void nvme_unmap_metadata(struct request *req)
> +{
> +	struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
> +	enum dma_data_direction dir = rq_dma_dir(req);
> +	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
> +	struct device *dma_dev = nvmeq->dev->dev;
> +
> +	if (iod->flags & IOD_META_MPTR) {
> +		dma_unmap_page(dma_dev, iod->meta_dma,
> +			       rq_integrity_vec(req).bv_len,
> +			       rq_dma_dir(req));
> +		return;
> +	}
> +
> +	if (!blk_rq_dma_unmap(req, dma_dev, &iod->meta_dma_state,
> +				iod->meta_total_len,
> +				iod->flags & IOD_META_P2P_BUS_ADDR)) {
> +		if (nvme_pci_cmd_use_meta_sgl(&iod->cmd))
> +			nvme_free_meta_sgls(iod, dma_dev, dir);
> +		else
> +			dma_unmap_page(dma_dev, iod->meta_dma,
> +				       iod->meta_total_len, dir);
> +	}

IOD_META_MPTR above really should be named IOD_SINGLE_META_SEGMENT as
it's all about avoiding the dma iterator, which could also create a
single segment case handled just above.

>  static blk_status_t nvme_pci_setup_meta_sgls(struct request *req)
>  {
>  	struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
> +	unsigned int entries = req->nr_integrity_segments;
>  	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
> +	struct nvme_dev *dev = nvmeq->dev;
>  	struct nvme_sgl_desc *sg_list;
> +	struct blk_dma_iter iter;
>  	dma_addr_t sgl_dma;
> +	int i = 0;
>  
> +	if (!blk_rq_integrity_dma_map_iter_start(req, dev->dev,
> +						&iod->meta_dma_state, &iter))
> +		return iter.status;
>  
> +	if (iter.p2pdma.map == PCI_P2PDMA_MAP_BUS_ADDR)
> +		iod->flags |= IOD_META_P2P_BUS_ADDR;
> +	else if (blk_rq_dma_map_coalesce(&iod->meta_dma_state))
> +		entries = 1;
> +
> +	if (entries == 1 && !(nvme_req(req)->flags & NVME_REQ_USERCMD)) {
> +		iod->cmd.common.metadata = cpu_to_le64(iter.addr);
> +		iod->meta_total_len = iter.len;
> +		iod->meta_dma = iter.addr;
> +		iod->meta_descriptor = NULL;
> +		return BLK_STS_OK;

Maybe throw in a comment explaining that we fall back to a single metadata
pointer here if we can, and why we don't for passthrough requests?




More information about the Linux-nvme mailing list