[PATCH] nvme: improve check for attached metadata buffers

Keith Busch keith.busch at intel.com
Mon Nov 6 16:00:06 PST 2017


On Mon, Nov 06, 2017 at 09:26:16PM +0100, Christoph Hellwig wrote:
> How about something like this should do the right thing.  But while
> looking over that I'm not sure our !CONFIG_BLK_DEV_INTEGRITY case
> for inserting/stripping PI really works - as far as I can tell we
> should never set NVME_RW_PRINFO_PRCHK_GUARD in that case, or am I
> missing something?

I think it should be okay as-is. With PRACT, the PRCHK fields should
tell the controller what to generate on writes and what to check on
reads before stripping. See section 8.3.1.1 and 8.3.1.2.

Just noticed potential ECN material in the above sections: 8.3.1.1
lables the two cases '1' and '2', where 8.3.1.2 labels them 'a' and
'b'. Poor consistency, there. :)

> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 66211617e7e8..3a58a07d2a53 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -137,6 +137,11 @@ int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
>  }
>  EXPORT_SYMBOL_GPL(nvme_delete_ctrl_sync);
>  
> +static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
> +{
> +	return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
> +}
> +
>  static blk_status_t nvme_error_status(struct request *req)
>  {
>  	switch (nvme_req(req)->status & 0x7ff) {
> @@ -472,16 +477,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
>  	u16 control = 0;
>  	u32 dsmgmt = 0;
>  
> -	/*
> -	 * If formated with metadata, require the block layer provide a buffer
> -	 * unless this namespace is formated such that the metadata can be
> -	 * stripped/generated by the controller with PRACT=1.
> -	 */
> -	if (ns && ns->ms &&
> -	    (!ns->pi_type || ns->ms != sizeof(struct t10_pi_tuple)) &&
> -	    !blk_integrity_rq(req) && !blk_rq_is_passthrough(req))
> -		return BLK_STS_NOTSUPP;
> -
>  	if (req->cmd_flags & REQ_FUA)
>  		control |= NVME_RW_FUA;
>  	if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
> @@ -500,6 +495,18 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
>  		nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
>  
>  	if (ns->ms) {
> +		/*
> +		 * If formated with metadata, the block layer always provides a
> +		 * metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled.  Else
> +		 * we enable the PRACT bit for protection information or set the
> +		 * namespace capacity to zero to prevent any I/O.
> +		 */
> +		if (!blk_integrity_rq(req)) {
> +			if (WARN_ON_ONCE(!nvme_ns_has_pi(ns)))
> +				return BLK_STS_NOTSUPP;
> +			control |= NVME_RW_PRINFO_PRACT;
> +		}
> +
>  		switch (ns->pi_type) {
>  		case NVME_NS_DPS_PI_TYPE3:
>  			control |= NVME_RW_PRINFO_PRCHK_GUARD;
> @@ -512,8 +519,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
>  					nvme_block_nr(ns, blk_rq_pos(req)));
>  			break;
>  		}
> -		if (!blk_integrity_rq(req))
> -			control |= NVME_RW_PRINFO_PRACT;
>  	}
>  
>  	cmnd->rw.control = cpu_to_le16(control);
> @@ -1173,7 +1178,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
>  	if (ns->ms && !ns->ext &&
>  	    (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
>  		nvme_init_integrity(disk, ns->ms, ns->pi_type);
> -	if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
> +	if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk))
>  		capacity = 0;
>  	set_capacity(disk, capacity);
>  

This looks good.



More information about the Linux-nvme mailing list