[PATCH 2/3] nvme: multipath: only update ctrl->nr_active when using queue-depth iopolicy

Ewan Milne emilne at redhat.com
Mon Nov 13 13:16:33 PST 2023


I think quiescing all the queues associated with the nvme_ctrl would
be undesirable
just to handle changing the iopolicy, which only needs to affect I/O
that has not yet
gone through the path selection.  It would be better not to make the
sysfs write have
to wait, as well (although this could be done with a work item, it
seems like overkill).

Probably there is not enough contention for the atomic_dec_if_positive() to loop
but I take your point, I'll look at it.  The equipment I have here is
likely not fast enough
to be sure.

-Ewan



On Thu, Nov 9, 2023 at 8:18 PM Uday Shankar <ushankar at purestorage.com> wrote:
>
> On Tue, Nov 07, 2023 at 04:23:30PM -0500, Ewan D. Milne wrote:
> > The atomic updates of ctrl->nr_active are unnecessary when using
> > numa or round-robin iopolicy, so avoid that cost on a per-request basis.
> > Clear nr_active when changing iopolicy and do not decrement below zero.
> > (This handles changing the iopolicy while requests are in flight.)
>
> Instead of trying to handle a changing iopolicy while requests are in
> flight, can we quiesce I/O when we change the iopolicy? That should let
> us simplify/speed up the logic in the I/O path a bit
> (atomic_dec_if_positive seems to hide a cmpxchg loop on most
> architectures, which can be slower than an atomic_dec).
>
> >
> > Signed-off-by: Ewan D. Milne <emilne at redhat.com>
> > ---
> >  drivers/nvme/host/core.c      |  2 +-
> >  drivers/nvme/host/multipath.c | 21 ++++++++++++++++++---
> >  drivers/nvme/host/nvme.h      |  2 ++
> >  3 files changed, 21 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> > index 75a1b58a7a43..9bc19755be77 100644
> > --- a/drivers/nvme/host/core.c
> > +++ b/drivers/nvme/host/core.c
> > @@ -110,7 +110,7 @@ struct workqueue_struct *nvme_delete_wq;
> >  EXPORT_SYMBOL_GPL(nvme_delete_wq);
> >
> >  static LIST_HEAD(nvme_subsystems);
> > -static DEFINE_MUTEX(nvme_subsystems_lock);
> > +DEFINE_MUTEX(nvme_subsystems_lock);
> >
> >  static DEFINE_IDA(nvme_instance_ida);
> >  static dev_t nvme_ctrl_base_chr_devt;
> > diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
> > index 4c2690cddef3..e184e7c377bc 100644
> > --- a/drivers/nvme/host/multipath.c
> > +++ b/drivers/nvme/host/multipath.c
> > @@ -133,7 +133,8 @@ void nvme_mpath_start_request(struct request *rq)
> >       if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq))
> >               return;
> >
> > -     atomic_inc(&ns->ctrl->nr_active);
> > +     if (READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD)
> > +             atomic_inc(&ns->ctrl->nr_active);
> >       nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
> >       nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq),
> >                                                     jiffies);
> > @@ -147,7 +148,8 @@ void nvme_mpath_end_request(struct request *rq)
> >       if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
> >               return;
> >
> > -     atomic_dec(&ns->ctrl->nr_active);
> > +     if (READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD)
> > +             atomic_dec_if_positive(&ns->ctrl->nr_active);
> >       bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
> >                        blk_rq_bytes(rq) >> SECTOR_SHIFT,
> >                        nvme_req(rq)->start_time);
> > @@ -848,6 +850,19 @@ static ssize_t nvme_subsys_iopolicy_show(struct device *dev,
> >                         nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]);
> >  }
> >
> > +void nvme_subsys_iopolicy_update(struct nvme_subsystem *subsys, int iopolicy)
> > +{
> > +     struct nvme_ctrl *ctrl;
> > +
> > +     WRITE_ONCE(subsys->iopolicy, iopolicy);
> > +
> > +     mutex_lock(&nvme_subsystems_lock);
> > +     list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
> > +             atomic_set(&ctrl->nr_active, 0);
> > +     }
> > +     mutex_unlock(&nvme_subsystems_lock);
> > +}
> > +
> >  static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
> >               struct device_attribute *attr, const char *buf, size_t count)
> >  {
> > @@ -857,7 +872,7 @@ static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
> >
> >       for (i = 0; i < ARRAY_SIZE(nvme_iopolicy_names); i++) {
> >               if (sysfs_streq(buf, nvme_iopolicy_names[i])) {
> > -                     WRITE_ONCE(subsys->iopolicy, i);
> > +                     nvme_subsys_iopolicy_update(subsys, i);
> >                       return count;
> >               }
> >       }
> > diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
> > index f0f3fd8b4197..c4469bc38d89 100644
> > --- a/drivers/nvme/host/nvme.h
> > +++ b/drivers/nvme/host/nvme.h
> > @@ -49,6 +49,8 @@ extern struct workqueue_struct *nvme_wq;
> >  extern struct workqueue_struct *nvme_reset_wq;
> >  extern struct workqueue_struct *nvme_delete_wq;
> >
> > +extern struct mutex nvme_subsystems_lock;
> > +
> >  /*
> >   * List of workarounds for devices that required behavior not specified in
> >   * the standard.
> > --
> > 2.20.1
> >
> >
>




More information about the Linux-nvme mailing list