[bug report] blktests nvme/022 lead kernel WARNING and NULL pointer

Yi Zhang yi.zhang at redhat.com
Fri May 21 01:38:33 BST 2021


> >
> > What about this?

Hi Hannes
With this patch, no WARNNING/NULL pointer this time, but still have
'keep-alive timer expired' and reset failure issue, here is the full
log:

# ./check nvme/022
nvme/022 (test NVMe reset command on NVMeOF file-backed ns)  [failed]
    runtime  10.646s  ...  11.087s
    --- tests/nvme/022.out 2021-05-20 20:16:31.384068807 -0400
    +++ /root/blktests/results/nodev/nvme/022.out.bad 2021-05-20
20:24:27.874250466 -0400
    @@ -1,4 +1,5 @@
     Running nvme/022
     91fdba0d-f87b-4c25-b80f-db7be1418b9e
     uuid.91fdba0d-f87b-4c25-b80f-db7be1418b9e
    +ERROR: reset failed
     Test complete
# cat results/nodev/nvme/022.full
Reset: Network dropped connection on reset
NQN:blktests-subsystem-1 disconnected 1 controller(s)

[37353.068448] run blktests nvme/022 at 2021-05-20 20:24:16
[37353.146301] nvmet: adding nsid 1 to subsystem blktests-subsystem-1
[37353.161765] nvmet: creating controller 1 for subsystem
blktests-subsystem-1 for NQN
nqn.2014-08.org.nvmexpress:uuid:6a70d220-bfde-1000-03ce-ea40b8730904.
[37353.175796] nvme nvme0: creating 128 I/O queues.
[37353.189734] nvme nvme0: new ctrl: "blktests-subsystem-1"
[37354.216686] nvme nvme0: resetting controller
[37363.270607] nvmet: ctrl 1 keep-alive timer (5 seconds) expired!
[37363.276521] nvmet: ctrl 1 fatal error occurred!
[37363.281058] nvme nvme0: Removing ctrl: NQN "blktests-subsystem-1"

# ./check nvme/021
nvme/021 (test NVMe list command on NVMeOF file-backed ns)   [passed]
    runtime  10.958s  ...  11.382s
# dmesg
[38142.862881] run blktests nvme/021 at 2021-05-20 20:37:26
[38142.941038] nvmet: adding nsid 1 to subsystem blktests-subsystem-1
[38142.956621] nvmet: creating controller 1 for subsystem
blktests-subsystem-1 for NQN
nqn.2014-08.org.nvmexpress:uuid:6a70d220-bfde-1000-03ce-ea40b8730904.
[38142.970524] nvme nvme0: creating 128 I/O queues.
[38142.984356] nvme nvme0: new ctrl: "blktests-subsystem-1"
[38144.014601] nvme nvme0: Removing ctrl: NQN "blktests-subsystem-1"
[38153.030107] nvmet: ctrl 1 keep-alive timer (5 seconds) expired!
[38153.036018] nvmet: ctrl 1 fatal error occurred!


>
> diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
> index 74b3b150e1a5..9838a7d27bc1 100644
> --- a/drivers/nvme/target/loop.c
> +++ b/drivers/nvme/target/loop.c
> @@ -263,7 +263,8 @@ static const struct blk_mq_ops
> nvme_loop_admin_mq_ops = {
>
>   static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
>   {
> -       clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags);
> +       if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags))
> +               return;
>          nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
>          blk_cleanup_queue(ctrl->ctrl.admin_q);
>          blk_cleanup_queue(ctrl->ctrl.fabrics_q);
> @@ -299,6 +300,7 @@ static void nvme_loop_destroy_io_queues(struct
> nvme_loop_ctrl *ctrl)
>                  clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags);
>                  nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
>          }
> +       ctrl->ctrl.queue_count = 1;
>   }
>
>   static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
> @@ -405,6 +407,7 @@ static int nvme_loop_configure_admin_queue(struct
> nvme_loop_ctrl *ctrl)
>          return 0;
>
>   out_cleanup_queue:
> +       clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags);
>          blk_cleanup_queue(ctrl->ctrl.admin_q);
>   out_cleanup_fabrics_q:
>          blk_cleanup_queue(ctrl->ctrl.fabrics_q);
> @@ -462,8 +465,10 @@ static void nvme_loop_reset_ctrl_work(struct
> work_struct *work)
>          nvme_loop_shutdown_ctrl(ctrl);
>
>          if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
> -               /* state change failure should never happen */
> -               WARN_ON_ONCE(1);
> +               if (ctrl->ctrl.state != NVME_CTRL_DELETING &&
> +                   ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO)
> +                       /* state change failure for non-deleted ctrl? */
> +                       WARN_ON_ONCE(1);
>                  return;
>          }
>
>
> (I'll be sending out a formal patchset once it's confirmed).
>
> Cheers,
>
> Hannes
> --
> Dr. Hannes Reinecke                Kernel Storage Architect
> hare at suse.de                              +49 911 74053 688
> SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
> HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
>


-- 
Best Regards,
  Yi Zhang




More information about the Linux-nvme mailing list