[PATCH] tests/nvme: Add admin-passthru+reset race test
Jonathan Derrick
jonathan.derrick at linux.dev
Mon Nov 14 12:34:12 PST 2022
Adds a test which runs many formats and reset_controllers in parallel.
The intent is to expose timing holes in the controller state machine
which will lead to hung task timing and the controller becoming
unavailable.
Reported by https://bugzilla.kernel.org/show_bug.cgi?id=216354
Signed-off-by: Jonathan Derrick <jonathan.derrick at linux.dev>
---
tests/nvme/046 | 85 ++++++++++++++++++++++++++++++++++++++++++++++
tests/nvme/046.out | 2 ++
2 files changed, 87 insertions(+)
create mode 100755 tests/nvme/046
create mode 100644 tests/nvme/046.out
diff --git a/tests/nvme/046 b/tests/nvme/046
new file mode 100755
index 0000000..4b47783
--- /dev/null
+++ b/tests/nvme/046
@@ -0,0 +1,85 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-3.0+
+# Copyright (C) 2022 Jonathan Derrick <jonathan.derrick at linux.dev>
+#
+# Test nvme reset controller during admin passthru
+#
+# Regression for issue reported by
+# https://bugzilla.kernel.org/show_bug.cgi?id=216354
+
+. tests/nvme/rc
+
+#restrict test to nvme-pci only
+nvme_trtype=pci
+
+DESCRIPTION="test nvme reset controller during admin passthru"
+QUICK=1
+CAN_BE_ZONED=1
+
+requires() {
+ _nvme_requires
+}
+
+device_requires() {
+ _require_test_dev_is_nvme
+}
+
+test_device() {
+ echo "Running ${TEST_NAME}"
+
+ local sysfs
+ local attr
+ local m
+
+ sysfs="$TEST_DEV_SYSFS/device"
+ timeout=$(($(cat /proc/sys/kernel/hung_task_timeout_secs) / 2))
+
+ sleep 5
+
+ if [[ ! -d "$sysfs" ]]; then
+ echo "$sysfs doesn't exist"
+ fi
+
+ # do reset controller/format loops
+ # don't check status now because a timing race is desired
+ i=0
+ start=0
+ timing_out=false
+ while [[ $i -le 1000 ]]; do
+ start=$SECONDS
+ if [[ -f "$sysfs/reset_controller" ]]; then
+ echo 1 > "$sysfs/reset_controller" 2>/dev/null &
+ i=$((i+1))
+ fi
+ nvme format -l 0 -f $TEST_DEV 2>/dev/null &
+
+ #Assume the controller is hung and unrecoverable
+ if [[ $(($SECONDS - $start)) -gt $timeout ]]; then
+ echo "nvme controller timing out"
+ timing_out=true
+ break
+ fi
+ done
+
+ { kill $!; wait; } &> /dev/null
+
+ # at this point it may have waited hung_task_timeout / 2 already, so
+ # only wait 25% longer for a total of about 75% of allowed timeout
+ m=0
+ while [[ $m -le $((timeout / 2)) ]]; do
+ if [[ $timing_out == true ]]; then
+ break
+ fi
+ if grep -q live "$sysfs/state"; then
+ break
+ fi
+ sleep 1
+ m=$((m+1))
+ done
+ if ! grep -q live "$sysfs/state"; then
+ echo "nvme still not live after $(($SECONDS - $start)) seconds!"
+ fi
+ udevadm settle
+
+ echo "Test complete"
+}
diff --git a/tests/nvme/046.out b/tests/nvme/046.out
new file mode 100644
index 0000000..2b5fa6a
--- /dev/null
+++ b/tests/nvme/046.out
@@ -0,0 +1,2 @@
+Running nvme/046
+Test complete
--
2.31.1
More information about the Linux-nvme
mailing list