[PATCH v2 blktests] tests/nvme: add tests for error logging

Sagi Grimberg sagi at grimberg.me
Tue May 10 15:25:37 PDT 2022



On 5/10/22 19:43, Alan Adamson wrote:
> Test nvme error logging by injecting errors. Kernel must have FAULT_INJECTION
> and FAULT_INJECTION_DEBUG_FS configured to use error injector. Tests can be
> run with or without NVME_VERBOSE_ERRORS configured.
> 
> These test verify the functionality delivered by the follow:
> 	commit bd83fe6f2cd2 ("nvme: add verbose error logging")
> 
> Signed-off-by: Alan Adamson <alan.adamson at oracle.com>
> Signed-off-by: Chaitanya Kulkarni <kch at nvidia.com>
> ---
>   tests/nvme/039     | 185 +++++++++++++++++++++++++++++++++++++++++++++
>   tests/nvme/039.out |   7 ++
>   2 files changed, 192 insertions(+)
>   create mode 100755 tests/nvme/039
>   create mode 100644 tests/nvme/039.out
> 
> diff --git a/tests/nvme/039 b/tests/nvme/039
> new file mode 100755
> index 000000000000..e6d45a6e3fe5
> --- /dev/null
> +++ b/tests/nvme/039
> @@ -0,0 +1,185 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-3.0+
> +# Copyright (C) 2022 Oracle and/or its affiliates
> +#
> +# Test nvme error logging by injecting errors. Kernel must have FAULT_INJECTION
> +# and FAULT_INJECTION_DEBUG_FS configured to use error injector. Tests can be
> +# run with or without NVME_VERBOSE_ERRORS configured.
> +#
> +# Test for commit bd83fe6f2cd2 ("nvme: add verbose error logging").
> +
> +. tests/nvme/rc
> +DESCRIPTION="test error logging"
> +QUICK=1
> +
> +requires() {
> +	_nvme_requires
> +	_have_kernel_option FAULT_INJECTION && \
> +	    _have_kernel_option FAULT_INJECTION_DEBUG_FS
> +}
> +
> +declare -A NS_DEV_FAULT_INJECT_SAVE
> +declare -A CTRL_DEV_FAULT_INJECT_SAVE
> +
> +save_err_inject_attr()
> +{
> +	local a
> +
> +	for a in /sys/kernel/debug/"${ns_dev}"/fault_inject/*; do
> +		NS_DEV_FAULT_INJECT_SAVE[${a}]=$(<"${a}")
> +	done
> +	for a in /sys/kernel/debug/"${ctrl_dev}"/fault_inject/*; do
> +		CTRL_DEV_FAULT_INJECT_SAVE[${a}]=$(<"${a}")
> +	done
> +}
> +
> +restore_err_inject_attr()
> +{
> +	local a
> +
> +	for a in /sys/kernel/debug/"${ns_dev}"/fault_inject/*; do
> +		echo "${NS_DEV_FAULT_INJECT_SAVE[${a}]}" > "${a}"
> +	done
> +	for a in /sys/kernel/debug/"${ctrl_dev}"/fault_inject/*; do
> +		echo "${CTRL_DEV_FAULT_INJECT_SAVE[${a}]}" > "${a}"
> +	done
> +}
> +
> +set_verbose_prob_retry()
> +{
> +	echo 0 > /sys/kernel/debug/"$1"/fault_inject/verbose
> +	echo 100 > /sys/kernel/debug/"$1"/fault_inject/probability
> +	echo 1 > /sys/kernel/debug/"$1"/fault_inject/dont_retry
> +}
> +
> +set_status_time()
> +{
> +	echo "$1" > /sys/kernel/debug/"$3"/fault_inject/status
> +	echo "$2" > /sys/kernel/debug/"$3"/fault_inject/times
> +}
> +
> +inject_unrec_read_err()
> +{
> +	# Inject a 'Unrecovered Read Error' error on a READ
> +	set_status_time 0x281 1 "$1"
> +
> +	dd if=/dev/"$1" of=/dev/null bs=512 count=1 iflag=direct \
> +	    2> /dev/null 1>&2
> +
> +	if ${nvme_verbose_errors}; then
> +		dmesg -t | tail -2 | grep "Unrecovered Read Error (" | \
> +		    sed 's/nvme.*://g'
> +	else
> +		dmesg -t | tail -2 | grep "Cmd(" | sed 's/I\/O Cmd/Read/g' | \
> +		    sed 's/I\/O Error/Unrecovered Read Error/g' | \
> +		    sed 's/nvme.*://g'
> +	fi
> +}
> +
> +inject_invalid_read_err()
> +{
> +	# Inject a valid invalid error status (0x375) on a READ
> +	set_status_time 0x375 1 "$1"
> +
> +	dd if=/dev/"$1" of=/dev/null bs=512 count=1 iflag=direct \
> +	    2> /dev/null 1>&2
> +
> +	if ${nvme_verbose_errors}; then
> +		dmesg -t | tail -2 | grep "Unknown (" | \
> +		    sed 's/nvme.*://g'
> +	else
> +		dmesg -t | tail -2 | grep "Cmd(" | sed 's/I\/O Cmd/Read/g' | \
> +		    sed 's/I\/O Error/Unknown/g' | \
> +		    sed 's/nvme.*://g'
> +	fi
> +}
> +
> +inject_write_fault()
> +{
> +	# Inject a 'Write Fault' error on a WRITE
> +	set_status_time 0x280 1 "$1"
> +
> +	dd if=/dev/zero of=/dev/"$1" bs=512 count=1 oflag=direct \
> +	    2> /dev/null 1>&2
> +
> +	if ${nvme_verbose_errors}; then
> +		dmesg -t | tail -2 | grep "Write Fault (" | \
> +		    sed 's/nvme.*://g'
> +	else
> +		dmesg -t | tail -2 | grep "Cmd(" | sed 's/I\/O Cmd/Write/g' | \
> +		    sed 's/I\/O Error/Write Fault/g' | \
> +		    sed 's/nvme.*://g'
> +	fi
> +}
> +
> +inject_id_admin()
> +{
> +	# Inject a valid (Identify) Admin command
> +	set_status_time 0x286 1000 "$1"
> +
> +	nvme admin-passthru /dev/"$1" --opcode=0x06 --data-len=4096 \
> +	    --cdw10=1 -r 2> /dev/null 1>&2
> +
> +	if ${nvme_verbose_errors}; then
> +		dmesg -t | tail -1 | grep "Access Denied (" | \
> +		    sed 's/nvme.*://g'
> +	else
> +		dmesg -t | tail -1 | grep "Admin Cmd(" | \
> +		    sed 's/Admin Cmd/Identify/g' | \
> +		    sed 's/I\/O Error/Access Denied/g' | \
> +		    sed 's/nvme.*://g'
> +	fi
> +}
> +
> +inject_invalid_cmd()
> +{
> +	# Inject an invalid command (0x96)
> +	set_status_time 0x1 1 "$1"
> +
> +	nvme admin-passthru /dev/"$1" --opcode=0x96 --data-len=4096 \
> +	    --cdw10=1 -r 2> /dev/null 1>&2
> +	if ${nvme_verbose_errors}; then
> +		dmesg -t | tail -1 | grep "Invalid Command Opcode (" | \
> +		    sed 's/nvme.*://g'
> +	else
> +		dmesg -t | tail -1 | grep "Admin Cmd(" | \
> +		    sed 's/Admin Cmd/Unknown/g' | \
> +		    sed 's/I\/O Error/Invalid Command Opcode/g' | \
> +		    sed 's/nvme.*://g'
> +	fi
> +}
> +

All of the above seems like they belong in common code...

> +test_device() {
> +	local nvme_verbose_errors
> +	local ns_dev
> +	local ctrl_dev
> +
> +	echo "Running ${TEST_NAME}"
> +
> +	if _have_kernel_option NVME_VERBOSE_ERRORS; then
> +		nvme_verbose_errors=true
> +	else
> +		unset SKIP_REASON
> +		nvme_verbose_errors=false
> +	fi
> +
> +	ns_dev=${TEST_DEV##*/}
> +	ctrl_dev=${ns_dev%n*}
> +
> +	save_err_inject_attr
> +
> +	set_verbose_prob_retry "${ns_dev}"
> +
> +	inject_unrec_read_err "${ns_dev}"
> +	inject_invalid_read_err "${ns_dev}"
> +	inject_write_fault "${ns_dev}"
> +
> +	set_verbose_prob_retry "${ctrl_dev}"
> +
> +	inject_id_admin "${ctrl_dev}"
> +	inject_invalid_cmd "${ctrl_dev}"
> +
> +	restore_err_inject_attr
> +
> +	echo "Test complete"
> +}
> diff --git a/tests/nvme/039.out b/tests/nvme/039.out
> new file mode 100644
> index 000000000000..162935eb1d7b
> --- /dev/null
> +++ b/tests/nvme/039.out
> @@ -0,0 +1,7 @@
> +Running nvme/039
> + Read(0x2) @ LBA 0, 1 blocks, Unrecovered Read Error (sct 0x2 / sc 0x81) DNR
> + Read(0x2) @ LBA 0, 1 blocks, Unknown (sct 0x3 / sc 0x75) DNR
> + Write(0x1) @ LBA 0, 1 blocks, Write Fault (sct 0x2 / sc 0x80) DNR
> + Identify(0x6), Access Denied (sct 0x2 / sc 0x86) DNR
> + Unknown(0x96), Invalid Command Opcode (sct 0x0 / sc 0x1) DNR
> +Test complete



More information about the Linux-nvme mailing list