[RFC PATCH] dm: fix excessive dm-mq context switching
Mike Snitzer
snitzer at redhat.com
Sun Feb 7 08:53:40 PST 2016
On Sun, Feb 07 2016 at 11:43am -0500,
Sagi Grimberg <sagig at dev.mellanox.co.il> wrote:
>
> >Hello Sagi,
>
> Hey Bart,
>
> >Did you run your test on a NUMA system ?
>
> I did.
>
> >If so, can you check with e.g.
> >perf record -ags -e LLC-load-misses sleep 10 && perf report whether this
> >workload triggers perhaps lock contention ? What you need to look for in
> >the perf output is whether any functions occupy more than 10% CPU time.
>
> I will, thanks for the tip!
Also, I found ftrace's function_graph tracer very helpful (it is how I
found the various issues fixed by the first context switch patch). Here
is my latest script:
#!/bin/sh
set -xv
NULL_BLK_HW_QUEUES=4
NULL_BLK_QUEUE_DEPTH=4096
DM_MQ_HW_QUEUES=4
DM_MQ_QUEUE_DEPTH=2048
FIO=/root/snitm/git/fio/fio
FIO_QUEUE_DEPTH=32
FIO_RUNTIME=10
FIO_NUMJOBS=12
PERF=perf
#PERF=/root/snitm/git/linux/tools/perf/perf
run_fio() {
DEVICE=$1
TASK_NAME=$(basename ${DEVICE})
PERF_RECORD=$2
RUN_CMD="${FIO} --cpus_allowed_policy=split --group_reporting --rw=randread --bs=4k --numjobs=${FIO_NUMJOBS} \
--iodepth=${FIO_QUEUE_DEPTH} --runtime=${FIO_RUNTIME} --time_based --loops=1 --ioengine=libaio \
--direct=1 --invalidate=1 --randrepeat=1 --norandommap --exitall --name task_${TASK_NAME} --filename=${DEVICE}"
if [ ! -z "${PERF_RECORD}" ]; then
${PERF_RECORD} ${RUN_CMD}
mv perf.data perf.data.${TASK_NAME}
else
${RUN_CMD}
fi
}
run_fio_with_ftrace() {
DEVICE=$1
TASK_NAME=$(basename ${DEVICE})
echo > /sys/kernel/debug/tracing/trace
echo 0 > /sys/kernel/debug/tracing/tracing_on
echo function_graph > /sys/kernel/debug/tracing/current_tracer
echo 1 > /sys/kernel/debug/tracing/tracing_on
run_fio $DEVICE
echo 0 > /sys/kernel/debug/tracing/tracing_on
cat /sys/kernel/debug/tracing/trace > trace.${TASK_NAME}
echo nop > /sys/kernel/debug/tracing/current_tracer
}
dmsetup remove dm_mq
modprobe -r null_blk
modprobe null_blk gb=4 bs=512 hw_queue_depth=${NULL_BLK_QUEUE_DEPTH} nr_devices=1 queue_mode=2 irqmode=1 completion_nsec=1 submit_queues=${NULL_BLK_HW_QUEUES}
#run_fio /dev/nullb0 "${PERF} record -ag -e cs"
#run_fio /dev/nullb0 "${PERF} stat"
echo Y > /sys/module/dm_mod/parameters/use_blk_mq
echo ${DM_MQ_QUEUE_DEPTH} > /sys/module/dm_mod/parameters/blk_mq_queue_depth
echo ${DM_MQ_HW_QUEUES} > /sys/module/dm_mod/parameters/blk_mq_nr_hw_queues
echo "0 8388608 multipath 0 0 1 1 service-time 0 1 2 /dev/nullb0 1000 1" | dmsetup create dm_mq
#echo "0 8388608 linear /dev/nullb0 0" | dmsetup create dm_mq
run_fio_with_ftrace /dev/mapper/dm_mq
#run_fio /dev/mapper/dm_mq
#run_fio /dev/mapper/dm_mq "${PERF} record -ag -e cs"
#run_fio /dev/mapper/dm_mq "${PERF} record -ag"
#run_fio /dev/mapper/dm_mq "${PERF} stat"
#run_fio /dev/mapper/dm_mq "trace-cmd record -e all"
More information about the Linux-nvme
mailing list