[PATCH] blk-cgroup: Replace u64 sync with spinlock for iostat update

boy.wu boy.wu at mediatek.com
Tue Jul 16 00:46:53 PDT 2024


From: Boy Wu <boy.wu at mediatek.com>

In 32bit SMP systems, if multiple CPUs call blkcg_print_stat,
it may cause blkcg_fill_root_iostats to have a concurrent problem
on the seqlock in u64_stats_update, which will cause a deadlock
on u64_stats_fetch_begin in blkcg_print_one_stat.

Thus, replace u64 sync with spinlock to protect iostat update.

Fixes: ef45fe470e1e ("blk-cgroup: show global disk stats in root cgroup io.stat")
Signed-off-by: Boy Wu <boy.wu at mediatek.com>
---
Change in v2:
 - update commit message
 - Remove u64_sync
 - Replace spin_lock_irq with guard statement
 - Replace blkg->q->queue_lock with blkg_stat_lock
Change in v3:
 - update commit message
 - Add spinlock in blkg_iostat_set structure
 - Replace all u64_sync with spinlock for iostat
 - Replace blkg_stat_lock with iostat.spinlock
---
 block/blk-cgroup.c | 62 +++++++++++++++++++---------------------------
 block/blk-cgroup.h |  1 +
 2 files changed, 26 insertions(+), 37 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 37e6cc91d576..4b66f37c45a0 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -329,7 +329,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,
 	INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
 #endif
 
-	u64_stats_init(&blkg->iostat.sync);
+	spin_lock_init(&blkg->iostat.spinlock);
 	for_each_possible_cpu(cpu) {
 		u64_stats_init(&per_cpu_ptr(blkg->iostat_cpu, cpu)->sync);
 		per_cpu_ptr(blkg->iostat_cpu, cpu)->blkg = blkg;
@@ -995,15 +995,13 @@ static void blkcg_iostat_update(struct blkcg_gq *blkg, struct blkg_iostat *cur,
 				struct blkg_iostat *last)
 {
 	struct blkg_iostat delta;
-	unsigned long flags;
 
 	/* propagate percpu delta to global */
-	flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
+	guard(spinlock_irqsave)(&blkg->iostat.spinlock);
 	blkg_iostat_set(&delta, cur);
 	blkg_iostat_sub(&delta, last);
 	blkg_iostat_add(&blkg->iostat.cur, &delta);
 	blkg_iostat_add(last, &delta);
-	u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
 }
 
 static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
@@ -1034,7 +1032,6 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
 		struct blkcg_gq *blkg = bisc->blkg;
 		struct blkcg_gq *parent = blkg->parent;
 		struct blkg_iostat cur;
-		unsigned int seq;
 
 		/*
 		 * Order assignment of `next_bisc` from `bisc->lnode.next` in
@@ -1051,10 +1048,8 @@ static void __blkcg_rstat_flush(struct blkcg *blkcg, int cpu)
 			goto propagate_up; /* propagate up to parent only */
 
 		/* fetch the current per-cpu values */
-		do {
-			seq = u64_stats_fetch_begin(&bisc->sync);
+		scoped_guard(spinlock_irqsave, &bisc->spinlock)
 			blkg_iostat_set(&cur, &bisc->cur);
-		} while (u64_stats_fetch_retry(&bisc->sync, seq));
 
 		blkcg_iostat_update(blkg, &cur, &bisc->last);
 
@@ -1112,7 +1107,6 @@ static void blkcg_fill_root_iostats(void)
 		struct blkcg_gq *blkg = bdev->bd_disk->queue->root_blkg;
 		struct blkg_iostat tmp;
 		int cpu;
-		unsigned long flags;
 
 		memset(&tmp, 0, sizeof(tmp));
 		for_each_possible_cpu(cpu) {
@@ -1134,9 +1128,8 @@ static void blkcg_fill_root_iostats(void)
 				cpu_dkstats->sectors[STAT_DISCARD] << 9;
 		}
 
-		flags = u64_stats_update_begin_irqsave(&blkg->iostat.sync);
+		guard(spinlock_irqsave)(&blkg->iostat.spinlock);
 		blkg_iostat_set(&blkg->iostat.cur, &tmp);
-		u64_stats_update_end_irqrestore(&blkg->iostat.sync, flags);
 	}
 }
 
@@ -1145,7 +1138,6 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
 	struct blkg_iostat_set *bis = &blkg->iostat;
 	u64 rbytes, wbytes, rios, wios, dbytes, dios;
 	const char *dname;
-	unsigned seq;
 	int i;
 
 	if (!blkg->online)
@@ -1157,16 +1149,14 @@ static void blkcg_print_one_stat(struct blkcg_gq *blkg, struct seq_file *s)
 
 	seq_printf(s, "%s ", dname);
 
-	do {
-		seq = u64_stats_fetch_begin(&bis->sync);
-
+	scoped_guard(spinlock_irqsave, &bis->spinlock) {
 		rbytes = bis->cur.bytes[BLKG_IOSTAT_READ];
 		wbytes = bis->cur.bytes[BLKG_IOSTAT_WRITE];
 		dbytes = bis->cur.bytes[BLKG_IOSTAT_DISCARD];
 		rios = bis->cur.ios[BLKG_IOSTAT_READ];
 		wios = bis->cur.ios[BLKG_IOSTAT_WRITE];
 		dios = bis->cur.ios[BLKG_IOSTAT_DISCARD];
-	} while (u64_stats_fetch_retry(&bis->sync, seq));
+	}
 
 	if (rbytes || wbytes || rios || wios) {
 		seq_printf(s, "rbytes=%llu wbytes=%llu rios=%llu wios=%llu dbytes=%llu dios=%llu",
@@ -2141,7 +2131,6 @@ void blk_cgroup_bio_start(struct bio *bio)
 	struct blkcg *blkcg = bio->bi_blkg->blkcg;
 	int rwd = blk_cgroup_io_type(bio), cpu;
 	struct blkg_iostat_set *bis;
-	unsigned long flags;
 
 	if (!cgroup_subsys_on_dfl(io_cgrp_subsys))
 		return;
@@ -2152,30 +2141,29 @@ void blk_cgroup_bio_start(struct bio *bio)
 
 	cpu = get_cpu();
 	bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu);
-	flags = u64_stats_update_begin_irqsave(&bis->sync);
-
-	/*
-	 * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split
-	 * bio and we would have already accounted for the size of the bio.
-	 */
-	if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
-		bio_set_flag(bio, BIO_CGROUP_ACCT);
-		bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
-	}
-	bis->cur.ios[rwd]++;
+	scoped_guard(spinlock_irqsave, &bis->spinlock) {
+		/*
+		 * If the bio is flagged with BIO_CGROUP_ACCT it means this is a split
+		 * bio and we would have already accounted for the size of the bio.
+		 */
+		if (!bio_flagged(bio, BIO_CGROUP_ACCT)) {
+			bio_set_flag(bio, BIO_CGROUP_ACCT);
+			bis->cur.bytes[rwd] += bio->bi_iter.bi_size;
+		}
+		bis->cur.ios[rwd]++;
 
-	/*
-	 * If the iostat_cpu isn't in a lockless list, put it into the
-	 * list to indicate that a stat update is pending.
-	 */
-	if (!READ_ONCE(bis->lqueued)) {
-		struct llist_head *lhead = this_cpu_ptr(blkcg->lhead);
+		/*
+		 * If the iostat_cpu isn't in a lockless list, put it into the
+		 * list to indicate that a stat update is pending.
+		 */
+		if (!READ_ONCE(bis->lqueued)) {
+			struct llist_head *lhead = this_cpu_ptr(blkcg->lhead);
 
-		llist_add(&bis->lnode, lhead);
-		WRITE_ONCE(bis->lqueued, true);
+			llist_add(&bis->lnode, lhead);
+			WRITE_ONCE(bis->lqueued, true);
+		}
 	}
 
-	u64_stats_update_end_irqrestore(&bis->sync, flags);
 	cgroup_rstat_updated(blkcg->css.cgroup, cpu);
 	put_cpu();
 }
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index bd472a30bc61..b9544969a131 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -44,6 +44,7 @@ struct blkg_iostat {
 };
 
 struct blkg_iostat_set {
+	spinlock_t			spinlock;
 	struct u64_stats_sync		sync;
 	struct blkcg_gq		       *blkg;
 	struct llist_node		lnode;
-- 
2.18.0




More information about the linux-arm-kernel mailing list