[PATCH v4] blk-cgroup: Replace u64 sync with spinlock for iostat

Boy Wu (吳勃誼) Boy.Wu at mediatek.com
Thu Jul 25 20:43:27 PDT 2024


On Fri, 2024-07-19 at 07:31 -1000, tj at kernel.org wrote:
>  	 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
>  Hello, Boy.
> 
> On Fri, Jul 19, 2024 at 01:47:35AM +0000, Boy Wu (吳勃誼) wrote:
> ...
> > If it is for readability, I think keeping using u64 sync instead of
> > replacing it with spinlock is better, because what u64 sync
> protects is
> > 64-bit data for 32-bit systems, while spinlock can be used for many
> > other reasons. The root cause of this issue is just the incorrect
> use
> 
> Yeah, but it can't be used when there are multiple updaters.
> 
> > of u64 sync. Adding back the missing spinlock for the correct usage
> of
> > u64 sync is simpler. Is there any benefit to replacing u64 sync
> with
> > spinlock?
> 
> It doesn't make sense to protect u64_sync with a spinlock. Both are
> only
> needed on 32bit. What's the point of having both? Also, note that
> iostat_cpu
> is also updated from two paths - bio issue and stat reset. If you
> want to
> keep that u64_sync, the only way to avoid possible deadlocks is
> adding
> spinlock in the bio issue path too, which will be pretty expensive.
> 

The use of a spinlock with u64 sync is suggested in
include/linux/u64_stats_sync.h:33.

 * Usage :
 *
 * Stats producer (writer) should use following template granted it
already got
 * an exclusive access to counters (a lock is already taken, or per cpu
 * data is used [in a non preemptable context])
 *
 *   spin_lock_bh(...) or other synchronization to get exclusive access
 *   ...
 *   u64_stats_update_begin(&stats->syncp);
 *   u64_stats_add(&stats->bytes64, len); // non atomic operation
 *   u64_stats_inc(&stats->packets64);    // non atomic operation
 *   u64_stats_update_end(&stats->syncp);

Is this a incorrect statment?

> > > Also, for blkg_clear_stat(), we're running a slight chance of
> > > clearing of
> > > iostat_cpu racing against state updates from the hot path. Given
> the
> > > circumstances - stat reset is an cgroup1-only feature which isn't
> > > used
> > > widely and a problematic interface to begin with, I believe this
> is a
> > > valid
> > > choice but it needs to be noted.
> > 
> > I don't get this part, but if this is another issue, maybe another
> > patch would be better?
> 
> It's the same issue. Reset is another writer and whenever you have
> more than
> one writers w/ u64_sync, there's a chance of deadlocks. So,
> iostat_cpu also
> has two writers - bio issue and stat reset. If you want to keep using
> u64_sync in both places, the only way to do it is adding spinlock
> protection
> to both paths, which is an expensive thing to do for the bio issue
> path. So,
> here, we'd rather just give up and let stat resetting be racy on
> 32bit.
> 
> Thanks.
> 
> -- 
> tejun

There are three places update iostat and two places update iostat_cpu
in blk-cgroup.c in version 6.10.1.

I assume the suggestion in u64_stats_sync.h is correct. For
readability, how about the following change?

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 85b3b9051455..7472cfa9a506 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -62,6 +62,10 @@ static struct workqueue_struct *blkcg_punt_bio_wq;

 static DEFINE_RAW_SPINLOCK(blkg_stat_lock);

+#if BITS_PER_LONG == 32
+static DEFINE_RAW_SPINLOCK(blkg_stat_cpu_lock);
+#endif
+
 #define BLKG_DESTROY_BATCH_SIZE  64

 /*
@@ -535,5 +539,55 @@ static void blkg_destroy_all(struct gendisk *disk)
        spin_unlock_irq(&q->queue_lock);
 }

+static inline unsigned long blkcg_iostats_update_begin_irqsave(struct
u64_stats_sync *syncp)
+{
+       unsigned long flags;
+
+#if BITS_PER_LONG == 64
+       flags = u64_stats_update_begin_irqsave(syncp);
+#else /* 64 bit */
+       raw_spin_lock_irqsave(&blkg_stat_lock, flags);
+       u64_stats_update_begin(syncp);
+#endif
+
+       return flags;
+}
+
+static inline void blkcg_iostats_update_end_irqrestore(struct
u64_stats_sync *syncp,
+                                                      unsigned long
flags)
+{
+#if BITS_PER_LONG == 64
+       u64_stats_update_end_irqrestore(syncp, flags);
+#else /* 64 bit */
+       u64_stats_update_end(syncp);
+       raw_spin_unlock_irqrestore(&blkg_stat_lock, flags);
+#endif
+}
+
+static unsigned long blkcg_iostats_cpu_update_begin_irqsave(struct
u64_stats_sync *syncp)
+{
+       unsigned long flags;
+
+#if BITS_PER_LONG == 64
+       flags = u64_stats_update_begin_irqsave(syncp);
+#else /* 64 bit */
+       raw_spin_lock_irqsave(&blkg_stat_cpu_lock, flags);
+       u64_stats_update_begin(syncp);
+#endif
+
+       return flags;
+}
+
+static inline void blkcg_iostats_cpu_update_end_irqrestore(struct
u64_stats_sync *syncp,
+                                                          unsigned
long flags)
+{
+#if BITS_PER_LONG == 64
+       u64_stats_update_end_irqrestore(syncp, flags);
+#else /* 64 bit */
+       u64_stats_update_end(syncp);
+       raw_spin_unlock_irqrestore(&blkg_stat_cpu_lock, flags);
+#endif
+}
+
 static void blkg_iostat_set(struct blkg_iostat *dst, struct
blkg_iostat *src)
 {
        int i;
@@ -632,24 +686,26 @@ static void blkg_iostat_set(struct blkg_iostat
*dst, struct blkg_iostat *src)
 static void __blkg_clear_stat(struct blkg_iostat_set *bis)
 {
        struct blkg_iostat cur = {0};
-       unsigned long flags;

-       flags = u64_stats_update_begin_irqsave(&bis->sync);
        blkg_iostat_set(&bis->cur, &cur);
        blkg_iostat_set(&bis->last, &cur);
-       u64_stats_update_end_irqrestore(&bis->sync, flags);
 }

 static void blkg_clear_stat(struct blkcg_gq *blkg)
 {
        int cpu;
+       unsigned long flags;

        for_each_possible_cpu(cpu) {
                struct blkg_iostat_set *s = per_cpu_ptr(blkg-
>iostat_cpu, cpu);

+               flags = blkcg_iostats_cpu_update_begin_irqsave(&s-
>sync);
                __blkg_clear_stat(s);
+               blkcg_iostats_cpu_update_end_irqrestore(&s->sync,
flags);
        }
+       flags = blkcg_iostats_update_begin_irqsave(&blkg->iostat.sync);
        __blkg_clear_stat(&blkg->iostat);
+       blkcg_iostats_update_end_irqrestore(&blkg->iostat.sync, flags);
 }

 static int blkcg_reset_stats(struct cgroup_subsys_state *css,
@@ -1134,9 +1190,9 @@ static void blkcg_fill_root_iostats(void)
                                cpu_dkstats->sectors[STAT_DISCARD] <<
9;
                }

-               flags = u64_stats_update_begin_irqsave(&blkg-
>iostat.sync);
+               flags = blkcg_iostats_update_begin_irqsave(&blkg-
>iostat.sync);
                blkg_iostat_set(&blkg->iostat.cur, &tmp);
-               u64_stats_update_end_irqrestore(&blkg->iostat.sync,
flags);
+               blkcg_iostats_update_end_irqrestore(&blkg->iostat.sync, 
flags);
        }
 }

@@ -2152,7 +2208,7 @@ void blk_cgroup_bio_start(struct bio *bio)

        cpu = get_cpu();
        bis = per_cpu_ptr(bio->bi_blkg->iostat_cpu, cpu);
-       flags = u64_stats_update_begin_irqsave(&bis->sync);
+       flags = blkcg_iostats_cpu_update_begin_irqsave(&bis->sync);

        /*
         * If the bio is flagged with BIO_CGROUP_ACCT it means this is
a split
@@ -2175,7 +2231,7 @@ void blk_cgroup_bio_start(struct bio *bio)
                WRITE_ONCE(bis->lqueued, true);
        }

-       u64_stats_update_end_irqrestore(&bis->sync, flags);
+       blkcg_iostats_cpu_update_end_irqrestore(&bis->sync, flags);
        cgroup_rstat_updated(blkcg->css.cgroup, cpu);
        put_cpu();
 }

--
boy.wu



More information about the linux-arm-kernel mailing list