[RFC PATCH 1/3] mm/zsmalloc: encode class index in obj value for lockless class lookup
Barry Song
baohua at kernel.org
Mon May 11 17:58:23 PDT 2026
On Fri, May 8, 2026 at 2:19 PM Wenchao Hao <haowenchao22 at gmail.com> wrote:
>
> Encode the size class index (class_idx) into the obj value so that
> zs_free() can determine the correct size_class without dereferencing
> the handle->obj->PFN->zpdesc->zspage->class chain under pool->lock.
>
> OBJ_INDEX_BITS is over-provisioned on 64-bit systems. For example on
> arm64 with default chain_size=8: OBJ_INDEX_BITS=24 but only 10 bits
> are actually needed for obj_idx. We dynamically compute OBJ_CLASS_BITS
> as ilog2(ZS_SIZE_CLASSES - 1) + 1 (8 bits for 4K pages, 9 for 64K)
> and verify at compile time via static_assert that the three fields
> (PFN + class_idx + obj_idx) fit within BITS_PER_LONG.
>
> This encoding is gated by ZS_OBJ_CLASS_IDX, defined only when
> BITS_PER_LONG >= 64. On 32-bit systems the bits do not fit, so
> the feature is disabled and the original OBJ_INDEX layout is preserved.
>
> Split OBJ_INDEX into class_idx and obj_idx:
>
> obj: [PFN | class_idx | obj_idx]
> [_PFN_BITS | OBJ_CLASS_BITS | OBJ_IDX_BITS]
>
> class_idx is invariant across page migration (only PFN changes), so a
> lockless read always yields a valid class_idx.
>
> Update obj_to_location(), location_to_obj() and callers accordingly.
> Add obj_to_class_idx() helper. Adjust ZS_MIN_ALLOC_SIZE to use
> OBJ_IDX_BITS.
>
> Signed-off-by: Wenchao Hao <haowenchao at xiaomi.com>
> ---
> mm/zsmalloc.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 88 insertions(+), 7 deletions(-)
>
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 63128ddb7959..bccadf0a27f2 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -96,11 +96,74 @@
> #define CLASS_BITS 8
> #define MAGIC_VAL_BITS 8
>
> +/*
> + * Optionally encode the size class index in the obj value so that
> + * zs_free() can look up the correct class without holding pool->lock.
> + *
> + * Rather than fixing a hard CLASS_BITS constant for the class_idx field,
> + * we compute the minimum bits needed from the actual number of size classes
> + * and the actual maximum obj_idx, then check whether they all fit:
> + *
> + * _PFN_BITS + OBJ_CLASS_BITS_NEEDED + OBJ_IDX_BITS_NEEDED <= BITS_PER_LONG
> + *
> + * This naturally handles all architectures and PAGE_SIZE configurations:
> + *
> + * - 32-bit: BITS_PER_LONG=32, sum easily exceeds 32 --> disabled.
> + * - powerpc64 64K pages: ZS_SIZE_CLASSES=257 --> OBJ_CLASS_BITS_NEEDED=9,
> + * but the sum still fits in 64 bits --> enabled.
> + * - riscv64 Sv57: _PFN_BITS=44, tight but still fits --> enabled.
> + *
> + * When enabled, obj layout is:
> + *
> + * 63 0
> + * +-----------+--------------+-------------+
> + * | PFN | class_idx | obj_idx |
> + * | _PFN_BITS |OBJ_CLASS_BITS| OBJ_IDX_BITS|
> + * +-----------+--------------+-------------+
> + *
> + * Migration only rewrites PFN; class_idx and obj_idx are invariant,
> + * so a lockless read of obj always yields a valid class_idx.
> + */
> +
> +#if BITS_PER_LONG >= 64
> +#define ZS_OBJ_CLASS_IDX
> +#endif
> +
> +#ifdef ZS_OBJ_CLASS_IDX
> +
> +/* ZS_SIZE_CLASSES computed conservatively with original OBJ_INDEX_BITS */
> +#define ZS_MIN_ALLOC_SIZE_FULL \
> + MAX(32, (CONFIG_ZSMALLOC_CHAIN_SIZE << PAGE_SHIFT >> OBJ_INDEX_BITS))
> +#define ZS_SIZE_CLASSES_FULL \
> + (DIV_ROUND_UP(PAGE_SIZE - ZS_MIN_ALLOC_SIZE_FULL, \
> + PAGE_SIZE >> CLASS_BITS) + 1)
> +
> +#define ZS_MAX_OBJ_COUNT_FULL \
> + (CONFIG_ZSMALLOC_CHAIN_SIZE * PAGE_SIZE / 32)
> +#define OBJ_CLASS_BITS_NEEDED (ilog2(ZS_SIZE_CLASSES_FULL - 1) + 1)
> +#define OBJ_IDX_BITS_NEEDED (ilog2(ZS_MAX_OBJ_COUNT_FULL - 1) + 1)
> +
> +static_assert(_PFN_BITS + OBJ_CLASS_BITS_NEEDED + OBJ_IDX_BITS_NEEDED
> + <= BITS_PER_LONG,
> + "zsmalloc: class_idx + obj_idx + PFN do not fit in obj on this config");
> +
> +#define OBJ_CLASS_BITS OBJ_CLASS_BITS_NEEDED
> +#define OBJ_IDX_BITS (OBJ_INDEX_BITS - OBJ_CLASS_BITS)
> +#define OBJ_IDX_MASK ((_AC(1, UL) << OBJ_IDX_BITS) - 1)
> +#define OBJ_CLASS_MASK ((_AC(1, UL) << OBJ_CLASS_BITS) - 1)
> +
> +#else /* !ZS_OBJ_CLASS_IDX */
> +
> +#define OBJ_IDX_BITS OBJ_INDEX_BITS
> +#define OBJ_IDX_MASK OBJ_INDEX_MASK
> +
> +#endif /* ZS_OBJ_CLASS_IDX */
This is quite confusing. Can we rename OBJ_INDEX_BITS,
OBJ_IDX_BITS, etc., to something more meaningful?
I guess OBJ_CLASS_BITS could be defined as 0 for 32-bit
builds, which would let us remove many of the ifdefs?
> +
> #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(CONFIG_ZSMALLOC_CHAIN_SIZE, UL))
>
> /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
> #define ZS_MIN_ALLOC_SIZE \
> - MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
> + MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_IDX_BITS))
> /* each chunk includes extra space to keep handle */
> #define ZS_MAX_ALLOC_SIZE PAGE_SIZE
>
> @@ -722,7 +785,7 @@ static void obj_to_location(unsigned long obj, struct zpdesc **zpdesc,
> unsigned int *obj_idx)
> {
> *zpdesc = pfn_zpdesc(obj >> OBJ_INDEX_BITS);
> - *obj_idx = (obj & OBJ_INDEX_MASK);
> + *obj_idx = (obj & OBJ_IDX_MASK);
> }
>
> static void obj_to_zpdesc(unsigned long obj, struct zpdesc **zpdesc)
> @@ -730,17 +793,29 @@ static void obj_to_zpdesc(unsigned long obj, struct zpdesc **zpdesc)
> *zpdesc = pfn_zpdesc(obj >> OBJ_INDEX_BITS);
> }
>
> +#ifdef ZS_OBJ_CLASS_IDX
> +static unsigned int obj_to_class_idx(unsigned long obj)
> +{
> + return (obj >> OBJ_IDX_BITS) & OBJ_CLASS_MASK;
> +}
> +#endif
> +
> /**
> - * location_to_obj - get obj value encoded from (<zpdesc>, <obj_idx>)
> + * location_to_obj - encode (<zpdesc>, <obj_idx>, <class_idx>) into obj value
> * @zpdesc: zpdesc object resides in zspage
> * @obj_idx: object index
> + * @class_idx: size class index (used only when ZS_OBJ_CLASS_IDX is defined)
> */
> -static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx)
> +static unsigned long location_to_obj(struct zpdesc *zpdesc, unsigned int obj_idx,
> + unsigned int class_idx)
> {
> unsigned long obj;
>
> obj = zpdesc_pfn(zpdesc) << OBJ_INDEX_BITS;
> - obj |= obj_idx & OBJ_INDEX_MASK;
> +#ifdef ZS_OBJ_CLASS_IDX
> + obj |= (unsigned long)(class_idx & OBJ_CLASS_MASK) << OBJ_IDX_BITS;
> +#endif
> + obj |= obj_idx & OBJ_IDX_MASK;
Can we avoid using these "ifdef ZS_OBJ_CLASS_IDX" blocks entirely?
>
> return obj;
> }
> @@ -1276,7 +1351,7 @@ static unsigned long obj_malloc(struct zs_pool *pool,
> kunmap_local(vaddr);
> mod_zspage_inuse(zspage, 1);
>
> - obj = location_to_obj(m_zpdesc, obj);
> + obj = location_to_obj(m_zpdesc, obj, zspage->class);
> record_obj(handle, obj);
>
> return obj;
> @@ -1762,7 +1837,13 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
>
> old_obj = handle_to_obj(handle);
> obj_to_location(old_obj, &dummy, &obj_idx);
> - new_obj = (unsigned long)location_to_obj(newzpdesc, obj_idx);
> +#ifdef ZS_OBJ_CLASS_IDX
> + new_obj = (unsigned long)location_to_obj(newzpdesc,
> + obj_idx, obj_to_class_idx(old_obj));
> +#else
> + new_obj = (unsigned long)location_to_obj(newzpdesc,
> + obj_idx, 0);
> +#endif
Maybe we can define __obj_to_class_idx() as a static inline
returning 0 for 32-bit builds.
> record_obj(handle, new_obj);
> }
> }
> --
> 2.34.1
>
More information about the linux-riscv
mailing list