[PATCH v4 3/5] arm64: mte: implement CONFIG_ARM64_MTE_COMP

Andy Shevchenko andriy.shevchenko at linux.intel.com
Fri Jul 21 04:22:05 PDT 2023


On Thu, Jul 20, 2023 at 07:39:54PM +0200, Alexander Potapenko wrote:
> The config implements the algorithm compressing memory tags for ARM MTE
> during swapping.
> 
> The algorithm is based on RLE and specifically targets 128-byte buffers
> of tags corresponding to a single page. In the common case a buffer
> can be compressed into 63 bits, making it possible to store it without
> additional memory allocation.

...

> +Programming Interface
> +=====================
> +
> + .. kernel-doc:: arch/arm64/mm/mtecomp.c

:export:

> +

Is it dangling trailing blank line? Drop it.

...

> +#include <linux/bitmap.h>

> +#include <linux/bitops.h>

This is guaranteed to be included by bitmap.h.

> +#include <linux/export.h>
> +#include <linux/gfp.h>
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/string.h>
> +#include <linux/types.h>

...

> +/*
> + * Sizes of compressed values. These depend on MTE_TAG_SIZE and

of the

> + * MTE_GRANULES_PER_PAGE.
> + */

...

> +	u8 prev_tag = tags[0] / 16; /* First tag in the array. */
> +	unsigned int cur_idx = 0, i, j;
> +	u8 cur_tag;


> +	out_tags[0] = prev_tag;

out_tags[cur_idx] ?

> +	for (i = 0; i < MTE_PAGE_TAG_STORAGE; i++) {
> +		for (j = 0; j < 2; j++) {
> +			cur_tag = j ? (tags[i] % 16) : (tags[i] / 16);
> +			if (cur_tag == prev_tag) {
> +				out_sizes[cur_idx]++;

> +			} else {
> +				cur_idx++;
> +				prev_tag = cur_tag;
> +				out_tags[cur_idx] = prev_tag;
> +				out_sizes[cur_idx] = 1;

Looking more at this I think there is still a room for improvement. I can't
come up right now with a proposal (lunch time :-), but I would look into

	do {
		...
	} while (i < MTE_...);

approach.

> +			}
> +		}
> +	}

...

> +static size_t mte_size_to_ranges(size_t size)
> +{
> +	size_t largest_bits;

> +	size_t ret = 0;

Redundant assignment. Please, check again all of them.

> +
> +	largest_bits = (size == 8) ? MTE_BITS_PER_LARGEST_IDX_INLINE :
> +				     MTE_BITS_PER_LARGEST_IDX;
> +	ret = (size * 8 + MTE_BITS_PER_SIZE - largest_bits) /

Hmm... I thought that we moved BYTES_TO_BITS() to the generic header...
Okay, never mind.

> +	      (MTE_BITS_PER_TAG + MTE_BITS_PER_SIZE);
> +	return ret;

	return (...) / ...;

> +}

...

> +static size_t mte_alloc_size(unsigned int num_ranges)
> +{
> +	size_t sizes[4] = { 8, 16, 32, 64 };

Hooray! And now it's not needed anymore...

> +	unsigned int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(sizes); i++) {

...as sizes[i] is equivalent of (8 << i).

> +		if (num_ranges <= mte_size_to_ranges(sizes[i]))
> +			return sizes[i];
> +	}
> +	return 128;
> +}

> +}

...

> +/**
> + * mte_compress() - compress the given tag array.
> + * @tags: 128-byte array to read the tags from.
> + *
> + * Compresses the tags and returns a 64-bit opaque handle pointing to the
> + * tag storage. May allocate memory, which is freed by @mte_release_handle().

+ blank line here.

> + * Returns: 64-bit tag storage handle.
> + */

...

> +	/*
> +	 * mte_compress_to_buf() only initializes the bits that mte_decompress()
> +	 * will read. But when the tags are stored in the handle itself, it must
> +	 * have all its bits initialized.
> +	 */
> +	unsigned long result = 0;

	// Actually it's interesting how it's supposed to work on 32-bit
	// builds...
	DECLARE_BITMAP(result, BITS_PER_LONG);

and then

	bitmap_zero();

?

...

> +static unsigned long mte_bitmap_read(const unsigned long *bitmap,
> +				     unsigned long *pos, unsigned long bits)
> +{
> +	unsigned long result;
> +
> +	result = bitmap_read(bitmap, *pos, bits);
> +	*pos += bits;
> +	return result;

	unsigned long start = *pos;

	*pos += bits;
	return bitmap_read(bitmap, start, bits);

> +}

...

> +	unsigned short r_sizes[46], sum = 0;

See below.

...

It's cleaner and more robust to have

	sum = 0;

here.

> +	for (i = 0; i < max_ranges; i++) {
> +		if (i == largest_idx)
> +			continue;
> +		r_sizes[i] =
> +			mte_bitmap_read(bitmap, &bit_pos, MTE_BITS_PER_SIZE);
> +		if (!r_sizes[i]) {
> +			max_ranges = i;
> +			break;
> +		}
> +		sum += r_sizes[i];
> +	}
> +	if (sum >= 256)
> +		return false;

-- 
With Best Regards,
Andy Shevchenko





More information about the linux-arm-kernel mailing list