[PATCH v4] Set the scratch allocation to alignment to cacheline size.
Raj Vishwanathan
raj.vishwanathan at gmail.com
Fri Mar 21 10:42:11 PDT 2025
Andrew
Thanks for the explanation. I would prefer to do this in two different
patches. The first one being the one that I sent out and when it is
ready in Linux I can add this to OpenSBI.
The current patch cleanly resolves the livelock issue we have and
maintains the default for the scratch allocation. If we add the Zic64b
check, this could change the default value for scratch allocation.
Thanks
Raj
On Fri, Mar 21, 2025 at 12:50 AM Andrew Jones <ajones at ventanamicro.com> wrote:
>
> On Thu, Mar 20, 2025 at 10:46:02PM -0700, Raj Vishwanathan wrote:
> > Andrew,
> >
> > Thanks for the tip. It may be the way to in the future. Just out of
> > curiosity, does the absence of Zic64b extension mean that cache blocks
> > are not 64 bytes in size?
>
> Nope, it just means the cache block sizes will need to be determined in
> some other way, such as with the riscv,cbom/cbo{m,p,z}-block-size nodes.
>
> My comment on the patch is to not give up when riscv,cbom-block-size
> is missing, but rather to also search the extension list for zic64b.
> Or vice versa, search for zic64b, if it's not present, then search
> for the block-size nodes.
>
> Thanks,
> drew
>
> >
> > We are testing on a system that does not have this but a 64byte is
> > still the cacheline size
> >
> > Thanks
> >
> > Raj
> >
> > On Wed, Mar 19, 2025 at 4:59 AM Andrew Jones <ajones at ventanamicro.com> wrote:
> > >
> > > On Tue, Mar 18, 2025 at 11:51:11AM -0700, Raj Vishwanathan wrote:
> > > > We set the scratch allocation alignment to cacheline size,specified by
> > > > riscv,cbom-block-size in the dts file to avoid two atomic variables from
> > > > the same cache line causing livelock on some platforms. If the cacheline
> > > > is not specified, we set it a default value.
> > > >
> > > > Signed-off-by: Raj Vishwanathan <Raj.Vishwanathan at gmail.com>
> > > > ---
> > > > Changes in V3:
> > > > Remove platform specific references to 64 Bytes.
> > > > Changes in V2:
> > > > Added a new configuration to get the alignment size.
> > > > Change in V1:
> > > > Original Patch
> > > > ---
> > > > include/sbi/sbi_platform.h | 2 ++
> > > > include/sbi_utils/fdt/fdt_helper.h | 1 +
> > > > lib/sbi/sbi_scratch.c | 27 +++++++++++++++++++++++++--
> > > > lib/utils/fdt/fdt_helper.c | 24 ++++++++++++++++++++++++
> > > > platform/generic/platform.c | 7 +++++++
> > > > 5 files changed, 59 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/include/sbi/sbi_platform.h b/include/sbi/sbi_platform.h
> > > > index 6d5fbc7..0cea0fe 100644
> > > > --- a/include/sbi/sbi_platform.h
> > > > +++ b/include/sbi/sbi_platform.h
> > > > @@ -197,6 +197,8 @@ struct sbi_platform {
> > > > * 2. HART id < SBI_HARTMASK_MAX_BITS
> > > > */
> > > > const u32 *hart_index2id;
> > > > + /** Allocation alignment for Scratch */
> > > > + u32 cbom_block_size;
> > > > };
> > > >
> > > > /**
> > > > diff --git a/include/sbi_utils/fdt/fdt_helper.h b/include/sbi_utils/fdt/fdt_helper.h
> > > > index 7329b84..0b82159 100644
> > > > --- a/include/sbi_utils/fdt/fdt_helper.h
> > > > +++ b/include/sbi_utils/fdt/fdt_helper.h
> > > > @@ -55,6 +55,7 @@ bool fdt_node_is_enabled(const void *fdt, int nodeoff);
> > > >
> > > > int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid);
> > > >
> > > > +int fdt_parse_cbom_block_size(const void *fdt, int cpu_offset, u32 *cbom_block_size);
> > > > int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid);
> > > >
> > > > int fdt_parse_timebase_frequency(const void *fdt, unsigned long *freq);
> > > > diff --git a/lib/sbi/sbi_scratch.c b/lib/sbi/sbi_scratch.c
> > > > index ccbbc68..fdb9e20 100644
> > > > --- a/lib/sbi/sbi_scratch.c
> > > > +++ b/lib/sbi/sbi_scratch.c
> > > > @@ -14,6 +14,8 @@
> > > > #include <sbi/sbi_scratch.h>
> > > > #include <sbi/sbi_string.h>
> > > >
> > > > +#define DEFAULT_SCRATCH_ALLOC_ALIGN __SIZEOF_POINTER__
> > > > +
> > > > u32 last_hartindex_having_scratch = 0;
> > > > u32 hartindex_to_hartid_table[SBI_HARTMASK_MAX_BITS + 1] = { -1U };
> > > > struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0 };
> > > > @@ -21,6 +23,20 @@ struct sbi_scratch *hartindex_to_scratch_table[SBI_HARTMASK_MAX_BITS + 1] = { 0
> > > > static spinlock_t extra_lock = SPIN_LOCK_INITIALIZER;
> > > > static unsigned long extra_offset = SBI_SCRATCH_EXTRA_SPACE_OFFSET;
> > > >
> > > > +static u32 sbi_get_scratch_alloc_align(void)
> > > > +{
> > > > + const struct sbi_platform *plat;
> > > > + /*
> > > > + * Get the alignment size. We will return DEFAULT_SCRATCH_ALLOC_ALIGNMENT
> > > > + * or riscv,cbom_block_size
> > > > + */
> > > > + plat = sbi_platform_thishart_ptr();
> > > > + if (!plat)
> > > > + return DEFAULT_SCRATCH_ALLOC_ALIGN;
> > > > + return plat->cbom_block_size ? plat->cbom_block_size : \
> > > > + DEFAULT_SCRATCH_ALLOC_ALIGN;
> > > > +
> > > > +}
> > > > u32 sbi_hartid_to_hartindex(u32 hartid)
> > > > {
> > > > u32 i;
> > > > @@ -57,6 +73,7 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
> > > > void *ptr;
> > > > unsigned long ret = 0;
> > > > struct sbi_scratch *rscratch;
> > > > + u32 scratch_alloc_align = 0;
> > > >
> > > > /*
> > > > * We have a simple brain-dead allocator which never expects
> > > > @@ -70,8 +87,14 @@ unsigned long sbi_scratch_alloc_offset(unsigned long size)
> > > > if (!size)
> > > > return 0;
> > > >
> > > > - size += __SIZEOF_POINTER__ - 1;
> > > > - size &= ~((unsigned long)__SIZEOF_POINTER__ - 1);
> > > > + scratch_alloc_align = sbi_get_scratch_alloc_align();
> > > > +
> > > > + /*
> > > > + * We let the allocation align to cacheline bytes to avoid livelock on
> > > > + * certain platforms due to atomic variables from the same cache line.
> > > > + */
> > > > + size += scratch_alloc_align - 1;
> > > > + size &= ~((unsigned long)scratch_alloc_align - 1);
> > > >
> > > > spin_lock(&extra_lock);
> > > >
> > > > diff --git a/lib/utils/fdt/fdt_helper.c b/lib/utils/fdt/fdt_helper.c
> > > > index cb350e5..bea4fdc 100644
> > > > --- a/lib/utils/fdt/fdt_helper.c
> > > > +++ b/lib/utils/fdt/fdt_helper.c
> > > > @@ -287,6 +287,30 @@ int fdt_parse_hart_id(const void *fdt, int cpu_offset, u32 *hartid)
> > > >
> > > > return 0;
> > > > }
> > > > +int fdt_parse_cbom_block_size(const void *fdt,int cpu_offset,u32 *cbom_block_size)
> > > > +{
> > > > + int len;
> > > > + const void *prop;
> > > > + const fdt32_t *val;
> > > > +
> > > > + if (!fdt || cpu_offset < 0)
> > > > + return SBI_EINVAL;
> > > > +
> > > > + prop = fdt_getprop(fdt, cpu_offset, "device_type", &len);
> > > > + if (!prop || !len)
> > > > + return SBI_EINVAL;
> > > > + if (strncmp (prop, "cpu", strlen ("cpu")))
> > > > + return SBI_EINVAL;
> > > > +
> > > > + val = fdt_getprop(fdt, cpu_offset, "riscv,cbom-block-size", &len);
> > > > + if (!val || len < sizeof(fdt32_t))
> > > > + return SBI_EINVAL;
> > >
> > > There's another way to get the CBO block sizes, which we haven't started
> > > doing in Linux yet, but at some point maybe we should. If we don't have
> > > the riscv,cbom-block-size nodes, then we can still check for the existence
> > > of the Zic64b extension in the isa-string/isa-extensions. The existence of
> > > that extension states that all cache blocks (cbom/cboz) are 64 bytes in
> > > size.
> > >
> > > Thanks,
> > > drew
> > >
> > > > +
> > > > + if (cbom_block_size)
> > > > + *cbom_block_size = fdt32_to_cpu(*val);
> > > > + return 0;
> > > > +
> > > > +}
> > > >
> > > > int fdt_parse_max_enabled_hart_id(const void *fdt, u32 *max_hartid)
> > > > {
> > > > diff --git a/platform/generic/platform.c b/platform/generic/platform.c
> > > > index c03ed88..0ff8d46 100644
> > > > --- a/platform/generic/platform.c
> > > > +++ b/platform/generic/platform.c
> > > > @@ -174,6 +174,8 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
> > > > const void *fdt = (void *)arg1;
> > > > u32 hartid, hart_count = 0;
> > > > int rc, root_offset, cpus_offset, cpu_offset, len;
> > > > + u32 cbom_block_size = 0;
> > > > + u32 tmp=0;
> > > >
> > > > root_offset = fdt_path_offset(fdt, "/");
> > > > if (root_offset < 0)
> > > > @@ -207,11 +209,16 @@ unsigned long fw_platform_init(unsigned long arg0, unsigned long arg1,
> > > > continue;
> > > >
> > > > generic_hart_index2id[hart_count++] = hartid;
> > > > + rc = fdt_parse_cbom_block_size(fdt, cpu_offset,&tmp);
> > > > + if (rc)
> > > > + continue;
> > > > + cbom_block_size = MAX(tmp,cbom_block_size);
> > > > }
> > > >
> > > > platform.hart_count = hart_count;
> > > > platform.heap_size = fw_platform_get_heap_size(fdt, hart_count);
> > > > platform_has_mlevel_imsic = fdt_check_imsic_mlevel(fdt);
> > > > + platform.cbom_block_size = cbom_block_size;
> > > >
> > > > fw_platform_coldboot_harts_init(fdt);
> > > >
> > > > --
> > > > 2.43.0
> > > >
> > > >
> > > > --
> > > > opensbi mailing list
> > > > opensbi at lists.infradead.org
> > > > http://lists.infradead.org/mailman/listinfo/opensbi
More information about the opensbi
mailing list