[PATCH v3] irqchip/gicv3-its: Avoid memory over allocation for ITEs

Shanker Donthineni shankerd at codeaurora.org
Mon Mar 20 04:15:50 PDT 2017


Hi Marc,


On 03/20/2017 05:14 AM, Shanker Donthineni wrote:
> Hi Marc,
>
>
> On 03/17/2017 10:33 AM, Marc Zyngier wrote:
>> On 17/03/17 14:18, Shanker Donthineni wrote:
>>> Hi Marc,
>>>
>>>
>>> On 03/17/2017 08:50 AM, Marc Zyngier wrote:
>>>> On 07/03/17 14:25, Shanker Donthineni wrote:
>>>>> We are always allocating extra 255Bytes of memory to handle ITE
>>>>> physical address alignment requirement. The kmalloc() satisfies
>>>>> the ITE alignment since the ITS driver is requesting a minimum
>>>>> size of ITS_ITT_ALIGN bytes.
>>>>>
>>>>> Let's try to allocate the exact amount of memory that is required
>>>>> for ITEs to avoid wastage.
>>>>>
>>>>> Signed-off-by: Shanker Donthineni <shankerd at codeaurora.org>
>>>>> ---Hi 
>>>>> v2: removed 'Change-Id: Ia8084189833f2081ff13c392deb5070c46a64038' from commit.
>>>>> v3: changed from IITE to ITE.
>>>>>
>>>>>  drivers/irqchip/irq-gic-v3-its.c | 7 ++++++-
>>>>>  1 file changed, 6 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
>>>>> index 86bd428..5aeca78 100644
>>>>> --- a/drivers/irqchip/irq-gic-v3-its.c
>>>>> +++ b/drivers/irqchip/irq-gic-v3-its.c
>>>>> @@ -1329,8 +1329,13 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
>>>>>  	 */
>>>>>  	nr_ites = max(2UL, roundup_pow_of_two(nvecs));
>>>>>  	sz = nr_ites * its->ite_size;
>>>>> -	sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
>>>>> +	sz = max(sz, ITS_ITT_ALIGN);
>>>>>  	itt = kzalloc(sz, GFP_KERNEL);
>>>>> +	if (itt && !IS_ALIGNED(virt_to_phys(itt), ITS_ITT_ALIGN)) {
>>>>> +		kfree(itt);
>>>>> +		itt = kzalloc(sz + ITS_ITT_ALIGN - 1, GFP_KERNEL);
>>>>> +	}
>>>>> +
>>>> Is this really worth the complexity? Are you aware of a system where the
>>>> accumulation of overallocation actually shows up as being an issue?
>>> As such there is no issue with over allocation. Actually this change masked QDF2400 bug 'iirqchip/gicv3-its: Add workaround for QDF2400 ITS erratum 0065' till now, found and fixed recently while looking at the code for possible memory optimizations.
>>>  
>>>> If you want to be absolutely exact in your allocation, then I'd suggest
>>>> doing it all the time, and have a proper dedicated allocator that always
>>>> do the right thing, without a wasteful fallback like you still have here.
>>> We don't need to fallbak, and it can be removed safely. Looking for
>>> your suggestion. should I implement a dedicated allocator or remove
>>> fallbak for simpler code?
>> Are you saying that kmalloc is guaranteed to give us something that is
>> 256 byte aligned? If so, why do we test for alignment (with free +
>> over-allocate if it fails)?
> I've verified on my system kmalloc() is always allocating memory with 256bytes alignment. kmalloc() uses the generic slab caches available in the kernel to allocate memory based on the input size.
>
>> I'd rather have only one way of allocating the ITT. Either we always
>> overallocate in order to guarantee right alignment (and my personal view
>> is that for most system, this doesn't matter at all), or we create our
>> own allocator. The issue with the latter is that we don't really have a
>> good story for allocating arrays of objects with a given alignment
>> (kmem_cache_* only deals with single objects).
> Adding a dedicated function to allocate memory is preferable but need pull a few of lines of code.
>
> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
> index a27a074..f0125e5 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -90,6 +90,8 @@ struct its_node {
>         u32                     ite_size;
>         u32                     device_ids;
>         int                     numa_node;
> +       struct page             *ite_page;
> +       u32                     ite_psz;
>  };
>
>  #define ITS_ITT_ALIGN          SZ_256
> @@ -266,7 +268,6 @@ static struct its_collection *its_build_mapd_cmd(struct its_cmd_block *cmd,
>         u8 size = ilog2(desc->its_mapd_cmd.dev->nr_ites);
>
>         itt_addr = virt_to_phys(desc->its_mapd_cmd.dev->itt);
> -       itt_addr = ALIGN(itt_addr, ITS_ITT_ALIGN);
>
>         its_encode_cmd(cmd, GITS_CMD_MAPD);
>         its_encode_devid(cmd, desc->its_mapd_cmd.dev->device_id);
> @@ -1319,6 +1320,42 @@ static bool its_alloc_device_table(struct its_node *its, u32 dev_id)
>         return true;
>  }
>
> +static void *its_alloc_memory_ites(struct its_node *its, int nr_ites)
> +{
> +       unsigned long flags;
> +       struct page *page;
> +       void *ite;
> +       u32 size;
> +
> +       size = ALIGN(nr_ites * its->ite_size, ITS_ITT_ALIGN);
> +       raw_spin_lock_irqsave(&its->lock, flags);
> +
> +       /* Try to reuse the current page if enough space is available */
> +       if (size > its->ite_psz) {
> +               /* Allocate a new compound page with minimum order 1 */
> +               page = alloc_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
> +                                  max(get_order(size), 1));
> +               if (!page) {
> +                       raw_spin_unlock_irqrestore(&its->lock, flags);
> +                       return NULL;
> +               }
> +
> +               /* Free current page, decrement page count */
> +               if (its->ite_page)
> +                       put_page(its->ite_page);
> +               its->ite_psz = PAGE_ORDER_TO_SIZE(compound_order(page));
> +               its->ite_page = page;
> +       }
> +
> +       get_page(its->ite_page); /* increment page count */
> +       its->ite_psz -= size;    /* update free space  */
> +       ite = page_address(its->ite_page) + its->ite_psz;
> +       raw_spin_unlock_irqrestore(&its->lock, flags);
> +       gic_flush_dcache_to_poc(ite, size);
> +
> +       return ite;
> +}
> +
>  static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
>                                             int nvecs)
>  {
> @@ -1330,7 +1367,6 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
>         int lpi_base;
>         int nr_lpis;
>         int nr_ites;
> -       int sz;
>
>         if (!its_alloc_device_table(its, dev_id))
>                 return NULL;
> @@ -1342,22 +1378,22 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
>          * express an ITT with a single entry.
>          */
>         nr_ites = max(2UL, roundup_pow_of_two(nvecs));
> -       sz = nr_ites * its->ite_size;
> -       sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
> -       itt = kzalloc(sz, GFP_KERNEL);
> +       itt = its_alloc_memory_ites(its, nr_ites);
> +       if (!itt)
> +               return NULL;
> +
>         lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis);
>         if (lpi_map)
>                 col_map = kzalloc(sizeof(*col_map) * nr_lpis, GFP_KERNEL);
>
> -       if (!dev || !itt || !lpi_map || !col_map) {
> +       if (!dev || !lpi_map || !col_map) {
>                 kfree(dev);
> -               kfree(itt);
> +               put_page(virt_to_page(itt));
>                 kfree(lpi_map);
>                 kfree(col_map);
>                 return NULL;
>         }
>
> -       gic_flush_dcache_to_poc(itt, sz);
>
>         dev->its = its;
>         dev->itt = itt;
> @@ -1386,7 +1422,7 @@ static void its_free_device(struct its_device *its_dev)
>         raw_spin_lock_irqsave(&its_dev->its->lock, flags);
>         list_del(&its_dev->entry);
>         raw_spin_unlock_irqrestore(&its_dev->its->lock, flags);
> -       kfree(its_dev->itt);
> +       put_page(virt_to_page(its_dev->itt));
>         kfree(its_dev);
>  }
>
>
>

This patch is not urgent, if you want we can revisit it at later time.

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 86bd428..5aeca78 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1329,8 +1329,13 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
         */
        nr_ites = max(2UL, roundup_pow_of_two(nvecs));
        sz = nr_ites * its->ite_size;
-       sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
+       sz = max(sz, ITS_ITT_ALIGN);
        itt = kzalloc(sz, GFP_KERNEL);
+       if (itt && !IS_ALIGNED(virt_to_phys(itt), ITS_ITT_ALIGN)) {
+               kfree(itt);
+               itt = kzalloc(sz + ITS_ITT_ALIGN - 1, GFP_KERNEL);
+       }
+
        lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis);
        if (lpi_map)
                col_map = kzalloc(sizeof(*col_map) * nr_lpis, GFP_KERNEL);

-- 
Shanker Donthineni
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum, a Linux Foundation Collaborative Project.




More information about the linux-arm-kernel mailing list