[PATCH] mm: kmemleak: Skip check in kmemleak_*_phys when pfn bound is not ready
Patrick Wang
patrick.wang.shcn at gmail.com
Mon May 30 06:32:18 PDT 2022
On 2022/5/30 10:27, Yee Lee wrote:
> On Fri, 2022-05-27 at 21:39 +0800, patrick wang wrote:
>> On Fri, May 27, 2022 at 11:25 AM <yee.lee at mediatek.com> wrote:
>>>
>>> From: Yee Lee <yee.lee at mediatek.com>
>>>
>>> In some archs (arm64), memblock allocates memory in boot time when
>>> the pfn boundary (max_pfn/min_pfn) is not ready. The lowmen checks
>>> in
>>> kmemleak_*_phys() drop those blocks and cause some false leak
>>> alarms
>>> on common kernel objects.
>>>
>>> Kmemleak output: (Qemu/arm64)
>>> unreferenced object 0xffff0000c0170a00 (size 128):
>>> comm "swapper/0", pid 1, jiffies 4294892404 (age 126.208s)
>>> hex dump (first 32 bytes):
>>> 62 61 73 65 00 00 00 00 00 00 00 00 00 00 00
>>> 00 base............
>>> 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>>> 00 ................
>>> backtrace:
>>> [<(____ptrval____)>] __kmalloc_track_caller+0x1b0/0x2e4
>>> [<(____ptrval____)>] kstrdup_const+0x8c/0xc4
>>> [<(____ptrval____)>] kvasprintf_const+0xbc/0xec
>>> [<(____ptrval____)>] kobject_set_name_vargs+0x58/0xe4
>>> [<(____ptrval____)>] kobject_add+0x84/0x100
>>> [<(____ptrval____)>] __of_attach_node_sysfs+0x78/0xec
>>> [<(____ptrval____)>] of_core_init+0x68/0x104
>>> [<(____ptrval____)>] driver_init+0x28/0x48
>>> [<(____ptrval____)>] do_basic_setup+0x14/0x28
>>> [<(____ptrval____)>] kernel_init_freeable+0x110/0x178
>>> [<(____ptrval____)>] kernel_init+0x20/0x1a0
>>> [<(____ptrval____)>] ret_from_fork+0x10/0x20
>>>
>>> This patch relaxs the boundary checking in kmemleak_*_phys api
>>> if max_low_pfn is uninitialzed.
>>>
>>> Fixes: 23c2d4 (mm: kmemleak: take a full lowmem check in
>>> kmemleak_*_phy)
>>> Signed-off-by: Yee Lee <yee.lee at mediatek.com>
>>> ---
>>> mm/kmemleak.c | 8 ++++----
>>> 1 file changed, 4 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/mm/kmemleak.c b/mm/kmemleak.c
>>> index a182f5ddaf68..6b2af544aa0f 100644
>>> --- a/mm/kmemleak.c
>>> +++ b/mm/kmemleak.c
>>> @@ -1132,7 +1132,7 @@ EXPORT_SYMBOL(kmemleak_no_scan);
>>> void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int
>>> min_count,
>>> gfp_t gfp)
>>> {
>>> - if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) <
>>> max_low_pfn)
>>> + if (!max_low_pfn || (PHYS_PFN(phys) >= min_low_pfn &&
>>> PHYS_PFN(phys) < max_low_pfn))
>>
>> Just skip checking will bring the crash possibility back. Seems it's
>> beyond
>> these interfaces' handle scope for this situation, since
>> "min_low_pfn" and
>> "max_low_pfn" are depending on arches.
>>
> Yes, for the cases beyond the pfn guard, users have to take care the
> boundary by themselves.
>
Could we record these early calling and deal with them when it's
ready? Is this appropriate?
I have an implementation based on this approach. Could you please
help to have a test on your machine as well? And someone to take
a look or review?
From 82cae75dfaa78f414faf85bb49133e95159c041a Mon Sep 17 00:00:00 2001
From: Patrick Wang <patrick.wang.shcn at gmail.com>
Date: Mon, 30 May 2022 18:38:23 +0800
Subject: [PATCH] mm: kmemleak: record early operations and handle later
The kmemleak_*_phys() interface uses "min_low_pfn" and
"max_low_pfn" to check address. But on some architectures,
kmemleak_*_phys() is called before those two variables
initialized. Record these early operations and handle them
when kmemleak_*_phys() are ready.
Signed-off-by: Patrick Wang <patrick.wang.shcn at gmail.com>
---
mm/kmemleak.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 158 insertions(+)
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index a182f5ddaf68..a71e41b49ebc 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -164,6 +164,26 @@ struct kmemleak_object {
char comm[TASK_COMM_LEN]; /* executable name */
};
+/* maximum early recording count */
+#define EARLY_RECORDS 20
+
+/* early recording operation type */
+enum early_record_type {
+ record_alloc = 0,
+ record_free_part,
+ record_not_leak,
+ record_ignore
+};
+
+/* early recording operation */
+struct early_record_op {
+ enum early_record_type record_type;
+ phys_addr_t arg1;
+ size_t arg2;
+ int arg3;
+ gfp_t arg4;
+};
+
/* flag representing the memory block allocation status */
#define OBJECT_ALLOCATED (1 << 0)
/* flag set after the first reporting of an unreference object */
@@ -230,11 +250,26 @@ static int kmemleak_skip_disable;
/* If there are leaks that can be reported */
static bool kmemleak_found_leaks;
+/*
+ * Used to record early ops. Could recorded ops remain unhandled after
+ * initmem freed? Not likely.
+ */
+static struct early_record_op early_record_ops[EARLY_RECORDS] __initdata;
+static int early_record_op_count;
+/* indicate if recorded ops being handled */
+static bool early_record_in_handle;
+static DEFINE_RAW_SPINLOCK(early_record_lock);
+
static bool kmemleak_verbose;
module_param_named(verbose, kmemleak_verbose, bool, 0600);
static void kmemleak_disable(void);
+static void record_early_ops(enum early_record_type record_type,
+ phys_addr_t arg1, size_t arg2,
+ int arg3, gfp_t arg4) __init;
+static void handle_early_ops(void) __init;
+
/*
* Print a warning and dump the stack trace.
*/
@@ -1132,6 +1167,26 @@ EXPORT_SYMBOL(kmemleak_no_scan);
void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count,
gfp_t gfp)
{
+ /* Not ready, record this operation. */
+ if (!max_low_pfn && !early_record_in_handle) {
+ /*
+ * record_early_ops is in __init section, make sure
+ * text executable.
+ */
+ if (core_kernel_text((unsigned long)record_early_ops))
+ record_early_ops(record_alloc, phys, size, min_count, gfp);
+ return;
+ }
+ /* Ready, handle recorded ops if they exit and not being handled. */
+ else if (early_record_op_count && !early_record_in_handle) {
+ /*
+ * handle_early_ops is in __init section, make sure
+ * text executable.
+ */
+ if (core_kernel_text((unsigned long)handle_early_ops))
+ handle_early_ops();
+ }
+
if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
kmemleak_alloc(__va(phys), size, min_count, gfp);
}
@@ -1146,6 +1201,18 @@ EXPORT_SYMBOL(kmemleak_alloc_phys);
*/
void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size)
{
+ /* Not ready, record this operation. */
+ if (!max_low_pfn && !early_record_in_handle) {
+ if (core_kernel_text((unsigned long)record_early_ops))
+ record_early_ops(record_free_part, phys, size, 0, 0);
+ return;
+ }
+ /* Ready, handle recorded ops if they exit and not being handled. */
+ else if (early_record_op_count && !early_record_in_handle) {
+ if (core_kernel_text((unsigned long)handle_early_ops))
+ handle_early_ops();
+ }
+
if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
kmemleak_free_part(__va(phys), size);
}
@@ -1158,6 +1225,18 @@ EXPORT_SYMBOL(kmemleak_free_part_phys);
*/
void __ref kmemleak_not_leak_phys(phys_addr_t phys)
{
+ /* Not ready, record this operation. */
+ if (!max_low_pfn && !early_record_in_handle) {
+ if (core_kernel_text((unsigned long)record_early_ops))
+ record_early_ops(record_not_leak, phys, 0, 0, 0);
+ return;
+ }
+ /* Ready, handle recorded ops if they exit and not being handled. */
+ else if (early_record_op_count && !early_record_in_handle) {
+ if (core_kernel_text((unsigned long)handle_early_ops))
+ handle_early_ops();
+ }
+
if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
kmemleak_not_leak(__va(phys));
}
@@ -1170,11 +1249,90 @@ EXPORT_SYMBOL(kmemleak_not_leak_phys);
*/
void __ref kmemleak_ignore_phys(phys_addr_t phys)
{
+ /* Not ready, record this operation. */
+ if (!max_low_pfn && !early_record_in_handle) {
+ if (core_kernel_text((unsigned long)record_early_ops))
+ record_early_ops(record_ignore, phys, 0, 0, 0);
+ return;
+ }
+ /* Ready, handle recorded ops if they exit and not being handled. */
+ else if (early_record_op_count && !early_record_in_handle) {
+ if (core_kernel_text((unsigned long)handle_early_ops))
+ handle_early_ops();
+ }
+
if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
kmemleak_ignore(__va(phys));
}
EXPORT_SYMBOL(kmemleak_ignore_phys);
+/*
+ * Record early operation to early_record_ops array.
+ */
+static void __init record_early_ops(enum early_record_type record_type,
+ phys_addr_t arg1, size_t arg2,
+ int arg3, gfp_t arg4)
+{
+ struct early_record_op *op;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&early_record_lock, flags);
+ /* early_record_ops array full */
+ if (early_record_op_count >= EARLY_RECORDS)
+ goto out;
+
+ op = &early_record_ops[early_record_op_count];
+ op->record_type = record_type;
+ op->arg1 = arg1;
+ op->arg2 = arg2;
+ op->arg3 = arg3;
+ op->arg4 = arg4;
+
+ early_record_op_count++;
+out:
+ raw_spin_unlock_irqrestore(&early_record_lock, flags);
+}
+
+/*
+ * Handle the whole recorded operations.
+ */
+static void __init handle_early_ops(void)
+{
+ struct early_record_op *op = early_record_ops;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&early_record_lock, flags);
+ /* Indicate operations are being handled. */
+ early_record_in_handle = true;
+
+ while (early_record_op_count) {
+ /* Deal with operations according to their type. */
+ switch (op->record_type) {
+ case record_alloc:
+ kmemleak_alloc_phys(op->arg1, op->arg2,
+ op->arg3, op->arg4);
+ break;
+ case record_free_part:
+ kmemleak_free_part_phys(op->arg1, op->arg2);
+ break;
+ case record_not_leak:
+ kmemleak_not_leak_phys(op->arg1);
+ break;
+ case record_ignore:
+ kmemleak_ignore_phys(op->arg1);
+ break;
+ default:
+ break;
+ }
+
+ early_record_op_count--;
+ op++;
+ }
+
+ early_record_in_handle = false;
+ raw_spin_unlock_irqrestore(&early_record_lock, flags);
+}
+
/*
* Update an object's checksum and return true if it was modified.
*/
--
2.25.1
Thanks,
Patrick
More information about the linux-arm-kernel
mailing list