[openwrt/openwrt] kernel: mtk_bmt: refactor to avoid deep recursion

LEDE Commits lede-commits at lists.infradead.org
Sat Apr 29 03:59:53 PDT 2023


ynezz pushed a commit to openwrt/openwrt.git, branch master:
https://git.openwrt.org/626c84340d54c07f46f6ae111ff4389980a86c64

commit 626c84340d54c07f46f6ae111ff4389980a86c64
Author: Michał Kępień <openwrt at kempniu.pl>
AuthorDate: Sat Apr 29 07:41:32 2023 +0100

    kernel: mtk_bmt: refactor to avoid deep recursion
    
    A Linksys E8450 (mt7622) device running current master has recently
    started crashing:
    
        [    0.562900] mtk-ecc 1100e000.ecc: probed
        [    0.570254] spi-nand spi2.0: Fidelix SPI NAND was found.
        [    0.575576] spi-nand spi2.0: 128 MiB, block size: 128 KiB, page size: 2048, OOB size: 64
        [    0.583780] mtk-snand 1100d000.spi: ECC strength: 4 bits per 512 bytes
        [    0.682930] Insufficient stack space to handle exception!
        [    0.682939] ESR: 0x0000000096000047 -- DABT (current EL)
        [    0.682946] FAR: 0xffffffc008c47fe0
        [    0.682948] Task stack:     [0xffffffc008c48000..0xffffffc008c4c000]
        [    0.682951] IRQ stack:      [0xffffffc008008000..0xffffffc00800c000]
        [    0.682954] Overflow stack: [0xffffff801feb00a0..0xffffff801feb10a0]
        [    0.682959] CPU: 1 PID: 1 Comm: swapper/0 Tainted: G S                5.15.107 #0
        [    0.682966] Hardware name: Linksys E8450 (DT)
        [    0.682969] pstate: 800000c5 (Nzcv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
        [    0.682975] pc : dequeue_entity+0x0/0x250
        [    0.682988] lr : dequeue_task_fair+0x98/0x290
        [    0.682992] sp : ffffffc008c48030
        [    0.682994] x29: ffffffc008c48030 x28: 0000000000000001 x27: ffffff801feb6380
        [    0.683004] x26: 0000000000000001 x25: ffffff801feb6300 x24: ffffff8000068000
        [    0.683011] x23: 0000000000000001 x22: 0000000000000009 x21: 0000000000000000
        [    0.683017] x20: ffffff801feb6380 x19: ffffff8000068080 x18: 0000000017a740a6
        [    0.683024] x17: ffffffc008bae748 x16: ffffffc008bae6d8 x15: ffffffffffffffff
        [    0.683031] x14: ffffffffffffffff x13: 0000000000000000 x12: 0000000f00000101
        [    0.683038] x11: 0000000000000449 x10: 0000000000000127 x9 : 0000000000000000
        [    0.683044] x8 : 0000000000000125 x7 : 0000000000116da1 x6 : 0000000000116da1
        [    0.683051] x5 : 00000000001165a1 x4 : ffffff801feb6e00 x3 : 0000000000000000
        [    0.683058] x2 : 0000000000000009 x1 : ffffff8000068080 x0 : ffffff801feb6380
        [    0.683066] Kernel panic - not syncing: kernel stack overflow
        [    0.683069] SMP: stopping secondary CPUs
        [    1.648361] SMP: failed to stop secondary CPUs 0-1
        [    1.648366] Kernel Offset: disabled
        [    1.648368] CPU features: 0x00003000,00000802
        [    1.648372] Memory Limit: none
    
    Several factors contributed to this issue:
    
     1. The mtk_bmt driver recursively calls its scan_bmt() helper function
        during device initialization, while looking for a valid block
        mapping table (BMT).
    
     2. Commit fa4dc86e98 ("kernel: backport MEMREAD ioctl"):
    
          - increased the size of some stack-allocated structures (like
            struct mtd_oob_ops, used in bbt_nand_read(), which is indirectly
            called from scan_bmt()),
    
          - increased the stack size for some functions (for example,
            spinand_mtd_read(), which is indirectly called from scan_bmt(),
            now uses an extra stack-allocated struct mtd_ecc_stats).
    
     3. OpenWrt currently compiles the kernel with the
        -fno-optimize-sibling-calls flag, which prevents tail-call
        optimization.
    
    Collectively, all of these factors caused stack usage in the mtk_bmt
    driver to grow excessively large, triggering stack overflows.
    
    Recursion is not really necessary in scan_bmt() as it simply iterates
    over flash memory blocks in reverse order, looking for a valid BMT.
    Refactor the logic contained in the scan_bmt() and read_bmt() functions
    in target/linux/generic/files/drivers/mtd/nand/mtk_bmt_v2.c so that deep
    recursion is prevented (and therefore also any potential stack overflows
    it may cause).
    
    Link: https://lists.openwrt.org/pipermail/openwrt-devel/2023-April/040872.html
    Signed-off-by: Michał Kępień <openwrt at kempniu.pl>
---
 .../generic/files/drivers/mtd/nand/mtk_bmt_v2.c    | 55 ++++++++++------------
 1 file changed, 24 insertions(+), 31 deletions(-)

diff --git a/target/linux/generic/files/drivers/mtd/nand/mtk_bmt_v2.c b/target/linux/generic/files/drivers/mtd/nand/mtk_bmt_v2.c
index 2770376e98..6b06948c0f 100644
--- a/target/linux/generic/files/drivers/mtd/nand/mtk_bmt_v2.c
+++ b/target/linux/generic/files/drivers/mtd/nand/mtk_bmt_v2.c
@@ -191,36 +191,6 @@ static u16 get_bmt_index(struct bbmt *bmt)
 	return 0;
 }
 
-static int
-read_bmt(u16 block, unsigned char *dat, unsigned char *fdm, int fdm_len)
-{
-	u32 len = bmtd.bmt_pgs << bmtd.pg_shift;
-
-	return bbt_nand_read(blk_pg(block), dat, len, fdm, fdm_len);
-}
-
-static struct bbbt *scan_bmt(u16 block)
-{
-	u8 fdm[4];
-
-	if (block < bmtd.pool_lba)
-		return NULL;
-
-	if (read_bmt(block, bmtd.bbt_buf, fdm, sizeof(fdm)))
-		return scan_bmt(block - 1);
-
-	if (is_valid_bmt(bmtd.bbt_buf, fdm)) {
-		bmtd.bmt_blk_idx = get_bmt_index(bmt_tbl((struct bbbt *)bmtd.bbt_buf));
-		if (bmtd.bmt_blk_idx == 0) {
-			pr_info("[BBT] FATAL ERR: bmt block index is wrong!\n");
-			return NULL;
-		}
-		pr_info("[BBT] BMT.v2 is found at 0x%x\n", block);
-		return (struct bbbt *)bmtd.bbt_buf;
-	} else
-		return scan_bmt(block - 1);
-}
-
 /* Write the Burner Bad Block Table to Nand Flash
  * n - write BMT to bmt_tbl[n]
  */
@@ -472,7 +442,30 @@ static int mtk_bmt_init_v2(struct device_node *np)
 	/* Scanning start from the first page of the last block
 	 * of whole flash
 	 */
-	bmtd.bbt = scan_bmt(bmtd.total_blks - 1);
+	bmtd.bbt = NULL;
+	for (u16 block = bmtd.total_blks - 1; !bmtd.bbt && block >= bmtd.pool_lba; block--) {
+		u8 fdm[4];
+
+		if (bbt_nand_read(blk_pg(block), bmtd.bbt_buf, bufsz, fdm, sizeof(fdm))) {
+			/* Read failed, try the previous block */
+			continue;
+		}
+
+		if (!is_valid_bmt(bmtd.bbt_buf, fdm)) {
+			/* No valid BMT found, try the previous block */
+			continue;
+		}
+
+		bmtd.bmt_blk_idx = get_bmt_index(bmt_tbl((struct bbbt *)bmtd.bbt_buf));
+		if (bmtd.bmt_blk_idx == 0) {
+			pr_info("[BBT] FATAL ERR: bmt block index is wrong!\n");
+			break;
+		}
+
+		pr_info("[BBT] BMT.v2 is found at 0x%x\n", block);
+		bmtd.bbt = (struct bbbt *)bmtd.bbt_buf;
+	}
+
 	if (!bmtd.bbt) {
 		/* BMT not found */
 		if (bmtd.total_blks > BB_TABLE_MAX + BMT_TABLE_MAX) {




More information about the lede-commits mailing list