LCOV - code coverage report
Current view: top level - libubifs - recovery.c (source / functions) Hit Total Coverage
Test: a simple test Lines: 408 483 84.5 %
Date: 2024-06-05 20:10:43 Functions: 20 21 95.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-only
       2             : /*
       3             :  * This file is part of UBIFS.
       4             :  *
       5             :  * Copyright (C) 2006-2008 Nokia Corporation
       6             :  *
       7             :  * Authors: Adrian Hunter
       8             :  *          Artem Bityutskiy (Битюцкий Артём)
       9             :  */
      10             : 
      11             : /*
      12             :  * This file implements functions needed to recover from unclean un-mounts.
      13             :  * When UBIFS is mounted, it checks a flag on the master node to determine if
      14             :  * an un-mount was completed successfully. If not, the process of mounting
      15             :  * incorporates additional checking and fixing of on-flash data structures.
      16             :  * UBIFS always cleans away all remnants of an unclean un-mount, so that
      17             :  * errors do not accumulate. However UBIFS defers recovery if it is mounted
      18             :  * read-only, and the flash is not modified in that case.
      19             :  *
      20             :  * The general UBIFS approach to the recovery is that it recovers from
      21             :  * corruptions which could be caused by power cuts, but it refuses to recover
      22             :  * from corruption caused by other reasons. And UBIFS tries to distinguish
      23             :  * between these 2 reasons of corruptions and silently recover in the former
      24             :  * case and loudly complain in the latter case.
      25             :  *
      26             :  * UBIFS writes only to erased LEBs, so it writes only to the flash space
      27             :  * containing only 0xFFs. UBIFS also always writes strictly from the beginning
      28             :  * of the LEB to the end. And UBIFS assumes that the underlying flash media
      29             :  * writes in @c->max_write_size bytes at a time.
      30             :  *
      31             :  * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min.
      32             :  * I/O unit corresponding to offset X to contain corrupted data, all the
      33             :  * following min. I/O units have to contain empty space (all 0xFFs). If this is
      34             :  * not true, the corruption cannot be the result of a power cut, and UBIFS
      35             :  * refuses to mount.
      36             :  */
      37             : 
      38             : #include <sys/types.h>
      39             : 
      40             : #include "linux_err.h"
      41             : #include "bitops.h"
      42             : #include "kmem.h"
      43             : #include "crc32.h"
      44             : #include "ubifs.h"
      45             : #include "defs.h"
      46             : #include "debug.h"
      47             : #include "key.h"
      48             : #include "misc.h"
      49             : 
      50             : /**
      51             :  * is_empty - determine whether a buffer is empty (contains all 0xff).
      52             :  * @buf: buffer to clean
      53             :  * @len: length of buffer
      54             :  *
      55             :  * This function returns %1 if the buffer is empty (contains all 0xff) otherwise
      56             :  * %0 is returned.
      57             :  */
      58             : static int is_empty(void *buf, int len)
      59             : {
      60        6404 :         uint8_t *p = buf;
      61             :         int i;
      62             : 
      63   237336282 :         for (i = 0; i < len; i++)
      64   237333718 :                 if (*p++ != 0xff)
      65             :                         return 0;
      66             :         return 1;
      67             : }
      68             : 
      69             : /**
      70             :  * first_non_ff - find offset of the first non-0xff byte.
      71             :  * @buf: buffer to search in
      72             :  * @len: length of buffer
      73             :  *
      74             :  * This function returns offset of the first non-0xff byte in @buf or %-1 if
      75             :  * the buffer contains only 0xff bytes.
      76             :  */
      77             : static int first_non_ff(void *buf, int len)
      78             : {
      79             :         uint8_t *p = buf;
      80             :         int i;
      81             : 
      82      158576 :         for (i = 0; i < len; i++)
      83      158593 :                 if (*p++ != 0xff)
      84             :                         return i;
      85             :         return -1;
      86             : }
      87             : 
      88             : /**
      89             :  * get_master_node - get the last valid master node allowing for corruption.
      90             :  * @c: UBIFS file-system description object
      91             :  * @lnum: LEB number
      92             :  * @pbuf: buffer containing the LEB read, is returned here
      93             :  * @mst: master node, if found, is returned here
      94             :  * @cor: corruption, if found, is returned here
      95             :  *
      96             :  * This function allocates a buffer, reads the LEB into it, and finds and
      97             :  * returns the last valid master node allowing for one area of corruption.
      98             :  * The corrupt area, if there is one, must be consistent with the assumption
      99             :  * that it is the result of an unclean unmount while the master node was being
     100             :  * written. Under those circumstances, it is valid to use the previously written
     101             :  * master node.
     102             :  *
     103             :  * This function returns %0 on success and a negative error code on failure.
     104             :  */
     105          58 : static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
     106             :                            struct ubifs_mst_node **mst, void **cor)
     107             : {
     108          58 :         const int sz = c->mst_node_alsz;
     109             :         int err, offs, len;
     110             :         void *sbuf, *buf;
     111             : 
     112          58 :         sbuf = vmalloc(c->leb_size);
     113          58 :         if (!sbuf)
     114             :                 return -ENOMEM;
     115             : 
     116          58 :         err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0);
     117          58 :         if (err && err != -EBADMSG)
     118             :                 goto out_free;
     119             : 
     120             :         /* Find the first position that is definitely not a node */
     121          58 :         offs = 0;
     122          58 :         buf = sbuf;
     123          58 :         len = c->leb_size;
     124        2515 :         while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) {
     125        2455 :                 struct ubifs_ch *ch = buf;
     126             : 
     127        2455 :                 if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
     128             :                         break;
     129        2399 :                 offs += sz;
     130        2399 :                 buf  += sz;
     131        2399 :                 len  -= sz;
     132             :         }
     133             :         /* See if there was a valid master node before that */
     134          58 :         if (offs) {
     135             :                 int ret;
     136             : 
     137          55 :                 offs -= sz;
     138          55 :                 buf  -= sz;
     139          55 :                 len  += sz;
     140          55 :                 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
     141          55 :                 if (ret != SCANNED_A_NODE && offs) {
     142             :                         /* Could have been corruption so check one place back */
     143           9 :                         offs -= sz;
     144           9 :                         buf  -= sz;
     145           9 :                         len  += sz;
     146           9 :                         ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
     147           9 :                         if (ret != SCANNED_A_NODE)
     148             :                                 /*
     149             :                                  * We accept only one area of corruption because
     150             :                                  * we are assuming that it was caused while
     151             :                                  * trying to write a master node.
     152             :                                  */
     153             :                                 goto out_err;
     154             :                 }
     155          55 :                 if (ret == SCANNED_A_NODE) {
     156          55 :                         struct ubifs_ch *ch = buf;
     157             : 
     158          55 :                         if (ch->node_type != UBIFS_MST_NODE)
     159             :                                 goto out_err;
     160          55 :                         dbg_rcvry("found a master node at %d:%d", lnum, offs);
     161          55 :                         *mst = buf;
     162          55 :                         offs += sz;
     163          55 :                         buf  += sz;
     164          55 :                         len  -= sz;
     165             :                 }
     166             :         }
     167             :         /* Check for corruption */
     168          58 :         if (offs < c->leb_size) {
     169         112 :                 if (!is_empty(buf, min_t(int, len, sz))) {
     170           9 :                         *cor = buf;
     171           9 :                         dbg_rcvry("found corruption at %d:%d", lnum, offs);
     172             :                 }
     173          56 :                 offs += sz;
     174          56 :                 buf  += sz;
     175          56 :                 len  -= sz;
     176             :         }
     177             :         /* Check remaining empty space */
     178          58 :         if (offs < c->leb_size)
     179          56 :                 if (!is_empty(buf, len))
     180             :                         goto out_err;
     181          58 :         *pbuf = sbuf;
     182          58 :         return 0;
     183             : 
     184           0 : out_err:
     185             :         set_failure_reason_callback(c, FR_DATA_CORRUPTED);
     186             :         err = -EINVAL;
     187           0 : out_free:
     188           0 :         vfree(sbuf);
     189           0 :         *mst = NULL;
     190           0 :         *cor = NULL;
     191           0 :         return err;
     192             : }
     193             : 
     194             : /**
     195             :  * write_rcvrd_mst_node - write recovered master node.
     196             :  * @c: UBIFS file-system description object
     197             :  * @mst: master node
     198             :  *
     199             :  * This function returns %0 on success and a negative error code on failure.
     200             :  */
     201          28 : static int write_rcvrd_mst_node(struct ubifs_info *c,
     202             :                                 struct ubifs_mst_node *mst)
     203             : {
     204          28 :         int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
     205             :         __le32 save_flags;
     206             : 
     207          28 :         dbg_rcvry("recovery");
     208             : 
     209          28 :         save_flags = mst->flags;
     210          28 :         mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
     211             : 
     212          28 :         err = ubifs_prepare_node_hmac(c, mst, UBIFS_MST_NODE_SZ,
     213             :                                       offsetof(struct ubifs_mst_node, hmac), 1);
     214          28 :         if (err)
     215             :                 goto out;
     216          28 :         err = ubifs_leb_change(c, lnum, mst, sz);
     217          28 :         if (err)
     218             :                 goto out;
     219          28 :         err = ubifs_leb_change(c, lnum + 1, mst, sz);
     220             :         if (err)
     221             :                 goto out;
     222          28 : out:
     223          28 :         mst->flags = save_flags;
     224          28 :         return err;
     225             : }
     226             : 
     227             : /**
     228             :  * ubifs_recover_master_node - recover the master node.
     229             :  * @c: UBIFS file-system description object
     230             :  *
     231             :  * This function recovers the master node from corruption that may occur due to
     232             :  * an unclean unmount.
     233             :  *
     234             :  * This function returns %0 on success and a negative error code on failure.
     235             :  */
     236          29 : int ubifs_recover_master_node(struct ubifs_info *c)
     237             : {
     238          29 :         void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL;
     239          29 :         struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst;
     240          29 :         const int sz = c->mst_node_alsz;
     241             :         int err, offs1, offs2;
     242             : 
     243          29 :         dbg_rcvry("recovery");
     244             : 
     245          29 :         err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1);
     246          29 :         if (err)
     247             :                 goto out_free;
     248             : 
     249          29 :         err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2);
     250          29 :         if (err)
     251             :                 goto out_free;
     252             : 
     253          29 :         if (mst1) {
     254          27 :                 offs1 = (void *)mst1 - buf1;
     255          27 :                 if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) &&
     256           0 :                     (offs1 == 0 && !cor1)) {
     257             :                         /*
     258             :                          * mst1 was written by recovery at offset 0 with no
     259             :                          * corruption.
     260             :                          */
     261           0 :                         dbg_rcvry("recovery recovery");
     262           0 :                         mst = mst1;
     263          27 :                 } else if (mst2) {
     264          26 :                         offs2 = (void *)mst2 - buf2;
     265          26 :                         if (offs1 == offs2) {
     266             :                                 /* Same offset, so must be the same */
     267           4 :                                 if (ubifs_compare_master_node(c, mst1, mst2))
     268             :                                         goto out_err;
     269           4 :                                 mst = mst1;
     270          22 :                         } else if (offs2 + sz == offs1) {
     271             :                                 /* 1st LEB was written, 2nd was not */
     272          22 :                                 if (cor1)
     273             :                                         goto out_err;
     274             :                                 mst = mst1;
     275           0 :                         } else if (offs1 == 0 &&
     276           0 :                                    c->leb_size - offs2 - sz < sz) {
     277             :                                 /* 1st LEB was unmapped and written, 2nd not */
     278           0 :                                 if (cor1)
     279             :                                         goto out_err;
     280             :                                 mst = mst1;
     281             :                         } else
     282             :                                 goto out_err;
     283             :                 } else {
     284             :                         /*
     285             :                          * 2nd LEB was unmapped and about to be written, so
     286             :                          * there must be only one master node in the first LEB
     287             :                          * and no corruption.
     288             :                          */
     289           1 :                         if (offs1 != 0 || cor1)
     290             :                                 goto out_err;
     291             :                         mst = mst1;
     292             :                 }
     293             :         } else {
     294           2 :                 if (!mst2)
     295             :                         goto out_err;
     296             :                 /*
     297             :                  * 1st LEB was unmapped and about to be written, so there must
     298             :                  * be no room left in 2nd LEB.
     299             :                  */
     300           2 :                 offs2 = (void *)mst2 - buf2;
     301           2 :                 if (offs2 + sz + sz <= c->leb_size)
     302             :                         goto out_err;
     303             :                 mst = mst2;
     304             :         }
     305             : 
     306          28 :         ubifs_msg(c, "recovered master node from LEB %d",
     307             :                   (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
     308             : 
     309          28 :         memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
     310             : 
     311          28 :         if (c->ro_mount) {
     312             :                 /* Read-only mode. Keep a copy for switching to rw mode */
     313           0 :                 c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
     314           0 :                 if (!c->rcvrd_mst_node) {
     315             :                         err = -ENOMEM;
     316             :                         goto out_free;
     317             :                 }
     318           0 :                 memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
     319             : 
     320             :                 /*
     321             :                  * We had to recover the master node, which means there was an
     322             :                  * unclean reboot. However, it is possible that the master node
     323             :                  * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
     324             :                  * E.g., consider the following chain of events:
     325             :                  *
     326             :                  * 1. UBIFS was cleanly unmounted, so the master node is clean
     327             :                  * 2. UBIFS is being mounted R/W and starts changing the master
     328             :                  *    node in the first (%UBIFS_MST_LNUM). A power cut happens,
     329             :                  *    so this LEB ends up with some amount of garbage at the
     330             :                  *    end.
     331             :                  * 3. UBIFS is being mounted R/O. We reach this place and
     332             :                  *    recover the master node from the second LEB
     333             :                  *    (%UBIFS_MST_LNUM + 1). But we cannot update the media
     334             :                  *    because we are being mounted R/O. We have to defer the
     335             :                  *    operation.
     336             :                  * 4. However, this master node (@c->mst_node) is marked as
     337             :                  *    clean (since the step 1). And if we just return, the
     338             :                  *    mount code will be confused and won't recover the master
     339             :                  *    node when it is re-mounter R/W later.
     340             :                  *
     341             :                  *    Thus, to force the recovery by marking the master node as
     342             :                  *    dirty.
     343             :                  */
     344           0 :                 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
     345             :         } else {
     346             :                 /* Write the recovered master node */
     347          28 :                 c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
     348          28 :                 err = write_rcvrd_mst_node(c, c->mst_node);
     349          28 :                 if (err)
     350             :                         goto out_free;
     351             :         }
     352             : 
     353          28 :         vfree(buf2);
     354          28 :         vfree(buf1);
     355             : 
     356          28 :         return 0;
     357             : 
     358           1 : out_err:
     359             :         set_failure_reason_callback(c, FR_DATA_CORRUPTED);
     360             :         err = -EINVAL;
     361           1 : out_free:
     362           1 :         ubifs_err(c, "failed to recover master node");
     363           1 :         if (mst1) {
     364           1 :                 ubifs_err(c, "dumping first master node");
     365           1 :                 ubifs_dump_node(c, mst1, c->leb_size - ((void *)mst1 - buf1));
     366             :         }
     367           1 :         if (mst2) {
     368           0 :                 ubifs_err(c, "dumping second master node");
     369           0 :                 ubifs_dump_node(c, mst2, c->leb_size - ((void *)mst2 - buf2));
     370             :         }
     371           1 :         vfree(buf2);
     372           1 :         vfree(buf1);
     373           1 :         return err;
     374             : }
     375             : 
     376             : /**
     377             :  * is_last_write - determine if an offset was in the last write to a LEB.
     378             :  * @c: UBIFS file-system description object
     379             :  * @buf: buffer to check
     380             :  * @offs: offset to check
     381             :  *
     382             :  * This function returns %1 if @offs was in the last write to the LEB whose data
     383             :  * is in @buf, otherwise %0 is returned. The determination is made by checking
     384             :  * for subsequent empty space starting from the next @c->max_write_size
     385             :  * boundary.
     386             :  */
     387             : static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
     388             : {
     389             :         int empty_offs, check_len;
     390             :         uint8_t *p;
     391             : 
     392             :         /*
     393             :          * Round up to the next @c->max_write_size boundary i.e. @offs is in
     394             :          * the last wbuf written. After that should be empty space.
     395             :          */
     396         476 :         empty_offs = ALIGN(offs + 1, c->max_write_size);
     397         476 :         check_len = c->leb_size - empty_offs;
     398         476 :         p = buf + empty_offs - offs;
     399         476 :         return is_empty(p, check_len);
     400             : }
     401             : 
     402             : /**
     403             :  * clean_buf - clean the data from an LEB sitting in a buffer.
     404             :  * @c: UBIFS file-system description object
     405             :  * @buf: buffer to clean
     406             :  * @lnum: LEB number to clean
     407             :  * @offs: offset from which to clean
     408             :  * @len: length of buffer
     409             :  *
     410             :  * This function pads up to the next min_io_size boundary (if there is one) and
     411             :  * sets empty space to all 0xff. @buf, @offs and @len are updated to the next
     412             :  * @c->min_io_size boundary.
     413             :  */
     414        2764 : static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
     415             :                       int *offs, int *len)
     416             : {
     417             :         int empty_offs, pad_len;
     418             : 
     419        2764 :         dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
     420             : 
     421        2764 :         ubifs_assert(c, !(*offs & 7));
     422        2764 :         empty_offs = ALIGN(*offs, c->min_io_size);
     423        2764 :         pad_len = empty_offs - *offs;
     424        2764 :         ubifs_pad(c, *buf, pad_len);
     425        2764 :         *offs += pad_len;
     426        2764 :         *buf += pad_len;
     427        2764 :         *len -= pad_len;
     428        2764 :         memset(*buf, 0xff, c->leb_size - empty_offs);
     429        2764 : }
     430             : 
     431             : /**
     432             :  * no_more_nodes - determine if there are no more nodes in a buffer.
     433             :  * @c: UBIFS file-system description object
     434             :  * @buf: buffer to check
     435             :  * @len: length of buffer
     436             :  * @lnum: LEB number of the LEB from which @buf was read
     437             :  * @offs: offset from which @buf was read
     438             :  *
     439             :  * This function ensures that the corrupted node at @offs is the last thing
     440             :  * written to a LEB. This function returns %1 if more data is not found and
     441             :  * %0 if more data is found.
     442             :  */
     443        1837 : static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
     444             :                         int lnum, int offs)
     445             : {
     446        1837 :         struct ubifs_ch *ch = buf;
     447        1837 :         int skip, dlen = le32_to_cpu(ch->len);
     448             : 
     449             :         /* Check for empty space after the corrupt node's common header */
     450        1837 :         skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs;
     451        3674 :         if (is_empty(buf + skip, len - skip))
     452             :                 return 1;
     453             :         /*
     454             :          * The area after the common header size is not empty, so the common
     455             :          * header must be intact. Check it.
     456             :          */
     457        1665 :         if (ubifs_check_node(c, buf, len, lnum, offs, 1, 0) != -EUCLEAN) {
     458         210 :                 dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs);
     459             :                 return 0;
     460             :         }
     461             :         /* Now we know the corrupt node's length we can skip over it */
     462        1455 :         skip = ALIGN(offs + dlen, c->max_write_size) - offs;
     463             :         /* After which there should be empty space */
     464        2910 :         if (is_empty(buf + skip, len - skip))
     465             :                 return 1;
     466        1426 :         dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip);
     467             :         return 0;
     468             : }
     469             : 
     470             : /**
     471             :  * fix_unclean_leb - fix an unclean LEB.
     472             :  * @c: UBIFS file-system description object
     473             :  * @sleb: scanned LEB information
     474             :  * @start: offset where scan started
     475             :  */
     476        2764 : static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
     477             :                            int start)
     478             : {
     479        2764 :         int lnum = sleb->lnum, endpt = start;
     480             : 
     481             :         /* Get the end offset of the last node we are keeping */
     482        5528 :         if (!list_empty(&sleb->nodes)) {
     483             :                 struct ubifs_scan_node *snod;
     484             : 
     485        1125 :                 snod = list_entry(sleb->nodes.prev,
     486             :                                   struct ubifs_scan_node, list);
     487        1125 :                 endpt = snod->offs + snod->len;
     488             :         }
     489             : 
     490        2764 :         if (c->ro_mount && !c->remounting_rw) {
     491             :                 /* Add to recovery list */
     492             :                 struct ubifs_unclean_leb *ucleb;
     493             : 
     494         294 :                 dbg_rcvry("need to fix LEB %d start %d endpt %d",
     495             :                           lnum, start, sleb->endpt);
     496         294 :                 ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS);
     497         294 :                 if (!ucleb)
     498             :                         return -ENOMEM;
     499         294 :                 ucleb->lnum = lnum;
     500         294 :                 ucleb->endpt = endpt;
     501         294 :                 list_add_tail(&ucleb->list, &c->unclean_leb_list);
     502             :         } else {
     503             :                 /* Write the fixed LEB back to flash */
     504             :                 int err;
     505             : 
     506        2470 :                 dbg_rcvry("fixing LEB %d start %d endpt %d",
     507             :                           lnum, start, sleb->endpt);
     508        2470 :                 if (endpt == 0) {
     509          28 :                         err = ubifs_leb_unmap(c, lnum);
     510          28 :                         if (err)
     511             :                                 return err;
     512             :                 } else {
     513        2442 :                         int len = ALIGN(endpt, c->min_io_size);
     514             : 
     515        2442 :                         if (start) {
     516        2083 :                                 err = ubifs_leb_read(c, lnum, sleb->buf, 0,
     517             :                                                      start, 1);
     518        2083 :                                 if (err && err != -EBADMSG)
     519             :                                         return err;
     520             :                         }
     521             :                         /* Pad to min_io_size */
     522        2442 :                         if (len > endpt) {
     523         896 :                                 int pad_len = len - ALIGN(endpt, 8);
     524             : 
     525         896 :                                 if (pad_len > 0) {
     526         860 :                                         void *buf = sleb->buf + len - pad_len;
     527             : 
     528         860 :                                         ubifs_pad(c, buf, pad_len);
     529             :                                 }
     530             :                         }
     531        2442 :                         err = ubifs_leb_change(c, lnum, sleb->buf, len);
     532        2442 :                         if (err)
     533             :                                 return err;
     534             :                 }
     535             :         }
     536             :         return 0;
     537             : }
     538             : 
     539             : /**
     540             :  * drop_last_group - drop the last group of nodes.
     541             :  * @sleb: scanned LEB information
     542             :  * @offs: offset of dropped nodes is returned here
     543             :  *
     544             :  * This is a helper function for 'ubifs_recover_leb()' which drops the last
     545             :  * group of nodes of the scanned LEB.
     546             :  */
     547        2261 : static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
     548             : {
     549        6909 :         while (!list_empty(&sleb->nodes)) {
     550             :                 struct ubifs_scan_node *snod;
     551             :                 struct ubifs_ch *ch;
     552             : 
     553        1030 :                 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
     554             :                                   list);
     555        1030 :                 ch = snod->node;
     556        1030 :                 if (ch->group_type != UBIFS_IN_NODE_GROUP)
     557             :                         break;
     558             : 
     559          63 :                 dbg_rcvry("dropping grouped node at %d:%d",
     560             :                           sleb->lnum, snod->offs);
     561          63 :                 *offs = snod->offs;
     562         126 :                 list_del(&snod->list);
     563          63 :                 kfree(snod);
     564          63 :                 sleb->nodes_cnt -= 1;
     565             :         }
     566        2261 : }
     567             : 
     568             : /**
     569             :  * drop_last_node - drop the last node.
     570             :  * @sleb: scanned LEB information
     571             :  * @offs: offset of dropped nodes is returned here
     572             :  *
     573             :  * This is a helper function for 'ubifs_recover_leb()' which drops the last
     574             :  * node of the scanned LEB.
     575             :  */
     576         889 : static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
     577             : {
     578             :         struct ubifs_scan_node *snod;
     579             : 
     580        1778 :         if (!list_empty(&sleb->nodes)) {
     581         889 :                 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
     582             :                                   list);
     583             : 
     584         889 :                 dbg_rcvry("dropping last node at %d:%d",
     585             :                           sleb->lnum, snod->offs);
     586         889 :                 *offs = snod->offs;
     587        1778 :                 list_del(&snod->list);
     588         889 :                 kfree(snod);
     589         889 :                 sleb->nodes_cnt -= 1;
     590             :         }
     591         889 : }
     592             : 
     593             : /**
     594             :  * ubifs_recover_leb - scan and recover a LEB.
     595             :  * @c: UBIFS file-system description object
     596             :  * @lnum: LEB number
     597             :  * @offs: offset
     598             :  * @sbuf: LEB-sized buffer to use
     599             :  * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not
     600             :  *         belong to any journal head)
     601             :  *
     602             :  * This function does a scan of a LEB, but caters for errors that might have
     603             :  * been caused by the unclean unmount from which we are attempting to recover.
     604             :  * Returns the scanned information on success and a negative error code on
     605             :  * failure.
     606             :  */
     607        4876 : struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
     608             :                                          int offs, void *sbuf, int jhead)
     609             : {
     610        4876 :         int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
     611        4876 :         int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped;
     612             :         struct ubifs_scan_leb *sleb;
     613        4876 :         void *buf = sbuf + offs;
     614             : 
     615        4876 :         dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped);
     616             : 
     617        4876 :         sleb = ubifs_start_scan(c, lnum, offs, sbuf);
     618        4876 :         if (IS_ERR(sleb))
     619             :                 return sleb;
     620             : 
     621        4876 :         ubifs_assert(c, len >= 8);
     622      864133 :         while (len >= 8) {
     623      864051 :                 dbg_scan("look at LEB %d:%d (%d bytes left)",
     624             :                          lnum, offs, len);
     625             : 
     626             :                 cond_resched();
     627             : 
     628             :                 /*
     629             :                  * Scan quietly until there is an error from which we cannot
     630             :                  * recover
     631             :                  */
     632      864051 :                 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
     633      864051 :                 if (ret == SCANNED_A_NODE) {
     634             :                         /* A valid node, and not a padding node */
     635      840970 :                         struct ubifs_ch *ch = buf;
     636             :                         int node_len;
     637             : 
     638      840970 :                         err = ubifs_add_snod(c, sleb, buf, offs);
     639      840970 :                         if (err)
     640             :                                 goto error;
     641      840970 :                         node_len = ALIGN(le32_to_cpu(ch->len), 8);
     642      840970 :                         offs += node_len;
     643      840970 :                         buf += node_len;
     644      840970 :                         len -= node_len;
     645       23081 :                 } else if (ret > 0) {
     646             :                         /* Padding bytes or a valid padding node */
     647       18287 :                         offs += ret;
     648       18287 :                         buf += ret;
     649       18287 :                         len -= ret;
     650        9588 :                 } else if (ret == SCANNED_EMPTY_SPACE ||
     651             :                            ret == SCANNED_GARBAGE     ||
     652        4794 :                            ret == SCANNED_A_BAD_PAD_NODE ||
     653             :                            ret == SCANNED_A_CORRUPT_NODE) {
     654        4794 :                         dbg_rcvry("found corruption (%d) at %d:%d",
     655             :                                   ret, lnum, offs);
     656             :                         break;
     657             :                 } else {
     658           0 :                         ubifs_err(c, "unexpected return value %d", ret);
     659             :                         err = -EINVAL;
     660             :                         goto error;
     661             :                 }
     662             :         }
     663             : 
     664        4876 :         if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
     665         918 :                 if (!is_last_write(c, buf, offs))
     666             :                         goto corrupted_rescan;
     667        4417 :         } else if (ret == SCANNED_A_CORRUPT_NODE) {
     668        1837 :                 if (!no_more_nodes(c, buf, len, lnum, offs))
     669             :                         goto corrupted_rescan;
     670        5160 :         } else if (!is_empty(buf, len)) {
     671          34 :                 if (!is_last_write(c, buf, offs)) {
     672          17 :                         int corruption = first_non_ff(buf, len);
     673             : 
     674             :                         /*
     675             :                          * See header comment for this file for more
     676             :                          * explanations about the reasons we have this check.
     677             :                          */
     678          17 :                         ubifs_err(c, "corrupt empty space LEB %d:%d, corruption starts at %d",
     679             :                                   lnum, offs, corruption);
     680             :                         /* Make sure we dump interesting non-0xFF data */
     681          17 :                         offs += corruption;
     682          17 :                         buf += corruption;
     683          17 :                         goto corrupted;
     684             :                 }
     685             :         }
     686             : 
     687        2764 :         min_io_unit = round_down(offs, c->min_io_size);
     688        2764 :         if (grouped)
     689             :                 /*
     690             :                  * If nodes are grouped, always drop the incomplete group at
     691             :                  * the end.
     692             :                  */
     693        2261 :                 drop_last_group(sleb, &offs);
     694             : 
     695        2764 :         if (jhead == GCHD) {
     696             :                 /*
     697             :                  * If this LEB belongs to the GC head then while we are in the
     698             :                  * middle of the same min. I/O unit keep dropping nodes. So
     699             :                  * basically, what we want is to make sure that the last min.
     700             :                  * I/O unit where we saw the corruption is dropped completely
     701             :                  * with all the uncorrupted nodes which may possibly sit there.
     702             :                  *
     703             :                  * In other words, let's name the min. I/O unit where the
     704             :                  * corruption starts B, and the previous min. I/O unit A. The
     705             :                  * below code tries to deal with a situation when half of B
     706             :                  * contains valid nodes or the end of a valid node, and the
     707             :                  * second half of B contains corrupted data or garbage. This
     708             :                  * means that UBIFS had been writing to B just before the power
     709             :                  * cut happened. I do not know how realistic is this scenario
     710             :                  * that half of the min. I/O unit had been written successfully
     711             :                  * and the other half not, but this is possible in our 'failure
     712             :                  * mode emulation' infrastructure at least.
     713             :                  *
     714             :                  * So what is the problem, why we need to drop those nodes? Why
     715             :                  * can't we just clean-up the second half of B by putting a
     716             :                  * padding node there? We can, and this works fine with one
     717             :                  * exception which was reproduced with power cut emulation
     718             :                  * testing and happens extremely rarely.
     719             :                  *
     720             :                  * Imagine the file-system is full, we run GC which starts
     721             :                  * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is
     722             :                  * the current GC head LEB). The @c->gc_lnum is -1, which means
     723             :                  * that GC will retain LEB X and will try to continue. Imagine
     724             :                  * that LEB X is currently the dirtiest LEB, and the amount of
     725             :                  * used space in LEB Y is exactly the same as amount of free
     726             :                  * space in LEB X.
     727             :                  *
     728             :                  * And a power cut happens when nodes are moved from LEB X to
     729             :                  * LEB Y. We are here trying to recover LEB Y which is the GC
     730             :                  * head LEB. We find the min. I/O unit B as described above.
     731             :                  * Then we clean-up LEB Y by padding min. I/O unit. And later
     732             :                  * 'ubifs_rcvry_gc_commit()' function fails, because it cannot
     733             :                  * find a dirty LEB which could be GC'd into LEB Y! Even LEB X
     734             :                  * does not match because the amount of valid nodes there does
     735             :                  * not fit the free space in LEB Y any more! And this is
     736             :                  * because of the padding node which we added to LEB Y. The
     737             :                  * user-visible effect of this which I once observed and
     738             :                  * analysed is that we cannot mount the file-system with
     739             :                  * -ENOSPC error.
     740             :                  *
     741             :                  * So obviously, to make sure that situation does not happen we
     742             :                  * should free min. I/O unit B in LEB Y completely and the last
     743             :                  * used min. I/O unit in LEB Y should be A. This is basically
     744             :                  * what the below code tries to do.
     745             :                  */
     746        1392 :                 while (offs > min_io_unit)
     747         889 :                         drop_last_node(sleb, &offs);
     748             :         }
     749             : 
     750        2764 :         buf = sbuf + offs;
     751        2764 :         len = c->leb_size - offs;
     752             : 
     753        2764 :         clean_buf(c, &buf, lnum, &offs, &len);
     754        2764 :         ubifs_end_scan(c, sleb, lnum, offs);
     755             : 
     756        2764 :         err = fix_unclean_leb(c, sleb, start);
     757        2764 :         if (err)
     758             :                 goto error;
     759             : 
     760             :         return sleb;
     761             : 
     762        2095 : corrupted_rescan:
     763             :         /* Re-scan the corrupted data with verbose messages */
     764        2095 :         ubifs_err(c, "corruption %d", ret);
     765        2095 :         ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
     766        2112 : corrupted:
     767        2112 :         set_failure_reason_callback(c, FR_DATA_CORRUPTED);
     768        2112 :         ubifs_scanned_corruption(c, lnum, offs, buf);
     769        2112 :         err = -EUCLEAN;
     770        2112 : error:
     771        2112 :         ubifs_err(c, "LEB %d scanning failed", lnum);
     772        2112 :         ubifs_scan_destroy(sleb);
     773        4224 :         return ERR_PTR(err);
     774             : }
     775             : 
     776             : /**
     777             :  * get_cs_sqnum - get commit start sequence number.
     778             :  * @c: UBIFS file-system description object
     779             :  * @lnum: LEB number of commit start node
     780             :  * @offs: offset of commit start node
     781             :  * @cs_sqnum: commit start sequence number is returned here
     782             :  *
     783             :  * This function returns %0 on success and a negative error code on failure.
     784             :  */
     785           0 : static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
     786             :                         unsigned long long *cs_sqnum)
     787             : {
     788           0 :         struct ubifs_cs_node *cs_node = NULL;
     789             :         int err, ret;
     790             : 
     791           0 :         dbg_rcvry("at %d:%d", lnum, offs);
     792           0 :         cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL);
     793           0 :         if (!cs_node)
     794             :                 return -ENOMEM;
     795           0 :         if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
     796             :                 goto out_err;
     797           0 :         err = ubifs_leb_read(c, lnum, (void *)cs_node, offs,
     798             :                              UBIFS_CS_NODE_SZ, 0);
     799           0 :         if (err && err != -EBADMSG)
     800             :                 goto out_free;
     801           0 :         ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
     802           0 :         if (ret != SCANNED_A_NODE) {
     803           0 :                 ubifs_err(c, "Not a valid node");
     804             :                 goto out_err;
     805             :         }
     806           0 :         if (cs_node->ch.node_type != UBIFS_CS_NODE) {
     807           0 :                 ubifs_err(c, "Not a CS node, type is %d", cs_node->ch.node_type);
     808             :                 goto out_err;
     809             :         }
     810           0 :         if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
     811           0 :                 ubifs_err(c, "CS node cmt_no %llu != current cmt_no %llu",
     812             :                           (unsigned long long)le64_to_cpu(cs_node->cmt_no),
     813             :                           c->cmt_no);
     814             :                 goto out_err;
     815             :         }
     816           0 :         *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum);
     817           0 :         dbg_rcvry("commit start sqnum %llu", *cs_sqnum);
     818           0 :         kfree(cs_node);
     819           0 :         return 0;
     820             : 
     821           0 : out_err:
     822           0 :         err = -EINVAL;
     823             :         set_failure_reason_callback(c, FR_DATA_CORRUPTED);
     824           0 : out_free:
     825           0 :         ubifs_err(c, "failed to get CS sqnum");
     826           0 :         kfree(cs_node);
     827           0 :         return err;
     828             : }
     829             : 
     830             : /**
     831             :  * ubifs_recover_log_leb - scan and recover a log LEB.
     832             :  * @c: UBIFS file-system description object
     833             :  * @lnum: LEB number
     834             :  * @offs: offset
     835             :  * @sbuf: LEB-sized buffer to use
     836             :  *
     837             :  * This function does a scan of a LEB, but caters for errors that might have
     838             :  * been caused by unclean reboots from which we are attempting to recover
     839             :  * (assume that only the last log LEB can be corrupted by an unclean reboot).
     840             :  *
     841             :  * This function returns %0 on success and a negative error code on failure.
     842             :  */
     843           9 : struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
     844             :                                              int offs, void *sbuf)
     845             : {
     846             :         struct ubifs_scan_leb *sleb;
     847             :         int next_lnum;
     848             : 
     849           9 :         dbg_rcvry("LEB %d", lnum);
     850           9 :         next_lnum = lnum + 1;
     851           9 :         if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs)
     852           0 :                 next_lnum = UBIFS_LOG_LNUM;
     853           9 :         if (next_lnum != c->ltail_lnum) {
     854             :                 /*
     855             :                  * We can only recover at the end of the log, so check that the
     856             :                  * next log LEB is empty or out of date.
     857             :                  */
     858           9 :                 sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0);
     859           9 :                 if (IS_ERR(sleb))
     860             :                         return sleb;
     861           0 :                 if (sleb->nodes_cnt) {
     862             :                         struct ubifs_scan_node *snod;
     863           0 :                         unsigned long long cs_sqnum = c->cs_sqnum;
     864             : 
     865           0 :                         snod = list_entry(sleb->nodes.next,
     866             :                                           struct ubifs_scan_node, list);
     867           0 :                         if (cs_sqnum == 0) {
     868             :                                 int err;
     869             : 
     870           0 :                                 err = get_cs_sqnum(c, lnum, offs, &cs_sqnum);
     871           0 :                                 if (err) {
     872           0 :                                         ubifs_scan_destroy(sleb);
     873           0 :                                         return ERR_PTR(err);
     874             :                                 }
     875             :                         }
     876           0 :                         if (snod->sqnum > cs_sqnum) {
     877           0 :                                 ubifs_err(c, "unrecoverable log corruption in LEB %d",
     878             :                                           lnum);
     879           0 :                                 ubifs_scan_destroy(sleb);
     880           0 :                                 return ERR_PTR(-EUCLEAN);
     881             :                         }
     882             :                 }
     883           0 :                 ubifs_scan_destroy(sleb);
     884             :         }
     885           0 :         return ubifs_recover_leb(c, lnum, offs, sbuf, -1);
     886             : }
     887             : 
     888             : /**
     889             :  * recover_head - recover a head.
     890             :  * @c: UBIFS file-system description object
     891             :  * @lnum: LEB number of head to recover
     892             :  * @offs: offset of head to recover
     893             :  * @sbuf: LEB-sized buffer to use
     894             :  *
     895             :  * This function ensures that there is no data on the flash at a head location.
     896             :  *
     897             :  * This function returns %0 on success and a negative error code on failure.
     898             :  */
     899        2066 : static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf)
     900             : {
     901        2066 :         int len = c->max_write_size, err;
     902             : 
     903        2066 :         if (offs + len > c->leb_size)
     904           9 :                 len = c->leb_size - offs;
     905             : 
     906        2066 :         if (!len)
     907             :                 return 0;
     908             : 
     909             :         /* Read at the head location and check it is empty flash */
     910        2058 :         err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1);
     911        4116 :         if (err || !is_empty(sbuf, len)) {
     912         247 :                 dbg_rcvry("cleaning head at %d:%d", lnum, offs);
     913         247 :                 if (offs == 0)
     914           0 :                         return ubifs_leb_unmap(c, lnum);
     915         247 :                 err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1);
     916         247 :                 if (err && err != -EBADMSG)
     917             :                         return err;
     918         247 :                 return ubifs_leb_change(c, lnum, sbuf, offs);
     919             :         }
     920             : 
     921             :         return 0;
     922             : }
     923             : 
     924             : /**
     925             :  * ubifs_recover_inl_heads - recover index and LPT heads.
     926             :  * @c: UBIFS file-system description object
     927             :  * @sbuf: LEB-sized buffer to use
     928             :  *
     929             :  * This function ensures that there is no data on the flash at the index and
     930             :  * LPT head locations.
     931             :  *
     932             :  * This deals with the recovery of a half-completed journal commit. UBIFS is
     933             :  * careful never to overwrite the last version of the index or the LPT. Because
     934             :  * the index and LPT are wandering trees, data from a half-completed commit will
     935             :  * not be referenced anywhere in UBIFS. The data will be either in LEBs that are
     936             :  * assumed to be empty and will be unmapped anyway before use, or in the index
     937             :  * and LPT heads.
     938             :  *
     939             :  * This function returns %0 on success and a negative error code on failure.
     940             :  */
     941        1033 : int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
     942             : {
     943             :         int err;
     944             : 
     945        1033 :         ubifs_assert(c, !c->ro_mount || c->remounting_rw);
     946             : 
     947        1033 :         dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
     948        1033 :         err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
     949        1033 :         if (err)
     950             :                 return err;
     951             : 
     952        1033 :         dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
     953             : 
     954        1033 :         return recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
     955             : }
     956             : 
     957             : /**
     958             :  * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
     959             :  * @c: UBIFS file-system description object
     960             :  *
     961             :  * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
     962             :  * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
     963             :  * zero in case of success and a negative error code in case of failure.
     964             :  */
     965         265 : static int grab_empty_leb(struct ubifs_info *c)
     966             : {
     967             :         int lnum, err;
     968             : 
     969             :         /*
     970             :          * Note, it is very important to first search for an empty LEB and then
     971             :          * run the commit, not vice-versa. The reason is that there might be
     972             :          * only one empty LEB at the moment, the one which has been the
     973             :          * @c->gc_lnum just before the power cut happened. During the regular
     974             :          * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
     975             :          * one but GC can grab it. But at this moment this single empty LEB is
     976             :          * not marked as taken, so if we run commit - what happens? Right, the
     977             :          * commit will grab it and write the index there. Remember that the
     978             :          * index always expands as long as there is free space, and it only
     979             :          * starts consolidating when we run out of space.
     980             :          *
     981             :          * IOW, if we run commit now, we might not be able to find a free LEB
     982             :          * after this.
     983             :          */
     984         265 :         lnum = ubifs_find_free_leb_for_idx(c);
     985         265 :         if (lnum < 0) {
     986           1 :                 ubifs_err(c, "could not find an empty LEB");
     987           1 :                 ubifs_dump_lprops(c);
     988           1 :                 ubifs_dump_budg(c, &c->bi);
     989           1 :                 return lnum;
     990             :         }
     991             : 
     992             :         /* Reset the index flag */
     993         264 :         err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
     994             :                                   LPROPS_INDEX, 0);
     995         264 :         if (err)
     996             :                 return err;
     997             : 
     998         264 :         c->gc_lnum = lnum;
     999         264 :         dbg_rcvry("found empty LEB %d, run commit", lnum);
    1000             : 
    1001         264 :         return ubifs_run_commit(c);
    1002             : }
    1003             : 
    1004             : /**
    1005             :  * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
    1006             :  * @c: UBIFS file-system description object
    1007             :  *
    1008             :  * Out-of-place garbage collection requires always one empty LEB with which to
    1009             :  * start garbage collection. The LEB number is recorded in c->gc_lnum and is
    1010             :  * written to the master node on unmounting. In the case of an unclean unmount
    1011             :  * the value of gc_lnum recorded in the master node is out of date and cannot
    1012             :  * be used. Instead, recovery must allocate an empty LEB for this purpose.
    1013             :  * However, there may not be enough empty space, in which case it must be
    1014             :  * possible to GC the dirtiest LEB into the GC head LEB.
    1015             :  *
    1016             :  * This function also runs the commit which causes the TNC updates from
    1017             :  * size-recovery and orphans to be written to the flash. That is important to
    1018             :  * ensure correct replay order for subsequent mounts.
    1019             :  *
    1020             :  * This function returns %0 on success and a negative error code on failure.
    1021             :  */
    1022         537 : int ubifs_rcvry_gc_commit(struct ubifs_info *c)
    1023             : {
    1024         537 :         struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
    1025             :         struct ubifs_lprops lp;
    1026             :         int err;
    1027             : 
    1028         537 :         dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
    1029             : 
    1030         537 :         c->gc_lnum = -1;
    1031         537 :         if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
    1032         250 :                 return grab_empty_leb(c);
    1033             : 
    1034         287 :         err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
    1035         287 :         if (err) {
    1036          15 :                 if (err != -ENOSPC)
    1037             :                         return err;
    1038             : 
    1039          15 :                 dbg_rcvry("could not find a dirty LEB");
    1040          15 :                 return grab_empty_leb(c);
    1041             :         }
    1042             : 
    1043         272 :         ubifs_assert(c, !(lp.flags & LPROPS_INDEX));
    1044         272 :         ubifs_assert(c, lp.free + lp.dirty >= wbuf->offs);
    1045             : 
    1046             :         /*
    1047             :          * We run the commit before garbage collection otherwise subsequent
    1048             :          * mounts will see the GC and orphan deletion in a different order.
    1049             :          */
    1050         272 :         dbg_rcvry("committing");
    1051         272 :         err = ubifs_run_commit(c);
    1052         272 :         if (err)
    1053             :                 return err;
    1054             : 
    1055         272 :         dbg_rcvry("GC'ing LEB %d", lp.lnum);
    1056         272 :         mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
    1057         272 :         err = ubifs_garbage_collect_leb(c, &lp);
    1058         272 :         if (err >= 0) {
    1059         272 :                 int err2 = ubifs_wbuf_sync_nolock(wbuf);
    1060             : 
    1061         272 :                 if (err2)
    1062           0 :                         err = err2;
    1063             :         }
    1064         272 :         mutex_unlock(&wbuf->io_mutex);
    1065         272 :         if (err < 0) {
    1066           0 :                 ubifs_err(c, "GC failed, error %d", err);
    1067           0 :                 if (err == -EAGAIN)
    1068           0 :                         err = -EINVAL;
    1069             :                 return err;
    1070             :         }
    1071             : 
    1072         272 :         ubifs_assert(c, err == LEB_RETAINED);
    1073         272 :         if (err != LEB_RETAINED)
    1074             :                 return -EINVAL;
    1075             : 
    1076         272 :         err = ubifs_leb_unmap(c, c->gc_lnum);
    1077         272 :         if (err)
    1078             :                 return err;
    1079             : 
    1080         272 :         dbg_rcvry("allocated LEB %d for GC", lp.lnum);
    1081             :         return 0;
    1082             : }
    1083             : 
    1084             : /**
    1085             :  * add_ino - add an entry to the size tree.
    1086             :  * @c: UBIFS file-system description object
    1087             :  * @inum: inode number
    1088             :  * @i_size: size on inode
    1089             :  * @d_size: maximum size based on data nodes
    1090             :  * @exists: indicates whether the inode exists
    1091             :  */
    1092      517787 : static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size,
    1093             :                    loff_t d_size, int exists)
    1094             : {
    1095      517787 :         struct rb_node **p = &c->size_tree.rb_node, *parent = NULL;
    1096             :         struct size_entry *e;
    1097             : 
    1098     8989271 :         while (*p) {
    1099     7953697 :                 parent = *p;
    1100     7953697 :                 e = rb_entry(parent, struct size_entry, rb);
    1101     7953697 :                 if (inum < e->inum)
    1102     1726785 :                         p = &(*p)->rb_left;
    1103             :                 else
    1104     6226912 :                         p = &(*p)->rb_right;
    1105             :         }
    1106             : 
    1107      517787 :         e = kzalloc(sizeof(struct size_entry), GFP_KERNEL);
    1108      517787 :         if (!e)
    1109             :                 return -ENOMEM;
    1110             : 
    1111      517787 :         e->inum = inum;
    1112      517787 :         e->i_size = i_size;
    1113      517787 :         e->d_size = d_size;
    1114      517787 :         e->exists = exists;
    1115             : 
    1116     1035574 :         rb_link_node(&e->rb, parent, p);
    1117      517787 :         rb_insert_color(&e->rb, &c->size_tree);
    1118             : 
    1119      517787 :         return 0;
    1120             : }
    1121             : 
    1122             : /**
    1123             :  * find_ino - find an entry on the size tree.
    1124             :  * @c: UBIFS file-system description object
    1125             :  * @inum: inode number
    1126             :  */
    1127             : static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum)
    1128             : {
    1129     2795371 :         struct rb_node *p = c->size_tree.rb_node;
    1130             :         struct size_entry *e;
    1131             : 
    1132    40518649 :         while (p) {
    1133    39993687 :                 e = rb_entry(p, struct size_entry, rb);
    1134    39993687 :                 if (inum < e->inum)
    1135    12088115 :                         p = p->rb_left;
    1136    27905572 :                 else if (inum > e->inum)
    1137    25635163 :                         p = p->rb_right;
    1138             :                 else
    1139             :                         return e;
    1140             :         }
    1141             :         return NULL;
    1142             : }
    1143             : 
    1144             : /**
    1145             :  * remove_ino - remove an entry from the size tree.
    1146             :  * @c: UBIFS file-system description object
    1147             :  * @inum: inode number
    1148             :  */
    1149        8007 : static void remove_ino(struct ubifs_info *c, ino_t inum)
    1150             : {
    1151        8007 :         struct size_entry *e = find_ino(c, inum);
    1152             : 
    1153        8007 :         if (!e)
    1154             :                 return;
    1155         832 :         rb_erase(&e->rb, &c->size_tree);
    1156             :         kfree(e);
    1157             : }
    1158             : 
    1159             : /**
    1160             :  * ubifs_destroy_size_tree - free resources related to the size tree.
    1161             :  * @c: UBIFS file-system description object
    1162             :  */
    1163        1951 : void ubifs_destroy_size_tree(struct ubifs_info *c)
    1164             : {
    1165             :         struct size_entry *e, *n;
    1166             : 
    1167        2141 :         rbtree_postorder_for_each_entry_safe(e, n, &c->size_tree, rb) {
    1168         190 :                 kfree(e);
    1169             :         }
    1170             : 
    1171        1951 :         c->size_tree = RB_ROOT;
    1172        1951 : }
    1173             : 
    1174             : /**
    1175             :  * ubifs_recover_size_accum - accumulate inode sizes for recovery.
    1176             :  * @c: UBIFS file-system description object
    1177             :  * @key: node key
    1178             :  * @deletion: node is for a deletion
    1179             :  * @new_size: inode size
    1180             :  *
    1181             :  * This function has two purposes:
    1182             :  *     1) to ensure there are no data nodes that fall outside the inode size
    1183             :  *     2) to ensure there are no data nodes for inodes that do not exist
    1184             :  * To accomplish those purposes, a rb-tree is constructed containing an entry
    1185             :  * for each inode number in the journal that has not been deleted, and recording
    1186             :  * the size from the inode node, the maximum size of any data node (also altered
    1187             :  * by truncations) and a flag indicating a inode number for which no inode node
    1188             :  * was present in the journal.
    1189             :  *
    1190             :  * Note that there is still the possibility that there are data nodes that have
    1191             :  * been committed that are beyond the inode size, however the only way to find
    1192             :  * them would be to scan the entire index. Alternatively, some provision could
    1193             :  * be made to record the size of inodes at the start of commit, which would seem
    1194             :  * very cumbersome for a scenario that is quite unlikely and the only negative
    1195             :  * consequence of which is wasted space.
    1196             :  *
    1197             :  * This functions returns %0 on success and a negative error code on failure.
    1198             :  */
    1199     2795371 : int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
    1200             :                              int deletion, loff_t new_size)
    1201             : {
    1202     5590742 :         ino_t inum = key_inum(c, key);
    1203             :         struct size_entry *e;
    1204             :         int err;
    1205             : 
    1206     5590742 :         switch (key_type(c, key)) {
    1207      482283 :         case UBIFS_INO_KEY:
    1208      482283 :                 if (deletion)
    1209        8007 :                         remove_ino(c, inum);
    1210             :                 else {
    1211      474276 :                         e = find_ino(c, inum);
    1212      474276 :                         if (e) {
    1213       57042 :                                 e->i_size = new_size;
    1214       57042 :                                 e->exists = 1;
    1215             :                         } else {
    1216      417234 :                                 err = add_ino(c, inum, new_size, 0, 1);
    1217      417234 :                                 if (err)
    1218             :                                         return err;
    1219             :                         }
    1220             :                 }
    1221             :                 break;
    1222     2309469 :         case UBIFS_DATA_KEY:
    1223     2309469 :                 e = find_ino(c, inum);
    1224     2309469 :                 if (e) {
    1225     2208916 :                         if (new_size > e->d_size)
    1226     1966405 :                                 e->d_size = new_size;
    1227             :                 } else {
    1228      100553 :                         err = add_ino(c, inum, 0, new_size, 0);
    1229      100553 :                         if (err)
    1230             :                                 return err;
    1231             :                 }
    1232             :                 break;
    1233        3619 :         case UBIFS_TRUN_KEY:
    1234        3619 :                 e = find_ino(c, inum);
    1235        3619 :                 if (e)
    1236        3619 :                         e->d_size = new_size;
    1237             :                 break;
    1238             :         }
    1239             :         return 0;
    1240             : }
    1241             : 
    1242             : /**
    1243             :  * fix_size_in_place - fix inode size in place on flash.
    1244             :  * @c: UBIFS file-system description object
    1245             :  * @e: inode size information for recovery
    1246             :  */
    1247          53 : static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
    1248             : {
    1249          53 :         struct ubifs_ino_node *ino = c->sbuf;
    1250             :         unsigned char *p;
    1251             :         union ubifs_key key;
    1252             :         int err, lnum, offs, len;
    1253             :         loff_t i_size;
    1254             :         uint32_t crc;
    1255             : 
    1256             :         /* Locate the inode node LEB number and offset */
    1257         106 :         ino_key_init(c, &key, e->inum);
    1258          53 :         err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs);
    1259          53 :         if (err) {
    1260           0 :                 unsigned int reason = get_failure_reason_callback(c);
    1261             : 
    1262           0 :                 if (reason & FR_DATA_CORRUPTED) {
    1263           0 :                         test_and_clear_failure_reason_callback(c, FR_DATA_CORRUPTED);
    1264           0 :                         if (handle_failure_callback(c, FR_H_TNC_DATA_CORRUPTED, NULL)) {
    1265             :                                 /* Leave the inode to be deleted by subsequent steps */
    1266             :                                 return 0;
    1267             :                         }
    1268             :                 }
    1269             :                 goto out;
    1270             :         }
    1271             :         /*
    1272             :          * If the size recorded on the inode node is greater than the size that
    1273             :          * was calculated from nodes in the journal then don't change the inode.
    1274             :          */
    1275          53 :         i_size = le64_to_cpu(ino->size);
    1276          53 :         if (i_size >= e->d_size)
    1277             :                 return 0;
    1278             :         /* Read the LEB */
    1279          53 :         err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1);
    1280          53 :         if (err && err != -EBADMSG)
    1281             :                 goto out;
    1282             :         /* Change the size field and recalculate the CRC */
    1283          53 :         ino = c->sbuf + offs;
    1284          53 :         ino->size = cpu_to_le64(e->d_size);
    1285          53 :         len = le32_to_cpu(ino->ch.len);
    1286         106 :         crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8);
    1287          53 :         ino->ch.crc = cpu_to_le32(crc);
    1288             :         /* Work out where data in the LEB ends and free space begins */
    1289          53 :         p = c->sbuf;
    1290          53 :         len = c->leb_size - 1;
    1291      508426 :         while (p[len] == 0xff)
    1292      508320 :                 len -= 1;
    1293          53 :         len = ALIGN(len + 1, c->min_io_size);
    1294             :         /* Atomically write the fixed LEB back again */
    1295          53 :         err = ubifs_leb_change(c, lnum, c->sbuf, len);
    1296          53 :         if (err)
    1297             :                 goto out;
    1298          53 :         dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
    1299             :                   (unsigned long)e->inum, lnum, offs, (long long)i_size,
    1300             :                   (long long)e->d_size);
    1301             :         return 0;
    1302             : 
    1303           0 : out:
    1304           0 :         ubifs_warn(c, "inode %lu failed to fix size %lld -> %lld error %d",
    1305             :                    (unsigned long)e->inum, (long long)e->i_size,
    1306             :                    (long long)e->d_size, err);
    1307             :         return err;
    1308             : }
    1309             : 
    1310             : /**
    1311             :  * inode_fix_size - fix inode size
    1312             :  * @c: UBIFS file-system description object
    1313             :  * @e: inode size information for recovery
    1314             :  */
    1315             : static int inode_fix_size(struct ubifs_info *c, __unused struct size_entry *e)
    1316             : {
    1317             :         /* Don't remove entry, keep it in the size tree. */
    1318             :         /* Remove this assertion after supporting authentication. */
    1319           4 :         ubifs_assert(c, c->ro_mount);
    1320             :         return 0;
    1321             : }
    1322             : 
    1323             : /**
    1324             :  * ubifs_recover_size - recover inode size.
    1325             :  * @c: UBIFS file-system description object
    1326             :  * @in_place: If true, do a in-place size fixup
    1327             :  *
    1328             :  * This function attempts to fix inode size discrepancies identified by the
    1329             :  * 'ubifs_recover_size_accum()' function.
    1330             :  *
    1331             :  * This functions returns %0 on success and a negative error code on failure.
    1332             :  */
    1333        1088 : int ubifs_recover_size(struct ubifs_info *c, bool in_place)
    1334             : {
    1335        1088 :         struct rb_node *this = rb_first(&c->size_tree);
    1336             : 
    1337      518359 :         while (this) {
    1338             :                 struct size_entry *e;
    1339             :                 int err;
    1340             : 
    1341      516183 :                 e = rb_entry(this, struct size_entry, rb);
    1342             : 
    1343      516183 :                 this = rb_next(this);
    1344             : 
    1345      516183 :                 if (!e->exists) {
    1346             :                         union ubifs_key key;
    1347             : 
    1348      147552 :                         ino_key_init(c, &key, e->inum);
    1349      147552 :                         err = ubifs_tnc_lookup(c, &key, c->sbuf);
    1350       73776 :                         if (err && err != -ENOENT) {
    1351             :                                 unsigned int reason;
    1352             : 
    1353           0 :                                 reason = get_failure_reason_callback(c);
    1354           0 :                                 if (reason & FR_DATA_CORRUPTED) {
    1355           0 :                                         test_and_clear_failure_reason_callback(c, FR_DATA_CORRUPTED);
    1356           0 :                                         if (handle_failure_callback(c, FR_H_TNC_DATA_CORRUPTED, NULL)) {
    1357             :                                                 /* Leave the inode to be deleted by subsequent steps */
    1358             :                                                 goto delete_entry;
    1359             :                                         }
    1360             :                                 }
    1361           0 :                                 return err;
    1362             :                         }
    1363       73776 :                         if (err == -ENOENT) {
    1364             :                                 /* Remove data nodes that have no inode */
    1365          30 :                                 dbg_rcvry("removing ino %lu",
    1366             :                                           (unsigned long)e->inum);
    1367          30 :                                 err = ubifs_tnc_remove_ino(c, e->inum);
    1368          30 :                                 if (err)
    1369             :                                         return err;
    1370             :                         } else {
    1371       73746 :                                 struct ubifs_ino_node *ino = c->sbuf;
    1372             : 
    1373       73746 :                                 e->exists = 1;
    1374       73746 :                                 e->i_size = le64_to_cpu(ino->size);
    1375             :                         }
    1376             :                 }
    1377             : 
    1378      516183 :                 if (e->exists && e->i_size < e->d_size) {
    1379          57 :                         ubifs_assert(c, !(c->ro_mount && in_place));
    1380             : 
    1381             :                         /*
    1382             :                          * We found data that is outside the found inode size,
    1383             :                          * fixup the inode size
    1384             :                          */
    1385             : 
    1386          57 :                         if (in_place) {
    1387          53 :                                 err = fix_size_in_place(c, e);
    1388          53 :                                 if (err)
    1389             :                                         return err;
    1390             :                         } else {
    1391           8 :                                 err = inode_fix_size(c, e);
    1392             :                                 if (err)
    1393             :                                         return err;
    1394           4 :                                 continue;
    1395             :                         }
    1396             :                 }
    1397             : 
    1398      516126 : delete_entry:
    1399      516179 :                 rb_erase(&e->rb, &c->size_tree);
    1400             :                 kfree(e);
    1401             :         }
    1402             : 
    1403             :         return 0;
    1404             : }

Generated by: LCOV version 1.13