Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * This file is part of UBIFS.
4 : *
5 : * Copyright (C) 2006-2008 Nokia Corporation.
6 : *
7 : * Authors: Adrian Hunter
8 : * Artem Bityutskiy (Битюцкий Артём)
9 : */
10 :
11 : /*
12 : * This file contains journal replay code. It runs when the file-system is being
13 : * mounted and requires no locking.
14 : *
15 : * The larger is the journal, the longer it takes to scan it, so the longer it
16 : * takes to mount UBIFS. This is why the journal has limited size which may be
17 : * changed depending on the system requirements. But a larger journal gives
18 : * faster I/O speed because it writes the index less frequently. So this is a
19 : * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the
20 : * larger is the journal, the more memory its index may consume.
21 : */
22 :
23 : #include "linux_err.h"
24 : #include "bitops.h"
25 : #include "kmem.h"
26 : #include "ubifs.h"
27 : #include "defs.h"
28 : #include "debug.h"
29 : #include "key.h"
30 : #include "misc.h"
31 :
32 : /**
33 : * struct replay_entry - replay list entry.
34 : * @lnum: logical eraseblock number of the node
35 : * @offs: node offset
36 : * @len: node length
37 : * @deletion: non-zero if this entry corresponds to a node deletion
38 : * @sqnum: node sequence number
39 : * @list: links the replay list
40 : * @key: node key
41 : * @nm: directory entry name
42 : * @old_size: truncation old size
43 : * @new_size: truncation new size
44 : *
45 : * The replay process first scans all buds and builds the replay list, then
46 : * sorts the replay list in nodes sequence number order, and then inserts all
47 : * the replay entries to the TNC.
48 : */
49 : struct replay_entry {
50 : int lnum;
51 : int offs;
52 : int len;
53 : u8 hash[UBIFS_HASH_ARR_SZ];
54 : unsigned int deletion:1;
55 : unsigned long long sqnum;
56 : struct list_head list;
57 : union ubifs_key key;
58 : union {
59 : struct fscrypt_name nm;
60 : struct {
61 : loff_t old_size;
62 : loff_t new_size;
63 : };
64 : };
65 : };
66 :
67 : /**
68 : * struct bud_entry - entry in the list of buds to replay.
69 : * @list: next bud in the list
70 : * @bud: bud description object
71 : * @sqnum: reference node sequence number
72 : * @free: free bytes in the bud
73 : * @dirty: dirty bytes in the bud
74 : */
75 : struct bud_entry {
76 : struct list_head list;
77 : struct ubifs_bud *bud;
78 : unsigned long long sqnum;
79 : int free;
80 : int dirty;
81 : };
82 :
83 : /**
84 : * set_bud_lprops - set free and dirty space used by a bud.
85 : * @c: UBIFS file-system description object
86 : * @b: bud entry which describes the bud
87 : *
88 : * This function makes sure the LEB properties of bud @b are set correctly
89 : * after the replay. Returns zero in case of success and a negative error code
90 : * in case of failure.
91 : */
92 6245 : static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
93 : {
94 : const struct ubifs_lprops *lp;
95 6245 : int err = 0, dirty;
96 :
97 12490 : if (!test_lpt_valid_callback(c, b->bud->lnum, LPROPS_NC, LPROPS_NC,
98 : LPROPS_NC, LPROPS_NC))
99 : return 0;
100 :
101 6108 : ubifs_get_lprops(c);
102 :
103 6108 : lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
104 6108 : if (IS_ERR(lp)) {
105 0 : err = PTR_ERR(lp);
106 0 : if (test_and_clear_failure_reason_callback(c, FR_LPT_CORRUPTED) &&
107 0 : can_ignore_failure_callback(c, FR_LPT_CORRUPTED))
108 0 : err = 0;
109 : goto out;
110 : }
111 :
112 6108 : dirty = lp->dirty;
113 6108 : if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
114 : /*
115 : * The LEB was added to the journal with a starting offset of
116 : * zero which means the LEB must have been empty. The LEB
117 : * property values should be @lp->free == @c->leb_size and
118 : * @lp->dirty == 0, but that is not the case. The reason is that
119 : * the LEB had been garbage collected before it became the bud,
120 : * and there was no commit in between. The garbage collector
121 : * resets the free and dirty space without recording it
122 : * anywhere except lprops, so if there was no commit then
123 : * lprops does not have that information.
124 : *
125 : * We do not need to adjust free space because the scan has told
126 : * us the exact value which is recorded in the replay entry as
127 : * @b->free.
128 : *
129 : * However we do need to subtract from the dirty space the
130 : * amount of space that the garbage collector reclaimed, which
131 : * is the whole LEB minus the amount of space that was free.
132 : */
133 2571 : dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
134 : lp->free, lp->dirty);
135 2571 : dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
136 : lp->free, lp->dirty);
137 2571 : dirty -= c->leb_size - lp->free;
138 : /*
139 : * If the replay order was perfect the dirty space would now be
140 : * zero. The order is not perfect because the journal heads
141 : * race with each other. This is not a problem but is does mean
142 : * that the dirty space may temporarily exceed c->leb_size
143 : * during the replay.
144 : */
145 2571 : if (dirty != 0)
146 1189 : dbg_mnt("LEB %d lp: %d free %d dirty replay: %d free %d dirty",
147 : b->bud->lnum, lp->free, lp->dirty, b->free,
148 : b->dirty);
149 : }
150 12216 : if (!test_lpt_valid_callback(c, b->bud->lnum, lp->free, lp->dirty,
151 6108 : b->free, dirty + b->dirty))
152 : goto out;
153 :
154 6108 : lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
155 6108 : lp->flags | LPROPS_TAKEN, 0);
156 6108 : if (IS_ERR(lp)) {
157 0 : err = PTR_ERR(lp);
158 0 : goto out;
159 : }
160 :
161 : /* Make sure the journal head points to the latest bud */
162 12216 : err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
163 12216 : b->bud->lnum, c->leb_size - b->free);
164 :
165 6108 : out:
166 6108 : ubifs_release_lprops(c);
167 6108 : return err;
168 : }
169 :
170 : /**
171 : * set_buds_lprops - set free and dirty space for all replayed buds.
172 : * @c: UBIFS file-system description object
173 : *
174 : * This function sets LEB properties for all replayed buds. Returns zero in
175 : * case of success and a negative error code in case of failure.
176 : */
177 : static int set_buds_lprops(struct ubifs_info *c)
178 : {
179 : struct bud_entry *b;
180 : int err;
181 :
182 8444 : list_for_each_entry(b, &c->replay_buds, list) {
183 6245 : err = set_bud_lprops(c, b);
184 6245 : if (err)
185 : return err;
186 : }
187 :
188 : return 0;
189 : }
190 :
191 : /**
192 : * trun_remove_range - apply a replay entry for a truncation to the TNC.
193 : * @c: UBIFS file-system description object
194 : * @r: replay entry of truncation
195 : */
196 3619 : static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
197 : {
198 : unsigned min_blk, max_blk;
199 : union ubifs_key min_key, max_key;
200 : ino_t ino;
201 :
202 3619 : min_blk = r->new_size / UBIFS_BLOCK_SIZE;
203 3619 : if (r->new_size & (UBIFS_BLOCK_SIZE - 1))
204 813 : min_blk += 1;
205 :
206 3619 : max_blk = r->old_size / UBIFS_BLOCK_SIZE;
207 3619 : if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0)
208 0 : max_blk -= 1;
209 :
210 7238 : ino = key_inum(c, &r->key);
211 :
212 3619 : data_key_init(c, &min_key, ino, min_blk);
213 3619 : data_key_init(c, &max_key, ino, max_blk);
214 :
215 3619 : return ubifs_tnc_remove_range(c, &min_key, &max_key);
216 : }
217 :
218 : /**
219 : * inode_still_linked - check whether inode in question will be re-linked.
220 : * @c: UBIFS file-system description object
221 : * @rino: replay entry to test
222 : *
223 : * O_TMPFILE files can be re-linked, this means link count goes from 0 to 1.
224 : * This case needs special care, otherwise all references to the inode will
225 : * be removed upon the first replay entry of an inode with link count 0
226 : * is found.
227 : */
228 8007 : static bool inode_still_linked(struct ubifs_info *c, struct replay_entry *rino)
229 : {
230 : struct replay_entry *r;
231 :
232 8007 : ubifs_assert(c, rino->deletion);
233 16014 : ubifs_assert(c, key_type(c, &rino->key) == UBIFS_INO_KEY);
234 :
235 : /*
236 : * Find the most recent entry for the inode behind @rino and check
237 : * whether it is a deletion.
238 : */
239 5536509 : list_for_each_entry_reverse(r, &c->replay_list, list) {
240 5536509 : ubifs_assert(c, r->sqnum >= rino->sqnum);
241 16617835 : if (key_inum(c, &r->key) == key_inum(c, &rino->key) &&
242 8308 : key_type(c, &r->key) == UBIFS_INO_KEY)
243 8007 : return r->deletion == 0;
244 :
245 : }
246 :
247 0 : ubifs_assert(c, 0);
248 0 : return false;
249 : }
250 :
251 : /**
252 : * apply_replay_entry - apply a replay entry to the TNC.
253 : * @c: UBIFS file-system description object
254 : * @r: replay entry to apply
255 : *
256 : * Apply a replay entry to the TNC.
257 : */
258 3152496 : static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
259 : {
260 : int err;
261 :
262 3152496 : dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ",
263 : r->lnum, r->offs, r->len, r->deletion, r->sqnum);
264 :
265 6304992 : if (is_hash_key(c, &r->key)) {
266 357117 : if (r->deletion)
267 7061 : err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
268 : else
269 350056 : err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
270 350056 : r->len, r->hash, &r->nm);
271 : } else {
272 2795379 : if (r->deletion)
273 23252 : switch (key_type(c, &r->key)) {
274 8007 : case UBIFS_INO_KEY:
275 : {
276 16014 : ino_t inum = key_inum(c, &r->key);
277 :
278 8007 : if (inode_still_linked(c, r)) {
279 : err = 0;
280 : break;
281 : }
282 :
283 4717 : err = ubifs_tnc_remove_ino(c, inum);
284 4717 : break;
285 : }
286 3619 : case UBIFS_TRUN_KEY:
287 3619 : err = trun_remove_range(c, r);
288 3619 : break;
289 0 : default:
290 0 : err = ubifs_tnc_remove(c, &r->key);
291 0 : break;
292 : }
293 : else
294 2783753 : err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
295 2783753 : r->len, r->hash);
296 2792089 : if (err)
297 : return err;
298 :
299 2795371 : if (c->need_recovery)
300 2795371 : err = ubifs_recover_size_accum(c, &r->key, r->deletion,
301 : r->new_size);
302 : }
303 :
304 : return err;
305 : }
306 :
307 : /**
308 : * replay_entries_cmp - compare 2 replay entries.
309 : * @priv: UBIFS file-system description object
310 : * @a: first replay entry
311 : * @b: second replay entry
312 : *
313 : * This is a comparios function for 'list_sort()' which compares 2 replay
314 : * entries @a and @b by comparing their sequence number. Returns %1 if @a has
315 : * greater sequence number and %-1 otherwise.
316 : */
317 38201566 : static int replay_entries_cmp(void *priv, const struct list_head *a,
318 : const struct list_head *b)
319 : {
320 38201566 : struct ubifs_info *c = priv;
321 : struct replay_entry *ra, *rb;
322 :
323 : cond_resched();
324 38201566 : if (a == b)
325 : return 0;
326 :
327 38201566 : ra = list_entry(a, struct replay_entry, list);
328 38201566 : rb = list_entry(b, struct replay_entry, list);
329 38201566 : ubifs_assert(c, ra->sqnum != rb->sqnum);
330 38201566 : if (ra->sqnum > rb->sqnum)
331 : return 1;
332 21954126 : return -1;
333 : }
334 :
335 : /**
336 : * apply_replay_list - apply the replay list to the TNC.
337 : * @c: UBIFS file-system description object
338 : *
339 : * Apply all entries in the replay list to the TNC. Returns zero in case of
340 : * success and a negative error code in case of failure.
341 : */
342 2227 : static int apply_replay_list(struct ubifs_info *c)
343 : {
344 : struct replay_entry *r;
345 : int err;
346 :
347 2227 : list_sort(c, &c->replay_list, &replay_entries_cmp);
348 :
349 3154695 : list_for_each_entry(r, &c->replay_list, list) {
350 : cond_resched();
351 :
352 3152496 : err = apply_replay_entry(c, r);
353 3152496 : if (err)
354 : return err;
355 : }
356 :
357 : return 0;
358 : }
359 :
360 : /**
361 : * destroy_replay_list - destroy the replay.
362 : * @c: UBIFS file-system description object
363 : *
364 : * Destroy the replay list.
365 : */
366 2248 : static void destroy_replay_list(struct ubifs_info *c)
367 : {
368 : struct replay_entry *r, *tmp;
369 :
370 3166064 : list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
371 6327632 : if (is_hash_key(c, &r->key))
372 360877 : kfree(fname_name(&r->nm));
373 6327632 : list_del(&r->list);
374 3163816 : kfree(r);
375 : }
376 2248 : }
377 :
378 : /**
379 : * insert_node - insert a node to the replay list
380 : * @c: UBIFS file-system description object
381 : * @lnum: node logical eraseblock number
382 : * @offs: node offset
383 : * @len: node length
384 : * @key: node key
385 : * @sqnum: sequence number
386 : * @deletion: non-zero if this is a deletion
387 : * @used: number of bytes in use in a LEB
388 : * @old_size: truncation old size
389 : * @new_size: truncation new size
390 : *
391 : * This function inserts a scanned non-direntry node to the replay list. The
392 : * replay list contains @struct replay_entry elements, and we sort this list in
393 : * sequence number order before applying it. The replay list is applied at the
394 : * very end of the replay process. Since the list is sorted in sequence number
395 : * order, the older modifications are applied first. This function returns zero
396 : * in case of success and a negative error code in case of failure.
397 : */
398 2803689 : static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
399 : const u8 *hash, union ubifs_key *key,
400 : unsigned long long sqnum, int deletion, int *used,
401 : loff_t old_size, loff_t new_size)
402 : {
403 : struct replay_entry *r;
404 :
405 2803689 : dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
406 :
407 5607378 : if (key_inum(c, key) >= c->highest_inum)
408 27878 : c->highest_inum = key_inum(c, key);
409 :
410 2803689 : r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
411 2803689 : if (!r)
412 : return -ENOMEM;
413 :
414 2803689 : if (!deletion)
415 2788283 : *used += ALIGN(len, 8);
416 2803689 : r->lnum = lnum;
417 2803689 : r->offs = offs;
418 2803689 : r->len = len;
419 5607378 : ubifs_copy_hash(c, hash, r->hash);
420 2803689 : r->deletion = !!deletion;
421 2803689 : r->sqnum = sqnum;
422 5607378 : key_copy(c, key, &r->key);
423 2803689 : r->old_size = old_size;
424 2803689 : r->new_size = new_size;
425 :
426 5607378 : list_add_tail(&r->list, &c->replay_list);
427 2803689 : return 0;
428 : }
429 :
430 : /**
431 : * insert_dent - insert a directory entry node into the replay list.
432 : * @c: UBIFS file-system description object
433 : * @lnum: node logical eraseblock number
434 : * @offs: node offset
435 : * @len: node length
436 : * @key: node key
437 : * @name: directory entry name
438 : * @nlen: directory entry name length
439 : * @sqnum: sequence number
440 : * @deletion: non-zero if this is a deletion
441 : * @used: number of bytes in use in a LEB
442 : *
443 : * This function inserts a scanned directory entry node or an extended
444 : * attribute entry to the replay list. Returns zero in case of success and a
445 : * negative error code in case of failure.
446 : */
447 361159 : static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
448 : const u8 *hash, union ubifs_key *key,
449 : const char *name, int nlen, unsigned long long sqnum,
450 : int deletion, int *used)
451 : {
452 : struct replay_entry *r;
453 : char *nbuf;
454 :
455 361159 : dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs);
456 722318 : if (key_inum(c, key) >= c->highest_inum)
457 6744 : c->highest_inum = key_inum(c, key);
458 :
459 361159 : r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
460 361159 : if (!r)
461 : return -ENOMEM;
462 :
463 361159 : nbuf = kmalloc(nlen + 1, GFP_KERNEL);
464 361159 : if (!nbuf) {
465 0 : kfree(r);
466 0 : return -ENOMEM;
467 : }
468 :
469 361159 : if (!deletion)
470 350338 : *used += ALIGN(len, 8);
471 361159 : r->lnum = lnum;
472 361159 : r->offs = offs;
473 361159 : r->len = len;
474 722318 : ubifs_copy_hash(c, hash, r->hash);
475 361159 : r->deletion = !!deletion;
476 361159 : r->sqnum = sqnum;
477 722318 : key_copy(c, key, &r->key);
478 361159 : fname_len(&r->nm) = nlen;
479 361159 : memcpy(nbuf, name, nlen);
480 361159 : nbuf[nlen] = '\0';
481 361159 : fname_name(&r->nm) = nbuf;
482 :
483 722318 : list_add_tail(&r->list, &c->replay_list);
484 361159 : return 0;
485 : }
486 :
487 : /**
488 : * ubifs_validate_entry - validate directory or extended attribute entry node.
489 : * @c: UBIFS file-system description object
490 : * @dent: the node to validate
491 : *
492 : * This function validates directory or extended attribute entry node @dent.
493 : * Returns zero if the node is all right and a %-EINVAL if not.
494 : */
495 717483 : int ubifs_validate_entry(struct ubifs_info *c,
496 : const struct ubifs_dent_node *dent)
497 : {
498 1434966 : int key_type = key_type_flash(c, dent->key);
499 717483 : int nlen = le16_to_cpu(dent->nlen);
500 :
501 1434966 : if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 ||
502 1434966 : dent->type >= UBIFS_ITYPES_CNT ||
503 717483 : nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
504 258086 : (key_type == UBIFS_XENT_KEY &&
505 975569 : strnlen((const char*)dent->name, nlen) != nlen) ||
506 717483 : le64_to_cpu(dent->inum) > MAX_INUM) {
507 0 : ubifs_err(c, "bad %s node", key_type == UBIFS_DENT_KEY ?
508 : "directory entry" : "extended attribute entry");
509 : return -EINVAL;
510 : }
511 :
512 717483 : if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
513 0 : ubifs_err(c, "bad key type %d", key_type);
514 : return -EINVAL;
515 : }
516 :
517 : return 0;
518 : }
519 :
520 : /**
521 : * is_last_bud - check if the bud is the last in the journal head.
522 : * @c: UBIFS file-system description object
523 : * @bud: bud description object
524 : *
525 : * This function checks if bud @bud is the last bud in its journal head. This
526 : * information is then used by 'replay_bud()' to decide whether the bud can
527 : * have corruptions or not. Indeed, only last buds can be corrupted by power
528 : * cuts. Returns %1 if this is the last bud, and %0 if not.
529 : */
530 6438 : static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
531 : {
532 6438 : struct ubifs_jhead *jh = &c->jheads[bud->jhead];
533 : struct ubifs_bud *next;
534 : uint32_t data;
535 : int err;
536 :
537 6438 : if (list_is_last(&bud->list, &jh->buds_list))
538 : return 1;
539 :
540 : /*
541 : * The following is a quirk to make sure we work correctly with UBIFS
542 : * images used with older UBIFS.
543 : *
544 : * Normally, the last bud will be the last in the journal head's list
545 : * of bud. However, there is one exception if the UBIFS image belongs
546 : * to older UBIFS. This is fairly unlikely: one would need to use old
547 : * UBIFS, then have a power cut exactly at the right point, and then
548 : * try to mount this image with new UBIFS.
549 : *
550 : * The exception is: it is possible to have 2 buds A and B, A goes
551 : * before B, and B is the last, bud B is contains no data, and bud A is
552 : * corrupted at the end. The reason is that in older versions when the
553 : * journal code switched the next bud (from A to B), it first added a
554 : * log reference node for the new bud (B), and only after this it
555 : * synchronized the write-buffer of current bud (A). But later this was
556 : * changed and UBIFS started to always synchronize the write-buffer of
557 : * the bud (A) before writing the log reference for the new bud (B).
558 : *
559 : * But because older UBIFS always synchronized A's write-buffer before
560 : * writing to B, we can recognize this exceptional situation but
561 : * checking the contents of bud B - if it is empty, then A can be
562 : * treated as the last and we can recover it.
563 : *
564 : * TODO: remove this piece of code in a couple of years (today it is
565 : * 16.05.2011).
566 : */
567 2953 : next = list_entry(bud->list.next, struct ubifs_bud, list);
568 2953 : if (!list_is_last(&next->list, &jh->buds_list))
569 : return 0;
570 :
571 485 : err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1);
572 485 : if (err)
573 : return 0;
574 :
575 485 : return data == 0xFFFFFFFF;
576 : }
577 :
578 : /**
579 : * authenticate_sleb - authenticate one scan LEB
580 : * @c: UBIFS file-system description object
581 : * @sleb: the scan LEB to authenticate
582 : * @log_hash:
583 : * @is_last: if true, this is the last LEB
584 : *
585 : * This function iterates over the buds of a single LEB authenticating all buds
586 : * with the authentication nodes on this LEB. Authentication nodes are written
587 : * after some buds and contain a HMAC covering the authentication node itself
588 : * and the buds between the last authentication node and the current
589 : * authentication node. It can happen that the last buds cannot be authenticated
590 : * because a powercut happened when some nodes were written but not the
591 : * corresponding authentication node. This function returns the number of nodes
592 : * that could be authenticated or a negative error code.
593 : */
594 : static int authenticate_sleb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
595 : __unused struct shash_desc *log_hash,
596 : __unused int is_last)
597 : {
598 6416 : if (!ubifs_authenticated(c))
599 6416 : return sleb->nodes_cnt;
600 :
601 : // To be implemented
602 : return -EINVAL;
603 : }
604 :
605 : /**
606 : * replay_bud - replay a bud logical eraseblock.
607 : * @c: UBIFS file-system description object
608 : * @b: bud entry which describes the bud
609 : *
610 : * This function replays bud @bud, recovers it if needed, and adds all nodes
611 : * from this bud to the replay list. Returns zero in case of success and a
612 : * negative error code in case of failure.
613 : */
614 6438 : static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
615 : {
616 6438 : int is_last = is_last_bud(c, b->bud);
617 6438 : int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
618 6438 : int n_nodes, n = 0;
619 : struct ubifs_scan_leb *sleb;
620 : struct ubifs_scan_node *snod;
621 :
622 6438 : dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
623 : lnum, b->bud->jhead, offs, is_last);
624 :
625 6438 : if (c->need_recovery && is_last)
626 : /*
627 : * Recover only last LEBs in the journal heads, because power
628 : * cuts may cause corruptions only in these LEBs, because only
629 : * these LEBs could possibly be written to at the power cut
630 : * time.
631 : */
632 2778 : sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead);
633 : else
634 3660 : sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
635 6438 : if (IS_ERR(sleb))
636 22 : return PTR_ERR(sleb);
637 :
638 12832 : n_nodes = authenticate_sleb(c, sleb, b->bud->log_hash, is_last);
639 6416 : if (n_nodes < 0) {
640 : err = n_nodes;
641 : goto out;
642 : }
643 :
644 6416 : ubifs_shash_copy_state(c, b->bud->log_hash,
645 6416 : c->jheads[b->bud->jhead].log_hash);
646 :
647 : /*
648 : * The bud does not have to start from offset zero - the beginning of
649 : * the 'lnum' LEB may contain previously committed data. One of the
650 : * things we have to do in replay is to correctly update lprops with
651 : * newer information about this LEB.
652 : *
653 : * At this point lprops thinks that this LEB has 'c->leb_size - offs'
654 : * bytes of free space because it only contain information about
655 : * committed data.
656 : *
657 : * But we know that real amount of free space is 'c->leb_size -
658 : * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and
659 : * 'sleb->endpt' is used by bud data. We have to correctly calculate
660 : * how much of these data are dirty and update lprops with this
661 : * information.
662 : *
663 : * The dirt in that LEB region is comprised of padding nodes, deletion
664 : * nodes, truncation nodes and nodes which are obsoleted by subsequent
665 : * nodes in this LEB. So instead of calculating clean space, we
666 : * calculate used space ('used' variable).
667 : */
668 :
669 6334490 : list_for_each_entry(snod, &sleb->nodes, list) {
670 : u8 hash[UBIFS_HASH_ARR_SZ];
671 3164848 : int deletion = 0;
672 :
673 : cond_resched();
674 :
675 3164848 : if (snod->sqnum >= SQNUM_WATERMARK) {
676 0 : ubifs_err(c, "file system's life ended");
677 0 : goto out_dump;
678 : }
679 :
680 6329696 : ubifs_node_calc_hash(c, snod->node, hash);
681 :
682 3164848 : if (snod->sqnum > c->max_sqnum)
683 21890 : c->max_sqnum = snod->sqnum;
684 :
685 3164848 : switch (snod->type) {
686 490331 : case UBIFS_INO_NODE:
687 : {
688 490331 : struct ubifs_ino_node *ino = snod->node;
689 490331 : loff_t new_size = le64_to_cpu(ino->size);
690 :
691 490331 : if (le32_to_cpu(ino->nlink) == 0)
692 11787 : deletion = 1;
693 490331 : err = insert_node(c, lnum, snod->offs, snod->len, hash,
694 : &snod->key, snod->sqnum, deletion,
695 : &used, 0, new_size);
696 490331 : break;
697 : }
698 2309739 : case UBIFS_DATA_NODE:
699 : {
700 2309739 : struct ubifs_data_node *dn = snod->node;
701 4619478 : loff_t new_size = le32_to_cpu(dn->size) +
702 4619478 : key_block(c, &snod->key) *
703 : UBIFS_BLOCK_SIZE;
704 :
705 2309739 : err = insert_node(c, lnum, snod->offs, snod->len, hash,
706 : &snod->key, snod->sqnum, deletion,
707 : &used, 0, new_size);
708 2309739 : break;
709 : }
710 361159 : case UBIFS_DENT_NODE:
711 : case UBIFS_XENT_NODE:
712 : {
713 361159 : struct ubifs_dent_node *dent = snod->node;
714 :
715 361159 : err = ubifs_validate_entry(c, dent);
716 361159 : if (err)
717 : goto out_dump;
718 :
719 722318 : err = insert_dent(c, lnum, snod->offs, snod->len, hash,
720 361159 : &snod->key, (const char *)dent->name,
721 361159 : le16_to_cpu(dent->nlen), snod->sqnum,
722 361159 : !le64_to_cpu(dent->inum), &used);
723 361159 : break;
724 : }
725 3619 : case UBIFS_TRUN_NODE:
726 : {
727 3619 : struct ubifs_trun_node *trun = snod->node;
728 3619 : loff_t old_size = le64_to_cpu(trun->old_size);
729 3619 : loff_t new_size = le64_to_cpu(trun->new_size);
730 : union ubifs_key key;
731 :
732 : /* Validate truncation node */
733 3619 : if (old_size < 0 || old_size > c->max_inode_sz ||
734 3619 : new_size < 0 || new_size > c->max_inode_sz ||
735 : old_size <= new_size) {
736 0 : ubifs_err(c, "bad truncation node");
737 0 : goto out_dump;
738 : }
739 :
740 : /*
741 : * Create a fake truncation key just to use the same
742 : * functions which expect nodes to have keys.
743 : */
744 7238 : trun_key_init(c, &key, le32_to_cpu(trun->inum));
745 3619 : err = insert_node(c, lnum, snod->offs, snod->len, hash,
746 : &key, snod->sqnum, 1, &used,
747 : old_size, new_size);
748 3619 : break;
749 : }
750 : case UBIFS_AUTH_NODE:
751 : break;
752 0 : default:
753 0 : ubifs_err(c, "unexpected node type %d in bud LEB %d:%d",
754 : snod->type, lnum, snod->offs);
755 : err = -EINVAL;
756 : goto out_dump;
757 : }
758 3164848 : if (err)
759 : goto out;
760 :
761 3164848 : n++;
762 3164848 : if (n == n_nodes)
763 : break;
764 : }
765 :
766 6416 : ubifs_assert(c, ubifs_search_bud(c, lnum));
767 6416 : ubifs_assert(c, sleb->endpt - offs >= used);
768 6416 : ubifs_assert(c, sleb->endpt % c->min_io_size == 0);
769 :
770 6416 : b->dirty = sleb->endpt - offs - used;
771 6416 : b->free = c->leb_size - sleb->endpt;
772 8030 : dbg_mnt("bud LEB %d replied: dirty %d, free %d",
773 : lnum, b->dirty, b->free);
774 :
775 11218 : out:
776 6416 : ubifs_scan_destroy(sleb);
777 6416 : return err;
778 :
779 0 : out_dump:
780 0 : set_failure_reason_callback(c, FR_DATA_CORRUPTED);
781 0 : ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs);
782 0 : ubifs_dump_node(c, snod->node, c->leb_size - snod->offs);
783 0 : ubifs_scan_destroy(sleb);
784 0 : return -EINVAL;
785 : }
786 :
787 : /**
788 : * replay_buds - replay all buds.
789 : * @c: UBIFS file-system description object
790 : *
791 : * This function returns zero in case of success and a negative error code in
792 : * case of failure.
793 : */
794 2242 : static int replay_buds(struct ubifs_info *c)
795 : {
796 : struct bud_entry *b, *tmp_b;
797 : int err;
798 2242 : unsigned long long prev_sqnum = 0;
799 :
800 8665 : list_for_each_entry_safe(b, tmp_b, &c->replay_buds, list) {
801 6438 : err = replay_bud(c, b);
802 6438 : if (err) {
803 29 : if (test_and_clear_failure_reason_callback(c, FR_DATA_CORRUPTED) &&
804 29 : handle_failure_callback(c, FR_H_BUD_CORRUPTED, b->bud)) {
805 : /* Set %FR_LPT_INCORRECT for lpt status. */
806 7 : set_lpt_invalid_callback(c, FR_LPT_INCORRECT);
807 : /* Skip replaying the bud LEB. */
808 14 : list_del(&b->list);
809 7 : kfree(b);
810 7 : continue;
811 : }
812 : return err;
813 : }
814 :
815 6416 : ubifs_assert(c, b->sqnum > prev_sqnum);
816 6416 : prev_sqnum = b->sqnum;
817 : }
818 :
819 : return 0;
820 : }
821 :
822 : /**
823 : * destroy_bud_list - destroy the list of buds to replay.
824 : * @c: UBIFS file-system description object
825 : */
826 : static void destroy_bud_list(struct ubifs_info *c)
827 : {
828 : struct bud_entry *b;
829 :
830 17250 : while (!list_empty(&c->replay_buds)) {
831 6377 : b = list_entry(c->replay_buds.next, struct bud_entry, list);
832 12754 : list_del(&b->list);
833 : kfree(b);
834 : }
835 : }
836 :
837 : /**
838 : * add_replay_bud - add a bud to the list of buds to replay.
839 : * @c: UBIFS file-system description object
840 : * @lnum: bud logical eraseblock number to replay
841 : * @offs: bud start offset
842 : * @jhead: journal head to which this bud belongs
843 : * @sqnum: reference node sequence number
844 : *
845 : * This function returns zero in case of success and a negative error code in
846 : * case of failure.
847 : */
848 6450 : static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
849 : unsigned long long sqnum)
850 : {
851 : struct ubifs_bud *bud;
852 : struct bud_entry *b;
853 : int err;
854 :
855 6450 : dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
856 :
857 6450 : bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL);
858 6450 : if (!bud)
859 : return -ENOMEM;
860 :
861 6450 : b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
862 6450 : if (!b) {
863 : err = -ENOMEM;
864 : goto out;
865 : }
866 :
867 6450 : bud->lnum = lnum;
868 6450 : bud->start = offs;
869 6450 : bud->jhead = jhead;
870 6450 : bud->log_hash = ubifs_hash_get_desc(c);
871 12900 : if (IS_ERR(bud->log_hash)) {
872 0 : err = PTR_ERR(bud->log_hash);
873 0 : goto out;
874 : }
875 :
876 6450 : ubifs_shash_copy_state(c, c->log_hash, bud->log_hash);
877 :
878 6450 : ubifs_add_bud(c, bud);
879 :
880 6450 : b->bud = bud;
881 6450 : b->sqnum = sqnum;
882 12900 : list_add_tail(&b->list, &c->replay_buds);
883 :
884 6450 : return 0;
885 0 : out:
886 0 : kfree(bud);
887 0 : kfree(b);
888 :
889 0 : return err;
890 : }
891 :
892 : /**
893 : * validate_ref - validate a reference node.
894 : * @c: UBIFS file-system description object
895 : * @ref: the reference node to validate
896 : *
897 : * This function returns %1 if a bud reference already exists for the LEB. %0 is
898 : * returned if the reference node is new, otherwise %-EINVAL is returned if
899 : * validation failed.
900 : */
901 7114 : static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
902 : {
903 : struct ubifs_bud *bud;
904 7114 : int lnum = le32_to_cpu(ref->lnum);
905 7114 : unsigned int offs = le32_to_cpu(ref->offs);
906 7114 : unsigned int jhead = le32_to_cpu(ref->jhead);
907 :
908 : /*
909 : * ref->offs may point to the end of LEB when the journal head points
910 : * to the end of LEB and we write reference node for it during commit.
911 : * So this is why we require 'offs > c->leb_size'.
912 : */
913 14228 : if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt ||
914 21342 : lnum < c->main_first || offs > c->leb_size ||
915 7114 : offs & (c->min_io_size - 1))
916 : return -EINVAL;
917 :
918 : /* Make sure we have not already looked at this bud */
919 7114 : bud = ubifs_search_bud(c, lnum);
920 7114 : if (bud) {
921 664 : if (bud->jhead == jhead && bud->start <= offs)
922 : return 1;
923 0 : ubifs_err(c, "bud at LEB %d:%d was already referred", lnum, offs);
924 : return -EINVAL;
925 : }
926 :
927 : return 0;
928 : }
929 :
930 : /**
931 : * replay_log_leb - replay a log logical eraseblock.
932 : * @c: UBIFS file-system description object
933 : * @lnum: log logical eraseblock to replay
934 : * @offs: offset to start replaying from
935 : * @sbuf: scan buffer
936 : *
937 : * This function replays a log LEB and returns zero in case of success, %1 if
938 : * this is the last LEB in the log, and a negative error code in case of
939 : * failure.
940 : */
941 4791 : static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
942 : {
943 : int err;
944 : struct ubifs_scan_leb *sleb;
945 : struct ubifs_scan_node *snod;
946 : const struct ubifs_cs_node *node;
947 :
948 4791 : dbg_mnt("replay log LEB %d:%d", lnum, offs);
949 4791 : sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
950 4791 : if (IS_ERR(sleb)) {
951 9 : if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
952 0 : return PTR_ERR(sleb);
953 9 : clear_failure_reason_callback(c);
954 : /*
955 : * Note, the below function will recover this log LEB only if
956 : * it is the last, because unclean reboots can possibly corrupt
957 : * only the tail of the log.
958 : */
959 9 : sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
960 9 : if (IS_ERR(sleb))
961 9 : return PTR_ERR(sleb);
962 : }
963 :
964 4782 : if (sleb->nodes_cnt == 0) {
965 : err = 1;
966 : goto out;
967 : }
968 :
969 2559 : node = sleb->buf;
970 2559 : snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
971 2559 : if (c->cs_sqnum == 0) {
972 : /*
973 : * This is the first log LEB we are looking at, make sure that
974 : * the first node is a commit start node. Also record its
975 : * sequence number so that UBIFS can determine where the log
976 : * ends, because all nodes which were have higher sequence
977 : * numbers.
978 : */
979 2242 : if (snod->type != UBIFS_CS_NODE) {
980 0 : ubifs_err(c, "first log node at LEB %d:%d is not CS node",
981 : lnum, offs);
982 : goto out_dump;
983 : }
984 2242 : if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
985 0 : ubifs_err(c, "first CS node at LEB %d:%d has wrong commit number %llu expected %llu",
986 : lnum, offs,
987 : (unsigned long long)le64_to_cpu(node->cmt_no),
988 : c->cmt_no);
989 : goto out_dump;
990 : }
991 :
992 2242 : c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
993 2242 : dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
994 :
995 2242 : err = ubifs_shash_init(c, c->log_hash);
996 2242 : if (err)
997 : goto out;
998 :
999 2242 : err = ubifs_shash_update(c, c->log_hash, node, UBIFS_CS_NODE_SZ);
1000 2242 : if (err < 0)
1001 : goto out;
1002 : }
1003 :
1004 2559 : if (snod->sqnum < c->cs_sqnum) {
1005 : /*
1006 : * This means that we reached end of log and now
1007 : * look to the older log data, which was already
1008 : * committed but the eraseblock was not erased (UBIFS
1009 : * only un-maps it). So this basically means we have to
1010 : * exit with "end of log" code.
1011 : */
1012 : err = 1;
1013 : goto out;
1014 : }
1015 :
1016 : /* Make sure the first node sits at offset zero of the LEB */
1017 2549 : if (snod->offs != 0) {
1018 0 : ubifs_err(c, "first node is not at zero offset");
1019 : goto out_dump;
1020 : }
1021 :
1022 12209 : list_for_each_entry(snod, &sleb->nodes, list) {
1023 : cond_resched();
1024 :
1025 9660 : if (snod->sqnum >= SQNUM_WATERMARK) {
1026 0 : ubifs_err(c, "file system's life ended");
1027 : goto out_dump;
1028 : }
1029 :
1030 9660 : if (snod->sqnum < c->cs_sqnum) {
1031 0 : ubifs_err(c, "bad sqnum %llu, commit sqnum %llu",
1032 : snod->sqnum, c->cs_sqnum);
1033 : goto out_dump;
1034 : }
1035 :
1036 9660 : if (snod->sqnum > c->max_sqnum)
1037 4220 : c->max_sqnum = snod->sqnum;
1038 :
1039 9660 : switch (snod->type) {
1040 7114 : case UBIFS_REF_NODE: {
1041 7114 : const struct ubifs_ref_node *ref = snod->node;
1042 :
1043 7114 : err = validate_ref(c, ref);
1044 7114 : if (err == 1)
1045 : break; /* Already have this bud */
1046 6450 : if (err)
1047 : goto out_dump;
1048 :
1049 6450 : err = ubifs_shash_update(c, c->log_hash, ref,
1050 : UBIFS_REF_NODE_SZ);
1051 6450 : if (err)
1052 : goto out;
1053 :
1054 19350 : err = add_replay_bud(c, le32_to_cpu(ref->lnum),
1055 6450 : le32_to_cpu(ref->offs),
1056 6450 : le32_to_cpu(ref->jhead),
1057 : snod->sqnum);
1058 6450 : if (err)
1059 : goto out;
1060 :
1061 : break;
1062 : }
1063 2546 : case UBIFS_CS_NODE:
1064 : /* Make sure it sits at the beginning of LEB */
1065 2546 : if (snod->offs != 0) {
1066 0 : ubifs_err(c, "unexpected node in log");
1067 : goto out_dump;
1068 : }
1069 : break;
1070 0 : default:
1071 0 : ubifs_err(c, "unexpected node in log");
1072 : goto out_dump;
1073 : }
1074 664 : }
1075 :
1076 2549 : if (sleb->endpt || c->lhead_offs >= c->leb_size) {
1077 2549 : c->lhead_lnum = lnum;
1078 2549 : c->lhead_offs = sleb->endpt;
1079 : }
1080 :
1081 2549 : err = !sleb->endpt;
1082 4782 : out:
1083 4782 : ubifs_scan_destroy(sleb);
1084 4782 : return err;
1085 :
1086 0 : out_dump:
1087 0 : set_failure_reason_callback(c, FR_DATA_CORRUPTED);
1088 0 : ubifs_err(c, "log error detected while replaying the log at LEB %d:%d",
1089 : lnum, offs + snod->offs);
1090 0 : ubifs_dump_node(c, snod->node, c->leb_size - snod->offs);
1091 0 : ubifs_scan_destroy(sleb);
1092 0 : return -EINVAL;
1093 : }
1094 :
1095 : /**
1096 : * take_ihead - update the status of the index head in lprops to 'taken'.
1097 : * @c: UBIFS file-system description object
1098 : *
1099 : * This function returns the amount of free space in the index head LEB or a
1100 : * negative error code.
1101 : */
1102 2358 : int take_ihead(struct ubifs_info *c)
1103 : {
1104 : const struct ubifs_lprops *lp;
1105 : int err, free;
1106 :
1107 2358 : ubifs_get_lprops(c);
1108 :
1109 2358 : lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum);
1110 2358 : if (IS_ERR(lp)) {
1111 30 : err = PTR_ERR(lp);
1112 60 : if (test_and_clear_failure_reason_callback(c, FR_LPT_CORRUPTED) &&
1113 30 : can_ignore_failure_callback(c, FR_LPT_CORRUPTED))
1114 30 : err = 0;
1115 : goto out;
1116 : }
1117 :
1118 2328 : free = lp->free;
1119 :
1120 4656 : if (!test_lpt_valid_callback(c, c->ihead_lnum, LPROPS_NC, LPROPS_NC,
1121 : LPROPS_NC, LPROPS_NC)) {
1122 : err = free;
1123 : goto out;
1124 : }
1125 :
1126 2224 : lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
1127 2224 : lp->flags | LPROPS_TAKEN, 0);
1128 2224 : if (IS_ERR(lp)) {
1129 0 : err = PTR_ERR(lp);
1130 0 : goto out;
1131 : }
1132 :
1133 : err = free;
1134 2358 : out:
1135 2358 : ubifs_release_lprops(c);
1136 2358 : return err;
1137 : }
1138 :
1139 : /**
1140 : * ubifs_replay_journal - replay journal.
1141 : * @c: UBIFS file-system description object
1142 : *
1143 : * This function scans the journal, replays and cleans it up. It makes sure all
1144 : * memory data structures related to uncommitted journal are built (dirty TNC
1145 : * tree, tree of buds, modified lprops, etc).
1146 : */
1147 2263 : int ubifs_replay_journal(struct ubifs_info *c)
1148 : {
1149 : int err, lnum, free;
1150 :
1151 : BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
1152 :
1153 : /* Update the status of the index head in lprops to 'taken' */
1154 2263 : free = take_ihead(c);
1155 2263 : if (free < 0)
1156 : return free; /* Error code */
1157 :
1158 2263 : if (c->program_type != FSCK_PROGRAM_TYPE) {
1159 : /*
1160 : * Skip index head checking for fsck, it is hard to check it
1161 : * caused by possible corrupted/incorrect lpt, tnc updating
1162 : * will report error code if index tree is really corrupted.
1163 : */
1164 0 : if (c->ihead_offs != c->leb_size - free) {
1165 0 : ubifs_err(c, "bad index head LEB %d:%d", c->ihead_lnum,
1166 : c->ihead_offs);
1167 : return -EINVAL;
1168 : }
1169 : }
1170 :
1171 2263 : dbg_mnt("start replaying the journal");
1172 2263 : c->replaying = 1;
1173 2263 : lnum = c->ltail_lnum = c->lhead_lnum;
1174 :
1175 : do {
1176 4791 : err = replay_log_leb(c, lnum, 0, c->sbuf);
1177 4791 : if (err == 1) {
1178 2233 : if (lnum != c->lhead_lnum)
1179 : /* We hit the end of the log */
1180 : break;
1181 :
1182 : /*
1183 : * The head of the log must always start with the
1184 : * "commit start" node on a properly formatted UBIFS.
1185 : * But we found no nodes at all, which means that
1186 : * something went wrong and we cannot proceed mounting
1187 : * the file-system.
1188 : */
1189 12 : set_failure_reason_callback(c, FR_DATA_CORRUPTED);
1190 12 : ubifs_err(c, "no UBIFS nodes found at the log head LEB %d:%d, possibly corrupted",
1191 : lnum, 0);
1192 : err = -EINVAL;
1193 : }
1194 2558 : if (err)
1195 : goto out;
1196 2549 : lnum = ubifs_next_log_lnum(c, lnum);
1197 2549 : } while (lnum != c->ltail_lnum);
1198 :
1199 2242 : err = replay_buds(c);
1200 2227 : if (err)
1201 : goto out;
1202 :
1203 2227 : err = apply_replay_list(c);
1204 2227 : if (err)
1205 : goto out;
1206 :
1207 2199 : err = set_buds_lprops(c);
1208 2199 : if (err)
1209 : goto out;
1210 :
1211 : /*
1212 : * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
1213 : * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
1214 : * depend on it. This means we have to initialize it to make sure
1215 : * budgeting works properly.
1216 : */
1217 2199 : c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
1218 2199 : c->bi.uncommitted_idx *= c->max_idx_node_sz;
1219 :
1220 2199 : ubifs_assert(c, c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1221 2389 : dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, highest_inum %lu",
1222 : c->lhead_lnum, c->lhead_offs, c->max_sqnum,
1223 : (unsigned long)c->highest_inum);
1224 4294 : out:
1225 2248 : destroy_replay_list(c);
1226 2248 : destroy_bud_list(c);
1227 2248 : c->replaying = 0;
1228 2248 : return err;
1229 : }
|