[PATCH] [MTD] UBI: implement atomic LEB change
Artem Bityutskiy
dedekind at infradead.org
Thu Feb 1 12:42:52 EST 2007
>From 1abfdcbb9426d9c47709bb70e3a4a8974535be2c Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy at nokia.com>
Date: Thu, 1 Feb 2007 19:25:31 +0200
Subject: [PATCH] [MTD] UBI: implement atomic LEB change
This patch implements the atomic LEB change feature which allows
to change the contents of a logical eraseblock atomically.
This patch makes UBI reserve one more PEB which is needed to
implement this feature.
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy at nokia.com>
---
drivers/mtd/ubi/eba.c | 192 ++++++++++++++++++++++++++++++++++++++++++++++-
drivers/mtd/ubi/eba.h | 47 +++++++++--
drivers/mtd/ubi/uif.c | 62 +++++++++++++++
include/linux/mtd/ubi.h | 54 +++++++++++++-
4 files changed, 342 insertions(+), 13 deletions(-)
Index: ubi-2.6.git/drivers/mtd/ubi/eba.c
===================================================================
--- ubi-2.6.git.orig/drivers/mtd/ubi/eba.c
+++ ubi-2.6.git/drivers/mtd/ubi/eba.c
@@ -22,7 +22,7 @@
#include <linux/sched.h>
#include <linux/crc32.h>
#include <linux/spinlock.h>
-#include <linux/rwsem.h>
+#include <linux/mutex.h>
#include <linux/err.h>
#include <linux/types.h>
#include <mtd/ubi-header.h>
@@ -41,6 +41,12 @@
#include "debug.h"
/*
+ * The EBA unit reserves on PEB to implement the atomic eraseblock movement
+ * feature.
+ */
+#define EBA_RESERVED_PEBS 1
+
+/*
* The highest bit in logical-to-physical eraseblock mappings is used to
* indicate that the logical eraseblock is not mapped.
*/
@@ -164,6 +170,34 @@ static inline void leb_unmap(const struc
}
/**
+ * leb_remap - re-map a logical eraseblock to another physical eraseblock.
+ *
+ * @ubi: the UBI device description object
+ * @vol_id: the volume ID
+ * @lnum: the logical eraseblock number
+ * @new_pnum: new physical eraseblock
+ *
+ * This function re-maps a logical eraseblock from one physical eraseblock to
+ * another physical eraseblock.
+ * The logical eraseblock has to be locked.
+ */
+static inline void leb_remap(const struct ubi_info *ubi, int vol_id, int lnum,
+ int new_pnum)
+{
+ int idx;
+ struct ubi_eba_info *eba = ubi->eba;
+
+ idx = vol_id2idx(ubi, vol_id);
+
+ spin_lock(&eba->eba_tbl_lock);
+ ubi_assert(eba->eba_tbl[idx].recs);
+ ubi_assert(eba->eba_tbl[idx].recs[lnum].pnum >= 0);
+ eba->eba_tbl[idx].recs[lnum].pnum = new_pnum;
+ eba->eba_tbl[idx].recs[lnum].leb_ver += 1;
+ spin_unlock(&eba->eba_tbl_lock);
+}
+
+/**
* leb2peb - get physical eraseblock number the logical eraseblock is mapped
* to.
*
@@ -413,6 +447,7 @@ int ubi_eba_read_leb(const struct ubi_in
*read = len;
return 0;
}
+
dbg_eba("read %zd bytes from offset %d of LEB %d:%d, PEB %d",
len, offset, vol_id, lnum, pnum);
@@ -576,8 +611,10 @@ retry:
pnum = ubi_wl_get_peb(ubi, dtype);
if (unlikely(pnum < 0)) {
err = pnum;
+ dbg_err("cannot get free PEB - error %d", err);
goto out_vid_hdr;
}
+
dbg_eba("write VID hdr and %zd bytes at offset %d of LEB %d:%d, PEB %d",
len, offset, vol_id, lnum, pnum);
@@ -725,8 +762,10 @@ retry:
pnum = ubi_wl_get_peb(ubi, dtype);
if (unlikely(pnum < 0)) {
err = pnum;
+ dbg_err("cannot get free PEB - error %d", err);
goto out_vid_hdr;
}
+
dbg_eba("write VID hdr and %zd bytes at of LEB %d:%d, PEB %d",
len, vol_id, lnum, pnum);
@@ -757,7 +796,6 @@ out_unlock:
/* Write failure */
write_error:
ubi_free_vid_hdr(ubi, vid_hdr);
- ubi_free_vid_hdr(ubi, vid_hdr);
if (err != -EIO || !io->bad_allowed)
goto no_bad_eraseblocks;
@@ -783,6 +821,151 @@ no_bad_eraseblocks:
return err;
}
+int ubi_eba_atomic_leb_change(const struct ubi_info *ubi, int vol_id, int lnum,
+ const void *buf, size_t len,
+ enum ubi_data_type dtype)
+{
+ int err, err1, old_pnum, new_pnum, tries = 0;
+ uint32_t leb_ver;
+ struct ubi_vid_hdr *vid_hdr;
+ const struct ubi_vtbl_vtr *vtr;
+ struct ubi_eba_info *eba = ubi->eba;
+ const struct ubi_io_info *io = ubi->io;
+
+retry:
+ /* Input arguments sanity check */
+ ubi_assert(vol_id >= 0);
+ ubi_assert(vol_id < ubi->acc->max_volumes || ubi_is_ivol(vol_id));
+ ubi_assert(lnum >= 0);
+ ubi_assert(len >= 0);
+ ubi_assert(dtype == UBI_DATA_LONGTERM || dtype == UBI_DATA_SHORTTERM ||
+ dtype == UBI_DATA_UNKNOWN);
+
+ vtr = ubi_vtbl_get_vtr(ubi, vol_id);
+ ubi_assert(!IS_ERR(vtr));
+ ubi_assert(len <= io->leb_size - vtr->data_pad);
+ ubi_assert(lnum < ubi->eba->eba_tbl[vol_id2idx(ubi, vol_id)].leb_count);
+ ubi_assert(len % io->min_io_size == 0);
+ ubi_assert(vtr->vol_type == UBI_DYNAMIC_VOLUME);
+
+ if (unlikely(ubi->io->ro_mode)) {
+ dbg_err("read-only mode");
+ return -EROFS;
+ }
+
+ mutex_lock(&eba->change_mutex);
+
+ new_pnum = ubi_wl_get_peb(ubi, dtype);
+ if (unlikely(new_pnum < 0)) {
+ err = new_pnum;
+ dbg_err("cannot get free PEB - error %d", err);
+ goto out_unlock_mutex;
+ }
+
+ err = ubi_eba_leb_write_lock(ubi, vol_id, lnum);
+ if (unlikely(err))
+ goto out_put_unlock_mutex;
+
+ old_pnum = leb2peb(ubi, vol_id, lnum);
+ leb_ver = leb_get_ver(ubi, vol_id, lnum);
+
+ dbg_eba("change LEB %d:%d with %zd bytes of data, old PEB %d, "
+ "new PEB is %d", vol_id, lnum, len, old_pnum, new_pnum);
+
+ /*
+ * We are ready to write new data to the new physical eraseblock. Write
+ * the VID header first, then data.
+ */
+ vid_hdr = ubi_zalloc_vid_hdr(ubi);
+ if (unlikely(!vid_hdr)) {
+ err = -ENOMEM;
+ goto out_unlock_leb_put;
+ }
+
+ leb_ver += 1; /* we have to increase the version */
+ vid_hdr->leb_ver = cpu_to_ubi32(leb_ver);
+ vid_hdr->vol_type = UBI_VID_DYNAMIC;
+ vid_hdr->vol_id = cpu_to_ubi32(vol_id);
+ vid_hdr->lnum = cpu_to_ubi32(lnum);
+ vid_hdr->compat = ubi_vtbl_get_compat(ubi, vol_id);
+ vid_hdr->data_pad = cpu_to_ubi32(vtr->data_pad);
+
+ err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
+ if (unlikely(err))
+ goto write_error;
+
+ if (len != 0) {
+ size_t written;
+
+ err = ubi_io_write_data(ubi, buf, new_pnum, 0, len, &written);
+ if (unlikely(err))
+ goto write_error;
+ }
+
+ /*
+ * The data is there, we need to re-map our LEB from @old_pnum to
+ * @new_pnum.
+ */
+ leb_remap(ubi, vol_id, lnum, new_pnum);
+
+ /* And drop the old physical eraseblock */
+ err = ubi_wl_put_peb(ubi, old_pnum, 0);
+ if (unlikely(err))
+ /*
+ * No idea what is this. But the best we can do is to switch to
+ * R/O mode.
+ */
+ ubi_eba_ro_mode(ubi);
+
+
+ ubi_free_vid_hdr(ubi, vid_hdr);
+ ubi_eba_leb_write_unlock(ubi, vol_id, lnum);
+ mutex_unlock(&eba->change_mutex);
+ return err;
+
+out_unlock_leb_put:
+ ubi_eba_leb_write_unlock(ubi, vol_id, lnum);
+out_put_unlock_mutex:
+ err1 = ubi_wl_put_peb(ubi, new_pnum, 0);
+ if (err1)
+ /*
+ * Something really bad is going on, switch to R/O mode just in
+ * case.
+ */
+ ubi_eba_ro_mode(ubi);
+out_unlock_mutex:
+ mutex_unlock(&eba->change_mutex);
+ return err;
+
+write_error:
+ ubi_free_vid_hdr(ubi, vid_hdr);
+ if (err != -EIO || !io->bad_allowed)
+ goto no_bad_eraseblocks;
+
+ /*
+ * We assume that if this physical eraseblock went bad - the erase code
+ * will handle that.
+ */
+ ubi_msg("try to recover form the error");
+ err = ubi_wl_put_peb(ubi, new_pnum, 1);
+ ubi_eba_leb_write_unlock(ubi, vol_id, lnum);
+ mutex_unlock(&eba->change_mutex);
+ if (err || ++tries > 5)
+ return err;
+ goto retry;
+
+ /*
+ * This flash device does not admit of bad eraseblocks or something
+ * nasty and unexpected happened. Switch to read-only mode just in
+ * case.
+ */
+no_bad_eraseblocks:
+ ubi_eba_ro_mode(ubi);
+ ubi_eba_leb_write_unlock(ubi, vol_id, lnum);
+ mutex_unlock(&eba->change_mutex);
+ return err;
+}
+
int ubi_eba_leb_is_mapped(const struct ubi_info *ubi, int vol_id, int lnum)
{
dbg_eba("check LEB %d:%d PEBs", vol_id, lnum);
@@ -942,6 +1125,7 @@ int ubi_eba_init_scan(struct ubi_info *u
spin_lock_init(&eba->eba_tbl_lock);
spin_lock_init(&eba->ltree_lock);
+ mutex_init(&eba->change_mutex);
eba->ltree = RB_ROOT;
eba->num_volumes = acc->max_volumes + acc->ivol_count;
@@ -952,6 +1136,10 @@ int ubi_eba_init_scan(struct ubi_info *u
goto out;
}
+ err = ubi_acc_reserve(ubi, EBA_RESERVED_PEBS);
+ if (err)
+ goto out;
+
err = build_eba_tbl(ubi, si);
if (err)
goto out;
Index: ubi-2.6.git/drivers/mtd/ubi/eba.h
===================================================================
--- ubi-2.6.git.orig/drivers/mtd/ubi/eba.h
+++ ubi-2.6.git/drivers/mtd/ubi/eba.h
@@ -41,6 +41,7 @@
#include <linux/rwsem.h>
#include <linux/rbtree.h>
#include <linux/mtd/ubi.h>
+#include <linux/mutex.h>
struct ubi_info;
struct ubi_scan_info;
@@ -78,8 +79,8 @@ int ubi_eba_rmvol(const struct ubi_info
* @reserved_pebs: new count of physical eraseblocks in this volume
*
* This function changes the EBA table accordingly to the volume re-size
- * operation. If the volume is actually shrinked, the dropped logical
- * eraseblocs are got unmapped an thus, the corresponding physical eraseblocs
+ * operation. If the volume is actually shrunken, the dropped logical
+ * eraseblocks are got unmapped an thus, the corresponding physical eraseblocks
* are scheduled for erasure. This function returns zero in case of success and
* a negative error code in case of failure.
*/
@@ -101,7 +102,7 @@ int ubi_eba_erase_leb(const struct ubi_i
* ubi_eba_read_leb - read data from a logical eraseblock.
*
* @ubi: the UBI device description object
- * @vol_id: the volume ID from where to read
+ * @vol_id: ID of the volume to read
* @lnum: the logical eraseblock number to read from
* @buf: the buffer to store the read data
* @offset: the offset within the logical eraseblock from where to read
@@ -131,7 +132,7 @@ int ubi_eba_read_leb(const struct ubi_in
* ubi_eba_write_leb - write data to a logical eraseblock of a dynamic volume.
*
* @ubi: the UBI device description object
- * @vol_id: the volume ID where to write
+ * @vol_id: ID of volume to write
* @lnum: the logical eraseblock number to write
* @buf: the data to write
* @offset: the offset within the logical eraseblock where to write
@@ -139,10 +140,11 @@ int ubi_eba_read_leb(const struct ubi_in
* @dtype: data type
* @written: how many bytes were actually written
*
- * This function writes data to a logical eraseblock of a dynamic volume.
- * Returns zero in case of success and a negative error code in case of
- * failure. The @written field contains the number of successfully written
- * bytes.
+ * This function writes data to a logical eraseblock of a dynamic volume. The
+ * @len and @offset arguments have to be aligned to the minimal I/O unit size.
+ * This function returns zero in case of success and a negative error code in
+ * case of failure. The @written field contains the number of successfully
+ * written bytes.
*/
int ubi_eba_write_leb(const struct ubi_info *ubi, int vol_id, int lnum,
const void *buf, int offset, size_t len,
@@ -152,7 +154,7 @@ int ubi_eba_write_leb(const struct ubi_i
* ubi_eba_write_leb_st - write data to a logical eraseblock of a static volume.
*
* @ubi: the UBI device description object
- * @vol_id: the volume ID where to write
+ * @vol_id: ID of the volume to write
* @lnum: the logical eraseblock number to write
* @buf: the data to write
* @len: how many bytes to write
@@ -182,6 +184,31 @@ int ubi_eba_write_leb_st(const struct ub
size_t *written, int used_ebs);
/**
+ * ubi_eba_atomic_leb_change - change the contents of an eraseblock atomically.
+ *
+ * @ubi: the UBI device description object
+ * @vol_id: ID of volume ID to change
+ * @lnum the logical eraseblock number to change
+ * @buf: new logical eraseblock contents
+ * @len: the amount of new data to write
+ * @dtype: data type
+ *
+ * This function changes the contents of the logical eraseblock @lnum of volume
+ * @vol_id atomically. This means, the function puts new data (from @buf) to
+ * the logical eraseblock and guarantees that in case of an interruption (like
+ * an unclean reboot) the old contents will be preserved and won't be damaged.
+ *
+ * The @len and @offset arguments have to be aligned to the minimal I/O unit
+ * size. This function returns zero in case of success and a negative error
+ * code in case of failure. If the function fails - either the old contents of
+ * the logical eraseblock is fully preserved or the new contents is fully
+ * written on flash.
+ */
+int ubi_eba_atomic_leb_change(const struct ubi_info *ubi, int vol_id, int lnum,
+ const void *buf, size_t len,
+ enum ubi_data_type dtype);
+
+/**
* ubi_eba_leb_is_mapped - check if a logical eraseblock is mapped.
*
* @ubi: the UBI device description object
@@ -339,6 +366,7 @@ struct ubi_eba_ltree_entry {
* @ltree: the lock tree
* @ltree_lock: protects the lock tree
* @num_volumes: number of volumes mapped by the EBA table
+ * @change_mutex: serializes the atomic eraseblock change operation
*
* The EBA unit implements per-logical eraseblock locking. Before accessing a
* logical eraseblock it is locked for reading or writing. The per-logical
@@ -354,6 +382,7 @@ struct ubi_eba_info {
struct rb_root ltree; /* private */
spinlock_t ltree_lock; /* private */
size_t num_volumes; /* private */
+ struct mutex change_mutex; /* private */
};
#endif /* !__UBI_EBA_H__ */
Index: ubi-2.6.git/drivers/mtd/ubi/uif.c
===================================================================
--- ubi-2.6.git.orig/drivers/mtd/ubi/uif.c
+++ ubi-2.6.git/drivers/mtd/ubi/uif.c
@@ -466,6 +466,68 @@ int ubi_eraseblock_write(struct ubi_vol_
}
EXPORT_SYMBOL_GPL(ubi_eraseblock_write);
+int ubi_atomic_eraseblock_change(struct ubi_vol_desc *udesc, int lnum,
+ const void *buf, size_t len,
+ enum ubi_data_type dtype)
+{
+ const struct ubi_vtbl_vtr *vtr;
+ struct ubi_vol_desc *desc = udesc;
+ const struct ubi_info *ubi = desc->vol->ubi;
+ const struct ubi_io_info *io = ubi->io;
+ int vol_id = desc->vol->vol_id;
+
+ dbg_uif("atomically write %zd bytes to LEB %d:%d",
+ len, vol_id, lnum);
+
+ if (unlikely(vol_id < 0 || vol_id >= ubi->acc->max_volumes)) {
+ dbg_err("bad vol_id %d", vol_id);
+ return -EINVAL;
+ }
+
+ vtr = ubi_vtbl_get_vtr(ubi, vol_id);
+ ubi_assert(!IS_ERR(vtr));
+
+ if (unlikely(desc->mode == UBI_READONLY)) {
+ dbg_err("read-only mode");
+ return -EROFS;
+ }
+
+ if (unlikely(vtr->vol_type == UBI_STATIC_VOLUME)) {
+ dbg_err("static volume");
+ return -EROFS;
+ }
+
+ if (unlikely(lnum < 0 || lnum >= vtr->reserved_pebs)) {
+ dbg_err("bad lnum %d", lnum);
+ return -EINVAL;
+ }
+
+ if (unlikely(len < 0 || len > vtr->usable_leb_size)) {
+ dbg_err("bad len %zd", len);
+ return -EINVAL;
+ }
+
+ if (unlikely(len % io->min_io_size)) {
+ dbg_err("unaligned len %zd", len);
+ return -EINVAL;
+ }
+
+ if (unlikely(dtype != UBI_DATA_LONGTERM &&
+ dtype != UBI_DATA_SHORTTERM &&
+ dtype != UBI_DATA_UNKNOWN)) {
+ dbg_err("bad dtype %d", dtype);
+ return -EINVAL;
+ }
+
+ if (unlikely(vtr->upd_marker)) {
+ dbg_err("writing update-interrupted volume");
+ return -EBADF;
+ }
+
+ return ubi_eba_atomic_leb_change(ubi, vol_id, lnum, buf, len, dtype);
+}
+EXPORT_SYMBOL_GPL(ubi_atomic_eraseblock_change);
+
int ubi_eraseblock_erase(struct ubi_vol_desc *udesc, int lnum)
{
const struct ubi_vtbl_vtr *vtr;
Index: ubi-2.6.git/include/linux/mtd/ubi.h
===================================================================
--- ubi-2.6.git.orig/include/linux/mtd/ubi.h
+++ ubi-2.6.git/include/linux/mtd/ubi.h
@@ -258,7 +258,7 @@ void ubi_close_volume(struct ubi_vol_des
* corrupted. But the read data is actually OK.
*
* Note, if a volume is damaged because of an interrupted update (the
- * @upd_marker flag is set) this function just returns immidiately with %-EBADF
+ * @upd_marker flag is set) this function just returns immediately with %-EBADF
* error code. In other words, volumes like that cannot be read before re-doing
* the update operation.
*/
@@ -335,6 +335,56 @@ static inline int ubi_write(struct ubi_v
}
/**
+ * ubi_atomic_eraseblock_change - change the contents of a logical eraseblock
+ * atomically.
+ *
+ * @udesc: volume descriptor
+ * @lnum the logical eraseblock number to change
+ * @buf: new logical eraseblock contents
+ * @len: the amount of new data to write
+ * @dtype: data type
+ *
+ * This function changes the contents of the logical eraseblock @lnum
+ * atomically. This means, the function puts new data (from @buf) to the
+ * logical eraseblock and guarantees that in case of an interruption (like an
+ * unclean reboot) the old contents will be preserved and won't be damaged.
+ *
+ * The idea of how this function is implemented is that it writes new data
+ * (@buf) to some new physical eraseblock, then just re-maps this logical
+ * eraseblock to it. The old physical eraseblock is then scheduled for erasure.
+ *
+ * The @len and @offset arguments have to be aligned to the minimal I/O unit
+ * size. This function returns zero in case of success and a negative error
+ * code in case of failure. If the function fails - either the old contents of
+ * the logical eraseblock is fully preserved or the new contents is fully
+ * written on flash.
+ *
+ */
+int ubi_atomic_eraseblock_change(struct ubi_vol_desc *udesc, int lnum,
+ const void *buf, size_t len,
+ enum ubi_data_type dtype);
+
+/**
+ * ubi_atomic_change - change the contents of a logical eraseblock atomically
+ * (simplified).
+ *
+ * @udesc: volume descriptor
+ * @lnum the logical eraseblock number to change
+ * @buf: new logical eraseblock contents
+ * @len: the amount of new data to write
+ *
+ * This function is the same as the 'ubi_atomic_eraseblock_change()' functions,
+ * but it does not have the data type argument.
+ */
+static inline int ubi_atomic_change(struct ubi_vol_desc *udesc, int lnum,
+ const void *buf, size_t len)
+{
+ return ubi_atomic_eraseblock_change(udesc, lnum, buf, len,
+ UBI_DATA_UNKNOWN);
+}
+
+
+/**
* ubi_eraseblock_erase - erase a logical eraseblock.
*
* @udesc: volume descriptor
@@ -344,7 +394,7 @@ static inline int ubi_write(struct ubi_v
* case of failure.
*
* Note, UBI erases eraseblocks asynchronously. This means that this function
- * will basically unmap this logical eraseblock from its physical eraseblock,
+ * will basically un-map this logical eraseblock from its physical eraseblock,
* schedule the physical eraseblock for erasure and return.
*/
int ubi_eraseblock_erase(struct ubi_vol_desc *udesc, int lnum);
More information about the linux-mtd
mailing list