UBI: torture after scrub

Tue Jul 9 10:02:50 EDT 2013

Hi,

There is something a bit odd in UBI behaviour related to scrubbing.
Throughout power cut test logs I see lots of

bitflips detected in PEB 30
PEB 30 gets scrubbed
PEB 30 gets erased and later reused
bitflips detected in PEB 30
....

Shouldn't UBI do torture test of source EB after scrubbing? Otherwise
it looks meaningless to scrub this EB at all.

Below change is based on linux 3.3, but it applies almost without changes
to 3.10 tree (however I'm not sure if sync erasure in wear_levelling_worker
adds up to write amplification).

Does it look reasonable?


Index: linux/drivers/mtd/ubi/eba.c
===================================================================

--- linux.orig/drivers/mtd/ubi/eba.c
+++ linux/drivers/mtd/ubi/eba.c
@@ -986,6 +986,7 @@ int ubi_eba_copy_leb(struct ubi_device *
 		     struct ubi_vid_hdr *vid_hdr)
 {
 	int err, vol_id, lnum, data_size, aldata_size, idx;
+	int err_bitflips = 0;
 	struct ubi_volume *vol;
 	uint32_t crc;
 
@@ -1060,7 +1061,9 @@ int ubi_eba_copy_leb(struct ubi_device *
 	mutex_lock(&ubi->buf_mutex);
 	dbg_wl("read %d bytes of data", aldata_size);
 	err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size);
-	if (err && err != UBI_IO_BITFLIPS) {
+	if (err == UBI_IO_BITFLIPS)
+		err_bitflips = MOVE_SOURCE_BITFLIPS;
+	else if (err) {
 		ubi_warn("error %d while reading data from PEB %d",
 			 err, from);
 		err = MOVE_SOURCE_RD_ERR;
@@ -1164,7 +1167,7 @@ out_unlock_buf:
 	mutex_unlock(&ubi->buf_mutex);
 out_unlock_leb:
 	leb_write_unlock(ubi, vol_id, lnum);
-	return err;
+	return err ? err : err_bitflips;
 }
 
 /**
Index: linux/drivers/mtd/ubi/ubi.h
===================================================================
--- linux.orig/drivers/mtd/ubi/ubi.h
+++ linux/drivers/mtd/ubi/ubi.h
@@ -112,6 +112,7 @@ enum {
  *
  * MOVE_CANCEL_RACE: canceled because the volume is being deleted, the source
  *                   PEB was put meanwhile, or there is I/O on the source PEB
+ * MOVE_SOURCE_BITFLIPS: PEB moved, but there were bitflips in the source PEB
  * MOVE_SOURCE_RD_ERR: canceled because there was a read error from the source
  *                     PEB
  * MOVE_TARGET_RD_ERR: canceled because there was a read error from the target
@@ -124,6 +125,7 @@ enum {
  */
 enum {
 	MOVE_CANCEL_RACE = 1,
+	MOVE_SOURCE_BITFLIPS,
 	MOVE_SOURCE_RD_ERR,
 	MOVE_TARGET_RD_ERR,
 	MOVE_TARGET_WR_ERR,
Index: linux/drivers/mtd/ubi/wl.c
===================================================================
--- linux.orig/drivers/mtd/ubi/wl.c
+++ linux/drivers/mtd/ubi/wl.c
@@ -797,6 +797,10 @@ static int wear_leveling_worker(struct u
 			scrubbing = 1;
 			goto out_not_moved;
 		}
+		if (err == MOVE_SOURCE_BITFLIPS) {
+			scrubbing = 1;
+			goto out_moved;
+		}
 		if (err == MOVE_CANCEL_BITFLIPS || err == MOVE_TARGET_WR_ERR ||
 		    err == MOVE_TARGET_RD_ERR) {
 			/*
@@ -830,6 +834,7 @@ static int wear_leveling_worker(struct u
 		ubi_assert(0);
 	}
 
+out_moved:
 	/* The PEB has been successfully moved */
 	if (scrubbing)
 		ubi_msg("scrubbed PEB %d (LEB %d:%d), data moved to PEB %d",
@@ -845,7 +850,7 @@ static int wear_leveling_worker(struct u
 	ubi->move_to_put = ubi->wl_scheduled = 0;
 	spin_unlock(&ubi->wl_lock);
 
-	err = schedule_erase(ubi, e1, 0);
+	err = schedule_erase(ubi, e1, scrubbing);
 	if (err) {
 		kmem_cache_free(ubi_wl_entry_slab, e1);
 		if (e2)