mtd/drivers/mtd/chips cfi_cmdset_0002.c,1.64,1.65

Fri Mar 28 14:47:45 EST 2003

Update of /home/cvs/mtd/drivers/mtd/chips
In directory phoenix.infradead.org:/tmp/cvs-serv19782

Modified Files:
	cfi_cmdset_0002.c 
Log Message:
Improve status polling during erase/write operations.
Previous warning about DQ6 still toggling but operation completed
successfully is a valid state and should not be reported.
New polling algorithm deals with "spurious rejection" caused by
the asynchronous nature of internal state machine of the device
and external bus reads.

Index: cfi_cmdset_0002.c
===================================================================
RCS file: /home/cvs/mtd/drivers/mtd/chips/cfi_cmdset_0002.c,v
retrieving revision 1.64
retrieving revision 1.65
diff -u -r1.64 -r1.65

--- cfi_cmdset_0002.c	17 Feb 2003 20:47:11 -0000	1.64
+++ cfi_cmdset_0002.c	28 Mar 2003 19:47:42 -0000	1.65
@@ -509,26 +509,51 @@
 	        cfi_send_gen_cmd(0xA0, cfi->addr_unlock1, chip->start, map, cfi, CFI_DEVICETYPE_X8, NULL);
 	}
 
+	DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): WRITE 0x%.2x\n", __func__, datum );
 	cfi_write(map, datum, adr);
 
 	cfi_spin_unlock(chip->mutex);
 	cfi_udelay(chip->word_write_time);
 	cfi_spin_lock(chip->mutex);
 
-	/* Polling toggle bits instead of reading back many times
-	   This ensures that write operation is really completed,
-	   or tells us why it failed. */        
+	/*
+	 * Polling toggle bits instead of reading back many times
+	 * This ensures that write operation is really completed,
+	 * or tells us why it failed.
+	 *
+	 * The polling and decoding of error state appears that it could
+	 * be simplified.  Don't do it unless you really know what you
+	 * are doing.  You must remember that JESD21-C 3.5.3 states that
+	 * the status must be read back an _additional_ two times before
+	 * a failure is determined.  This is because these devices have
+	 * internal state machines that are asynchronous to the external
+	 * LPC bus.  During an erase or write the read-back status of the
+	 * polling bits might be internaling transitioning when the external
+	 * read-back occurs.  This means that the bits aren't in the final
+	 * state and they might appear to report an error as they transition
+	 * and are in a weird state.  This will produce infrequent errors
+	 * that will usually disappear then next time an erase or write
+	 * happens (Try tracking those errors down!).  To ensure that
+	 * the bits are not in transition the location must be read-back
+	 * two more times and compared against what was written - BOTH reads
+	 * MUST match what was written - don't think this can be simplified
+	 * to only the last read matching.  If the comparison fails error
+	 * state can then be decoded with certainty.
+	 *
+	 * - Thayne Harbaugh
+	 */
 	dq6 = CMD(1<<6);
 	dq5 = CMD(1<<5);
-    /* See comment above for timeout value. */
-    timeo = jiffies + uWriteTimeout; 
+	/* See comment above for timeout value. */
+	timeo = jiffies + uWriteTimeout; 
 		
 	oldstatus = cfi_read(map, adr);
 	status = cfi_read(map, adr);
+	DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): Check 0x%.2x 0x%.2x\n", __func__, oldstatus, status );
 
-	while( (status & dq6) != (oldstatus & dq6) && 
-	       (status & dq5) != dq5 &&
-	       !time_after(jiffies, timeo) ) {
+	while( ( ( status ^ oldstatus ) & dq6 )
+	       && ! ( status & dq5 )
+	       && ! time_after(jiffies, timeo) ) {
 
 		if (need_resched()) {
 			cfi_spin_unlock(chip->mutex);
@@ -539,28 +564,60 @@
 
 		oldstatus = cfi_read( map, adr );
 		status = cfi_read( map, adr );
+		DEBUG( MTD_DEBUG_LEVEL3, "MTD %s(): Check 0x%.2x 0x%.2x\n", __func__, oldstatus, status );
 	}
-	
-	if( (status & dq6) != (oldstatus & dq6) ) {
-		/* The erasing didn't stop?? */
-		if( (status & dq5) == dq5 ) {
-			/* When DQ5 raises, we must check once again if DQ6 is toggling.
-               If not, the erase has been completed OK.  Then, we must reset
-               the chip. */
-			oldstatus = cfi_read(map, adr);
-			status = cfi_read(map, adr);
-		    
-		    if( (status & dq6) == (oldstatus & dq6) ) {
-				printk(KERN_WARNING "Warning: DQ5 raised while program operation was in progress, however operation completed OK\n" );
-			} else {
-				printk(KERN_WARNING "Internal flash device timeout occured or write operation was performed while flash was programming.\n" );
-			}
 
-	        /* When DQ5 raises, we must do a reset. */
-	        cfi_write( map, CMD(0xF0), chip->start );
+	if ( ( ( status ^ oldstatus ) & dq6 )
+	       && ! ( status & dq5 ) ) {
+		/* dq6 still toggling and dq5 still low - must be timeout */
+		printk( KERN_WARNING
+			"MTD %s(): Timed out while waiting for write to complete.\n",
+			__func__ );        
+		/* reset */
+		cfi_write( map, CMD(0xF0), chip->start );
+		ret = -EIO;
+	} else {
+		/*
+		 * Either dq5 went high or dq6 finish toggling.  For
+		 * the correct completion of certain types of writes
+		 * dq5 might _appear_ to go high while dq6 might
+		 * _appear_ to still be toggling - it depends on the
+		 * last state of the toggle was in and what the final
+		 * state will be with true data.  We alread know that
+		 * something changed state with the last set of reads.
+		 * We will read two more times and use this to either
+		 * verify that the write completed successfully or
+		 * that something really went wrong.  BOTH reads
+		 * must match what was written - this certifies that
+		 * the dq6 is not still toggling and have the status
+		 * bits erroneously match the datum that was written.
+		 */
+		/*
+		 * Be optimistic and check success first - then decode any
+		 * failures.
+		 */
+		oldstatus = cfi_read(map, adr);
+		status = cfi_read(map, adr);
+
+		if ( oldstatus == datum && status == datum ) {
+			/* success - do nothing */
 		} else {
-		    printk( "Waiting for write to complete timed out in do_write_oneword.");        
+			/* something has failed */
+			if ( ( ( status ^ oldstatus ) & dq6 )
+			     && ( status & dq5 ) ) {
+				printk(KERN_WARNING "MTD %s(): Internal flash device timeout occured.\n", __func__ );
+			} else {
+				/*
+				 * This state means that the write did not succeed
+				 * and that the usual error state of toggling dq6
+				 * while dq5 goes high did not happen.  Something
+				 * is seriously wacky!
+				 */
+				printk(KERN_WARNING "MTD %s(): Wacky!  Unable to decode failure status\n", __func__ );
+			}
 			ret = -EIO;
+			/* reset on all failures. */
+			cfi_write( map, CMD(0xF0), chip->start );
 		}
 	}
 
@@ -609,7 +666,7 @@
 		}
 
 		ret = do_write_oneword(map, &cfi->chips[chipnum], 
-				bus_ofs, datum, 0);
+				       bus_ofs, datum, 0);
 		if (ret) 
 			return ret;
 		
@@ -724,6 +781,7 @@
 	unsigned long timeo = jiffies + HZ;
 	unsigned int adr;
 	struct cfi_private *cfi = map->fldrv_priv;
+	int ret = 0;
 	DECLARE_WAITQUEUE(wait, current);
 
  retry:
@@ -762,22 +820,23 @@
 	adr = cfi->addr_unlock1;
 
 	/* Wait for the end of programing/erasure by using the toggle method.
-	 * As long as there is a programming procedure going on, bit 6 of the last
-	 * written byte is toggling it's state with each consectuve read.
+	 * As long as there is a programming procedure going on, bit 6
+	 * is toggling it's state with each consecutive read.
 	 * The toggling stops as soon as the procedure is completed.
 	 *
 	 * If the process has gone on for too long on the chip bit 5 gets.
 	 * After bit5 is set you can kill the operation by sending a reset
 	 * command to the chip.
 	 */
+	/* see comments in do_write_oneword */
 	dq6 = CMD(1<<6);
 	dq5 = CMD(1<<5);
 
 	oldstatus = cfi_read(map, adr);
 	status = cfi_read(map, adr);
-	while( ((status & dq6) != (oldstatus & dq6)) && 
-		((status & dq5) != dq5) &&
-		!time_after(jiffies, timeo)) {
+	while( ( ( status ^ oldstatus ) & dq6 )
+	       && ! ( status & dq5 )
+	       && ! time_after(jiffies, timeo) ) {
 		int wait_reps;
 
 		/* an initial short sleep */
@@ -806,16 +865,16 @@
 
 		/* Busy wait for 1/10 of a milisecond */
 		for(wait_reps = 0;
-		    	(wait_reps < 100) &&
-			((status & dq6) != (oldstatus & dq6)) && 
-			((status & dq5) != dq5);
-			wait_reps++) {
+		    (wait_reps < 100)
+			    && ( ( status ^ oldstatus ) & dq6 )
+			    && ! ( status & dq5 );
+		    wait_reps++) {
 			
 			/* Latency issues. Drop the lock, wait a while and retry */
 			cfi_spin_unlock(chip->mutex);
-			
+
 			cfi_udelay(1);
-		
+
 			cfi_spin_lock(chip->mutex);
 			oldstatus = cfi_read(map, adr);
 			status = cfi_read(map, adr);
@@ -823,25 +882,64 @@
 		oldstatus = cfi_read(map, adr);
 		status = cfi_read(map, adr);
 	}
-	if ((status & dq6) != (oldstatus & dq6)) {
-		/* The erasing didn't stop?? */
-		if ((status & dq5) == dq5) {
-			/* dq5 is active so we can do a reset and stop the erase */
-			cfi_write(map, CMD(0xF0), chip->start);
+
+	if ( ( ( status ^ oldstatus ) & dq6 )
+	       && ! ( status & dq5 ) ) {
+		/* dq6 still toggling and dq5 still low - must be timeout */
+		printk( KERN_WARNING
+			"MTD %s(): Timed out while waiting for erase to complete.\n",
+			__func__ );        
+		/* reset */
+		cfi_write( map, CMD(0xF0), chip->start );
+		ret = -EIO;
+	} else {
+		__u32 ones = 0;
+/*
+		 * Be optimistic and check success first - then decode any
+		 * failures.
+		 */
+		oldstatus = cfi_read(map, adr);
+		status = cfi_read(map, adr);
+		
+		switch( CFIDEV_BUSWIDTH ) {
+		case 1:
+			ones = (__u8)~0;
+			break;
+		case 2:
+			ones = (__u16)~0;
+			break;
+		case 4:
+			ones = (__u32)~0;
+			break;
+		}
+	
+		if ( oldstatus == ones && status == ones ) {
+			/* success - do nothing */
+		} else {
+			/* something has failed */
+			if ( ( ( status ^ oldstatus ) & dq6 )
+			     && ( status & dq5 ) ) {
+				printk(KERN_WARNING "MTD %s(): Internal flash device timeout occured.\n", __func__ );
+			} else {
+				/*
+				 * This state means that the erase did not succeed
+				 * and that the usual error state of toggling dq6
+				 * while dq5 goes high did not happen.  Something
+				 * is seriously wacky!
+				 */
+				printk(KERN_WARNING "MTD %s(): Wacky!  Unable to decode failure status\n", __func__ );
+			}
+			ret = -EIO;
+			/* reset on all failures. */
+			cfi_write( map, CMD(0xF0), chip->start );
 		}
-		chip->state = FL_READY;
-		wake_up(&chip->wq);
-		cfi_spin_unlock(chip->mutex);
-		printk("waiting for erase to complete timed out.");
-		DISABLE_VPP(map);
-		return -EIO;
 	}
+
 	DISABLE_VPP(map);
 	chip->state = FL_READY;
 	wake_up(&chip->wq);
 	cfi_spin_unlock(chip->mutex);
-
-	return 0;
+	return ret;
 }
 
 static inline int do_erase_oneblock(struct map_info *map, struct flchip *chip, unsigned long adr)
@@ -850,6 +948,7 @@
 	unsigned int dq6, dq5;
 	unsigned long timeo = jiffies + HZ;
 	struct cfi_private *cfi = map->fldrv_priv;
+	int ret = 0;
 	DECLARE_WAITQUEUE(wait, current);
 
  retry:
@@ -886,22 +985,23 @@
 	timeo = jiffies + (HZ*20);
 
 	/* Wait for the end of programing/erasure by using the toggle method.
-	 * As long as there is a programming procedure going on, bit 6 of the last
-	 * written byte is toggling it's state with each consectuve read.
+	 * As long as there is a programming procedure going on, bit 6
+	 * is toggling it's state with each consecutive read.
 	 * The toggling stops as soon as the procedure is completed.
 	 *
 	 * If the process has gone on for too long on the chip bit 5 gets.
 	 * After bit5 is set you can kill the operation by sending a reset
 	 * command to the chip.
 	 */
+	/* see comments in do_write_oneword */
 	dq6 = CMD(1<<6);
 	dq5 = CMD(1<<5);
 
 	oldstatus = cfi_read(map, adr);
 	status = cfi_read(map, adr);
-	while( ((status & dq6) != (oldstatus & dq6)) && 
-		((status & dq5) != dq5) &&
-		!time_after(jiffies, timeo)) {
+	while( ( ( status ^ oldstatus ) & dq6 )
+	       && ! ( status & dq5 )
+	       && ! time_after(jiffies, timeo) ) {
 		int wait_reps;
 
 		/* an initial short sleep */
@@ -930,10 +1030,10 @@
 
 		/* Busy wait for 1/10 of a milisecond */
 		for(wait_reps = 0;
-		    	(wait_reps < 100) &&
-			((status & dq6) != (oldstatus & dq6)) && 
-			((status & dq5) != dq5);
-			wait_reps++) {
+		    (wait_reps < 100)
+			    && ( ( status ^ oldstatus ) & dq6 )
+			    && ! ( status & dq5 );
+		    wait_reps++) {
 			
 			/* Latency issues. Drop the lock, wait a while and retry */
 			cfi_spin_unlock(chip->mutex);
@@ -947,44 +1047,65 @@
 		oldstatus = cfi_read(map, adr);
 		status = cfi_read(map, adr);
 	}
-	if( (status & dq6) != (oldstatus & dq6) ) 
-	{                                       
-		/* The erasing didn't stop?? */
-		if( ( status & dq5 ) == dq5 ) 
-		{   			
-			/* When DQ5 raises, we must check once again if DQ6 is toggling.
-               If not, the erase has been completed OK.  If not, reset chip. */
-		    oldstatus   = cfi_read( map, adr );
-		    status      = cfi_read( map, adr );
-		    
-		    if( ( oldstatus & 0x00FF ) == ( status & 0x00FF ) )
-		    {
-                printk( "Warning: DQ5 raised while erase operation was in progress, but erase completed OK\n" ); 		    
-		    } 			
-			else
-            {
-			    /* DQ5 is active so we can do a reset and stop the erase */
-				cfi_write(map, CMD(0xF0), chip->start);
-                printk( KERN_WARNING "Internal flash device timeout occured or write operation was performed while flash was erasing\n" );
+
+	if ( ( ( status ^ oldstatus ) & dq6 )
+	       && ! ( status & dq5 ) ) {
+		/* dq6 still toggling and dq5 still low - must be timeout */
+		printk( KERN_WARNING
+			"MTD %s(): Timed out while waiting for erase to complete.\n",
+			__func__ );        
+		/* reset */
+		cfi_write( map, CMD(0xF0), chip->start );
+		ret = -EIO;
+	} else {
+		__u32 ones = 0;
+
+		/*
+		 * Be optimistic and check success first - then decode any
+		 * failures.
+		 */
+		oldstatus = cfi_read(map, adr);
+		status = cfi_read(map, adr);
+
+		switch( CFIDEV_BUSWIDTH ) {
+		case 1:
+			ones = (__u8)~0;
+			break;
+		case 2:
+			ones = (__u16)~0;
+			break;
+		case 4:
+			ones = (__u32)~0;
+			break;
+		}
+	
+		if ( oldstatus == ones && status == ones ) {
+			/* success - do nothing */
+		} else {
+			/* something has failed */
+			if ( ( ( status ^ oldstatus ) & dq6 )
+			     && ( status & dq5 ) ) {
+				printk(KERN_WARNING "MTD %s(): Internal flash device timeout occured.\n", __func__ );
+			} else {
+				/*
+				 * This state means that the erase did not succeed
+				 * and that the usual error state of toggling dq6
+				 * while dq5 goes high did not happen.  Something
+				 * is seriously wacky!
+				 */
+				printk(KERN_WARNING "MTD %s(): Wacky!  Unable to decode failure status\n", __func__ );
 			}
+			ret = -EIO;
+			/* reset on all failures. */
+			cfi_write( map, CMD(0xF0), chip->start );
 		}
-        else
-        {
-		    printk( "Waiting for erase to complete timed out in do_erase_oneblock.");        
-		    
-		chip->state = FL_READY;
-		wake_up(&chip->wq);
-		cfi_spin_unlock(chip->mutex);
-		DISABLE_VPP(map);
-		return -EIO;
-	}
 	}
 
 	DISABLE_VPP(map);
 	chip->state = FL_READY;
 	wake_up(&chip->wq);
 	cfi_spin_unlock(chip->mutex);
-	return 0;
+	return ret;
 }
 
 static int cfi_amdstd_erase_varsize(struct mtd_info *mtd, struct erase_info *instr)