Recursion in CFI driver. Is it really necessary to have.
Alexey Korolev
akorolev at infradead.org
Mon Oct 15 10:57:16 EDT 2007
Hi,
>
> Wow. That's certainly bad.
>
> It's been a while that I wrote that code, but looking at it now I just
> can't see how that could happen. It is like if it was recursing on
> itself, but the code explicitly guards against that. See:
>
> if (contender && contender != chip) {
> [...]
> ret = get_chip(map, contender, contender->start, mode);
>
> So from that point, 'chip' and 'contender' must be equal when get_chip()
> is called again, and therefore recursion should stop there.
>
I think it has a hole here. The issue takes place if we have three or more processes wishing to own chip.
Assume we have three processes Process 1, Process 2, and Process 3 (a looser process).
Process 1: get_chip CHIP1
contender = CHIP1;
Own chip.
Process 2: get_chip CHIP2
contender == CHIP1;
get_chip CHIP1 to suspend it.
Own chip CHIP1.
Suspend
Own CHIP2.
contender = CHIP2;
Process 3: get_chip CHIP3
contender == CHIP2;
get_chip CHIP2 to suspend it.
Can't suspend -> wait
Process 1:
Process 2:
Process 1: do operations and finish with "contender = CHIP1"
Process 3: get_chip CHIP2
contender == CHIP1;
get_chip CHIP1 to suspend it.
Can't suspend -> wait
Process 2:
Process 1:
Process 2: do operations and finish with "contender = CHIP2"
Process 3: get_chip CHIP1
contender == CHIP2;
get_chip CHIP2 to suspend it.
Can't suspend -> wait
Etc.
Process 3 is asking for contender chip which is changing. It causes growing of recursion level.
The most important here, that from point of silicon state machine design view there is no need to have recursion deeper than "level 1".
I've made the patch which heals the deadlock problem. Could you please look at this?
Signed-off-by: Alexey Korolev <akorolev at infradead.org>
=========================================================
diff -aur a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
--- a/drivers/mtd/chips/cfi_cmdset_0001.c 2007-09-25 14:51:06.000000000 +0400
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c 2007-10-15 18:42:42.000000000 +0400
@@ -86,6 +86,7 @@
size_t len);
static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode);
+static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long adr, int mode);
static void put_chip(struct map_info *map, struct flchip *chip, unsigned long adr);
#include "fwh_lock.h"
@@ -644,14 +645,9 @@
static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr, int mode)
{
- DECLARE_WAITQUEUE(wait, current);
+ int ret;
struct cfi_private *cfi = map->fldrv_priv;
- map_word status, status_OK = CMD(0x80), status_PWS = CMD(0x01);
- unsigned long timeo;
- struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
- resettime:
- timeo = jiffies + HZ;
retry:
if (chip->priv && (mode == FL_WRITING || mode == FL_ERASING || mode == FL_OTP_WRITE)) {
/*
@@ -686,18 +682,22 @@
* it'll happily send us to sleep. In any case, when
* get_chip returns success we're clear to go ahead.
*/
- int ret = spin_trylock(contender->mutex);
+ ret = spin_trylock(contender->mutex);
spin_unlock(&shared->lock);
if (!ret)
goto retry;
spin_unlock(chip->mutex);
- ret = get_chip(map, contender, contender->start, mode);
+ ret = chip_ready(map, contender, contender->start, mode);
spin_lock(chip->mutex);
+
+ if (ret == -EAGAIN) {
+ spin_unlock(contender->mutex);
+ goto retry;
+ }
if (ret) {
spin_unlock(contender->mutex);
return ret;
}
- timeo = jiffies + HZ;
spin_lock(&shared->lock);
spin_unlock(contender->mutex);
}
@@ -709,6 +709,23 @@
spin_unlock(&shared->lock);
}
+ ret = chip_ready(map, chip, adr, mode);
+ if (ret == -EAGAIN)
+ goto retry;
+
+
+ return ret;
+}
+
+static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long adr, int mode)
+{
+
+ DECLARE_WAITQUEUE(wait, current);
+ struct cfi_private *cfi = map->fldrv_priv;
+ map_word status, status_OK = CMD(0x80), status_PWS = CMD(0x01);
+ struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
+ unsigned long timeo = jiffies + HZ;
+
switch (chip->state) {
case FL_STATUS:
@@ -731,7 +748,7 @@
cfi_udelay(1);
spin_lock(chip->mutex);
/* Someone else might have been playing with it. */
- goto retry;
+ return -EAGAIN;
}
case FL_READY:
@@ -806,7 +823,7 @@
schedule();
remove_wait_queue(&chip->wq, &wait);
spin_lock(chip->mutex);
- goto resettime;
+ return -EAGAIN;
}
}
=========================================================
More information about the linux-mtd
mailing list