kthread, sched/wait: Fix kthread_parkme() completion issue

Linux-MTD Mailing List linux-mtd at lists.infradead.org
Mon May 14 05:59:09 PDT 2018


Gitweb:     http://git.infradead.org/?p=mtd-2.6.git;a=commit;h=85f1abe0019fcb3ea10df7029056cf42702283a8
Commit:     85f1abe0019fcb3ea10df7029056cf42702283a8
Parent:     741a76b350897604c48fb12beff1c9b77724dc96
Author:     Peter Zijlstra <peterz at infradead.org>
AuthorDate: Tue May 1 18:14:45 2018 +0200
Committer:  Ingo Molnar <mingo at kernel.org>
CommitDate: Thu May 3 07:38:05 2018 +0200

    kthread, sched/wait: Fix kthread_parkme() completion issue
    
    Even with the wait-loop fixed, there is a further issue with
    kthread_parkme(). Upon hotplug, when we do takedown_cpu(),
    smpboot_park_threads() can return before all those threads are in fact
    blocked, due to the placement of the complete() in __kthread_parkme().
    
    When that happens, sched_cpu_dying() -> migrate_tasks() can end up
    migrating such a still runnable task onto another CPU.
    
    Normally the task will have hit schedule() and gone to sleep by the
    time we do kthread_unpark(), which will then do __kthread_bind() to
    re-bind the task to the correct CPU.
    
    However, when we loose the initial TASK_PARKED store to the concurrent
    wakeup issue described previously, do the complete(), get migrated, it
    is possible to either:
    
     - observe kthread_unpark()'s clearing of SHOULD_PARK and terminate
       the park and set TASK_RUNNING, or
    
     - __kthread_bind()'s wait_task_inactive() to observe the competing
       TASK_RUNNING store.
    
    Either way the WARN() in __kthread_bind() will trigger and fail to
    correctly set the CPU affinity.
    
    Fix this by only issuing the complete() when the kthread has scheduled
    out. This does away with all the icky 'still running' nonsense.
    
    The alternative is to promote TASK_PARKED to a special state, this
    guarantees wait_task_inactive() cannot observe a 'stale' TASK_RUNNING
    and we'll end up doing the right thing, but this preserves the whole
    icky business of potentially migating the still runnable thing.
    
    Reported-by: Gaurav Kohli <gkohli at codeaurora.org>
    Signed-off-by: Peter Zijlstra (Intel) <peterz at infradead.org>
    Cc: Linus Torvalds <torvalds at linux-foundation.org>
    Cc: Oleg Nesterov <oleg at redhat.com>
    Cc: Peter Zijlstra <peterz at infradead.org>
    Cc: Thomas Gleixner <tglx at linutronix.de>
    Signed-off-by: Ingo Molnar <mingo at kernel.org>
---
 include/linux/kthread.h |  1 +
 kernel/kthread.c        | 43 +++++++++++++++++++------------------------
 kernel/sched/core.c     | 32 +++++++++++++++++++++-----------
 3 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index c1961761311d..2803264c512f 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k);
 int kthread_park(struct task_struct *k);
 void kthread_unpark(struct task_struct *k);
 void kthread_parkme(void);
+void kthread_park_complete(struct task_struct *k);
 
 int kthreadd(void *unused);
 extern struct task_struct *kthreadd_task;
diff --git a/kernel/kthread.c b/kernel/kthread.c
index cbee858e5815..2017a39ab490 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -55,7 +55,6 @@ enum KTHREAD_BITS {
 	KTHREAD_IS_PER_CPU = 0,
 	KTHREAD_SHOULD_STOP,
 	KTHREAD_SHOULD_PARK,
-	KTHREAD_IS_PARKED,
 };
 
 static inline void set_kthread_struct(void *kthread)
@@ -181,11 +180,8 @@ static void __kthread_parkme(struct kthread *self)
 		set_current_state(TASK_PARKED);
 		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
 			break;
-		if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
-			complete(&self->parked);
 		schedule();
 	}
-	clear_bit(KTHREAD_IS_PARKED, &self->flags);
 	__set_current_state(TASK_RUNNING);
 }
 
@@ -195,6 +191,11 @@ void kthread_parkme(void)
 }
 EXPORT_SYMBOL_GPL(kthread_parkme);
 
+void kthread_park_complete(struct task_struct *k)
+{
+	complete(&to_kthread(k)->parked);
+}
+
 static int kthread(void *_create)
 {
 	/* Copy data: it's on kthread's stack */
@@ -451,22 +452,15 @@ void kthread_unpark(struct task_struct *k)
 {
 	struct kthread *kthread = to_kthread(k);
 
-	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
 	/*
-	 * We clear the IS_PARKED bit here as we don't wait
-	 * until the task has left the park code. So if we'd
-	 * park before that happens we'd see the IS_PARKED bit
-	 * which might be about to be cleared.
+	 * Newly created kthread was parked when the CPU was offline.
+	 * The binding was lost and we need to set it again.
 	 */
-	if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
-		/*
-		 * Newly created kthread was parked when the CPU was offline.
-		 * The binding was lost and we need to set it again.
-		 */
-		if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
-			__kthread_bind(k, kthread->cpu, TASK_PARKED);
-		wake_up_state(k, TASK_PARKED);
-	}
+	if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
+		__kthread_bind(k, kthread->cpu, TASK_PARKED);
+
+	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+	wake_up_state(k, TASK_PARKED);
 }
 EXPORT_SYMBOL_GPL(kthread_unpark);
 
@@ -489,12 +483,13 @@ int kthread_park(struct task_struct *k)
 	if (WARN_ON(k->flags & PF_EXITING))
 		return -ENOSYS;
 
-	if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
-		set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
-		if (k != current) {
-			wake_up_process(k);
-			wait_for_completion(&kthread->parked);
-		}
+	if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
+		return -EBUSY;
+
+	set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
+	if (k != current) {
+		wake_up_process(k);
+		wait_for_completion(&kthread->parked);
 	}
 
 	return 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5e10aaeebfcc..7ad60e00a6a8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7,6 +7,8 @@
  */
 #include "sched.h"
 
+#include <linux/kthread.h>
+
 #include <asm/switch_to.h>
 #include <asm/tlb.h>
 
@@ -2718,20 +2720,28 @@ static struct rq *finish_task_switch(struct task_struct *prev)
 		membarrier_mm_sync_core_before_usermode(mm);
 		mmdrop(mm);
 	}
-	if (unlikely(prev_state == TASK_DEAD)) {
-		if (prev->sched_class->task_dead)
-			prev->sched_class->task_dead(prev);
+	if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
+		switch (prev_state) {
+		case TASK_DEAD:
+			if (prev->sched_class->task_dead)
+				prev->sched_class->task_dead(prev);
 
-		/*
-		 * Remove function-return probe instances associated with this
-		 * task and put them back on the free list.
-		 */
-		kprobe_flush_task(prev);
+			/*
+			 * Remove function-return probe instances associated with this
+			 * task and put them back on the free list.
+			 */
+			kprobe_flush_task(prev);
 
-		/* Task is done with its stack. */
-		put_task_stack(prev);
+			/* Task is done with its stack. */
+			put_task_stack(prev);
 
-		put_task_struct(prev);
+			put_task_struct(prev);
+			break;
+
+		case TASK_PARKED:
+			kthread_park_complete(prev);
+			break;
+		}
 	}
 
 	tick_nohz_task_switch();



More information about the linux-mtd-cvs mailing list