[bug report] PM / reboot: Eliminate race between reboot and suspend

Dan Carpenter dan.carpenter at oracle.com
Wed Nov 18 13:59:17 EST 2020


Hello Pingfan Liu,

The patch 55f2503c3b69: "PM / reboot: Eliminate race between reboot
and suspend" from Jul 31, 2018, leads to the following static checker
warning:

	kernel/power/main.c:27 lock_system_sleep()
	warn: called with lock held.  '&system_transition_mutex'

kernel/reboot.c
   345  
   346          mutex_lock(&system_transition_mutex);
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The patch changed the code to take this lock.

   347          switch (cmd) {
   348          case LINUX_REBOOT_CMD_RESTART:
   349                  kernel_restart(NULL);
   350                  break;
   351  
   352          case LINUX_REBOOT_CMD_CAD_ON:
   353                  C_A_D = 1;
   354                  break;
   355  
   356          case LINUX_REBOOT_CMD_CAD_OFF:
   357                  C_A_D = 0;
   358                  break;
   359  
   360          case LINUX_REBOOT_CMD_HALT:
   361                  kernel_halt();
   362                  do_exit(0);
   363                  panic("cannot halt");
   364  
   365          case LINUX_REBOOT_CMD_POWER_OFF:
   366                  kernel_power_off();
   367                  do_exit(0);
   368                  break;
   369  
   370          case LINUX_REBOOT_CMD_RESTART2:
   371                  ret = strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1);
   372                  if (ret < 0) {
   373                          ret = -EFAULT;
   374                          break;
   375                  }
   376                  buffer[sizeof(buffer) - 1] = '\0';
   377  
   378                  kernel_restart(buffer);
   379                  break;
   380  
   381  #ifdef CONFIG_KEXEC_CORE
   382          case LINUX_REBOOT_CMD_KEXEC:
   383                  ret = kernel_kexec();
                        ^^^^^^^^^^^^^^^^^^^^
Called with lock held.

   384                  break;
   385  #endif

But kernel_kexec() also tries to take the &system_transition_mutex so
it will dead lock.

kernel/kexec_core.c
  1125  int kernel_kexec(void)
  1126  {
  1127          int error = 0;
  1128  
  1129          if (!mutex_trylock(&kexec_mutex))
  1130                  return -EBUSY;
  1131          if (!kexec_image) {
  1132                  error = -EINVAL;
  1133                  goto Unlock;
  1134          }
  1135  
  1136  #ifdef CONFIG_KEXEC_JUMP
  1137          if (kexec_image->preserve_context) {
  1138                  lock_system_sleep();
                        ^^^^^^^^^^^^^^^^^^^
Here.

  1139                  pm_prepare_console();
  1140                  error = freeze_processes();
  1141                  if (error) {
  1142                          error = -EBUSY;
  1143                          goto Restore_console;
  1144                  }
  1145                  suspend_console();
  1146                  error = dpm_suspend_start(PMSG_FREEZE);
  1147                  if (error)
  1148                          goto Resume_console;
  1149                  /* At this point, dpm_suspend_start() has been called,
  1150                   * but *not* dpm_suspend_end(). We *must* call
  1151                   * dpm_suspend_end() now.  Otherwise, drivers for
  1152                   * some devices (e.g. interrupt controllers) become
  1153                   * desynchronized with the actual state of the
  1154                   * hardware at resume time, and evil weirdness ensues.
  1155                   */
  1156                  error = dpm_suspend_end(PMSG_FREEZE);
  1157                  if (error)
  1158                          goto Resume_devices;
  1159                  error = suspend_disable_secondary_cpus();
  1160                  if (error)

regards,
dan carpenter



More information about the kexec mailing list