[PATCH 7/8] pwm: Add more locking

Marek Szyprowski m.szyprowski at samsung.com
Thu Apr 4 05:09:32 PDT 2024


Hi Uwe,

On 17.03.2024 11:40, Uwe Kleine-König wrote:
> This ensures that a pwm_chip that has no corresponding driver isn't used
> and that a driver doesn't go away while a callback is still running.
>
> In the presence of device links this isn't necessary yet (so this is no
> fix) but for pwm character device support this is needed.
>
> To not serialize all pwm_apply_state() calls, this introduces a per chip
> lock. An additional complication is that for atomic chips a mutex cannot
> be used (as pwm_apply_atomic() must not sleem) and a spinlock cannot be
> held while calling an operation for a sleeping chip. So depending on the
> chip being atomic or not a spinlock or a mutex is used.
>
> Signed-off-by: Uwe Kleine-König <u.kleine-koenig at pengutronix.de>


This patch landed some time ago in linux-next as commit a740f7879609 
("pwm: Add more locking"). Recently I've finally found that $subject 
patch is responsible for the following lock dep splat observed for some 
time during day-to-day linux-next testing:

======================================================
WARNING: possible circular locking dependency detected
6.9.0-rc1+ #14790 Tainted: G         C
------------------------------------------------------
kworker/u24:4/59 is trying to acquire lock:
ffff00003c65b510 (&chip->nonatomic_lock){+.+.}-{3:3}, at: 
pwm_apply_might_sleep+0x90/0xd8

but task is already holding lock:
ffff80008310fa40 (prepare_lock){+.+.}-{3:3}, at: clk_prepare_lock+0x4c/0xa8

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #1 (prepare_lock){+.+.}-{3:3}:
        lock_acquire+0x68/0x84
        __mutex_lock+0xa0/0x840
        mutex_lock_nested+0x24/0x30
        clk_prepare_lock+0x4c/0xa8
        clk_round_rate+0x38/0xd0
        meson_pwm_apply+0x128/0x220 [pwm_meson]
        __pwm_apply+0x64/0x1f8
        pwm_apply_might_sleep+0x58/0xd8
        pwm_regulator_set_voltage+0xbc/0x12c
        _regulator_do_set_voltage+0x110/0x688
        regulator_set_voltage_rdev+0x64/0x25c
        regulator_do_balance_voltage+0x20c/0x47c
        regulator_balance_voltage+0x50/0x9c
        regulator_set_voltage_unlocked+0xa4/0x128
        regulator_set_voltage+0x50/0xec
        _opp_config_regulator_single+0x4c/0x130
        _set_opp+0xfc/0x544
        dev_pm_opp_set_rate+0x190/0x284
        set_target+0x34/0x40
        __cpufreq_driver_target+0x170/0x290
        cpufreq_online+0x814/0xa3c
        cpufreq_add_dev+0x80/0x98
        subsys_interface_register+0xfc/0x118
        cpufreq_register_driver+0x150/0x234
        dt_cpufreq_probe+0x150/0x480
        platform_probe+0x68/0xd8
        really_probe+0xbc/0x2a0
        __driver_probe_device+0x78/0x12c
        driver_probe_device+0xdc/0x164
        __device_attach_driver+0xb8/0x138
        bus_for_each_drv+0x84/0xe0
        __device_attach+0xa8/0x1b0
        device_initial_probe+0x14/0x20
        bus_probe_device+0xb0/0xb4
        deferred_probe_work_func+0x8c/0xc8
        process_one_work+0x220/0x634
        worker_thread+0x268/0x3a8
        kthread+0x124/0x128
        ret_from_fork+0x10/0x20

-> #0 (&chip->nonatomic_lock){+.+.}-{3:3}:
        __lock_acquire+0x1318/0x20c4
        lock_acquire.part.0+0xc8/0x20c
        lock_acquire+0x68/0x84
        __mutex_lock+0xa0/0x840
        mutex_lock_nested+0x24/0x30
        pwm_apply_might_sleep+0x90/0xd8
        clk_pwm_prepare+0x54/0x84
        clk_core_prepare+0xc8/0x2f8
        clk_prepare+0x28/0x44
        mmc_pwrseq_simple_pre_power_on+0x4c/0x8c
        mmc_pwrseq_pre_power_on+0x24/0x34
        mmc_power_up.part.0+0x20/0x16c
        mmc_start_host+0xa0/0xac
        mmc_add_host+0x84/0xf0
        meson_mmc_probe+0x318/0x3e8
        platform_probe+0x68/0xd8
        really_probe+0xbc/0x2a0
        __driver_probe_device+0x78/0x12c
        driver_probe_device+0xdc/0x164
        __device_attach_driver+0xb8/0x138
        bus_for_each_drv+0x84/0xe0
        __device_attach_async_helper+0xb0/0xd4
        async_run_entry_fn+0x34/0xe0
        process_one_work+0x220/0x634
        worker_thread+0x268/0x3a8
        kthread+0x124/0x128
        ret_from_fork+0x10/0x20

other info that might help us debug this:

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(prepare_lock);
                                lock(&chip->nonatomic_lock);
                                lock(prepare_lock);
   lock(&chip->nonatomic_lock);

  *** DEADLOCK ***

4 locks held by kworker/u24:4/59:
  #0: ffff000000220d48 ((wq_completion)async){+.+.}-{0:0}, at: 
process_one_work+0x1a0/0x634
  #1: ffff80008461bde0 ((work_completion)(&entry->work)){+.+.}-{0:0}, 
at: process_one_work+0x1c8/0x634
  #2: ffff0000015bf0f8 (&dev->mutex){....}-{3:3}, at: 
__device_attach_async_helper+0x3c/0xd4
  #3: ffff80008310fa40 (prepare_lock){+.+.}-{3:3}, at: 
clk_prepare_lock+0x4c/0xa8

stack backtrace:
CPU: 1 PID: 59 Comm: kworker/u24:4 Tainted: G         C 6.9.0-rc1+ #14790
Hardware name: Khadas VIM3 (DT)
Workqueue: async async_run_entry_fn
Call trace:
  dump_backtrace+0x98/0xf0
  show_stack+0x18/0x24
  dump_stack_lvl+0x90/0xd0
  dump_stack+0x18/0x24
  print_circular_bug+0x290/0x370
  check_noncircular+0x15c/0x170
  __lock_acquire+0x1318/0x20c4
  lock_acquire.part.0+0xc8/0x20c
  lock_acquire+0x68/0x84
  __mutex_lock+0xa0/0x840
  mutex_lock_nested+0x24/0x30
  pwm_apply_might_sleep+0x90/0xd8
  clk_pwm_prepare+0x54/0x84
  clk_core_prepare+0xc8/0x2f8
  clk_prepare+0x28/0x44
  mmc_pwrseq_simple_pre_power_on+0x4c/0x8c
  mmc_pwrseq_pre_power_on+0x24/0x34
  mmc_power_up.part.0+0x20/0x16c
  mmc_start_host+0xa0/0xac
  mmc_add_host+0x84/0xf0
  meson_mmc_probe+0x318/0x3e8
  platform_probe+0x68/0xd8
  really_probe+0xbc/0x2a0
  __driver_probe_device+0x78/0x12c
  driver_probe_device+0xdc/0x164
  __device_attach_driver+0xb8/0x138
  bus_for_each_drv+0x84/0xe0
  __device_attach_async_helper+0xb0/0xd4
  async_run_entry_fn+0x34/0xe0
  process_one_work+0x220/0x634
  worker_thread+0x268/0x3a8
  kthread+0x124/0x128
  ret_from_fork+0x10/0x20


I'm not really sure if this warning shows a real problem or just some 
functions are missing lock dep annotations. Quite a lots of subsystems 
are involved in it (clocks, regulators and pwms) and this issue is fully 
reproducible here during system boot on Khadas VIM3 ARM64-based board.


> ---
>   drivers/pwm/core.c  | 98 +++++++++++++++++++++++++++++++++++++++++----
>   include/linux/pwm.h | 13 ++++++
>   2 files changed, 103 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
> index 09ff6ef0b634..2e08fcfbe138 100644
> --- a/drivers/pwm/core.c
> +++ b/drivers/pwm/core.c
> @@ -29,6 +29,22 @@ static DEFINE_MUTEX(pwm_lock);
>   
>   static DEFINE_IDR(pwm_chips);
>   
> +static void pwmchip_lock(struct pwm_chip *chip)
> +{
> +	if (chip->atomic)
> +		spin_lock(&chip->atomic_lock);
> +	else
> +		mutex_lock(&chip->nonatomic_lock);
> +}
> +
> +static void pwmchip_unlock(struct pwm_chip *chip)
> +{
> +	if (chip->atomic)
> +		spin_unlock(&chip->atomic_lock);
> +	else
> +		mutex_unlock(&chip->nonatomic_lock);
> +}
> +
>   static void pwm_apply_debug(struct pwm_device *pwm,
>   			    const struct pwm_state *state)
>   {
> @@ -183,6 +199,7 @@ static int __pwm_apply(struct pwm_device *pwm, const struct pwm_state *state)
>   int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state)
>   {
>   	int err;
> +	struct pwm_chip *chip = pwm->chip;
>   
>   	/*
>   	 * Some lowlevel driver's implementations of .apply() make use of
> @@ -193,7 +210,13 @@ int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state)
>   	 */
>   	might_sleep();
>   
> -	if (IS_ENABLED(CONFIG_PWM_DEBUG) && pwm->chip->atomic) {
> +	pwmchip_lock(chip);
> +	if (!chip->operational) {
> +		pwmchip_unlock(chip);
> +		return -ENODEV;
> +	}
> +
> +	if (IS_ENABLED(CONFIG_PWM_DEBUG) && chip->atomic) {
>   		/*
>   		 * Catch any drivers that have been marked as atomic but
>   		 * that will sleep anyway.
> @@ -205,6 +228,8 @@ int pwm_apply_might_sleep(struct pwm_device *pwm, const struct pwm_state *state)
>   		err = __pwm_apply(pwm, state);
>   	}
>   
> +	pwmchip_unlock(chip);
> +
>   	return err;
>   }
>   EXPORT_SYMBOL_GPL(pwm_apply_might_sleep);
> @@ -291,16 +316,26 @@ EXPORT_SYMBOL_GPL(pwm_adjust_config);
>   int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result,
>   		unsigned long timeout)
>   {
> +	struct pwm_chip *chip;
>   	int err;
>   
> -	if (!pwm || !pwm->chip->ops)
> +	if (!pwm || !(chip = pwm->chip)->ops)
>   		return -EINVAL;
>   
> -	if (!pwm->chip->ops->capture)
> +	if (!chip->ops->capture)
>   		return -ENOSYS;
>   
>   	mutex_lock(&pwm_lock);
> -	err = pwm->chip->ops->capture(pwm->chip, pwm, result, timeout);
> +
> +	pwmchip_lock(chip);
> +
> +	if (chip->operational)
> +		err = chip->ops->capture(pwm->chip, pwm, result, timeout);
> +	else
> +		err = -ENODEV;
> +
> +	pwmchip_unlock(chip);
> +
>   	mutex_unlock(&pwm_lock);
>   
>   	return err;
> @@ -340,6 +375,14 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label)
>   	if (test_bit(PWMF_REQUESTED, &pwm->flags))
>   		return -EBUSY;
>   
> +	/*
> +	 * This function is called while holding pwm_lock. As .operational only
> +	 * changes while holding this lock, checking it here without holding the
> +	 * chip lock is fine.
> +	 */
> +	if (!chip->operational)
> +		return -ENODEV;
> +
>   	if (!try_module_get(chip->owner))
>   		return -ENODEV;
>   
> @@ -368,7 +411,12 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label)
>   		 */
>   		struct pwm_state state = { 0, };
>   
> +		pwmchip_lock(chip);
> +
>   		err = ops->get_state(chip, pwm, &state);
> +
> +		pwmchip_unlock(chip);
> +
>   		trace_pwm_get(pwm, &state, err);
>   
>   		if (!err)
> @@ -997,6 +1045,7 @@ struct pwm_chip *pwmchip_alloc(struct device *parent, unsigned int npwm, size_t
>   
>   	chip->npwm = npwm;
>   	chip->uses_pwmchip_alloc = true;
> +	chip->operational = false;
>   
>   	pwmchip_dev = &chip->dev;
>   	device_initialize(pwmchip_dev);
> @@ -1102,6 +1151,11 @@ int __pwmchip_add(struct pwm_chip *chip, struct module *owner)
>   
>   	chip->owner = owner;
>   
> +	if (chip->atomic)
> +		spin_lock_init(&chip->atomic_lock);
> +	else
> +		mutex_init(&chip->nonatomic_lock);
> +
>   	mutex_lock(&pwm_lock);
>   
>   	ret = idr_alloc(&pwm_chips, chip, 0, 0, GFP_KERNEL);
> @@ -1115,6 +1169,10 @@ int __pwmchip_add(struct pwm_chip *chip, struct module *owner)
>   	if (IS_ENABLED(CONFIG_OF))
>   		of_pwmchip_add(chip);
>   
> +	pwmchip_lock(chip);
> +	chip->operational = true;
> +	pwmchip_unlock(chip);
> +
>   	ret = device_add(&chip->dev);
>   	if (ret)
>   		goto err_device_add;
> @@ -1124,6 +1182,10 @@ int __pwmchip_add(struct pwm_chip *chip, struct module *owner)
>   	return 0;
>   
>   err_device_add:
> +	pwmchip_lock(chip);
> +	chip->operational = false;
> +	pwmchip_unlock(chip);
> +
>   	if (IS_ENABLED(CONFIG_OF))
>   		of_pwmchip_remove(chip);
>   
> @@ -1145,12 +1207,27 @@ EXPORT_SYMBOL_GPL(__pwmchip_add);
>   void pwmchip_remove(struct pwm_chip *chip)
>   {
>   	pwmchip_sysfs_unexport(chip);
> +	unsigned int i;
> +
> +	mutex_lock(&pwm_lock);
> +
> +	pwmchip_lock(chip);
> +	chip->operational = false;
> +	pwmchip_unlock(chip);
> +
> +	for (i = 0; i < chip->npwm; ++i) {
> +		struct pwm_device *pwm = &chip->pwms[i];
> +
> +		if (test_and_clear_bit(PWMF_REQUESTED, &pwm->flags)) {
> +			dev_alert(&chip->dev, "Freeing requested PWM #%u\n", i);
> +			if (pwm->chip->ops->free)
> +				pwm->chip->ops->free(pwm->chip, pwm);
> +		}
> +	}
>   
>   	if (IS_ENABLED(CONFIG_OF))
>   		of_pwmchip_remove(chip);
>   
> -	mutex_lock(&pwm_lock);
> -
>   	idr_remove(&pwm_chips, chip->id);
>   
>   	mutex_unlock(&pwm_lock);
> @@ -1532,12 +1609,17 @@ void pwm_put(struct pwm_device *pwm)
>   
>   	mutex_lock(&pwm_lock);
>   
> -	if (!test_and_clear_bit(PWMF_REQUESTED, &pwm->flags)) {
> +	/*
> +	 * If the chip isn't operational, PWMF_REQUESTED was already cleared. So
> +	 * don't warn in this case. This can only happen if a consumer called
> +	 * pwm_put() twice.
> +	 */
> +	if (chip->operational && !test_and_clear_bit(PWMF_REQUESTED, &pwm->flags)) {
>   		pr_warn("PWM device already freed\n");
>   		goto out;
>   	}
>   
> -	if (chip->ops->free)
> +	if (chip->operational && chip->ops->free)
>   		pwm->chip->ops->free(pwm->chip, pwm);
>   
>   	pwm->label = NULL;
> diff --git a/include/linux/pwm.h b/include/linux/pwm.h
> index 60b92c2c75ef..9c84e0ba81a4 100644
> --- a/include/linux/pwm.h
> +++ b/include/linux/pwm.h
> @@ -274,6 +274,9 @@ struct pwm_ops {
>    * @of_xlate: request a PWM device given a device tree PWM specifier
>    * @atomic: can the driver's ->apply() be called in atomic context
>    * @uses_pwmchip_alloc: signals if pwmchip_allow was used to allocate this chip
> + * @operational: signals if the chip can be used (or is already deregistered)
> + * @nonatomic_lock: mutex for nonatomic chips
> + * @atomic_lock: mutex for atomic chips
>    * @pwms: array of PWM devices allocated by the framework
>    */
>   struct pwm_chip {
> @@ -289,6 +292,16 @@ struct pwm_chip {
>   
>   	/* only used internally by the PWM framework */
>   	bool uses_pwmchip_alloc;
> +	bool operational;
> +	union {
> +		/*
> +		 * depending on the chip being atomic or not either the mutex or
> +		 * the spinlock is used. It protects .operational and
> +		 * synchronizes calls to the .ops->apply and .ops->get_state()
> +		 */
> +		struct mutex nonatomic_lock;
> +		struct spinlock atomic_lock;
> +	};
>   	struct pwm_device pwms[] __counted_by(npwm);
>   };
>   

Best regards
-- 
Marek Szyprowski, PhD
Samsung R&D Institute Poland




More information about the linux-amlogic mailing list