[PATCH v4 3/8] clk: samsung: add infrastructure to register cpu clocks

Thomas Abraham ta.omasab at gmail.com
Fri May 23 07:41:41 PDT 2014


Hi Tomasz,

On Fri, May 16, 2014 at 10:47 PM, Tomasz Figa <t.figa at samsung.com> wrote:
> Hi Thomas,
>
> On 14.05.2014 03:11, Thomas Abraham wrote:
>> From: Thomas Abraham <thomas.ab at samsung.com>
>>
>> The CPU clock provider supplies the clock to the CPU clock domain. The
>> composition and organization of the CPU clock provider could vary among
>> Exynos SoCs. A CPU clock provider can be composed of clock mux, dividers
>> and gates. This patch defines a new clock type for CPU clock provider and
>> adds infrastructure to register the CPU clock providers for Samsung
>> platforms.
>>
>> Cc: Tomasz Figa <t.figa at samsung.com>
>> Signed-off-by: Thomas Abraham <thomas.ab at samsung.com>
>> ---
>>  drivers/clk/samsung/Makefile  |    2 +-
>>  drivers/clk/samsung/clk-cpu.c |  458 +++++++++++++++++++++++++++++++++++++++++
>>  drivers/clk/samsung/clk.h     |    5 +
>>  3 files changed, 464 insertions(+), 1 deletions(-)
>>  create mode 100644 drivers/clk/samsung/clk-cpu.c
>>
>> diff --git a/drivers/clk/samsung/Makefile b/drivers/clk/samsung/Makefile
>> index 8eb4799..e2b453f 100644
>> --- a/drivers/clk/samsung/Makefile
>> +++ b/drivers/clk/samsung/Makefile
>> @@ -2,7 +2,7 @@
>>  # Samsung Clock specific Makefile
>>  #
>>
>> -obj-$(CONFIG_COMMON_CLK)     += clk.o clk-pll.o
>> +obj-$(CONFIG_COMMON_CLK)     += clk.o clk-pll.o clk-cpu.o
>>  obj-$(CONFIG_ARCH_EXYNOS4)   += clk-exynos4.o
>>  obj-$(CONFIG_SOC_EXYNOS5250) += clk-exynos5250.o
>>  obj-$(CONFIG_SOC_EXYNOS5420) += clk-exynos5420.o
>> diff --git a/drivers/clk/samsung/clk-cpu.c b/drivers/clk/samsung/clk-cpu.c
>> new file mode 100644
>> index 0000000..6a40862
>> --- /dev/null
>> +++ b/drivers/clk/samsung/clk-cpu.c
>> @@ -0,0 +1,458 @@
>> +/*
>> + * Copyright (c) 2014 Samsung Electronics Co., Ltd.
>> + * Author: Thomas Abraham <thomas.ab at samsung.com>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This file contains the utility functions to register the cpu clocks
>> + * for samsung platforms.
>
> s/cpu/CPU/
> s/samsung/Samsung/
>
>> +*/
>> +
>> +#include <linux/errno.h>
>> +#include "clk.h"
>> +
>> +#define SRC_CPU                      0x0
>> +#define STAT_CPU             0x200
>> +#define DIV_CPU0             0x300
>> +#define DIV_CPU1             0x304
>> +#define DIV_STAT_CPU0                0x400
>> +#define DIV_STAT_CPU1                0x404
>> +
>> +#define MAX_DIV                      8
>> +
>> +#define EXYNOS4210_ARM_DIV1(div) ((div & 0x7) + 1)
>> +#define EXYNOS4210_ARM_DIV2(div) (((div >> 28) & 0x7) + 1)
>> +
>> +#define EXYNOS4210_DIV_CPU0(d5, d4, d3, d2, d1, d0)                  \
>> +             ((d5 << 24) | (d4 << 20) | (d3 << 16) | (d2 << 12) |    \
>> +              (d1 << 8) | (d0 <<  4))
>> +#define EXYNOS4210_DIV_CPU1(d2, d1, d0)                                      \
>> +             ((d2 << 8) | (d1 << 4) | (d0 << 0))
>
> Macro arguments should be put into parentheses to make sure that whole
> argument is subject to further arithmetic operations.
>
>> +
>> +#define EXYNOS4210_DIV1_HPM_MASK     ((0x7 << 0) | (0x7 << 4))
>> +#define EXYNOS4210_MUX_HPM_MASK              (1 << 20)
>> +
>> +/**
>> + * struct exynos4210_armclk_data: config data to setup exynos4210 cpu clocks.
>> + * @prate:   frequency of the parent clock.
>> + * @div0:    value to be programmed in the div_cpu0 register.
>> + * @div1:    value to be programmed in the div_cpu1 register.
>> + *
>> + * This structure holds the divider configuration data for divider clocks
>> + * belonging to the CMU_CPU clock domain. The parent frequency at which these
>> + * divider values are vaild is specified in @prate.
>
> s/vaild/valid/
>
>> + */
>> +struct exynos4210_armclk_data {
>> +     unsigned long   prate;
>> +     unsigned int    div0;
>> +     unsigned int    div1;
>> +};
>> +
>> +/**
>> + * struct exynos_cpuclk: information about clock supplied to a CPU core.
>> + * @hw:              handle between ccf and cpu clock.
>
> s/ccf/CCF/
> s/cpu/CPU/
>
>> + * @alt_parent:      alternate parent clock to use when switching the speed
>> + *           of the primary parent clock.
>> + * @ctrl_base:       base address of the clock controller.
>> + * @offset:  offset from the ctrl_base address where the cpu clock div/mux
>
> s/cpu/CPU/
>
>> + *           registers can be accessed.
>> + * @clk_nb:  clock notifier registered for changes in clock speed of the
>> + *           primary parent clock.
>> + * @lock:    register access lock.
>> + * @data:    optional data which the acutal instantiation of this clock
>> + *           can use.
>
> s/acutal/actual/
>
>> + */
>> +struct exynos_cpuclk {
>> +     struct clk_hw           hw;
>> +     struct clk              *alt_parent;
>> +     void __iomem            *ctrl_base;
>> +     unsigned long           offset;
>> +     struct notifier_block   clk_nb;
>> +     spinlock_t              *lock;
>> +     void                    *data;
>> +};
>> +
>> +#define to_exynos_cpuclk_hw(hw) container_of(hw, struct exynos_cpuclk, hw)
>> +#define to_exynos_cpuclk_nb(nb) container_of(nb, struct exynos_cpuclk, clk_nb)
>> +
>> +/**
>> + * struct exynos_cpuclk_soc_data: soc specific data for cpu clocks.
>> + * @parser:  pointer to a function that can parse SoC specific data.
>> + * @ops:     clock operations to be used for this clock.
>> + * @offset:  optional offset from base of clock controller register base, to
>> + *           be used when accessing clock controller registers related to the
>> + *           cpu clock.
>
> s/cpu/CPU/
>
>> + * @clk_cb:  the clock notifier callback to be called for changes in the
>> + *           clock rate of the primary parent clock.
>> + *
>> + * This structure provides SoC specific data for ARM clocks. Based on
>> + * the compatible value of the clock controller node, the value of the
>> + * fields in this structure can be populated.
>> + */
>> +struct exynos_cpuclk_soc_data {
>> +     int (*parser)(struct device_node *, void **);
>
> Here you don't have argument names, but...
>
>> +     const struct clk_ops *ops;
>> +     unsigned int offset;
>> +     int (*clk_cb)(struct notifier_block *nb, unsigned long evt, void *data);
>
> ...here you have. Please keep some consistency.
>
>> +};
>> +
>> +/* common round rate callback useable for all types of cpu clocks */
>
> s/cpu/CPU/
>
>> +static long exynos_cpuclk_round_rate(struct clk_hw *hw,
>> +                     unsigned long drate, unsigned long *prate)
>
> Hmm, the long return type will overflow with *prate > INT_MAX and
> best_div == 1, I wonder why it is defined so in CCF, even though it
> shouldn't return error codes...
>
>> +{
>> +     struct clk *parent = __clk_get_parent(hw->clk);
>> +     unsigned long max_prate = __clk_round_rate(parent, UINT_MAX);
>> +     unsigned long t_prate, div = 1, best_div = 1;
>> +     unsigned long delta, min_delta = UINT_MAX;
>
> By the way, shouldn't this function take into account the list of
> available CPU rates and round drate to a less or equal supported one?
> Otherwise, in further code you might hit cases where an unsupported rate
> is requested, which is against the CCF semantics, if .round_rate()
> operation is provided.
>
>> +
>> +     do {
>> +             t_prate = __clk_round_rate(parent, drate * div);
>> +             delta = drate - (t_prate / div);
>> +             if (delta < min_delta) {
>> +                     *prate = t_prate;
>> +                     best_div = div;
>> +                     min_delta = delta;
>> +             }
>> +             if (!delta)
>> +                     break;
>> +             div++;
>> +     } while ((drate * div) < max_prate && div <= MAX_DIV);
>> +
>> +     return *prate / best_div;
>> +}
>> +
>> +static unsigned long _calc_div(unsigned long prate, unsigned long drate)
>> +{
>> +     unsigned long div = prate / drate;
>> +
>> +     WARN_ON(div >= MAX_DIV);
>> +     return (!(prate % drate)) ? div-- : div;
>
> Could you explain what is the purpose of this check and adjustment?
>
> If my assumption that this is essentially DIV_ROUND_UP(prate, drate) - 1
> is true then this probably used to obtain a divisor value to get less or
> equal rate than drate. Is it right?
>
>> +}
>> +
>> +/* helper function to register a cpu clock */
>> +static int __init exynos_cpuclk_register(unsigned int lookup_id,
>> +             const char *name, const char **parents,
>> +             unsigned int num_parents, void __iomem *base,
>
> The num_parents argument doesn't seem to be used in the code. Maybe
> instead you should simply replace it and parents arguments with (const
> char *parent) and (const char *alt_parent)?
>
>> +             const struct exynos_cpuclk_soc_data *soc_data,
>> +             struct device_node *np, const struct clk_ops *ops,
>> +             spinlock_t *lock)
>> +{
>> +     struct exynos_cpuclk *cpuclk;
>> +     struct clk_init_data init;
>> +     struct clk *clk;
>> +     int ret;
>> +
>> +     cpuclk = kzalloc(sizeof(*cpuclk), GFP_KERNEL);
>> +     if (!cpuclk) {
>> +             pr_err("%s: could not allocate memory for %s clock\n",
>> +                                     __func__, name);
>> +             return -ENOMEM;
>> +     }
>> +
>> +     init.name = name;
>> +     init.flags = CLK_SET_RATE_PARENT;
>> +     init.parent_names = parents;
>> +     init.num_parents = 1;
>> +     init.ops = ops;
>> +
>> +     cpuclk->hw.init = &init;
>> +     cpuclk->ctrl_base = base;
>> +     cpuclk->lock = lock;
>> +
>> +     ret = soc_data->parser(np, &cpuclk->data);
>> +     if (ret) {
>> +             pr_err("%s: error %d in parsing %s clock data",
>> +                             __func__, ret, name);
>> +             ret = -EINVAL;
>> +             goto free_cpuclk;
>> +     }
>> +     cpuclk->offset = soc_data->offset;
>> +     init.ops = soc_data->ops;
>> +
>> +     cpuclk->clk_nb.notifier_call = soc_data->clk_cb;
>> +     if (clk_notifier_register(__clk_lookup(parents[0]), &cpuclk->clk_nb)) {
>> +             pr_err("%s: failed to register clock notifier for %s\n",
>> +                             __func__, name);
>> +             goto free_cpuclk_data;
>> +     }
>> +
>> +     cpuclk->alt_parent = __clk_lookup(parents[1]);
>> +     if (!cpuclk->alt_parent) {
>> +             pr_err("%s: could not lookup alternate parent %s\n",
>> +                             __func__, parents[1]);
>> +             ret = -EINVAL;
>> +             goto free_cpuclk_data;
>
> Shouldn't it be goto unregister_notifier and the notifier unregistered
> there?
>
>> +     }
>> +
>> +     clk = clk_register(NULL, &cpuclk->hw);
>> +     if (IS_ERR(clk)) {
>> +             pr_err("%s: could not register cpuclk %s\n", __func__,  name);
>> +             ret = PTR_ERR(clk);
>> +             goto free_cpuclk_data;
>
> Ditto.
>
>> +     }
>> +
>> +     samsung_clk_add_lookup(clk, lookup_id);
>> +     return 0;
>> +
>> +free_cpuclk_data:
>> +     kfree(cpuclk->data);
>> +free_cpuclk:
>> +     kfree(cpuclk);
>> +     return ret;
>> +}
>> +
>> +static void _exynos4210_set_armclk_div(void __iomem *base, unsigned long div)
>> +{
>> +     unsigned long timeout = jiffies + msecs_to_jiffies(10);
>> +
>> +     writel((readl(base + DIV_CPU0) & ~0x7) | div, base + DIV_CPU0);
>
> The 0x7 could be defined as a preprocessor macro. Also for increased
> readability, this could be split into separate read, modify and write.
>
>> +     while (time_before(jiffies, timeout))
>> +             if (!readl(base + DIV_STAT_CPU0))
>> +                     return;
>> +     pr_err("%s: timeout in divider stablization\n", __func__);
>> +}
>> +
>> +static unsigned long exynos4210_armclk_recalc_rate(struct clk_hw *hw,
>> +                             unsigned long parent_rate)
>> +{
>> +     struct exynos_cpuclk *armclk = to_exynos_cpuclk_hw(hw);
>> +     void __iomem *base = armclk->ctrl_base + armclk->offset;
>> +     unsigned long div0 = readl(base + DIV_CPU0);
>> +
>> +     return parent_rate / EXYNOS4210_ARM_DIV1(div0) /
>> +                             EXYNOS4210_ARM_DIV2(div0);
>> +}
>> +
>> +static int exynos4210_armclk_pre_rate_change(struct clk_notifier_data *ndata,
>> +                     struct exynos_cpuclk *armclk, void __iomem *base)
>> +{
>> +     struct exynos4210_armclk_data *armclk_data = armclk->data;
>> +     unsigned long alt_prate = clk_get_rate(armclk->alt_parent);
>> +     unsigned long alt_div, div0, div1, tdiv0, mux_reg;
>> +     unsigned long cur_armclk_rate, timeout;
>> +     unsigned long flags;
>> +
>> +     /* find out the divider values to use for clock data */
>> +     while (armclk_data->prate != ndata->new_rate) {
>
> I assume this code relies on the assumption that target DIV_CORE and
> DIV_CORE2 are always 0 (divide by 1)? Otherwise it should compare
> armclk_data->prate with new parent rate, not new target armclk rate,
> which would be parent rate divided by DIV_CORE and DIV_CORE2.
>
>> +             if (armclk_data->prate == 0)
>> +                     return -EINVAL;
>> +             armclk_data++;
>> +     }
>> +
>> +     div0 = armclk_data->div0;
>> +     div1 = armclk_data->div1;
>
> A comment about the following if would be nice.
>
>> +     if (readl(base + SRC_CPU) & EXYNOS4210_MUX_HPM_MASK) {
>> +             div1 = readl(base + DIV_CPU1) & EXYNOS4210_DIV1_HPM_MASK;
>> +             div1 |= ((armclk_data->div1) & ~EXYNOS4210_DIV1_HPM_MASK);
>> +     }
>> +
>> +     /*
>> +      * if the new and old parent clock speed is less than the clock speed
>> +      * of the alternate parent, then it should be ensured that at no point
>> +      * the armclk speed is more than the old_prate until the dividers are
>> +      * set.
>> +      */
>> +     tdiv0 = readl(base + DIV_CPU0);
>> +     cur_armclk_rate = ndata->old_rate / EXYNOS4210_ARM_DIV1(tdiv0) /
>> +                             EXYNOS4210_ARM_DIV2(tdiv0);
>> +     if (alt_prate > cur_armclk_rate) {
>
> Shouldn't you compare two parent rates here, not alt parent rate with
> current armclk rate?
>
> Also, this condition compares only alt rate with current rate. Let's see:
>
> 1) old >= alt && new >= alt => alt < old X new
>
> The voltage will be always enough to handle the switch, so no division
> is needed.
>
> 2) old < alt && new >= alt => old < alt <= new
>
> The voltage will be switched to higher or equal necessary one for alt
> rate, so no division is needed.
>
> 3) old < alt && new < alt => old X new < alt
>
> The voltage won't be enough for alt rate so division is needed.
>
> 4) old >= alt && new < alt => new < alt <= old
>
> Current voltage is enough for alt rate and it will be lowered only after
> the switching finishes, so division is not needed.
>
> This means that division is necessary only if both new and old rates are
> lower than alt and this is what the comment above says, but not what the
> code does, which is slightly inefficient.
>
>> +             alt_div = _calc_div(alt_prate, cur_armclk_rate);
>> +             _exynos4210_set_armclk_div(base, alt_div);
>> +             div0 |= alt_div;
>
> Hmm, this code is barely readable. It is not clear whether _calc_div()
> is returning a value ready to be written to the register or real divisor
> value. I'd make _calc_div() to simply return raw divisor value and then
> use a macro that calculates required bitfield value.
>
> Another thing is whether 8 is big enough maximum divisor. If not, both
> DIV_CORE and DIV_CORE2 should be used together to form a 6-bit divisor,
> which lets you divide by up to 64.
>
>> +     }
>> +
>> +     /* select sclk_mpll as the alternate parent */
>> +     spin_lock_irqsave(armclk->lock, flags);
>
> Hmm, is the start of critical section really here? The big
> read-modify-write section seems to begin at
>
>         if (readl(base + SRC_CPU) & EXYNOS4210_MUX_HPM_MASK) {
>                 div1 = readl(base + DIV_CPU1) & EXYNOS4210_DIV1_HPM_MASK;
>
>> +     mux_reg = readl(base + SRC_CPU);
>> +     writel(mux_reg | (1 << 16), base + SRC_CPU);
>> +
>> +     timeout = jiffies + msecs_to_jiffies(10);
>> +     while (time_before(jiffies, timeout))
>> +             if (((readl(base + STAT_CPU) >> 16) & 0x7) == 2)
>> +                     break;
>> +     spin_unlock_irqrestore(armclk->lock, flags);
>
> Is this really end of critical secion? More writes to registers are
> happening below. Keep in mind that APLL_RATIO field of CLK_DIV_CPU0
> register is used by generic divider clock - "sclk_apll".
>
>> +
>> +     if (((readl(base + STAT_CPU) >> 16) & 0x7) != 2)
>> +             pr_err("%s: re-parenting to sclk_mpll failed\n", __func__);
>> +
>> +     /* alternate parent is active now. set the dividers */
>> +     writel(div0, base + DIV_CPU0);
>> +     timeout = jiffies + msecs_to_jiffies(10);
>> +     while (time_before(jiffies, timeout))
>> +             if (!readl(base + DIV_STAT_CPU0))
>> +                     break;
>> +
>> +     if (readl(base + DIV_STAT_CPU0))
>> +             pr_err("%s: timeout in divider0 stablization\n", __func__);
>> +
>> +     writel(div1, base + DIV_CPU1);
>> +     timeout = jiffies + msecs_to_jiffies(10);
>> +     while (time_before(jiffies, timeout))
>> +             if (!readl(base + DIV_STAT_CPU1))
>> +                     break;
>> +     if (readl(base + DIV_STAT_CPU1))
>> +             pr_err("%s: timeout in divider1 stablization\n", __func__);
>
> IMHO to be safe, the spin_unlock_irqrestore() should be called here.
>
>> +
>> +     return 0;
>> +}
>> +
>> +static int exynos4210_armclk_post_rate_change(struct exynos_cpuclk *armclk,
>> +                     void __iomem *base)
>> +{
>> +     unsigned long mux_reg, flags;
>> +     unsigned long timeout = jiffies + msecs_to_jiffies(10);
>> +
>> +     spin_lock_irqsave(armclk->lock, flags);
>> +     mux_reg = readl(base + SRC_CPU);
>> +     writel(mux_reg & ~(1 << 16), base + SRC_CPU);
>
> Please replace (1 << 16) with appropriate macro.
>
>> +     while (time_before(jiffies, timeout))
>> +             if (((readl(base + STAT_CPU) >> 16) & 0x7) == 1)
>> +                     break;
>> +     spin_unlock_irqrestore(armclk->lock, flags);
>> +
>> +     if (((readl(base + STAT_CPU) >> 16) & 0x7) != 1)
>> +             pr_err("%s: re-parenting to mout_apll failed\n", __func__);
>> +
>> +     return 0;
>> +}
>> +
>> +/*
>> + * This clock notifier is called when the frequency of the parent clock
>> + * of armclk is to be changed. This notifier handles the setting up all
>> + * the divider clocks, remux to temporary parent and handling the safe
>> + * frequency levels when using temporary parent.
>> + */
>> +static int exynos4210_armclk_notifier_cb(struct notifier_block *nb,
>> +                             unsigned long event, void *data)
>> +{
>> +     struct clk_notifier_data *ndata = data;
>> +     struct exynos_cpuclk *armclk = to_exynos_cpuclk_nb(nb);
>> +     void __iomem *base =  armclk->ctrl_base + armclk->offset;
>> +     int err = 0;
>> +
>> +     if (event == PRE_RATE_CHANGE)
>> +             err = exynos4210_armclk_pre_rate_change(ndata, armclk, base);
>> +     else if (event == POST_RATE_CHANGE)
>> +             err = exynos4210_armclk_post_rate_change(armclk, base);
>> +
>> +     return notifier_from_errno(err);
>> +}
>> +
>> +static int exynos4210_armclk_set_rate(struct clk_hw *hw, unsigned long drate,
>> +                                     unsigned long prate)
>> +{
>> +     struct exynos_cpuclk *armclk = to_exynos_cpuclk_hw(hw);
>> +     void __iomem *base = armclk->ctrl_base + armclk->offset;
>> +     unsigned long div;
>> +
>> +     div = drate < prate ? _calc_div(prate, drate) : 0;
>> +     _exynos4210_set_armclk_div(base, div);
>
> Hmm, the code above in pre_rate_change() assumed that both DIV_CORE and
> DIV_CORE2 are 0, but here it sets DIV_CORE to a potentially non-zero
> value. It doesn't look correct.
>
>> +     return 0;
>> +}
>> +
>> +static const struct clk_ops exynos4210_armclk_clk_ops = {
>> +     .recalc_rate = exynos4210_armclk_recalc_rate,
>> +     .round_rate = exynos_cpuclk_round_rate,
>> +     .set_rate = exynos4210_armclk_set_rate,
>> +};
>> +
>> +/*
>> + * parse divider configuration data from dt for all the cpu clock domain
>> + * clocks in exynos4210 and compatible SoC's.
>> + */
>> +static int __init exynos4210_armclk_parser(struct device_node *np, void **data)
>> +{
>> +     struct exynos4210_armclk_data *tdata;
>> +     u32 cfg[10], num_rows, row, col;
>> +     struct property *prop;
>> +     const __be32 *ptr = NULL;
>> +     u32 cells;
>> +     int ret;
>> +
>> +     if (of_property_read_u32(np, "samsung,armclk-cells", &cells))
>> +             return -EINVAL;
>> +     prop = of_find_property(np, "samsung,armclk-divider-table", NULL);
>
> You should rather use the *lenp argument of of_find_property(), instead
> of dereferencing the struct.
>
>> +     if (!prop)
>> +             return -EINVAL;
>> +     if (!prop->value)
>> +             return -EINVAL;
>
> You can skip the check above, as the calculation below will give you
> num_rows equal 0 in this case.
>
>> +     if ((prop->length / sizeof(u32)) % cells)
>> +             return -EINVAL;
>> +     num_rows = (prop->length / sizeof(u32)) / cells;
>> +
>> +     /* allocate a zero terminated table */
>> +     *data = kzalloc(sizeof(*tdata) * (num_rows + 1), GFP_KERNEL);
>> +     if (!*data)
>> +             ret = -ENOMEM;
>
> Shouldn't you just return -ENOMEM here?
>
> Best regards,
> Tomasz

Thanks for your detailed review. I have made all the changes that you
have suggested.

Regards,
Thomas.



More information about the linux-arm-kernel mailing list