[PATCH v4 3/8] clk: samsung: add infrastructure to register cpu clocks
Thomas Abraham
ta.omasab at gmail.com
Fri May 23 07:41:41 PDT 2014
Hi Tomasz,
On Fri, May 16, 2014 at 10:47 PM, Tomasz Figa <t.figa at samsung.com> wrote:
> Hi Thomas,
>
> On 14.05.2014 03:11, Thomas Abraham wrote:
>> From: Thomas Abraham <thomas.ab at samsung.com>
>>
>> The CPU clock provider supplies the clock to the CPU clock domain. The
>> composition and organization of the CPU clock provider could vary among
>> Exynos SoCs. A CPU clock provider can be composed of clock mux, dividers
>> and gates. This patch defines a new clock type for CPU clock provider and
>> adds infrastructure to register the CPU clock providers for Samsung
>> platforms.
>>
>> Cc: Tomasz Figa <t.figa at samsung.com>
>> Signed-off-by: Thomas Abraham <thomas.ab at samsung.com>
>> ---
>> drivers/clk/samsung/Makefile | 2 +-
>> drivers/clk/samsung/clk-cpu.c | 458 +++++++++++++++++++++++++++++++++++++++++
>> drivers/clk/samsung/clk.h | 5 +
>> 3 files changed, 464 insertions(+), 1 deletions(-)
>> create mode 100644 drivers/clk/samsung/clk-cpu.c
>>
>> diff --git a/drivers/clk/samsung/Makefile b/drivers/clk/samsung/Makefile
>> index 8eb4799..e2b453f 100644
>> --- a/drivers/clk/samsung/Makefile
>> +++ b/drivers/clk/samsung/Makefile
>> @@ -2,7 +2,7 @@
>> # Samsung Clock specific Makefile
>> #
>>
>> -obj-$(CONFIG_COMMON_CLK) += clk.o clk-pll.o
>> +obj-$(CONFIG_COMMON_CLK) += clk.o clk-pll.o clk-cpu.o
>> obj-$(CONFIG_ARCH_EXYNOS4) += clk-exynos4.o
>> obj-$(CONFIG_SOC_EXYNOS5250) += clk-exynos5250.o
>> obj-$(CONFIG_SOC_EXYNOS5420) += clk-exynos5420.o
>> diff --git a/drivers/clk/samsung/clk-cpu.c b/drivers/clk/samsung/clk-cpu.c
>> new file mode 100644
>> index 0000000..6a40862
>> --- /dev/null
>> +++ b/drivers/clk/samsung/clk-cpu.c
>> @@ -0,0 +1,458 @@
>> +/*
>> + * Copyright (c) 2014 Samsung Electronics Co., Ltd.
>> + * Author: Thomas Abraham <thomas.ab at samsung.com>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + *
>> + * This file contains the utility functions to register the cpu clocks
>> + * for samsung platforms.
>
> s/cpu/CPU/
> s/samsung/Samsung/
>
>> +*/
>> +
>> +#include <linux/errno.h>
>> +#include "clk.h"
>> +
>> +#define SRC_CPU 0x0
>> +#define STAT_CPU 0x200
>> +#define DIV_CPU0 0x300
>> +#define DIV_CPU1 0x304
>> +#define DIV_STAT_CPU0 0x400
>> +#define DIV_STAT_CPU1 0x404
>> +
>> +#define MAX_DIV 8
>> +
>> +#define EXYNOS4210_ARM_DIV1(div) ((div & 0x7) + 1)
>> +#define EXYNOS4210_ARM_DIV2(div) (((div >> 28) & 0x7) + 1)
>> +
>> +#define EXYNOS4210_DIV_CPU0(d5, d4, d3, d2, d1, d0) \
>> + ((d5 << 24) | (d4 << 20) | (d3 << 16) | (d2 << 12) | \
>> + (d1 << 8) | (d0 << 4))
>> +#define EXYNOS4210_DIV_CPU1(d2, d1, d0) \
>> + ((d2 << 8) | (d1 << 4) | (d0 << 0))
>
> Macro arguments should be put into parentheses to make sure that whole
> argument is subject to further arithmetic operations.
>
>> +
>> +#define EXYNOS4210_DIV1_HPM_MASK ((0x7 << 0) | (0x7 << 4))
>> +#define EXYNOS4210_MUX_HPM_MASK (1 << 20)
>> +
>> +/**
>> + * struct exynos4210_armclk_data: config data to setup exynos4210 cpu clocks.
>> + * @prate: frequency of the parent clock.
>> + * @div0: value to be programmed in the div_cpu0 register.
>> + * @div1: value to be programmed in the div_cpu1 register.
>> + *
>> + * This structure holds the divider configuration data for divider clocks
>> + * belonging to the CMU_CPU clock domain. The parent frequency at which these
>> + * divider values are vaild is specified in @prate.
>
> s/vaild/valid/
>
>> + */
>> +struct exynos4210_armclk_data {
>> + unsigned long prate;
>> + unsigned int div0;
>> + unsigned int div1;
>> +};
>> +
>> +/**
>> + * struct exynos_cpuclk: information about clock supplied to a CPU core.
>> + * @hw: handle between ccf and cpu clock.
>
> s/ccf/CCF/
> s/cpu/CPU/
>
>> + * @alt_parent: alternate parent clock to use when switching the speed
>> + * of the primary parent clock.
>> + * @ctrl_base: base address of the clock controller.
>> + * @offset: offset from the ctrl_base address where the cpu clock div/mux
>
> s/cpu/CPU/
>
>> + * registers can be accessed.
>> + * @clk_nb: clock notifier registered for changes in clock speed of the
>> + * primary parent clock.
>> + * @lock: register access lock.
>> + * @data: optional data which the acutal instantiation of this clock
>> + * can use.
>
> s/acutal/actual/
>
>> + */
>> +struct exynos_cpuclk {
>> + struct clk_hw hw;
>> + struct clk *alt_parent;
>> + void __iomem *ctrl_base;
>> + unsigned long offset;
>> + struct notifier_block clk_nb;
>> + spinlock_t *lock;
>> + void *data;
>> +};
>> +
>> +#define to_exynos_cpuclk_hw(hw) container_of(hw, struct exynos_cpuclk, hw)
>> +#define to_exynos_cpuclk_nb(nb) container_of(nb, struct exynos_cpuclk, clk_nb)
>> +
>> +/**
>> + * struct exynos_cpuclk_soc_data: soc specific data for cpu clocks.
>> + * @parser: pointer to a function that can parse SoC specific data.
>> + * @ops: clock operations to be used for this clock.
>> + * @offset: optional offset from base of clock controller register base, to
>> + * be used when accessing clock controller registers related to the
>> + * cpu clock.
>
> s/cpu/CPU/
>
>> + * @clk_cb: the clock notifier callback to be called for changes in the
>> + * clock rate of the primary parent clock.
>> + *
>> + * This structure provides SoC specific data for ARM clocks. Based on
>> + * the compatible value of the clock controller node, the value of the
>> + * fields in this structure can be populated.
>> + */
>> +struct exynos_cpuclk_soc_data {
>> + int (*parser)(struct device_node *, void **);
>
> Here you don't have argument names, but...
>
>> + const struct clk_ops *ops;
>> + unsigned int offset;
>> + int (*clk_cb)(struct notifier_block *nb, unsigned long evt, void *data);
>
> ...here you have. Please keep some consistency.
>
>> +};
>> +
>> +/* common round rate callback useable for all types of cpu clocks */
>
> s/cpu/CPU/
>
>> +static long exynos_cpuclk_round_rate(struct clk_hw *hw,
>> + unsigned long drate, unsigned long *prate)
>
> Hmm, the long return type will overflow with *prate > INT_MAX and
> best_div == 1, I wonder why it is defined so in CCF, even though it
> shouldn't return error codes...
>
>> +{
>> + struct clk *parent = __clk_get_parent(hw->clk);
>> + unsigned long max_prate = __clk_round_rate(parent, UINT_MAX);
>> + unsigned long t_prate, div = 1, best_div = 1;
>> + unsigned long delta, min_delta = UINT_MAX;
>
> By the way, shouldn't this function take into account the list of
> available CPU rates and round drate to a less or equal supported one?
> Otherwise, in further code you might hit cases where an unsupported rate
> is requested, which is against the CCF semantics, if .round_rate()
> operation is provided.
>
>> +
>> + do {
>> + t_prate = __clk_round_rate(parent, drate * div);
>> + delta = drate - (t_prate / div);
>> + if (delta < min_delta) {
>> + *prate = t_prate;
>> + best_div = div;
>> + min_delta = delta;
>> + }
>> + if (!delta)
>> + break;
>> + div++;
>> + } while ((drate * div) < max_prate && div <= MAX_DIV);
>> +
>> + return *prate / best_div;
>> +}
>> +
>> +static unsigned long _calc_div(unsigned long prate, unsigned long drate)
>> +{
>> + unsigned long div = prate / drate;
>> +
>> + WARN_ON(div >= MAX_DIV);
>> + return (!(prate % drate)) ? div-- : div;
>
> Could you explain what is the purpose of this check and adjustment?
>
> If my assumption that this is essentially DIV_ROUND_UP(prate, drate) - 1
> is true then this probably used to obtain a divisor value to get less or
> equal rate than drate. Is it right?
>
>> +}
>> +
>> +/* helper function to register a cpu clock */
>> +static int __init exynos_cpuclk_register(unsigned int lookup_id,
>> + const char *name, const char **parents,
>> + unsigned int num_parents, void __iomem *base,
>
> The num_parents argument doesn't seem to be used in the code. Maybe
> instead you should simply replace it and parents arguments with (const
> char *parent) and (const char *alt_parent)?
>
>> + const struct exynos_cpuclk_soc_data *soc_data,
>> + struct device_node *np, const struct clk_ops *ops,
>> + spinlock_t *lock)
>> +{
>> + struct exynos_cpuclk *cpuclk;
>> + struct clk_init_data init;
>> + struct clk *clk;
>> + int ret;
>> +
>> + cpuclk = kzalloc(sizeof(*cpuclk), GFP_KERNEL);
>> + if (!cpuclk) {
>> + pr_err("%s: could not allocate memory for %s clock\n",
>> + __func__, name);
>> + return -ENOMEM;
>> + }
>> +
>> + init.name = name;
>> + init.flags = CLK_SET_RATE_PARENT;
>> + init.parent_names = parents;
>> + init.num_parents = 1;
>> + init.ops = ops;
>> +
>> + cpuclk->hw.init = &init;
>> + cpuclk->ctrl_base = base;
>> + cpuclk->lock = lock;
>> +
>> + ret = soc_data->parser(np, &cpuclk->data);
>> + if (ret) {
>> + pr_err("%s: error %d in parsing %s clock data",
>> + __func__, ret, name);
>> + ret = -EINVAL;
>> + goto free_cpuclk;
>> + }
>> + cpuclk->offset = soc_data->offset;
>> + init.ops = soc_data->ops;
>> +
>> + cpuclk->clk_nb.notifier_call = soc_data->clk_cb;
>> + if (clk_notifier_register(__clk_lookup(parents[0]), &cpuclk->clk_nb)) {
>> + pr_err("%s: failed to register clock notifier for %s\n",
>> + __func__, name);
>> + goto free_cpuclk_data;
>> + }
>> +
>> + cpuclk->alt_parent = __clk_lookup(parents[1]);
>> + if (!cpuclk->alt_parent) {
>> + pr_err("%s: could not lookup alternate parent %s\n",
>> + __func__, parents[1]);
>> + ret = -EINVAL;
>> + goto free_cpuclk_data;
>
> Shouldn't it be goto unregister_notifier and the notifier unregistered
> there?
>
>> + }
>> +
>> + clk = clk_register(NULL, &cpuclk->hw);
>> + if (IS_ERR(clk)) {
>> + pr_err("%s: could not register cpuclk %s\n", __func__, name);
>> + ret = PTR_ERR(clk);
>> + goto free_cpuclk_data;
>
> Ditto.
>
>> + }
>> +
>> + samsung_clk_add_lookup(clk, lookup_id);
>> + return 0;
>> +
>> +free_cpuclk_data:
>> + kfree(cpuclk->data);
>> +free_cpuclk:
>> + kfree(cpuclk);
>> + return ret;
>> +}
>> +
>> +static void _exynos4210_set_armclk_div(void __iomem *base, unsigned long div)
>> +{
>> + unsigned long timeout = jiffies + msecs_to_jiffies(10);
>> +
>> + writel((readl(base + DIV_CPU0) & ~0x7) | div, base + DIV_CPU0);
>
> The 0x7 could be defined as a preprocessor macro. Also for increased
> readability, this could be split into separate read, modify and write.
>
>> + while (time_before(jiffies, timeout))
>> + if (!readl(base + DIV_STAT_CPU0))
>> + return;
>> + pr_err("%s: timeout in divider stablization\n", __func__);
>> +}
>> +
>> +static unsigned long exynos4210_armclk_recalc_rate(struct clk_hw *hw,
>> + unsigned long parent_rate)
>> +{
>> + struct exynos_cpuclk *armclk = to_exynos_cpuclk_hw(hw);
>> + void __iomem *base = armclk->ctrl_base + armclk->offset;
>> + unsigned long div0 = readl(base + DIV_CPU0);
>> +
>> + return parent_rate / EXYNOS4210_ARM_DIV1(div0) /
>> + EXYNOS4210_ARM_DIV2(div0);
>> +}
>> +
>> +static int exynos4210_armclk_pre_rate_change(struct clk_notifier_data *ndata,
>> + struct exynos_cpuclk *armclk, void __iomem *base)
>> +{
>> + struct exynos4210_armclk_data *armclk_data = armclk->data;
>> + unsigned long alt_prate = clk_get_rate(armclk->alt_parent);
>> + unsigned long alt_div, div0, div1, tdiv0, mux_reg;
>> + unsigned long cur_armclk_rate, timeout;
>> + unsigned long flags;
>> +
>> + /* find out the divider values to use for clock data */
>> + while (armclk_data->prate != ndata->new_rate) {
>
> I assume this code relies on the assumption that target DIV_CORE and
> DIV_CORE2 are always 0 (divide by 1)? Otherwise it should compare
> armclk_data->prate with new parent rate, not new target armclk rate,
> which would be parent rate divided by DIV_CORE and DIV_CORE2.
>
>> + if (armclk_data->prate == 0)
>> + return -EINVAL;
>> + armclk_data++;
>> + }
>> +
>> + div0 = armclk_data->div0;
>> + div1 = armclk_data->div1;
>
> A comment about the following if would be nice.
>
>> + if (readl(base + SRC_CPU) & EXYNOS4210_MUX_HPM_MASK) {
>> + div1 = readl(base + DIV_CPU1) & EXYNOS4210_DIV1_HPM_MASK;
>> + div1 |= ((armclk_data->div1) & ~EXYNOS4210_DIV1_HPM_MASK);
>> + }
>> +
>> + /*
>> + * if the new and old parent clock speed is less than the clock speed
>> + * of the alternate parent, then it should be ensured that at no point
>> + * the armclk speed is more than the old_prate until the dividers are
>> + * set.
>> + */
>> + tdiv0 = readl(base + DIV_CPU0);
>> + cur_armclk_rate = ndata->old_rate / EXYNOS4210_ARM_DIV1(tdiv0) /
>> + EXYNOS4210_ARM_DIV2(tdiv0);
>> + if (alt_prate > cur_armclk_rate) {
>
> Shouldn't you compare two parent rates here, not alt parent rate with
> current armclk rate?
>
> Also, this condition compares only alt rate with current rate. Let's see:
>
> 1) old >= alt && new >= alt => alt < old X new
>
> The voltage will be always enough to handle the switch, so no division
> is needed.
>
> 2) old < alt && new >= alt => old < alt <= new
>
> The voltage will be switched to higher or equal necessary one for alt
> rate, so no division is needed.
>
> 3) old < alt && new < alt => old X new < alt
>
> The voltage won't be enough for alt rate so division is needed.
>
> 4) old >= alt && new < alt => new < alt <= old
>
> Current voltage is enough for alt rate and it will be lowered only after
> the switching finishes, so division is not needed.
>
> This means that division is necessary only if both new and old rates are
> lower than alt and this is what the comment above says, but not what the
> code does, which is slightly inefficient.
>
>> + alt_div = _calc_div(alt_prate, cur_armclk_rate);
>> + _exynos4210_set_armclk_div(base, alt_div);
>> + div0 |= alt_div;
>
> Hmm, this code is barely readable. It is not clear whether _calc_div()
> is returning a value ready to be written to the register or real divisor
> value. I'd make _calc_div() to simply return raw divisor value and then
> use a macro that calculates required bitfield value.
>
> Another thing is whether 8 is big enough maximum divisor. If not, both
> DIV_CORE and DIV_CORE2 should be used together to form a 6-bit divisor,
> which lets you divide by up to 64.
>
>> + }
>> +
>> + /* select sclk_mpll as the alternate parent */
>> + spin_lock_irqsave(armclk->lock, flags);
>
> Hmm, is the start of critical section really here? The big
> read-modify-write section seems to begin at
>
> if (readl(base + SRC_CPU) & EXYNOS4210_MUX_HPM_MASK) {
> div1 = readl(base + DIV_CPU1) & EXYNOS4210_DIV1_HPM_MASK;
>
>> + mux_reg = readl(base + SRC_CPU);
>> + writel(mux_reg | (1 << 16), base + SRC_CPU);
>> +
>> + timeout = jiffies + msecs_to_jiffies(10);
>> + while (time_before(jiffies, timeout))
>> + if (((readl(base + STAT_CPU) >> 16) & 0x7) == 2)
>> + break;
>> + spin_unlock_irqrestore(armclk->lock, flags);
>
> Is this really end of critical secion? More writes to registers are
> happening below. Keep in mind that APLL_RATIO field of CLK_DIV_CPU0
> register is used by generic divider clock - "sclk_apll".
>
>> +
>> + if (((readl(base + STAT_CPU) >> 16) & 0x7) != 2)
>> + pr_err("%s: re-parenting to sclk_mpll failed\n", __func__);
>> +
>> + /* alternate parent is active now. set the dividers */
>> + writel(div0, base + DIV_CPU0);
>> + timeout = jiffies + msecs_to_jiffies(10);
>> + while (time_before(jiffies, timeout))
>> + if (!readl(base + DIV_STAT_CPU0))
>> + break;
>> +
>> + if (readl(base + DIV_STAT_CPU0))
>> + pr_err("%s: timeout in divider0 stablization\n", __func__);
>> +
>> + writel(div1, base + DIV_CPU1);
>> + timeout = jiffies + msecs_to_jiffies(10);
>> + while (time_before(jiffies, timeout))
>> + if (!readl(base + DIV_STAT_CPU1))
>> + break;
>> + if (readl(base + DIV_STAT_CPU1))
>> + pr_err("%s: timeout in divider1 stablization\n", __func__);
>
> IMHO to be safe, the spin_unlock_irqrestore() should be called here.
>
>> +
>> + return 0;
>> +}
>> +
>> +static int exynos4210_armclk_post_rate_change(struct exynos_cpuclk *armclk,
>> + void __iomem *base)
>> +{
>> + unsigned long mux_reg, flags;
>> + unsigned long timeout = jiffies + msecs_to_jiffies(10);
>> +
>> + spin_lock_irqsave(armclk->lock, flags);
>> + mux_reg = readl(base + SRC_CPU);
>> + writel(mux_reg & ~(1 << 16), base + SRC_CPU);
>
> Please replace (1 << 16) with appropriate macro.
>
>> + while (time_before(jiffies, timeout))
>> + if (((readl(base + STAT_CPU) >> 16) & 0x7) == 1)
>> + break;
>> + spin_unlock_irqrestore(armclk->lock, flags);
>> +
>> + if (((readl(base + STAT_CPU) >> 16) & 0x7) != 1)
>> + pr_err("%s: re-parenting to mout_apll failed\n", __func__);
>> +
>> + return 0;
>> +}
>> +
>> +/*
>> + * This clock notifier is called when the frequency of the parent clock
>> + * of armclk is to be changed. This notifier handles the setting up all
>> + * the divider clocks, remux to temporary parent and handling the safe
>> + * frequency levels when using temporary parent.
>> + */
>> +static int exynos4210_armclk_notifier_cb(struct notifier_block *nb,
>> + unsigned long event, void *data)
>> +{
>> + struct clk_notifier_data *ndata = data;
>> + struct exynos_cpuclk *armclk = to_exynos_cpuclk_nb(nb);
>> + void __iomem *base = armclk->ctrl_base + armclk->offset;
>> + int err = 0;
>> +
>> + if (event == PRE_RATE_CHANGE)
>> + err = exynos4210_armclk_pre_rate_change(ndata, armclk, base);
>> + else if (event == POST_RATE_CHANGE)
>> + err = exynos4210_armclk_post_rate_change(armclk, base);
>> +
>> + return notifier_from_errno(err);
>> +}
>> +
>> +static int exynos4210_armclk_set_rate(struct clk_hw *hw, unsigned long drate,
>> + unsigned long prate)
>> +{
>> + struct exynos_cpuclk *armclk = to_exynos_cpuclk_hw(hw);
>> + void __iomem *base = armclk->ctrl_base + armclk->offset;
>> + unsigned long div;
>> +
>> + div = drate < prate ? _calc_div(prate, drate) : 0;
>> + _exynos4210_set_armclk_div(base, div);
>
> Hmm, the code above in pre_rate_change() assumed that both DIV_CORE and
> DIV_CORE2 are 0, but here it sets DIV_CORE to a potentially non-zero
> value. It doesn't look correct.
>
>> + return 0;
>> +}
>> +
>> +static const struct clk_ops exynos4210_armclk_clk_ops = {
>> + .recalc_rate = exynos4210_armclk_recalc_rate,
>> + .round_rate = exynos_cpuclk_round_rate,
>> + .set_rate = exynos4210_armclk_set_rate,
>> +};
>> +
>> +/*
>> + * parse divider configuration data from dt for all the cpu clock domain
>> + * clocks in exynos4210 and compatible SoC's.
>> + */
>> +static int __init exynos4210_armclk_parser(struct device_node *np, void **data)
>> +{
>> + struct exynos4210_armclk_data *tdata;
>> + u32 cfg[10], num_rows, row, col;
>> + struct property *prop;
>> + const __be32 *ptr = NULL;
>> + u32 cells;
>> + int ret;
>> +
>> + if (of_property_read_u32(np, "samsung,armclk-cells", &cells))
>> + return -EINVAL;
>> + prop = of_find_property(np, "samsung,armclk-divider-table", NULL);
>
> You should rather use the *lenp argument of of_find_property(), instead
> of dereferencing the struct.
>
>> + if (!prop)
>> + return -EINVAL;
>> + if (!prop->value)
>> + return -EINVAL;
>
> You can skip the check above, as the calculation below will give you
> num_rows equal 0 in this case.
>
>> + if ((prop->length / sizeof(u32)) % cells)
>> + return -EINVAL;
>> + num_rows = (prop->length / sizeof(u32)) / cells;
>> +
>> + /* allocate a zero terminated table */
>> + *data = kzalloc(sizeof(*tdata) * (num_rows + 1), GFP_KERNEL);
>> + if (!*data)
>> + ret = -ENOMEM;
>
> Shouldn't you just return -ENOMEM here?
>
> Best regards,
> Tomasz
Thanks for your detailed review. I have made all the changes that you
have suggested.
Regards,
Thomas.
More information about the linux-arm-kernel
mailing list