[PATCH v2 2/2] riscv: dts: spacemit: Add cpu scaling for K1 SoC

Shuwei Wu shuwei.wu at mailbox.org
Wed Apr 15 22:59:05 PDT 2026


On Tue Apr 14, 2026 at 9:25 PM CST, Anand Moon wrote:
> Hi Shuwei,
>
> On Fri, 10 Apr 2026 at 13:30, Shuwei Wu <shuwei.wu at mailbox.org> wrote:
>>
>> Add Operating Performance Points (OPP) tables and CPU clock properties
>> for the two clusters in the SpacemiT K1 SoC.
>>
>> Also assign the CPU power supply (cpu-supply) for the Banana Pi BPI-F3
>> board to fully enable CPU DVFS.
>>
>> Signed-off-by: Shuwei Wu <shuwei.wu at mailbox.org>
>>
>> ---
>> Changes in v2:
>> - Add k1-opp.dtsi with OPP tables for both CPU clusters
>> - Assign CPU supplies and include OPP table for Banana Pi BPI-F3
>> ---
>>  arch/riscv/boot/dts/spacemit/k1-bananapi-f3.dts |  35 +++++++-
>>  arch/riscv/boot/dts/spacemit/k1-opp.dtsi        | 105 ++++++++++++++++++++++++
>>  arch/riscv/boot/dts/spacemit/k1.dtsi            |   8 ++
>>  3 files changed, 147 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/riscv/boot/dts/spacemit/k1-bananapi-f3.dts b/arch/riscv/boot/dts/spacemit/k1-bananapi-f3.dts
>> index 444c3b1e6f44..3780593f610d 100644
>> --- a/arch/riscv/boot/dts/spacemit/k1-bananapi-f3.dts
>> +++ b/arch/riscv/boot/dts/spacemit/k1-bananapi-f3.dts
>> @@ -5,6 +5,7 @@
>>
>>  #include "k1.dtsi"
>>  #include "k1-pinctrl.dtsi"
>> +#include "k1-opp.dtsi"
>>
>>  / {
>>         model = "Banana Pi BPI-F3";
>> @@ -86,6 +87,38 @@ &combo_phy {
>>         status = "okay";
>>  };
>>
>> +&cpu_0 {
>> +       cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> +&cpu_1 {
>> +       cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> +&cpu_2 {
>> +       cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> +&cpu_3 {
>> +       cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> +&cpu_4 {
>> +       cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> +&cpu_5 {
>> +       cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> +&cpu_6 {
>> +       cpu-supply = <&buck1_3v45>;
>> +};
>> +
>> +&cpu_7 {
>> +       cpu-supply = <&buck1_3v45>;
>> +};
>> +
>>  &emmc {
>>         bus-width = <8>;
>>         mmc-hs400-1_8v;
>> @@ -201,7 +234,7 @@ pmic at 41 {
>>                 dldoin2-supply = <&buck5>;
>>
>>                 regulators {
>> -                       buck1 {
>> +                       buck1_3v45: buck1 {
>>                                 regulator-min-microvolt = <500000>;
>>                                 regulator-max-microvolt = <3450000>;
>>                                 regulator-ramp-delay = <5000>;
>> diff --git a/arch/riscv/boot/dts/spacemit/k1-opp.dtsi b/arch/riscv/boot/dts/spacemit/k1-opp.dtsi
>> new file mode 100644
>> index 000000000000..768ae390686d
>> --- /dev/null
>> +++ b/arch/riscv/boot/dts/spacemit/k1-opp.dtsi
>> @@ -0,0 +1,105 @@
>> +// SPDX-License-Identifier: (GPL-2.0+ OR MIT)
>> +
>> +/ {
>> +       cluster0_opp_table: opp-table-cluster0 {
>> +               compatible = "operating-points-v2";
>> +               opp-shared;
>> +
>> +               opp-614400000 {
>> +                       opp-hz = /bits/ 64 <614400000>;
>> +                       opp-microvolt = <950000>;
>> +                       clock-latency-ns = <200000>;
>> +               };
>> +
>> +               opp-819000000 {
>> +                       opp-hz = /bits/ 64 <819000000>;
>> +                       opp-microvolt = <950000>;
>> +                       clock-latency-ns = <200000>;
>> +               };
>> +
>> +               opp-1000000000 {
>> +                       opp-hz = /bits/ 64 <1000000000>;
>> +                       opp-microvolt = <950000>;
>> +                       clock-latency-ns = <200000>;
>> +               };
>> +
>> +               opp-1228800000 {
>> +                       opp-hz = /bits/ 64 <1228800000>;
>> +                       opp-microvolt = <950000>;
>> +                       clock-latency-ns = <200000>;
>> +               };
>> +
>> +               opp-1600000000 {
>> +                       opp-hz = /bits/ 64 <1600000000>;
>> +                       opp-microvolt = <1050000>;
>> +                       clock-latency-ns = <200000>;
>> +               };
>> +       };
>> +
>> +       cluster1_opp_table: opp-table-cluster1 {
>> +               compatible = "operating-points-v2";
>> +               opp-shared;
>> +
>> +               opp-614400000 {
>> +                       opp-hz = /bits/ 64 <614400000>;
>> +                       opp-microvolt = <950000>;
>> +                       clock-latency-ns = <200000>;
>> +               };
>> +
>> +               opp-819000000 {
>> +                       opp-hz = /bits/ 64 <819000000>;
>> +                       opp-microvolt = <950000>;
>> +                       clock-latency-ns = <200000>;
>> +               };
>> +
>> +               opp-1000000000 {
>> +                       opp-hz = /bits/ 64 <1000000000>;
>> +                       opp-microvolt = <950000>;
>> +                       clock-latency-ns = <200000>;
>> +               };
>> +
>> +               opp-1228800000 {
>> +                       opp-hz = /bits/ 64 <1228800000>;
>> +                       opp-microvolt = <950000>;
>> +                       clock-latency-ns = <200000>;
>> +               };
>> +
>> +               opp-1600000000 {
>> +                       opp-hz = /bits/ 64 <1600000000>;
>> +                       opp-microvolt = <1050000>;
>> +                       clock-latency-ns = <200000>;
>> +               };
>> +       };
>> +};
>> +
>> +&cpu_0 {
>> +       operating-points-v2 = <&cluster0_opp_table>;
>> +};
>> +
>> +&cpu_1 {
>> +       operating-points-v2 = <&cluster0_opp_table>;
>> +};
>> +
>> +&cpu_2 {
>> +       operating-points-v2 = <&cluster0_opp_table>;
>> +};
>> +
>> +&cpu_3 {
>> +       operating-points-v2 = <&cluster0_opp_table>;
>> +};
>> +
>> +&cpu_4 {
>> +       operating-points-v2 = <&cluster1_opp_table>;
>> +};
>> +
>> +&cpu_5 {
>> +       operating-points-v2 = <&cluster1_opp_table>;
>> +};
>> +
>> +&cpu_6 {
>> +       operating-points-v2 = <&cluster1_opp_table>;
>> +};
>> +
>> +&cpu_7 {
>> +       operating-points-v2 = <&cluster1_opp_table>;
>> +};
>> diff --git a/arch/riscv/boot/dts/spacemit/k1.dtsi b/arch/riscv/boot/dts/spacemit/k1.dtsi
>> index 529ec68e9c23..bdd109b81730 100644
>> --- a/arch/riscv/boot/dts/spacemit/k1.dtsi
>> +++ b/arch/riscv/boot/dts/spacemit/k1.dtsi
>> @@ -54,6 +54,7 @@ cpu_0: cpu at 0 {
>>                         compatible = "spacemit,x60", "riscv";
>>                         device_type = "cpu";
>>                         reg = <0>;
>> +                       clocks = <&syscon_apmu CLK_CPU_C0_CORE>;
>>                         riscv,isa = "rv64imafdcbv_zicbom_zicbop_zicboz_zicntr_zicond_zicsr_zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_zkt_zvfh_zvkt_sscofpmf_sstc_svinval_svnapot_svpbmt";
>>                         riscv,isa-base = "rv64i";
>>                         riscv,isa-extensions = "i", "m", "a", "f", "d", "c", "b", "v", "zicbom",
>> @@ -84,6 +85,7 @@ cpu_1: cpu at 1 {
>>                         compatible = "spacemit,x60", "riscv";
>>                         device_type = "cpu";
>>                         reg = <1>;
>> +                       clocks = <&syscon_apmu CLK_CPU_C0_CORE>;
>
> Based on the Spacemit kernel source, the k1-x_opp_table.dtsi file
> defines several additional clocks for the Operating Performance Points
> (OPP) table:
>
>  clocks = <&ccu CLK_CPU_C0_ACE>, <&ccu CLK_CPU_C1_ACE>, <&ccu CLK_CPU_C0_TCM>,
>                         <&ccu CLK_CCI550>, <&ccu CLK_PLL3>, <&ccu
> CLK_CPU_C0_HI>, <&ccu CLK_CPU_C1_HI>;
>                 clock-names = "ace0","ace1","tcm","cci","pll3", "c0hi", "c1hi";
>
> These hardware clocks are also explicitly registered in the APMU clock driver
> via the k1_ccu_apmu_hws array, confirming their availability for frequency
> and voltage scaling on the K1-X SoC.
>
> static struct clk_hw *k1_ccu_apmu_hws[] = {
>         [CLK_CCI550]            = &cci550_clk.common.hw,
>         [CLK_CPU_C0_HI]         = &cpu_c0_hi_clk.common.hw,
>         [CLK_CPU_C0_CORE]       = &cpu_c0_core_clk.common.hw,
>         [CLK_CPU_C0_ACE]        = &cpu_c0_ace_clk.common.hw,
>         [CLK_CPU_C0_TCM]        = &cpu_c0_tcm_clk.common.hw,
>         [CLK_CPU_C1_HI]         = &cpu_c1_hi_clk.common.hw,
>         [CLK_CPU_C1_CORE]       = &cpu_c1_core_clk.common.hw,
>         [CLK_CPU_C1_ACE]        = &cpu_c1_ace_clk.common.hw,
>
> Yes, it is possible to add these clocks for DVFS to work correctly,
> provided they are managed by the appropriate driver and declared in
> the Device Tree (DT).
>
> Thanks
> -Anand

Thanks for your review and for pointing this out.

Regarding the clocks you mentioned, I'd like to clarify their roles based on
the K1 datasheet. Taking Cluster 0 as an example, c0_core_clk is the primary
clock for the cluster. c0_ace_clk and c0_tcm_clk are children derived from it,
defaulting to half the frequency of their parent core clock, while c0_hi_clk
represents the high-speed path selection.
Cluster 1 follows the same structure.

Based on the official SpacemiT Bianbu OS source, the spacemit-cpufreq.c driver
mainly performs the following tasks:
1. Sets the CCI550 clock frequency to 614MHz.
2. Sets the clock frequencies of c0_ace_clk, c1_ace1_clk, and c0_tcm_clk to half
the frequency of their parent clock.
3. For the 1.6GHz OPP, it sets the PLL3 frequency to 3.2GHz and the
c0_hi_clk/c1_hi_clk frequencies to 1.6GHz.

I booted with the manufacturer's OpenWRT image and used debugfs to confirm that
the clock states are exactly as described above.

At 1.6GHz:
Clock Source & Tree           Rate (Hz)      HW Enable  Consumer
---------------------------------------------------------------------------
pll3                          3,200,000,000      Y      deviceless
 └─ pll3_d2                   1,600,000,000      Y      deviceless
     ├─ cpu_c1_hi_clk         1,600,000,000      Y      deviceless
     │   └─ cpu_c1_pclk       1,600,000,000      Y      cpu0
     │       └─ cpu_c1_ace_clk  800,000,000      Y      deviceless
     └─ cpu_c0_hi_clk         1,600,000,000      Y      deviceless
         └─ cpu_c0_core_clk   1,600,000,000      Y      cpu0
             ├─ cpu_c0_tcm_clk  800,000,000      Y      deviceless
             └─ cpu_c0_ace_clk  800,000,000      Y      deviceless

pll1_2457p6_vco               2,457,600,000      Y      deviceless
 └─ pll1_d4                     614,400,000      Y      deviceless
     └─ pll1_d4_614p4           614,400,000      Y      deviceless
         └─ cci550_clk          614,400,000      Y      deviceless

At 1.228GHz:
Clock Source & Tree           Rate (Hz)      HW Enable  Consumer
---------------------------------------------------------------------------
pll1_2457p6_vco               2,457,600,000      Y      deviceless
 └─ pll1_d2                   1,228,800,000      Y      deviceless
     └─ pll1_d2_1228p8        1,228,800,000      Y      deviceless
         ├─ cpu_c0_core_clk   1,228,800,000      Y      cpu0
         │   ├─ cpu_c0_tcm_clk  614,400,000      Y      deviceless
         │   └─ cpu_c0_ace_clk  614,400,000      Y      deviceless
         └─ cpu_c1_pclk       1,228,800,000      Y      cpu0
             └─ cpu_c1_ace_clk  614,400,000      Y      deviceless
  └─ pll1_d4                     614,400,000      Y      deviceless
     └─ pll1_d4_614p4           614,400,000      Y      deviceless
         └─ cci550_clk          614,400,000      Y      deviceless

pll3                          3,200,000,000      Y      deviceless
 └─ pll3_d2                   1,600,000,000      Y      deviceless
     ├─ cpu_c1_hi_clk         1,600,000,000      Y      deviceless
     └─ cpu_c0_hi_clk         1,600,000,000      Y      deviceless
 └─ pll3_d3                   1,066,666,666      Y      deviceless

Regarding the necessity of listing these clocks in the DT, my analysis is as follows:
1. For CCI550, I did not find a clear definition of this clock's specific role
in the SoC datasheet. Although the vendor kernel increases its frequency,
my benchmarks show that maintaining the mainline default (245.76MHz) has a
negligible impact on CPU performance.
2. For ACE and TCM clocks, they function as synchronous children of the core
clock with a default divide-by-2 ratio. Since they scale automatically relative
to c0_core_clk/c1_core_clk and no other peripherals depend on them, they do not
require manual management in the OPP table.
3. For the high-speed path, the underlying clock controller logic already handles
the parent MUX switching and PLL3 scaling automatically when clk_set_rate()
is called on the core clock.

I have verified this by checking the hardware state in the mainline kernel.
The clock tree matches the vendor kernel's configuration:

At 1.6GHz:
Clock Source & Tree           Rate (Hz)      HW Enable  Consumer
---------------------------------------------------------------------------
pll3                          3,200,000,000      Y      deviceless
 └─ pll3_d2                   1,600,000,000      Y      deviceless
     ├─ cpu_c1_hi_clk         1,600,000,000      Y      deviceless
     │   └─ cpu_c1_core_clk   1,600,000,000      Y      cpu4
     │       └─ cpu_c1_ace_clk  800,000,000      Y      deviceless
     └─ cpu_c0_hi_clk         1,600,000,000      Y      deviceless
         └─ cpu_c0_core_clk   1,600,000,000      Y      cpu0
             ├─ cpu_c0_tcm_clk  800,000,000      Y      deviceless
             └─ cpu_c0_ace_clk  800,000,000      Y      deviceless

pll1                          2,457,600,000      Y      deviceless
 └─ pll1_d5                     491,520,000      Y      deviceless
     └─ pll1_d5_491p52          491,520,000      Y      deviceless
         └─ cci550_clk          245,760,000      Y      deviceless

At 1.228GHz:
Clock Source & Tree           Rate (Hz)      HW Enable  Consumer
---------------------------------------------------------------------------
pll1                          2,457,600,000      Y      deviceless
 ├─ pll1_d5                     491,520,000      Y      deviceless
 │   └─ pll1_d5_491p52          491,520,000      Y      deviceless
 │       └─ cci550_clk          245,760,000      Y      deviceless
 └─ pll1_d2                   1,228,800,000      Y      deviceless
     └─ pll1_d2_1228p8        1,228,800,000      Y      deviceless
         └─ cpu_c0_core_clk   1,228,800,000      Y      cpu0
             ├─ cpu_c0_tcm_clk  614,400,000      Y      deviceless
             └─ cpu_c0_ace_clk  614,400,000      Y      deviceless

pll3                          3,200,000,000      Y      deviceless
 └─ pll3_d2                   1,600,000,000      Y      deviceless
     └─ cpu_c1_hi_clk         1,600,000,000      Y      deviceless
         └─ cpu_c1_core_clk   1,600,000,000      Y      cpu4
             └─ cpu_c1_ace_clk  800,000,000      Y      deviceless

Performance benchmarks also confirm that the current configuration is sufficient:
Benchmark (AWK computation): time awk 'BEGIN{for(i=0;i<10000000;i++) sum+=i}'
----------------------------------------------------------------------------
Frequency    |      Mainline Linux (s)       |        OpenWrt (s)          
(kHz)        |  Real (Total) |  User (CPU)   |  Real (Total) |  User (CPU) )
-------------+---------------+---------------+---------------+--------------
1,600,000    |     1.82s     |     1.81s     |     1.73s     |    1.73s    
1,228,800    |     2.34s     |     2.33s     |     2.26s     |    2.26s    
1,000,000    |     2.94s     |     2.86s     |     2.78s     |    2.78s    
  819,000    |     3.54s     |     3.53s     |     3.39s     |    3.39s    
  614,400    |     4.73s     |     4.71s     |     4.51s     |    4.51s    
----------------------------------------------------------------------------

In summary, because the clock controller correctly handles the internal dividers
and parent switching, declaring only the primary core clock for each CPU node is
sufficient for functional DVFS.

-- 
Best regards,
Shuwei Wu



More information about the linux-riscv mailing list