Linux Sched - ARM64 PSCI 核启动方式

• 14 分钟阅读 • schedule

About PSCI

关于PSCI的介绍,我在其它博客上找到一段这样的的描述,

The Power State Coordination Interface (PSCI) is an ARM standard that describes a software interface for power management between an operating system (not just Linux) and it’s supervisory firmware. The interface allows an operating system to perform power management tasks such as putting CPUs into an idle (low-power) state, bringing CPUs online/offline and turning the system off.

也就是说PSCI是操作系统和supervisory firmware之间的一套接口,在ARM64supervisory firmware即为ATF,引入PSCI是为了方便CPU核心电源相关的管理,如核的启动、休眠、热拔插等。

这篇文章主要是跟踪记录下ARM64在核启动相关的流程,其它如休眠、热拔插相关功能后续再议。

Core boot

对了解过系统启动初期流程的同学来说,这个函数kernel_init_freeable的调用时间点应该比较熟悉,而我们这篇记录核启动过程所涉及的重要流程就是在这个函数中调用的。注意,我们这里说的核都为“从核”,均由Boot Core启动,而Boot Core由硬件上电后自动启动,所以无需操心软件启动问题。

这个smp_prepare_cpus函数主要是解析CPU的拓扑图,像clustercorethread等参数,在ARM64下这些参数主要由设备树传递,还有另一种高级的传递方式叫ACPI(Advanced Configuration and Power Interface)主要应用在x86平台,拓扑的构建我们在这里不多介绍,后续另记录。

接下来主要看smp_init的实现过程,

void __init smp_init(void)
{
	int num_nodes, num_cpus;

	idle_threads_init();												( 1 )
	cpuhp_threads_init();												( 2 )

	pr_info("Bringing up secondary CPUs ...\n");

	bringup_nonboot_cpus(setup_max_cpus);								( 3 )

	num_nodes = num_online_nodes();
	num_cpus  = num_online_cpus();
	pr_info("Brought up %d node%s, %d CPU%s\n",
		num_nodes, (num_nodes > 1 ? "s" : ""),
		num_cpus,  (num_cpus  > 1 ? "s" : ""));

	/* Any cleanup work */
	smp_cpus_done(setup_max_cpus);										( 4 )
}

1、对于每一个corefork出一个idle进程,并将进程描述结构存储在percpu变量<font style="color:#000000;">idle_threads</font>中,以备后用;

2、

3、这就是主要的启核操作,我们直接来挖出它的深层次调用栈,

bringup_nonboot_cpus(setup_max_cpus);
    cpu_up(cpu, CPUHP_ONLINE);
        cpuhp_up_callbacks(cpu, st, target);
            cpuhp_invoke_callback_range(true, cpu, st, target);

关于cpuhp_invoke_callback_range,单拿出来看一下,这里会涉及到一个状态机,在核的updown状态之间,

static int cpuhp_invoke_callback_range(bool bringup,
				       unsigned int cpu,
				       struct cpuhp_cpu_state *st,
				       enum cpuhp_state target)
{
	enum cpuhp_state state;
	int err = 0;

	while (cpuhp_next_state(bringup, &state, st, target)) {
		err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
		if (err)
			break;
	}

	return err;
}

根据cpuhp_next_state这个函数的名字就能看出来个差不多,意思就是将当前的核状态一步步地转换到目标状态上,每转换一个状态就会调用一次cpuhp_invoke_callback执行对应的执行函数,前提是对应状态的相关回调已经实现,否则会跳过对应的状态,一但达到了目标状态就会停止。

关于状态机中的所有状态可以查该文件,

enum cpuhp_state

所有状态对应的回调函数可以在该文件中定义,

static struct cpuhp_step cpuhp_hp_states[]

我们只关注核启动过程中的关键步骤,直接来看状态切换过程中经过的CPUHP_BRINGUP_CPU状态,它对应的回调是bringup_cpu

bringup_cpu
    __cpu_up(cpu, idle);
        boot_secondary(cpu, idle);

关于boot_secondary函数,它是这样实现的,

static int boot_secondary(unsigned int cpu, struct task_struct *idle)
{
	const struct cpu_operations *ops = get_cpu_ops(cpu);

	if (ops->cpu_boot)
		return ops->cpu_boot(cpu);

	return -EOPNOTSUPP;
}

接口get_cpu_ops返回的ops取决于<font style="color:#000000;">cpu_ops</font>的值,在<font style="color:#000000;">ARM64</font>平台上共支持两种cpu_operations,分别是<font style="color:#000000;">spin_table</font><font style="color:#000000;">psci</font>,而选择使用哪种方法是在设备树中决定的,而具体的解析和设置流程在<font style="color:#000000;">setup_arch</font>中实现,

函数psci_dt_init将会根据设备树中指定的psci版本去调用具体的.data方法实现。我们以RK3399的设备树内容为例(设备树内容在文末),

static const struct of_device_id psci_of_match[] __initconst = {
	{ .compatible = "arm,psci",	.data = psci_0_1_init},
	{ .compatible = "arm,psci-0.2",	.data = psci_0_2_init},
	{ .compatible = "arm,psci-1.0",	.data = psci_0_2_init},
	{},
};

设备树中指定的版本是1.0,那么接下来会调用到psci_0_2_init

static int __init psci_0_2_init(struct device_node *np)
{
	int err;

	err = get_set_conduit_method(np);
	err = psci_probe();

	return err;
}

根据设备树中指定的ATF交互接口来决定<font style="color:#000000;">invoke_psci_fn</font>的具体回调,这里RK3399配置为smc,所以回调将指定为__invoke_psci_fn_smc

再来看psci_probe

static int __init psci_probe(void)
{
	u32 ver = psci_get_version();										( 1 )

	psci_0_2_set_functions();											( 2 )

	psci_init_migrate();												( 3 )

	return 0;
}

1、获取PSCI对应的版本号,针对版本做出一些检查;

2、主要是配置psci_ops方法集的各个回调,例如cpu_oncpu_suspend等,在操作对应CPU时最终将会调用到这里;

static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point)
{
	int err;
	u32 fn;

	fn = psci_function_id[PSCI_FN_CPU_ON];
	err = invoke_psci_fn(fn, cpuid, entry_point, 0);
	return psci_to_linux_errno(err);
}

3、

RK3399 示例设备树

cpus {
    #address-cells = <0x2>;
    #size-cells = <0x0>;

    cpu-map {

        cluster0 {

            core0 {
                cpu = <0x2>;
            };

            core1 {
                cpu = <0x3>;
            };

            core2 {
                cpu = <0x4>;
            };

            core3 {
                cpu = <0x5>;
            };
        };

        cluster1 {

            core0 {
                cpu = <0x6>;
            };

            core1 {
                cpu = <0x7>;
            };
        };
    };

    cpu@0 {
        device_type = "cpu";
        compatible = "arm,cortex-a53";
        reg = <0x0 0x0>;
        enable-method = "psci";
        capacity-dmips-mhz = <0x1e5>;
        clocks = <0x8 0x8>;
        #cooling-cells = <0x2>;
        dynamic-power-coefficient = <0x64>;
        cpu-idle-states = <0x9 0xa>;
        operating-points-v2 = <0xb>;
        cpu-supply = <0xc>;
        phandle = <0x2>;
    };

    cpu@1 {
        device_type = "cpu";
        compatible = "arm,cortex-a53";
        reg = <0x0 0x1>;
        enable-method = "psci";
        capacity-dmips-mhz = <0x1e5>;
        clocks = <0x8 0x8>;
        #cooling-cells = <0x2>;
        dynamic-power-coefficient = <0x64>;
        cpu-idle-states = <0x9 0xa>;
        operating-points-v2 = <0xb>;
        cpu-supply = <0xc>;
        phandle = <0x3>;
    };

    cpu@2 {
        device_type = "cpu";
        compatible = "arm,cortex-a53";
        reg = <0x0 0x2>;
        enable-method = "psci";
        capacity-dmips-mhz = <0x1e5>;
        clocks = <0x8 0x8>;
        #cooling-cells = <0x2>;
        dynamic-power-coefficient = <0x64>;
        cpu-idle-states = <0x9 0xa>;
        operating-points-v2 = <0xb>;
        cpu-supply = <0xc>;
        phandle = <0x4>;
    };

    cpu@3 {
        device_type = "cpu";
        compatible = "arm,cortex-a53";
        reg = <0x0 0x3>;
        enable-method = "psci";
        capacity-dmips-mhz = <0x1e5>;
        clocks = <0x8 0x8>;
        #cooling-cells = <0x2>;
        dynamic-power-coefficient = <0x64>;
        cpu-idle-states = <0x9 0xa>;
        operating-points-v2 = <0xb>;
        cpu-supply = <0xc>;
        phandle = <0x5>;
    };

    cpu@100 {
        device_type = "cpu";
        compatible = "arm,cortex-a72";
        reg = <0x0 0x100>;
        enable-method = "psci";
        capacity-dmips-mhz = <0x400>;
        clocks = <0x8 0x9>;
        #cooling-cells = <0x2>;
        dynamic-power-coefficient = <0x1b4>;
        cpu-idle-states = <0x9 0xa>;
        operating-points-v2 = <0xd>;
        cpu-supply = <0xe>;
        phandle = <0x6>;
    };

    cpu@101 {
        device_type = "cpu";
        compatible = "arm,cortex-a72";
        reg = <0x0 0x101>;
        enable-method = "psci";
        capacity-dmips-mhz = <0x400>;
        clocks = <0x8 0x9>;
        #cooling-cells = <0x2>;
        dynamic-power-coefficient = <0x1b4>;
        cpu-idle-states = <0x9 0xa>;
        operating-points-v2 = <0xd>;
        cpu-supply = <0xe>;
        phandle = <0x7>;
    };

    idle-states {
        entry-method = "psci";

        cpu-sleep {
            compatible = "arm,idle-state";
            local-timer-stop;
            arm,psci-suspend-param = <0x10000>;
            entry-latency-us = <0x78>;
            exit-latency-us = <0xfa>;
            min-residency-us = <0x384>;
            phandle = <0x9>;
        };

        cluster-sleep {
            compatible = "arm,idle-state";
            local-timer-stop;
            arm,psci-suspend-param = <0x1010000>;
            entry-latency-us = <0x190>;
            exit-latency-us = <0x1f4>;
            min-residency-us = <0x7d0>;
            phandle = <0xa>;
        };
    };
};
文章标签: schedule

上一篇 : 【 SCHED 】聊聊 Schedule Domain 的构建
下一篇 : 【 SCHED 】聊聊 CPU 核拓扑解析
阅读进度 0%