cole3's blog


  • Startseite

  • Über

Linux kernel parses cmdline

Veröffentlicht am 2017-05-29

Linux kernel利用静态数组存储cmdline,arm默认1024 Bytes,arm64默认2048 Bytes。

./arch/arm/include/uapi/asm/setup.h:19:#define COMMAND_LINE_SIZE 1024
./arch/arm64/include/uapi/asm/setup.h:24:#define COMMAND_LINE_SIZE 2048

restore cmdline

kernel启动后获取bootargs的flow如下:

+start_kernel
|--+setup_arch
|----+setup_machine_fdt
|------+of_scan_flat_dt
|--------+early_init_dt_scan_chosen
|----------+of_get_flat_dt_prop

从dtb里拿到bootargs字串后,存储到boot_command_line全局变量中:

[init/main.c]

/* Untouched command line saved by arch-specific code. */
char __initdata boot_command_line[COMMAND_LINE_SIZE];

在架构相关的setup_arch函数中,会将boot_command_line copy 到cmd_line变量中,后者定义在架构相关目录下:

[arch/arm/kernel/setup.c]

static char __initdata cmd_line[COMMAND_LINE_SIZE];

不过这两个存储位置都是__initdata类型,意味着这部分内存会被回收,所以要保存cmdline以供上层调用,需要另存:

+start_kernel
|--+setup_command_line

上述调用,会将cmdline转存到saved_command_line中,后者为/proc/cmdline提供数据支持。

kernel参数分成三种类型:early_param,module_param_named,__setup。cmdline会被逐层进行解析。

early_param parse

early_param注册的参数是最早被解析的:

+start_kernel
|--+setup_arch
|----+parse_early_param
|------+parse_early_options
|--------+parse_args    
|----------+handle_unknown (do_early_param)

+start_kernel
|--+parse_early_param
|----+parse_early_options
|------+parse_args    
|--------+handle_unknown (do_early_param)

[init/main.c]

/* Check for early params. */
static int __init do_early_param(char *param, char *val, const char *unused)
{
    const struct obs_kernel_param *p;

    for (p = __setup_start; p < __setup_end; p++) {
        if ((p->early && parameq(param, p->str)) ||
            (strcmp(param, "console") == 0 &&
             strcmp(p->str, "earlycon") == 0)
        ) {
            if (p->setup_func(val) != 0)
                pr_warn("Malformed early option '%s'\n", param);
        }
    }
    /* We accept everything at this stage. */
    return 0;
}

[include/asm-generic/vmlinux.lds.h]

#define INIT_SETUP(initsetup_align)                    \
        . = ALIGN(initsetup_align);                \
        VMLINUX_SYMBOL(__setup_start) = .;            \
        *(.init.setup)                        \
        VMLINUX_SYMBOL(__setup_end) = .;

do_early_param利用.init.setup段的结构进行解析cmdline,而且要求p->early置位。
来看下early_param的实现:

/*
 * Only for really core code.  See moduleparam.h for the normal way.
 *
 * Force the alignment so the compiler doesn't space elements of the
 * obs_kernel_param "array" too far apart in .init.setup.
 */
#define __setup_param(str, unique_id, fn, early)            \
    static const char __setup_str_##unique_id[] __initconst    \
        __aligned(1) = str; \
    static struct obs_kernel_param __setup_##unique_id    \
        __used __section(.init.setup)            \
        __attribute__((aligned((sizeof(long)))))    \
        = { __setup_str_##unique_id, fn, early }

#define __setup(str, fn)                    \
    __setup_param(str, fn, fn, 0)

/* NOTE: fn is as per module_param, not __setup!  Emits warning if fn
 * returns non-zero. */
#define early_param(str, fn)                    \
    __setup_param(str, fn, fn, 1)

从上面的code,可以看出early_param和__setup都会构建结构体在.init.setup中,不同的是early_param会置位early flag,所以在do_early_param中,会对这部分进行解析。

module_param_named

[include/linux/moduleparam.h]

/**
 * module_param_named - typesafe helper for a renamed module/cmdline parameter
 * @name: a valid C identifier which is the parameter name.
 * @value: the actual lvalue to alter.
 * @type: the type of the parameter
 * @perm: visibility in sysfs.
 *
 * Usually it's a good idea to have variable names and user-exposed names the
 * same, but that's harder if the variable must be non-static or is inside a
 * structure.  This allows exposure under a different name.
 */
#define module_param_named(name, value, type, perm)               \
    param_check_##type(name, &(value));                   \
    module_param_cb(name, &param_ops_##type, &value, perm);           \
    __MODULE_PARM_TYPE(name, #type)

/**
 * module_param_cb - general callback for a module/cmdline parameter
 * @name: a valid C identifier which is the parameter name.
 * @ops: the set & get operations for this parameter.
 * @perm: visibility in sysfs.
 *
 * The ops can have NULL set or get functions.
 */
#define module_param_cb(name, ops, arg, perm)                      \
    __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1)

/* This is the fundamental function for registering boot/module
   parameters. */
#define __module_param_call(prefix, name, ops, arg, perm, level)    \
    /* Default value instead of permissions? */            \
    static int __param_perm_check_##name __attribute__((unused)) =    \
    BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2))    \
    + BUILD_BUG_ON_ZERO(sizeof(""prefix) > MAX_PARAM_PREFIX_LEN);    \
    static const char __param_str_##name[] = prefix #name;        \
    static struct kernel_param __moduleparam_const __param_##name    \
    __used                                \
    __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
    = { __param_str_##name, ops, perm, level, { arg } }

从上面看出,module_param_named注册结构体到__param section,看下kernel parse的过程:

+start_kernel
|--+parse_early_param
|----+parse_args("Booting kernel", static_command_line, __start___param,
|       __stop___param - __start___param,
|       -1, -1, &unknown_bootoption);
|------+parse_one
|--------+params[i].ops->set

[include/asm-generic/vmlinux.lds.h]

/* Built-in module parameters. */                \
__param : AT(ADDR(__param) - LOAD_OFFSET) {            \
    VMLINUX_SYMBOL(__start___param) = .;            \
    *(__param)                        \
    VMLINUX_SYMBOL(__stop___param) = .;            \
}                                \

parse_args的传入参数为__param section,parse module_param_named传入的参数,而parse函数定义为:

[kernel/params.c]

/* Lazy bastard, eh? */
#define STANDARD_PARAM_DEF(name, type, format, tmptype, strtolfn)          \
    int param_set_##name(const char *val, const struct kernel_param *kp) \
    {                                \
        tmptype l;                        \
        int ret;                        \
                                    \
        ret = strtolfn(val, 0, &l);                \
        if (ret < 0 || ((type)l != l))                \
            return ret < 0 ? ret : -EINVAL;            \
        *((type *)kp->arg) = l;                    \
        return 0;                        \
    }                                \
    int param_get_##name(char *buffer, const struct kernel_param *kp) \
    {                                \
        return sprintf(buffer, format, *((type *)kp->arg));    \
    }                                \
    struct kernel_param_ops param_ops_##name = {            \
        .set = param_set_##name,                \
        .get = param_get_##name,                \
    };                                \
    EXPORT_SYMBOL(param_set_##name);                \
    EXPORT_SYMBOL(param_get_##name);                \
    EXPORT_SYMBOL(param_ops_##name)


STANDARD_PARAM_DEF(byte, unsigned char, "%c", unsigned long, strict_strtoul);
STANDARD_PARAM_DEF(short, short, "%hi", long, strict_strtol);
STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, strict_strtoul);
STANDARD_PARAM_DEF(int, int, "%i", long, strict_strtol);
STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, strict_strtoul);
STANDARD_PARAM_DEF(long, long, "%li", long, strict_strtol);
STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, strict_strtoul);

其他的:

./kernel/params.c:297:struct kernel_param_ops param_ops_charp = {
./kernel/params.c:322:struct kernel_param_ops param_ops_bool = {
./kernel/params.c:349:struct kernel_param_ops param_ops_invbool = {
./kernel/params.c:372:struct kernel_param_ops param_ops_bint = {
./kernel/params.c:496:struct kernel_param_ops param_ops_string = {

__setup

+start_kernel
|--+parse_early_param
|----+parse_args("Booting kernel", static_command_line, __start___param,
|       __stop___param - __start___param,
|       -1, -1, &unknown_bootoption);
|------+unknown_bootoption
|--------+obsolete_checksetup

[init/main.c]

static int __init obsolete_checksetup(char *line)
{
    const struct obs_kernel_param *p;
    int had_early_param = 0;

    p = __setup_start;
    do {
        int n = strlen(p->str);
        if (parameqn(line, p->str, n)) {
            if (p->early) {
                /* Already done in parse_early_param?
                 * (Needs exact match on param part).
                 * Keep iterating, as we can have early
                 * params and __setups of same names 8( */
                if (line[n] == '\0' || line[n] == '=')
                    had_early_param = 1;
            } else if (!p->setup_func) {
                pr_warn("Parameter %s is obsolete, ignored\n",
                    p->str);
                return 1;
            } else if (p->setup_func(line + n))
                return 1;
        }
        p++;
    } while (p < __setup_end);

    return had_early_param;
}

[include/asm-generic/vmlinux.lds.h]

#define INIT_SETUP(initsetup_align)                    \
        . = ALIGN(initsetup_align);                \
        VMLINUX_SYMBOL(__setup_start) = .;            \
        *(.init.setup)                        \
        VMLINUX_SYMBOL(__setup_end) = .;

__setup函数之前有提过,在.init.setup中构建解析结构体。

Linux kernel linear mapping area

Veröffentlicht am 2017-05-20

(32 bit case)

ARM page table

ARM的两级映射为12 + 8 + 12 = 32

一级页表(也可以称为页目录)地址,该页表总共有4096个索引(4096 × 1MB = 4GB)

CPU首先获取页目录基地址(TTB),加上待转换虚拟地址的高12位,就获得了该虚拟地址的页目录项所在位置。arm的一级页表会包含几种类型,如下图的arm-v7arm translation table的结构:

linux利用了section和small page来实现内存映射,其中线性映射区是利用section。

利用表项的bits[1:0]区别了下级页表的类型:

0b01, Page table
The descriptor gives the address of a second-level translation table, that specifies the mapping of the associated 1MByte VA range.

0b10, Section or Supersection
The descriptor gives the base address of the Section or Supersection. Bit[18] determines whether the entry describes a Section or a Supersection.

section-mapping

section address translation

MMU从CP15的C0中的TTB得到基址,加上虚拟地址的高12位,得到了页目录项,MMU发现低2位为10,确定是section-mapping,就会取该页目录项的高12位与虚拟地址的低20位拼接,便获取到了物理地址。

page-mapping

small page address translation

4KB page-mapping 是二级页表方式:

  1. MMU利用TTB(页目录)基址,与虚拟地址的高12位相加,得到页目录项值
  2. MMU获取页目录项最低2bit是01,说明本次映射的1MB数据为4KB小页的page-mapping
  3. MMU获取页目录项的高22位(页表是256X4=1K,所以页表基址是1K对齐的)是页表基地址,与虚拟地址的中间8位相加,即该虚拟地址的对应页表项地址,从而获取虚拟地址对应的页表项值(page table entry)
  4. MMU获取页表项值的高20位,这就是该4K页对应的物理地址了,与虚拟地址低12位相加(也就是4KB页内的偏移),得到虚拟地址对应的物理地址

Linux kernel 线性映射区

在kernel启动时,mmu从关闭状态到打开状态,需要为mmu准备page table。下面是详细描述建表过程:

建立临时映射表

首先提到一个重要的文件head.S,这个文件包含了Kernel startup entry point:

    __HEAD
ENTRY(stext)

此时的mmu处于关闭状态,不过因为C函数的地址和变量地址都是虚拟地址,所以在进入C world之前,要建立映射表,然后开机mmu,在stext中有体现:

    bl    __create_page_tables
//...
1:    b    __enable_mmu

__create_page_tables

这个函数的作用是建立mmu的page table,当然在这段汇编里,只建立能刚好满足kernel运行的page table,剩下的交给强大的C语言完成,所以这个页表被叫做“临时映射表”。

这个函数分成三大部分:

  1. Create identity mapping to cater for __enable_mmu
  2. Map our RAM from the start to the end of the kernel
  3. Then map boot params address in r2 if specified

1. Create identity mapping to cater for __enable_mmu

    /*
     * Create identity mapping to cater for __enable_mmu.
     * This identity mapping will be removed by paging_init().
     */
    adr    r0, __turn_mmu_on_loc
    ldmia    r0, {r3, r5, r6}
    sub    r0, r0, r3            @ virt->phys offset
    add    r5, r5, r0            @ phys __turn_mmu_on
    add    r6, r6, r0            @ phys __turn_mmu_on_end
    mov    r5, r5, lsr #SECTION_SHIFT
    mov    r6, r6, lsr #SECTION_SHIFT

1:    orr    r3, r7, r5, lsl #SECTION_SHIFT    @ flags + kernel base //制作pgd表项
    str    r3, [r4, r5, lsl #PMD_ORDER]    @ identity mapping //存储pgd表项
    cmp    r5, r6
    addlo    r5, r5, #1            @ next section
    blo    1b

创建同一映射,也被称为平映射,该映射的特点是:虚拟地址和物理地址相同。

映射区间为 __turn_mmu_on 到 __turn_mmu_on_end,按section方式进行映射1MB空间(如果该函数敲好在1MB边界上,则映射2MB)。__turn_mmu_on函数实现:

/*
 * Enable the MMU.  This completely changes the structure of the visible
 * memory space.  You will not be able to trace execution through this.
 * If you have an enquiry about this, *please* check the linux-arm-kernel
 * mailing list archives BEFORE sending another post to the list.
 *
 *  r0  = cp#15 control register
 *  r1  = machine ID
 *  r2  = atags or dtb pointer
 *  r9  = processor ID
 *  r13 = *virtual* address to jump to upon completion
 *
 * other registers depend on the function called upon completion
 */
    .align    5
    .pushsection    .idmap.text, "ax"
ENTRY(__turn_mmu_on)
    mov    r0, r0
    instr_sync
    mcr    p15, 0, r0, c1, c0, 0        @ write control reg
    mrc    p15, 0, r3, c0, c0, 0        @ read id reg
    instr_sync
    mov    r3, r3
    mov    r3, r13
    mov    pc, r3
__turn_mmu_on_end:
ENDPROC(__turn_mmu_on)
    .popsection

在write control reg之后,mmu就被打开了,下条指令的PC值,指向的位置仍然是之前的物理地址+4。当cpu执行新的PC值指向的指令时,就要利用MMU访问内存,由于之前制作了虚拟地址和物理地址的同一映射,所以新的物理地址和虚拟地址相同,所以平滑过渡了打开mmu造成的地址映射问题。

2. Map our RAM from the start to the end of the kernel

    /*
     * Map our RAM from the start to the end of the kernel .bss section.
     */
    add    r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER)
    ldr    r6, =(_end - 1)
    orr    r3, r8, r7 //制作pgd表项内容
    add    r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
1:    str    r3, [r0], #1 << PMD_ORDER //存储pgd表项
    add    r3, r3, #1 << SECTION_SHIFT
    cmp    r0, r6
    bls    1b

利用section方式,完成kernel线性区的映射,线性区的范围是PAGE_OFFSET到kernel .bss段的结束。

3. Then map boot params address in r2 if specified

/*
 * Then map boot params address in r2 if specified.
 * We map 2 sections in case the ATAGs/DTB crosses a section boundary.
 */
mov    r0, r2, lsr #SECTION_SHIFT
movs    r0, r0, lsl #SECTION_SHIFT
subne    r3, r0, r8
addne    r3, r3, #PAGE_OFFSET
addne    r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
orrne    r6, r7, r0
strne    r6, [r3], #1 << PMD_ORDER
addne    r6, r6, #1 << SECTION_SHIFT
strne    r6, [r3]

利用section方式,映射了dtb空间,映射大小为两个sections(2MB),原因是避免dtb跨section的问题。

至此,临时映射表已经键完,之后call __enable_mmu 函数,调用 __turn_mmu_on 打开MMU,终于进入C world。

建立最终线性页表

| start_kernel  
\--+ setup_arch  
   \--+ paging_init
      \--+ map_lowmem
         \--+ create_mapping
            \--+ alloc_init_pud
               \--+ alloc_init_pmd
                  \--+ __map_init_section  

针对kernel线性区,重新做了一遍section-mapping

static void __init __map_init_section(pmd_t *pmd, unsigned long addr,
            unsigned long end, phys_addr_t phys,
            const struct mem_type *type)
{
    pmd_t *p = pmd;

    do {
        *pmd = __pmd(phys | type->prot_sect);
        phys += SECTION_SIZE;
    } while (pmd++, addr += SECTION_SIZE, addr != end);

    flush_pmd_entry(p);
}

Welcome cole3's blog

Veröffentlicht am 2017-04-23

你好,欢迎来到cole3的个人技术博客

Hello World

Veröffentlicht am 2017-04-23

Welcome to Hexo! This is your very first post. Check documentation for more info. If you get any problems when using Hexo, you can find the answer in troubleshooting or you can ask me on GitHub.

Quick Start

Create a new post

1
$ hexo new "My New Post"

More info: Writing

Run server

1
$ hexo server

More info: Server

Generate static files

1
$ hexo generate

More info: Generating

Deploy to remote sites

1
$ hexo deploy

More info: Deployment

cole3

cole3

Keep going

4 Artikel
© 2017 cole3
Erstellt mit Hexo
Theme - NexT.Muse