进程使用标准库例程,库例程接下来调用内核函数,最终,由内核负责在各个请求进程之间公平而且流畅地共享资源和服务
#include
int main()
{
FILE *fp = NULL;
// w 打开一个文本文件,允许写入文件。如果文件不存在,则会创建一个新文件
fp = fopen("test.txt", "w");
fprintf(fp, "test\n");
fclose(fp);
}
编译,追踪
uname -a
Linux 5.11.0-27-generic #29~20.04.1-Ubuntu SMP Wed Aug 11 15:58:17 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux
gcc write_test.c -o write_test
ldd write_test # ldd 用于打印程序或者库文件所依赖的共享库列表
# 待补
用户层进入内核态执行系统调用函数,通过异常方式(库函数完成),将当前系统调用函数的调用号放入x8
寄存器,然后使用svc
指令,发起同步异常。参考[1]
Supervisor Call causes an exception to be taken to EL1.On executing an SVC instruction, the PE records the exception as a Supervisor Call exception in ESR_ELx, using the EC value 0x15
, and the value of the immediate argument.
以下源码参考:linux-5.10
// arch/arm64/kernel/entry.S
// 中断向量入口:
SYM_CODE_START(vectors)
kernel_ventry 1, sync_invalid // Synchronous EL1t
kernel_ventry 1, irq_invalid // IRQ EL1t
kernel_ventry 1, fiq_invalid // FIQ EL1t
kernel_ventry 1, error_invalid // Error EL1t
kernel_ventry 1, sync // Synchronous EL1h
kernel_ventry 1, irq // IRQ EL1h
kernel_ventry 1, fiq_invalid // FIQ EL1h
kernel_ventry 1, error // Error EL1h
kernel_ventry 0, sync // Synchronous 64-bit EL0 ,同步异常处理入口: 包括系统调用
kernel_ventry 0, irq // IRQ 64-bit EL0
kernel_ventry 0, fiq_invalid // FIQ 64-bit EL0
kernel_ventry 0, error // Error 64-bit EL0
// kernel_ventry 宏处理过程
.macro kernel_ventry, el, label, regsize = 64
.align 7
sub sp, sp, #S_FRAME_SIZE
b el\()\el\()_\label // 展开为: b el0_sync // 跳转到el0_sync
.endm
SYM_CODE_START_LOCAL_NOALIGN(el0_sync)
kernel_entry 0 // 保存用户态在寄存器数据
mov x0, sp
bl el0_sync_handler // el0_sync 处理函数
b ret_to_user
SYM_CODE_END(el0_sync)
// arch/arm64/kernel/entry-common.c
asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) {
unsigned long esr = read_sysreg(esr_el1);
switch (ESR_ELx_EC(esr)) {
// arch/arm64/include/asm :
// #define ESR_ELx_EC_SVC64 (0x15)
case ESR_ELx_EC_SVC64:
el0_svc(regs);
break;
... // 其他异常
}
}
static void noinstr el0_svc(struct pt_regs *regs) {
...
do_el0_svc(regs);
}
// arch/arm64/kernel/syscall.c
void do_el0_svc(struct pt_regs *regs){
sve_user_discard();
// __NR_syscalls 系统调用总数
// sys_call_table 系统调用表 它每个系统调用的size是.long,即4byte
el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table);
}
static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
const syscall_fn_t syscall_table[]) {
unsigned long flags = current_thread_info()->flags;
regs->orig_x0 = regs->regs[0];
regs->syscallno = scno;
....
invoke_syscall(regs, scno, sc_nr, syscall_table);
....
}
static void invoke_syscall(struct pt_regs *regs, unsigned int scno,
unsigned int sc_nr,
const syscall_fn_t syscall_table[])
{
long ret;
if (scno < sc_nr) {
syscall_fn_t syscall_fn;
syscall_fn = syscall_table[array_index_nospec(scno, sc_nr)];
ret = __invoke_syscall(regs, syscall_fn);
} else {
// 未定义的系统调用,返回—ENOSYS
ret = do_ni_syscall(regs, scno);
}
if (is_compat_task())
ret = lower_32_bits(ret);
regs->regs[0] = ret;
}
static long __invoke_syscall(struct pt_regs *regs, syscall_fn_t syscall_fn){
return syscall_fn(regs);
}
// arch/arm64/include/asm/ptrace.h
struct pt_regs {
union {
struct user_pt_regs user_regs;
struct {
u64 regs[31];
u64 sp;
u64 pc;
u64 pstate;
};
};
u64 orig_x0;
#ifdef __AARCH64EB__
u32 unused2;
s32 syscallno;
#else
s32 syscallno;
u32 unused2;
#endif
u64 orig_addr_limit;
/* Only valid when ARM64_HAS_IRQ_PRIO_MASKING is enabled. */
u64 pmr_save;
u64 stackframe[2];
/* Only valid for some EL1 exceptions. */
u64 lockdep_hardirqs;
u64 exit_rcu;
};
// arch/arm64/include/asm/syscall.h
typedef long (*syscall_fn_t)(const struct pt_regs *regs);
// arch/arm64/kernel/sys.c
#undef __SYSCALL
#define __SYSCALL(nr, sym) asmlinkage long __arm64_##sym(const struct pt_regs *);
#include
// 对于ARM64架构,头文件“asm/unistd.h”是“arch/arm64/include/asm/unistd.h”。
#undef __SYSCALL
#define __SYSCALL(nr, sym) [nr] = __arm64_##sym,
const syscall_fn_t sys_call_table[__NR_syscalls] = {
[0 ... __NR_syscalls - 1] = __arm64_sys_ni_syscall,
#include
};
// arch/arm64/include/asm/unistd.h
#include
#define NR_syscalls (__NR_syscalls)
#define __ARCH_WANT_RENAMEAT
#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_SET_GET_RLIMIT
#define __ARCH_WANT_TIME32_SYSCALLS
#define __ARCH_WANT_SYS_CLONE3
#include
// include/uapi/asm-generic/unistd.h
#define __NR_io_setup 0
__SC_COMP(__NR_io_setup, sys_io_setup, compat_sys_io_setup)
#define __NR_io_destroy 1
__SYSCALL(__NR_io_destroy, sys_io_destroy)
....
#define __NR_syscalls 441
通过上面文件的展开, 系统调用表为:
const syscall_fn_t sys_call_table[__NR_syscalls] = {
[0 ... __NR_syscalls - 1] = __arm64_sys_ni_syscall,__arm64_compat_sys_io_setup,__arm64_sys_io_destroy,......
};
系统调用宏
// arch/arm64/include/asm/syscall_wrapper.h
#define __SYSCALL_DEFINEx(x, name, ...) \
asmlinkage long __arm64_sys##name(const struct pt_regs *regs); \
ALLOW_ERROR_INJECTION(__arm64_sys##name, ERRNO); \
static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
asmlinkage long __arm64_sys##name(const struct pt_regs *regs) \
{ \
return __se_sys##name(SC_ARM64_REGS_TO_ARGS(x,__VA_ARGS__)); \
} \
static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
{ \
long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \
__MAP(x,__SC_TEST,__VA_ARGS__); \
__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
return ret; \
} \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
#define SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused); \
ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO); \
asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused)
// include/linux/syscalls.h
#ifndef SYSCALL_DEFINE0
#define SYSCALL_DEFINE0(sname) \
SYSCALL_METADATA(_##sname, 0); \
asmlinkage long sys_##sname(void); \
ALLOW_ERROR_INJECTION(sys_##sname, ERRNO); \
asmlinkage long sys_##sname(void)
#endif /* SYSCALL_DEFINE0 */
#define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE4(name, ...) SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE_MAXARGS 6
#define SYSCALL_DEFINEx(x, sname, ...) \
SYSCALL_METADATA(sname, x, __VA_ARGS__) \
__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
对于 write
系统调用宏展开
#define __NR_write 64 __SYSCALL(__NR_write, sys_write)
asmlinkage long __arm64_sys_write(const struct pt_regs *);
SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, size_t, count) {
return ksys_write(fd, buf, count);
}