用户态:最终通过int 0x80陷入内核
在arch\x86\include\asm\unistd_32.h中
#define __NR_ioctl 54
ioctl
==》 __ioctl
==》INLINE_SYSCALL (ioctl, 3, fd, request, arg); //每种cpu架构都有自己的定义,下面是X86的定义:
#define INLINE_SYSCALL(name, nr, args...) \
({ \
unsigned int resultvar = INTERNAL_SYSCALL (name, , nr, args); \
if (__builtin_expect (INTERNAL_SYSCALL_ERROR_P (resultvar, ), 0)) \
{ \
__set_errno (INTERNAL_SYSCALL_ERRNO (resultvar, )); \
resultvar = 0xffffffff; \
} \
(int) resultvar; })
/* Define a macro which expands inline into the wrapper code for a system
call. This use is for internal calls that do not need to handle errors
normally. It will never touch errno. This returns just what the kernel
gave back.
The _NCS variant allows non-constant syscall numbers but it is not
possible to use more than four parameters. */
# define INTERNAL_SYSCALL(name, err, nr, args...) \
({ \
register unsigned int resultvar; \
EXTRAVAR_##nr \
asm volatile ( \
LOADARGS_##nr \
"movl %1, %%eax\n\t" \
"int $0x80\n\t" \
RESTOREARGS_##nr \
: "=a" (resultvar) \
: "i" (__NR_##name) ASMFMT_##nr(args) : "memory", "cc"); \
(int) resultvar; })
传入的__NR_ioctl用于识别调用号,然后通过int 0x80进入内核。
内核实现:
1、int 0x80对应的中断处理函数system_call
set_system_trap_gate(SYSCALL_VECTOR, &system_call); //SYSCALL_VECTOR=0x80
ENTRY(system_call)
RING0_INT_FRAME# can't unwind into user space anyway
pushl %eax # save orig_eax
CFI_ADJUST_CFA_OFFSET 4
SAVE_ALL
GET_THREAD_INFO(%ebp)
# system call tracing in operation / emulation
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) http://
jnz syscall_trace_entry
cmpl $(nr_syscalls), %eax
jae syscall_badsys
syscall_call:
call *sys_call_table(,%eax,4)
movl %eax,PT_EAX(%esp)# store the return value
syscall_exit:
LOCKDEP_SYS_EXIT
DISABLE_INTERRUPTS(CLBR_ANY)# make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testl $_TIF_ALLWORK_MASK, %ecx# current->work
jne syscall_exit_work
ENTRY(sys_call_table)
.long sys_getegid16/* 50 */
.long sys_acct
.long sys_umount/* recycled never used phys() */
.long sys_ni_syscall/* old lock syscall holder */
.long sys_ioctl
.long sys_fcntl/* 55 */
ioctl为54号系统调用。
2、sys_ioctl的定义:
SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
{
int error;
struct fd f = fdget(fd);
if (!f.file)
return -EBADF;
error = security_file_ioctl(f.file, cmd, arg);
if (!error)
error = do_vfs_ioctl(f.file, fd, cmd, arg);
fdput(f);
return error;
}
#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
#define __SYSCALL_DEFINEx(x, name, ...) \
asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));\
asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
{ \
long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));\
__MAP(x,__SC_TEST,__VA_ARGS__);\
__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__));\
return ret; \
}
3、vfs_ioctl
static long vfs_ioctl(struct file *filp, unsigned int cmd,
unsigned long arg)
{
int error = -ENOTTY;
if (!filp->f_op)
goto out;
if (filp->f_op->unlocked_ioctl) {
error = filp->f_op->unlocked_ioctl(filp, cmd, arg);
if (error == -ENOIOCTLCMD)
error = -EINVAL;
goto out;
} else if (filp->f_op->ioctl) {
lock_kernel();
error = filp->f_op->ioctl(filp->f_path.dentry->d_inode,
filp, cmd, arg);
unlock_kernel();
}
out:
return error;
}
SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
==》vfs_read
SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, size_t, count)
==》vfs_write
SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
==》do_sys_open
SYSCALL_DEFINE1(close, unsigned int, fd)
==》filp_close
参考:http://bbs.byr.cn/#!article/Linux/97022