linux ebpf - kprobe_do_sys_open

一、__x64_sys_open调用栈跟踪

为什么kprobe可以钩住__x64_sys_open,却不会执行kprobe注册的pre_handlerpost_handler???

难道是因为系统调用open内核路径不调用__x64_sys_open()函数 ??

用ftrace跟踪下内核函数调用栈??有点麻烦,要不写个kprobe程序打印下调用栈,在哪里打点合适??文件打开操作会调用vfs层的vfs_open()函数。

获取的调用栈如下:

[90818.393752] RIP: 0010:vfs_open+0x1/0x40
[90818.393757] Code: 8b 57 60 48 8b 77 58 48 8b 7f 28 48 89 e5 e8 36 ff ff ff 5d 48 98 c3 cc cc cc cc 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 e8 <2b> 21 84 2e 48 89 f8 55 48 89 f7 31 d2 4c 8b 00 4c 8b 48 08 4c 89
[90818.393761] RSP: 0018:ffffa6e780047c18 EFLAGS: 00000246 ORIG_RAX: 0000000000000000
[90818.393766] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000008000
[90818.393768] RDX: 0000000000008124 RSI: ffff9833433ecc00 RDI: ffffa6e780047d30
[90818.393771] RBP: ffffa6e780047c58 R08: 0000000000008000 R09: 61c8864680b583eb
[90818.393773] R10: 000070756f726763 R11: 0000000000000006 R12: ffffffff944814c0
[90818.393776] R13: ffffa6e780047e5c R14: ffffa6e780047d30 R15: ffff9833433ecc00
[90818.393780]  ? vfs_open+0x5/0x40
[90818.393785]  ? do_open.isra.0+0x20d/0x480
[90818.393790]  ? vfs_open+0x5/0x40
[90818.393794]  ? do_open.isra.0+0x20d/0x480
[90818.393799]  path_openat+0x18e/0xe50
[90818.393803]  ? import_iovec+0x31/0x40
[90818.393808]  ? sock_poll+0x84/0x110
[90818.393813]  do_filp_open+0xb2/0x120
[90818.393818]  ? __check_object_size+0x14f/0x160
[90818.393823]  do_sys_openat2+0x249/0x330
[90818.393828]  do_sys_open+0x46/0x80
[90818.393833]  __x64_sys_openat+0x20/0x30
[90818.393837]  do_syscall_64+0x5c/0xc0
[90818.393842]  ? syscall_exit_to_user_mode+0x27/0x50
[90818.393847]  ? __x64_sys_read+0x1a/0x20
[90818.393850]  ? do_syscall_64+0x69/0xc0
[90818.393854]  ? do_syscall_64+0x69/0xc0
[90818.393857]  ? do_syscall_64+0x69/0xc0
[90818.393860]  ? do_syscall_64+0x69/0xc0
[90818.393862]  entry_SYSCALL_64_after_hwframe+0x61/0xcb

从调用栈的结果来看,用户态open()系统调用陷入内核后,调用的函数是__x64_sys_openat,为什么不是调用__x64_sys_open?,从以下内核符号表来看open操作路口存在两个类似函数?

ffffffff929803c0 T __x64_sys_open
ffffffff92980420 T __x64_sys_openat

__x64_sys_openat __x64_sys_open 是 Linux 内核中处理文件系统相关系统调用的两个函数。它们的区别在于__x64_sys_openat 将在给定的目录中打开给定路径的文件,而 __x64_sys_open 则在当前工作目录中打开给定路径的文件。

是不是可以理解为一个是全路径,一个是相对路径??

__x64_sys_openat 是一种以绝对路径或相对路径打开文件的方式,需要指定打开文件的完整路径和用于查找文件的基础目录(该目录由另一个文件描述符指定)。

  • 第一个参数是文件描述符,表示要在其下打开文件的目录(通常是当前目录,用 AT_FDCWD)
  • 第二个参数是文件路径名
  • 第三个参数是标志(例如标志指定 O_RDWR、O_CREAT等)。

__x64_sys_open 在相对于当前工作目录中打开给定路径的文件,可以接受相对路径

  • 第一个参数是文件路径名
  • 第二个参数是标志(例如标志指定 O_RDWR、O_CREAT等)。

因此,__x64_sys_openat 能够直接打开给定路径下的文件,而 __x64_sys_open 必须先在当前工作目录中查找路径,然后将其打开,因此在使用上有一些差异。可以根据具体的使用场景选择使用哪个系统调用。

二、do_sys_open函数原型以及参数说明

long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
	struct open_how how = build_open_how(flags, mode);
	return do_sys_openat2(dfd, filename, &how);
}

do_sys_open 是 Linux 内核中负责处理 open 系统调用的函数。该函数的参数包括以下几个:

  • int dfd:打开文件时相对于的目录文件描述符。对于绝对路径,这个参数应该是 AT_FDCWD
  • const char __user *filename:要打开的文件的路径名。用户程序调用 open 系统调用时传递的参数。
  • int flags:打开文件时的标志。这些标志是文件访问模式和打开选项。
  • umode_t mode:当创建新文件时,指定文件权限。

flags 字段包含以下标志:

  • O_RDONLY:只读方式打开文件。
  • O_WRONLY:只写方式打开文件。
  • O_RDWR:读写方式打开文件。
  • O_CREAT:如果文件不存在,则创建文件。
  • O_EXCL:与 O_CREAT 一起使用,如果文件已经存在,则返回错误。
  • O_NOCTTY:如果路径名指向终端设备,则不将该设备与进程关联。
  • O_TRUNC:如果文件已经存在,将文件长度截断为 0。
  • O_APPEND:在写入文件时,总是将数据附加到文件末尾。
  • O_NONBLOCK:以非阻塞方式打开文件。
  • O_DIRECTORY:当 dfd 不为空时,只打开目录。

mode 字段包含了创建新文件时的权限掩码。mode 参数只有在使用 O_CREAT 标志时才有意义。如果文件已经存在,将忽略此参数。新文件的权限将是 mode 值和进程的umask值取反的结果。

do_sys_open 函数中,根据参数中的标志和权限信息,首先调用相关的函数打开文件(如 vfs_open)。如果文件不存在且需要创建文件,则调用 vfs_create 函数创建文件。接着,内核会分配一个新的文件描述符,并维护进程和该文件描述符之间的映射关系。最后,将文件描述符fd返回给用户程序。

三、编写ebpf代码

kprobe_do_sys_open.c

//go:build ignore

// #include "../headers/common.h"
// #include "../headers/arm64_vmlinux.h"
#include "../headers/vmlinux.h"
#include "../headers/bpf_tracing.h"
#include "../headers/bpf_helpers.h"

#define FILE_NMAE_LEN 256
#define COMM_NAME_LEN 16

char __license[] SEC("license") = "Dual MIT/GPL";

struct open_info {
	u32 pid;
	int fd;
	u8 comm[COMM_NAME_LEN];
	u8 filename[FILE_NMAE_LEN];
};

struct bpf_map_def SEC("maps") info_map = {
	.type        = BPF_MAP_TYPE_ARRAY,
	.key_size    = sizeof(u32),
	.value_size  = sizeof(struct open_info),
	.max_entries = 256,
};

// Force emitting struct open_info into the ELF.
const struct open_info *unused __attribute__((unused));

SEC("kprobe/do_sys_open")
int kprobe_do_sys_open(struct pt_regs *ctx) {
	u32 key     = 0;
	u64 id   = bpf_get_current_pid_tgid();
	u32 tgid = id >> 32;
	struct open_info *info;
	int ret = 0;
	char fmt[] = "ret error %d\n";

	info = bpf_map_lookup_elem(&info_map, &key);
	if (!info) {
		return 0;
	}

	ret = bpf_probe_read(info->filename, FILE_NMAE_LEN, (void *)PT_REGS_PARM2(ctx));
	if (ret < 0) {
		bpf_trace_printk(fmt, sizeof(fmt), ret);
		return 0;
	}

	info->pid = tgid;
	bpf_get_current_comm(&info->comm, COMM_NAME_LEN);

	bpf_map_update_elem(&info_map, &key, info, BPF_ANY);

	return 0;
}

main.go

package main

import (
	"flag"
	"log"
	"os"
	"time"

	"github.com/cilium/ebpf/link"
	"github.com/cilium/ebpf/rlimit"
	"golang.org/x/sys/unix"
)

// $BPF_CLANG and $BPF_CFLAGS are set by the Makefile.
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang -cflags "-D__TARGET_ARCH_x86 -g" -no-strip -type open_info bpf kprobe_do_sys_open.c -- -I../headers

const mapKey uint32 = 0

func main() {

	var filename string
	var pid uint
	var help bool

	flag.StringVar(&filename, "filename", "", "设置需要跟踪的文件名")
	flag.UintVar(&pid, "pid", 0, "设置需要跟踪的pid号")
	flag.BoolVar(&help, "help", false, "显示使用帮助信息")
	flag.Parse()

	if help || (filename == "" && pid == 0) {
		flag.Usage()
		os.Exit(0)
	}

	// Name of the kernel function to trace.
	fn := "do_sys_open"

	// Allow the current process to lock memory for eBPF resources.
	if err := rlimit.RemoveMemlock(); err != nil {
		log.Fatal(err)
	}

	// Load pre-compiled programs and maps into the kernel.
	objs := bpfObjects{}
	if err := loadBpfObjects(&objs, nil); err != nil {
		log.Fatalf("loading objects: %v", err)
	}
	defer objs.Close()

	// Open a Kprobe at the entry point of the kernel function and attach the
	// pre-compiled program. Each time the kernel function enters, the program
	// will increment the execution counter by 1. The read loop below polls this
	// map value once per second.
	kp, err := link.Kprobe(fn, objs.KprobeDoSysOpen, nil)
	if err != nil {
		log.Fatalf("opening kprobe: %s", err)
	}
	defer kp.Close()

    // 设置从内核读取map的时间间隔
	// Read loop reporting the total amount of times the kernel
	// function was entered, once per second.
	ticker := time.NewTicker(1 * time.Microsecond)
	defer ticker.Stop()

	log.Println("Waiting for events..")

	for range ticker.C {
		var value bpfOpenInfo
		if err := objs.InfoMap.Lookup(mapKey, &value); err != nil {
			log.Fatalf("reading map: %v", err)
		}

		exportComm := unix.ByteSliceToString(value.Comm[:])
		exportPid := value.Pid
		exportFilename := unix.ByteSliceToString(value.Filename[:])

		// log.Printf("%s pid %d comm %s filename %s\n", fn, exportPid, exportComm, exportFilename)

		// if exportFilename == filename {
		// 	log.Printf("%s pid %d comm %s filename %s\n", fn, exportPid, exportComm, exportFilename)
		// }

		// if uint(exportPid) == pid {
		// 	log.Printf("%s pid %d comm %s filename %s\n", fn, exportPid, exportComm, exportFilename)
		// }
		if exportComm != "node" {
			log.Printf("%s pid %d comm %s filename %s\n", fn, exportPid, exportComm, exportFilename)
		}
	}
}

上述代码存在一个问题,设置读取map的时间间隔比较短,读取的map信息可能是上一次更新的数据。

# 同一条数据被读取了多次,也就是说取回的数据是上一次存放在map中的数据。
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat
2023/05/06 07:31:01 do_sys_open pid 11609 comm cat filename /proc/11448/stat

四、代码优化

以上代码采用的是BPF_MAP_TYPE_ARRAY类型的map,可以尝试使用BPF_MAP_TYPE_PERF_EVENT_ARRAY以事件的形式读取监控数据。

c部分代码实现如下,运行时居然报错loading objects: field KprobeDoSysOpen: program kprobe_do_sys_open: load program: permission denied: invalid indirect read from stack R4 off -280+4 size 280 (52 line(s) omitted)?从代码上看也看不出明显的问题?

SEC("kprobe/do_sys_open")
int kprobe_do_sys_open(struct pt_regs *ctx) {
	u64 id   = bpf_get_current_pid_tgid();
	u32 tgid = id >> 32;
	struct open_info info;
	int ret = 0;
	char fmt[] = "ret error %d\n";

	ret = bpf_probe_read_user_str(info.filename, FILE_NMAE_LEN, (void *)PT_REGS_PARM2(ctx));
	if (ret < 0) {
		bpf_trace_printk(fmt, sizeof(fmt), "bpf_probe_read", ret);
		return 0;
	}

	info.pid = tgid;
	bpf_get_current_comm(&info.comm, COMM_NAME_LEN);

	ret = bpf_perf_event_output(ctx, &info_map, 0, &info, sizeof(info));
	if (ret < 0) {
		bpf_trace_printk(fmt, sizeof(fmt), "bpf_perf_event_output", ret);
		return 0;
	}
	
	return 0;
}

这不知道如何下手,那就先反汇编看看有什么异常!从汇编代码来看,一条epbf字节码的长度是8字节。

root@curtis-Aspire-E5-471G:/home/curtis/write_code/cilium-ebpf/do_sys_open_kprobe# llvm-objdump -S -d ./bpf_bpfel.o 

./bpf_bpfel.o:  file format elf64-bpf

Disassembly of section kprobe/do_sys_open:

0000000000000000 <kprobe_do_sys_open>:
# 计算重定失败,解析文件时遇到无效数据....
warning: failed to compute relocation: R_BPF_64_32, Invalid data was encountered while parsing the file
warning: failed to compute relocation: R_BPF_64_32, Invalid data was encountered while parsing the file
warning: failed to compute relocation: R_BPF_64_32, Invalid data was encountered while parsing the file
warning: failed to compute relocation: R_BPF_64_32, Invalid data was encountered while parsing the file
...............
warning: failed to compute relocation: R_BPF_64_32, Invalid data was encountered while parsing the file
warning: failed to compute relocation: R_BPF_64_64, Invalid data was encountered while parsing the file
warning: failed to compute relocation: R_BPF_64_32, Invalid data was encountered while parsing the file
warning: failed to compute relocation: R_BPF_64_64, Invalid data was encountered while parsing the file
; int kprobe_do_sys_open(struct pt_regs *ctx) {
       0:       bf 16 00 00 00 00 00 00 r6 = r1
;       u64 id   = bpf_get_current_pid_tgid();
       1:       85 00 00 00 0e 00 00 00 call 14
       2:       bf 07 00 00 00 00 00 00 r7 = r0
       3:       b7 01 00 00 0a 00 00 00 r1 = 10
;       char fmt[] = "ret error %d\n";
       4:       6b 1a e4 fe 00 00 00 00 *(u16 *)(r10 - 284) = r1
       5:       b7 01 00 00 72 20 25 64 r1 = 1680154738
       6:       63 1a e0 fe 00 00 00 00 *(u32 *)(r10 - 288) = r1
       7:       18 01 00 00 72 65 74 20 00 00 00 00 65 72 72 6f r1 = 8030606864216778098 ll
       9:       7b 1a d8 fe 00 00 00 00 *(u64 *)(r10 - 296) = r1
;       ret = bpf_probe_read_user_str(info.filename, FILE_NMAE_LEN, (void *)PT_REGS_PARM2(ctx));
      10:       79 63 68 00 00 00 00 00 r3 = *(u64 *)(r6 + 104)
      11:       bf a1 00 00 00 00 00 00 r1 = r10
      12:       07 01 00 00 00 ff ff ff r1 += -256
      13:       b7 02 00 00 00 01 00 00 r2 = 256
      14:       85 00 00 00 72 00 00 00 call 114
      15:       bf 01 00 00 00 00 00 00 r1 = r0
      16:       67 01 00 00 20 00 00 00 r1 <<= 32
      17:       c7 01 00 00 20 00 00 00 r1 s>>= 32
;       if (ret < 0) {
      18:       65 01 06 00 ff ff ff ff if r1 s> -1 goto +6 <LBB0_2>
      19:       bf a1 00 00 00 00 00 00 r1 = r10
;               bpf_trace_printk(fmt, sizeof(fmt), "bpf_probe_read", ret);
      20:       07 01 00 00 d8 fe ff ff r1 += -296
      21:       b7 02 00 00 0e 00 00 00 r2 = 14
      22:       18 03 00 00 0e 00 00 00 00 00 00 00 00 00 00 00 r3 = 14 ll
      24:       05 00 17 00 00 00 00 00 goto +23 <LBB0_4>

00000000000000c8 <LBB0_2>:
;       u32 tgid = id >> 32;
      25:       77 07 00 00 20 00 00 00 r7 >>= 32
;       info.pid = tgid;
      26:       63 7a e8 fe 00 00 00 00 *(u32 *)(r10 - 280) = r7
;       bpf_get_current_comm(&info.comm, COMM_NAME_LEN);
      27:       bf a1 00 00 00 00 00 00 r1 = r10
      28:       07 01 00 00 f0 fe ff ff r1 += -272
      29:       b7 02 00 00 10 00 00 00 r2 = 16
      30:       85 00 00 00 10 00 00 00 call 16
      31:       bf a4 00 00 00 00 00 00 r4 = r10
;       u32 tgid = id >> 32;
      32:       07 04 00 00 e8 fe ff ff r4 += -280
;       ret = bpf_perf_event_output(ctx, &info_map, 0, &info, sizeof(info));
      33:       bf 61 00 00 00 00 00 00 r1 = r6
      34:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
      36:       b7 03 00 00 00 00 00 00 r3 = 0
      37:       b7 05 00 00 18 01 00 00 r5 = 280
      38:       85 00 00 00 19 00 00 00 call 25
      39:       bf 01 00 00 00 00 00 00 r1 = r0
      40:       67 01 00 00 20 00 00 00 r1 <<= 32
      41:       c7 01 00 00 20 00 00 00 r1 s>>= 32
;       if (ret < 0) {
      42:       65 01 07 00 ff ff ff ff if r1 s> -1 goto +7 <LBB0_5>
      43:       bf a1 00 00 00 00 00 00 r1 = r10
;               bpf_trace_printk(fmt, sizeof(fmt), "bpf_perf_event_output", ret);
      44:       07 01 00 00 d8 fe ff ff r1 += -296
      45:       b7 02 00 00 0e 00 00 00 r2 = 14
      46:       18 03 00 00 1d 00 00 00 00 00 00 00 00 00 00 00 r3 = 29 ll

0000000000000180 <LBB0_4>:
      48:       bf 04 00 00 00 00 00 00 r4 = r0
      49:       85 00 00 00 06 00 00 00 call 6

0000000000000190 <LBB0_5>:
; }
      50:       b7 00 00 00 00 00 00 00 r0 = 0
      51:       95 00 00 00 00 00 00 00 exit

为什么二进制文件中存在无效数据?仔细对比汇编代码与源码会发现,为什么没有找到int retstrcut open_info info声明部分,**看来还是得学一学bpf的汇编,**从报错信息来看,是非法从直接从栈中读取数据?

最后发现只是声明结构体struct open_info,没有进行初始化,bpf字节码不会为该结构体申请空间,根本原因是没有进行结构体的初始化!!!!

修改代码kprobe_do_sys_open

//go:build ignore

// #include "../headers/common.h"
// #include "../headers/arm64_vmlinux.h"
#include "../headers/vmlinux.h"
#include "../headers/bpf_tracing.h"
#include "../headers/bpf_helpers.h"

#define FILE_NMAE_LEN 256
#define COMM_NAME_LEN 16

char __license[] SEC("license") = "Dual MIT/GPL";

struct open_info {
	u32 pid;
	int fd;
	u8 comm[COMM_NAME_LEN];
	u8 filename[FILE_NMAE_LEN];
};

// struct bpf_map_def SEC("maps") info_map = {
// 	.type        = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
// 	.key_size    = sizeof(u32),
// 	.value_size  = sizeof(struct open_info),
// 	.max_entries = 256,
// };
struct {
	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
} info_map SEC(".maps");

// Force emitting struct open_info into the ELF.
const struct open_info *unused __attribute__((unused));

SEC("kprobe/do_sys_open")
int kprobe_do_sys_open(struct pt_regs *ctx) {
	u64 id   = bpf_get_current_pid_tgid();
	u32 tgid = id >> 32;
    // 初始化open_info结构体为{}
	struct open_info info = {};
	int ret = 0;
	char fmt[] = "ret error %d\n";

	ret = bpf_probe_read_user_str(info.filename, FILE_NMAE_LEN, (void *)PT_REGS_PARM2(ctx));
	if (ret < 0) {
		bpf_trace_printk(fmt, sizeof(fmt), "bpf_probe_read", ret);
		return 0;
	}

	info.pid = tgid;
	bpf_get_current_comm(&info.comm, COMM_NAME_LEN);

	ret = bpf_perf_event_output(ctx, &info_map, 0, &info, sizeof(info));
	if (ret < 0) {
		bpf_trace_printk(fmt, sizeof(fmt), "bpf_perf_event_output", ret);
		return 0;
	}
	
	return 0;
}

修改代码main.go

package main

import (
	"bytes"
	"encoding/binary"
	"errors"
	"flag"
	"log"
	"os"
	"os/signal"
	"syscall"

	"github.com/cilium/ebpf/link"
	"github.com/cilium/ebpf/perf"
	"github.com/cilium/ebpf/rlimit"
	"golang.org/x/sys/unix"
)

// $BPF_CLANG and $BPF_CFLAGS are set by the Makefile.
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang -cflags "-D__TARGET_ARCH_x86 -g" -no-strip -type open_info bpf kprobe_do_sys_open.c -- -I../headers

const mapKey uint32 = 0

func main() {

	var filename string
	var pid uint
	var help bool

	flag.StringVar(&filename, "filename", "", "设置需要跟踪的文件名")
	flag.UintVar(&pid, "pid", 0, "设置需要跟踪的pid号")
	flag.BoolVar(&help, "help", false, "显示使用帮助信息")
	flag.Parse()

	if help || (filename == "" && pid == 0) {
		flag.Usage()
		os.Exit(0)
	}

	stopper := make(chan os.Signal, 1)
	signal.Notify(stopper, os.Interrupt, syscall.SIGTERM)

	// Name of the kernel function to trace.
	fn := "do_sys_open"

	// Allow the current process to lock memory for eBPF resources.
	if err := rlimit.RemoveMemlock(); err != nil {
		log.Fatal(err)
	}

	// Load pre-compiled programs and maps into the kernel.
	objs := bpfObjects{}
	if err := loadBpfObjects(&objs, nil); err != nil {
		log.Fatalf("loading objects: %v", err)
	}
	defer objs.Close()

	// Open a Kprobe at the entry point of the kernel function and attach the
	// pre-compiled program. Each time the kernel function enters, the program
	// will increment the execution counter by 1. The read loop below polls this
	// map value once per second.
	kp, err := link.Kprobe(fn, objs.KprobeDoSysOpen, nil)
	if err != nil {
		log.Fatalf("opening kprobe: %s", err)
	}
	defer kp.Close()

	// Open a perf event reader from userspace on the PERF_EVENT_ARRAY map
	// described in the eBPF C program.
	rd, err := perf.NewReader(objs.InfoMap, os.Getpagesize())
	if err != nil {
		log.Fatalf("creating perf event reader: %s", err)
	}
	defer rd.Close()

	go func() {
		// Wait for a signal and close the perf reader,
		// which will interrupt rd.Read() and make the program exit.
		<-stopper
		log.Println("Received signal, exiting program..")

		if err := rd.Close(); err != nil {
			log.Fatalf("closing perf event reader: %s", err)
		}
	}()

	log.Printf("Listening for events..")

	// bpfEvent is generated by bpf2go.
	var info bpfOpenInfo
	for {
		record, err := rd.Read()
		if err != nil {
			if errors.Is(err, perf.ErrClosed) {
				return
			}
			log.Printf("reading from perf event reader: %s", err)
			continue
		}

		if record.LostSamples != 0 {
			log.Printf("perf event ring buffer full, dropped %d samples", record.LostSamples)
			continue
		}

		// Parse the perf event entry into a bpfEvent structure.
		if err := binary.Read(bytes.NewBuffer(record.RawSample), binary.LittleEndian, &info); err != nil {
			log.Printf("parsing perf event: %s", err)
			continue
		}

		exportComm := unix.ByteSliceToString(info.Comm[:])
		exportPid := info.Pid
		exportFilename := unix.ByteSliceToString(info.Filename[:])

		// log.Printf("%s pid %d comm %s filename %s\n", fn, exportPid, exportComm, exportFilename)

		// if exportFilename == filename {
		// 	log.Printf("%s pid %d comm %s filename %s\n", fn, exportPid, exportComm, exportFilename)
		// }

		// if uint(exportPid) == pid {
		// 	log.Printf("%s pid %d comm %s filename %s\n", fn, exportPid, exportComm, exportFilename)
		// }
		if exportComm != "node" {
			log.Printf("%s pid %d comm %s filename %s\n", fn, exportPid, exportComm, exportFilename)
		}
	}
}

运行试试看看,ebpf程序加载成功并成功读取打开文件的全路径。

root@curtis-Aspire-E5-471G:/home/curtis/write_code/cilium-ebpf/do_sys_open_kprobe# ./vfs_write_moitor -pid=10
2023/05/06 22:46:52 Listening for events..
2023/05/06 22:46:53 do_sys_open pid 23702 comm sh filename /etc/ld.so.cache
2023/05/06 22:46:53 do_sys_open pid 23702 comm sh filename /lib/x86_64-linux-gnu/libc.so.6
2023/05/06 22:46:53 do_sys_open pid 23705 comm ps filename /etc/ld.so.cache
2023/05/06 22:46:53 do_sys_open pid 23705 comm ps filename /lib/x86_64-linux-gnu/libprocps.so.8
2023/05/06 22:46:53 do_sys_open pid 23705 comm ps filename /lib/x86_64-linux-gnu/libdl.so.2
2023/05/06 22:46:53 do_sys_open pid 23705 comm ps filename /lib/x86_64-linux-gnu/libc.so.6
2023/05/06 22:46:53 do_sys_open pid 23705 comm ps filename /lib/x86_64-linux-gnu/libsystemd.so.0
2023/05/06 22:46:53 do_sys_open pid 23705 comm ps filename /lib/x86_64-linux-gnu/librt.so.1
2023/05/06 22:46:53 do_sys_open pid 23705 comm ps filename /lib/x86_64-linux-gnu/liblzma.so.5
2023/05/06 22:46:53 do_sys_open pid 23705 comm ps filename /lib/x86_64-linux-gnu/liblz4.so.1
2023/05/06 22:46:53 do_sys_open pid 23705 comm ps filename /lib/x86_64-linux-gnu/libgcrypt.so.20
2023/05/06 22:46:53 do_sys_open pid 23705 comm ps filename /lib/x86_64-linux-gnu/libpthread.so.0
2023/05/06 22:46:53 do_sys_open pid 23705 comm ps filename /lib/x86_64-linux-gnu/libgpg-error.so.0
2023/05/06 22:46:53 do_sys_open pid 23705 comm ps filename /proc/self/auxv
2023/05/06 22:46:53 do_sys_open pid 23705 comm ps filename /proc/sys/kernel/osrelease

五、尝试在ARM架构运行

运行环境:

root@curtis:/home/curtis# uname -a
Linux curtis 5.15.0-70-generic #77-Ubuntu SMP Tue Mar 21 15:58:51 UTC 2023 aarch64 aarch64 aarch64 GNU/Linux

如何在x86架构上编译arm架构bpf程序

  • 修改go generate命令

    //go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang -cflags "-D__TARGET_ARCH_arm64 -g" -no-strip -type open_info bpf kprobe_do_sys_open.c -- -I../headers
    
  • 修改go build命令

    OARCH=arm64 go build
    

编译后加载程序发现没有抓到打开文件的相关信息??

# 没有打印任何的出错信息?
root@curtis:/home/curtis# cat /sys/kernel/debug/tracing/trace_pipe

# kprobe钩子已经被挂上
root@curtis:/home/curtis# cat /sys/kernel/debug/kprobes/list
ffff8000084d8c00  k  do_sys_open+0x0

root@curtis:/home/curtis# cat /proc/kallsyms | grep do_sys_open
ffff8000084d84d0 t do_sys_openat2
ffff8000084d8c00 T do_sys_open

那唯一可能的原因是ARM架构文件打开流程没有过do_sys_open这个内核函数,arm架构和x86架构存在区别??无论怎样,都应该过vfs层吧,写个kpobe驱动打印调用栈看看。。

直接将kprobe的函数换成do_sys_openat2,成功抓取到数据。

root@curtis:/home/curtis# ./vfs_write_moitor -pid=10
2023/05/06 23:59:24 Listening for events..
2023/05/07 00:00:11 do_sys_openat2 pid 3749 comm vfs_write_moito filename /sys/bus/event_source/devices/kprobe/type
2023/05/07 00:00:11 do_sys_openat2 pid 3749 comm vfs_write_moito filename /etc/localtime
2023/05/07 00:00:33 do_sys_openat2 pid 3794 comm basename filename /etc/ld.so.cache
2023/05/07 00:00:33 do_sys_openat2 pid 3794 comm basename filename /lib/aarch64-linux-gnu/libc.so.6
2023/05/07 00:00:33 do_sys_openat2 pid 3794 comm basename filename /usr/lib/locale/locale-archive
2023/05/07 00:00:33 do_sys_openat2 pid 3796 comm dirname filename /etc/ld.so.cache
2023/05/07 00:00:33 do_sys_openat2 pid 3796 comm dirname filename /lib/aarch64-linux-gnu/libc.so.6
2023/05/07 00:00:33 do_sys_openat2 pid 3796 comm dirname filename /usr/lib/locale/locale-archive
2023/05/07 00:00:33 do_sys_openat2 pid 3797 comm basename filename /etc/ld.so.cache
2023/05/07 00:00:33 do_sys_openat2 pid 3797 comm basename filename /lib/aarch64-linux-gnu/libc.so.6
2023/05/07 00:00:33 do_sys_openat2 pid 3797 comm basename filename /usr/lib/locale/locale-archive
2023/05/07 00:00:33 do_sys_openat2 pid 3798 comm rm filename /etc/ld.so.cache
2023/05/07 00:00:35 Received signal, exiting program..

但是,为什么我自己vim kk.c没有抓到??抓一下bpf程序日志cat /sys/kernel/debug/tracing/trace_pipe发现有报错。

             vim-3841    [003] d...1  2864.120835: bpf_trace_printk: ret error -657933523
             vim-3841    [003] d...1  2864.122074: bpf_trace_printk: ret error -657933523
             vim-3841    [003] d...1  2864.124141: bpf_trace_printk: ret error -657933523
             vim-3841    [003] d...1  2864.129378: bpf_trace_printk: ret error -657933523

bpf_perf_event_output()eBPF提供的用于输出性能事件的函数之一,通常用于在eBPF程序中向用户空间发送数据或统计信息。该函数的参数解析如下:

  1. ctx参数:指向struct pt_regs类型的指针,表示函数在执行时所在的上下文环境。该参数是必需的,并且是eBPF程序中所有BPF helper函数都需要的参数之一。
  2. map_fd参数:表示BPF映射对象的文件描述符。该映射对象用于将数据从eBPF程序发送到用户空间。参数类型为int,并且需要在调用函数之前打开该映射对象。如果返回值小于0,则表示调用函数失败。
  3. flag参数:表示输出标志。该参数用于指定输出行为的选项。flag包括以下几种类型:
    • BPF_F_CURRENT_CPU: 标识输出数据时用当前的处理器编号,即发送方式为单播。如果设置了这个标志,那么map_fd参数就需要指向一个类型为BPF_MAP_TYPE_PERF_EVENT_ARRAY的BPF映射对象。
    • BPF_F_MULTI_CPU:标识输出数据时使用所有CPU,即广播发送方式。如果设置了这个标志,那么map_fd参数就需要指向一个类型为BPF_MAP_TYPE_PERF_EVENT_ARRAY的BPF映射对象。
  4. data参数:表示指向要传递给用户空间的数据的指针。该参数类型为void *,需要将数据作为结构体复制传递给函数,而且结构体大小应该与map映射对象中的值大小相同。
  5. size参数:表示要输出数据的大小,以字节为单位。如果输出大小大于映射对象中值的大小,那么输出将被截断。

函数返回的值是输出的字节数,如果返回值小于0,则表示调用函数失败。

enum {
	BPF_F_INDEX_MASK = 4294967295,
	BPF_F_CURRENT_CPU = 4294967295,
	BPF_F_CTXLEN_MASK = 0,
};

改下函数参数,成功抓到。

ret = bpf_perf_event_output(ctx, &info_map, BPF_F_CURRENT_CPU, &info, sizeof(info));
if (ret < 0) {
    bpf_trace_printk(fmt, sizeof(fmt), "bpf_perf_event_output", ret);
    return 0;
}
root@curtis:/home/curtis# ./vfs_write_moitor -pid=10
2023/05/07 00:10:15 Listening for events..
2023/05/07 00:10:15 do_sys_openat2 pid 3906 comm vfs_write_moito filename /etc/localtime
2023/05/07 00:10:24 do_sys_openat2 pid 730 comm irqbalance filename /proc/interrupts
2023/05/07 00:10:24 do_sys_openat2 pid 730 comm irqbalance filename /proc/stat
2023/05/07 00:10:24 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/76/smp_affinity
2023/05/07 00:10:24 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/80/smp_affinity
2023/05/07 00:10:24 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/77/smp_affinity
2023/05/07 00:10:24 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/56/smp_affinity
2023/05/07 00:10:24 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/57/smp_affinity
2023/05/07 00:10:24 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/10/smp_affinity
2023/05/07 00:10:24 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/12/smp_affinity
2023/05/07 00:10:24 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/49/smp_affinity
2023/05/07 00:10:24 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/50/smp_affinity
2023/05/07 00:10:31 do_sys_openat2 pid 1 comm systemd filename /proc/469/cgroup
2023/05/07 00:10:31 do_sys_openat2 pid 1 comm systemd filename /proc/710/cgroup
2023/05/07 00:10:34 do_sys_openat2 pid 730 comm irqbalance filename /proc/interrupts
2023/05/07 00:10:34 do_sys_openat2 pid 730 comm irqbalance filename /proc/stat
2023/05/07 00:10:34 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/76/smp_affinity
2023/05/07 00:10:34 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/80/smp_affinity
2023/05/07 00:10:34 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/77/smp_affinity
2023/05/07 00:10:34 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/56/smp_affinity
2023/05/07 00:10:34 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/57/smp_affinity
2023/05/07 00:10:34 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/10/smp_affinity
2023/05/07 00:10:34 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/12/smp_affinity
2023/05/07 00:10:34 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/49/smp_affinity
2023/05/07 00:10:34 do_sys_openat2 pid 730 comm irqbalance filename /proc/irq/50/smp_affinity
2023/05/07 00:10:40 do_sys_openat2 pid 3919 comm cat filename /etc/ld.so.cache
2023/05/07 00:10:40 do_sys_openat2 pid 3919 comm cat filename /lib/aarch64-linux-gnu/libc.so.6
2023/05/07 00:10:40 do_sys_openat2 pid 3919 comm cat filename /usr/lib/locale/locale-archive
2023/05/07 00:10:40 do_sys_openat2 pid 3919 comm cat filename kk.c <-----

你可能感兴趣的:(linux,ebpf,linux,chrome,运维)