限制:
1) 被监控线程需要绑定在某个cpu
2) 调度历史最多1024个
3) 被监控线程的调度延时,粒度是以tick为单位
4) 此代码未经严格测试
用法:
1) 修改sched.c中TASK_MONITOR_NAME和TASK_MONITOR_LEN为响应进程名字和长度
2) echo 20 > /proc/sys/kernel/default_target_timeout (指定监控延时>=20个ticks)
3) echo 0 > /proc/sys/kernel/cpu_check (指定监控cpu0上的切换历史)
4) echo 1 > /proc/sys/kernel/monitor_thread (开始监控)
修改记录:
kernel/sched.c:
#if 1
typedef struct task_preempt_pid{
pid_t pid;
u64 exc_time_per;
}task_preempt_pid;
u64 sched_target_out_jiffies;
#define TASK_NUM 1024
static int index_task = 0;
task_preempt_pid t_p_p[TASK_NUM]; //进程切换历史数组 1024个元素
int notify_dump = 0;
int index_start=0;
int index_end=0;
#define TASK_MONITOR_NAME "TEST"
#define TASK_MONITOR_LEN 4
int sysctl_monitor_thread = 0;
int recording_task=0;
int sysctl_cpu_check = 0;
void dump_all_preempt_task() //时钟检测到超时后,打印该cpu上的进程切换回溯
{
int idx_printer = index_end;
while(idx_printer!=index_start){
printk("[pid:%d,",t_p_p[idx_printer].pid);
printk("time:0x%llx]\n",t_p_p[idx_printer].exc_time_per);
idx_printer = (idx_printer+TASK_NUM-1)%TASK_NUM;
}
printk("[pid:%d,",t_p_p[idx_printer].pid);
printk("time:0x%llx]\n",t_p_p[idx_printer].exc_time_per);
index_start = index_end;
recording_task = 0;
}
#endif
asmlinkage void __sched schedule(void)
{
struct task_struct *prev, *next;
long *switch_count;
struct rq *rq;
int cpu;
spin_lock_irq(&rq->lock);
clear_tsk_need_resched(prev);
__update_rq_clock(rq);
#if 1
u64 prev_exc_time = rq->clock - prev->se.exec_start;
#endif
...
#if 1
if((sysctl_monitor_thread==1)&&(cpu==sysctl_cpu_check)){
if(recording_task){
if(!strncmp(next->comm,TASK_MONITOR_NAME,TASK_MONITOR_LEN)){
index_start = index_end;
recording_task=0;
}else{/*recording*/
t_p_p[index_end].exc_time_per = prev_exc_time;
t_p_p[index_end].pid = prev->pid;
index_end=(index_end+1)%TASK_NUM;
}
}else if((!strncmp(prev->comm,TASK_MONITOR_NAME,TASK_MONITOR_LEN))){
/*preempt,begin to record*/
sched_target_out_jiffies = jiffies_64;
t_p_p[index_end].exc_time_per = prev_exc_time;
t_p_p[index_end].pid = prev->pid;
index_end=(index_end+1)%TASK_NUM;
recording_task=1;
}
}
#endif
context_switch(rq, prev, next); /* unlocks the rq */
kernel/timer.c:
#if 1
int sysctl_default_target_timeout = 20;
extern int sysctl_monitor_thread;
extern u64 sched_target_out_jiffies;
extern int recording_task;
#endif
void update_process_times(int user_tick)
{
struct task_struct *p = current;
int cpu = smp_processor_id();
#if 1
if((cpu==0)&&(sysctl_monitor_thread == 1)){
if(recording_task&&sched_target_out_jiffies){ //检测到线程超时
if(time_after64(jiffies_64,
sched_target_out_jiffies
+sysctl_default_target_timeout)){
printk("\n\n<Back trace>:\n\n");
dump_all_preempt_task();
printk("\n\n");
sched_target_out_jiffies = 0;
sysctl_monitor_thread = 0;
}
}
}
#endif
kernel/sysctl.c:
#if 1
extern int sysctl_monitor_thread;
extern int sysctl_default_target_timeout;
extern int sysctl_cpu_check;
#endif
static ctl_table kern_table[] = {
#if 1
{
.ctl_name = CTL_UNNUMBERED,
.procname = "monitor_thread",
.data = &sysctl_monitor_thread,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "default_target_timeout",
.data = &sysctl_default_target_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "cpu_check",
.data = &sysctl_cpu_check,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#endif
监控所有cpu上的进程切换记录,通过用户态系统调用来通知开始和结束。
例如powerpc
内核修改处:
1) kernel/sched.c 添加if 1控制的代码
#if 1
typedef struct task_preempt_pid{
pid_t pid;
u64 exc_time_per;
}task_preempt_pid;
#define CPU_NUM 2
#define TASK_ARRAY_SIZE 10000
task_preempt_pid **result_array;
int index_cpu[CPU_NUM];
int record_schedule = -1;
static struct work_struct write_to_file_task;
#define FILE_RECORD_NAME "/sched_record.log"
void write_to_file_handler(void* data)
{
int i = 0;
int j = 0;
char temp_buf[1024];
int len;
int fd = sys_open(FILE_RECORD_NAME, O_RDWR|O_CREAT|O_APPEND,S_IRWXU|S_IRWXO|S_IRWXG);
if(fd<0){
printk("open file %s errno %d\n",FILE_RECORD_NAME,fd);
return;
}
for(;i<CPU_NUM;++i){
len = sprintf(temp_buf,"\ncpu:%d\n",i);
sys_write(fd,temp_buf,len);
len = sprintf(temp_buf,"there are %d process\n",index_cpu[i]);
sys_write(fd,temp_buf,len);
for(;j<index_cpu[i];++j){
len = sprintf(temp_buf,"([%d]pid:%d,time:0x%llx)\n",j,result_array[i][j].pid,result_array[i][j].exc_time_per);
sys_write(fd,temp_buf,len);
}
index_cpu[i] = 0;
}
if(fd>=0){
sys_close(fd);
}
}
void init_result_array(void)
{
int i=0;
if(result_array==NULL){
result_array = (task_preempt_pid**)vmalloc(CPU_NUM*sizeof(task_preempt_pid*));
for(;i<CPU_NUM;++i)
result_array[i]= (task_preempt_pid*)vmalloc(TASK_ARRAY_SIZE*sizeof(struct task_preempt_pid));
}
INIT_WORK(&write_to_file_task,
(void (*)(void *))&write_to_file_handler);
}
void dump_all_schedule_histoty(void)
{
if (in_interrupt())
schedule_work(&write_to_file_task);
else
write_to_file_handler(0);
}
#endif
在asmlinkage void __sched schedule(void)函数里添加
clear_tsk_need_resched(prev);
__update_rq_clock(rq);
#if 1
u64 prev_exc_time = rq->clock - prev->se.exec_start;
#endif
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
#if 1
if(record_schedule==0){
result_array[cpu][(index_cpu[cpu])].exc_time_per = prev_exc_time;
result_array[cpu][(index_cpu[cpu])].pid = prev->pid;
index_cpu[cpu] = (index_cpu[cpu]+1)%TASK_ARRAY_SIZE;
}
#endif
context_switch(rq, prev, next); /* unlocks the rq */
2) kernel/timer.c添加
#if 1
extern int record_schedule;
asmlinkage
long sys_notify_sched_syscall(void* args)
{
record_schedule = (int)args;
if(record_schedule==0)
printk("begin record\n");
else if(record_schedule==1)
printk("end record\n");
return 0;
}
#endif
void update_process_times(int user_tick)
{
struct task_struct *p = current;
int cpu = smp_processor_id();
#if 1
if((cpu==0)&&(record_schedule==1)){
record_schedule = -1;
dump_all_schedule_histoty();
}
#endif
3) linux/include/asm-powerpc/unistd.h中添加系统调用号,同时修改最大系统调用号 (根据实际情况修改)
#define __NR_write_watchregs 310
#define __NR_unify_syscall 311
#if 1
#define __NR_notify_sched_syscall 312
#endif
//#define __NR_syscalls 312
#define __NR_syscalls 313
4)修改系统调用表
linux/include/asm-powerpc/systbl.h
SYSCALL(unify_syscall)
#if 1
SYSCALL(notify_sched_syscall)
#endif
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <unistd.h>
#include <signal.h>
#include <sys/syscall.h>
#include <sys/prctl.h>
#define __USE_GNU
#include <sched.h>
#include <pthread.h>
void set_attribute(int prio,char* name)
{
struct sched_param tParam;
tParam.sched_priority = prio;
sched_setscheduler((long)syscall(SYS_gettid),SCHED_FIFO,&tParam);
prctl(PR_SET_NAME,name) ;
}
#define MAX_LOOP 1000000
void mdelay(int delay)
{
int i = 0;
for(;i<((MAX_LOOP)*delay);++i)
;
}
void* ProducerThread(void* data)
{
cpu_set_t cpuset;
pthread_attr_t attr;
set_attribute(65,"SCHE1_1");
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
sched_setaffinity((long)syscall(SYS_gettid),sizeof(cpu_set_t),&cpuset);
while(1)
{
mdelay(2);
sleep(1);
}
return NULL;
}
void* ConsumerThread(void* data)
{
cpu_set_t cpuset;
pthread_attr_t attr;
set_attribute(70,"SCHE1_2");
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
sched_setaffinity((long)syscall(SYS_gettid),sizeof(cpu_set_t),&cpuset);
while(1)
{
mdelay(2);
sleep(1);
}
return NULL;
}
int main(int argc, char* argv[])
{
int result_1,result_2;
pthread_t producer,consumer;
pthread_create(&producer, NULL, &ProducerThread, NULL);
pthread_create(&consumer, NULL, &ConsumerThread, NULL);
pthread_join(producer, (void *)&result_1);
pthread_join(consumer, (void *)&result_2);
while(1)
{
;
}
exit(EXIT_SUCCESS);
}