【openbmc1】rsyslog,logrotate,post_code,journalctl,log

文章目录

  • 1.rsyslog:rsyslogd是一个进程 ,syslog函数将log写给rsyslogd进程,rsyslogd -v
    • 1.1 本地:如下rsyslog.conf中log_rotation变量在local1这行定义
    • 1.2 远程:如下两个星号中间是点号
  • 2.logrotate:logrotate /etc/logrotate.rsyslog(bb中重命名)
  • 3.post_code:ipmid.c(ipmi_handle_sensor函数)调用sel.c和sdr.c(sensor.c提供接口)解析返回response
  • 4.journalctl:linux内存中日志是二进制无法查看,用journalctl查看
  • 5.导出log:ls -tr 按修改时间排序


1.rsyslog:rsyslogd是一个进程 ,syslog函数将log写给rsyslogd进程,rsyslogd -v

【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第1张图片
【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第2张图片
【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第3张图片
ident将是一个标记,省略的话即打印出进程的名字如下。
在这里插入图片描述

grep -v "^#" /etc/rsyslog.conf | grep -v "^$"   #排除文件中以#开头和去除空行, -v:输出不匹配的行

【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第4张图片
如下shell中logger默认日志级别notice,-t指定tag标记。
【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第5张图片

// logger命令打印显示: 年...
$template LogUtilFileFormat,"%$year% %timegenerated%.%timegenerated:::date-subseconds% %HOSTNAME% %syslogseverity-text:::uppercase% bmc#%syslogtag%%msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n"

1.1 本地:如下rsyslog.conf中log_rotation变量在local1这行定义

如下logfile.log是新文件,console.log不是新文件。omfile输出文件,与imfile相反。
【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第6张图片

1.2 远程:如下两个星号中间是点号

rsyslog守护进程来自于当前的linux发布版本的预装模块,但是默认并没有启动。为了能够让rsyslog守护进程能够接受外部的消息,需要编辑/etc/rsyslog.conf使rsyslog守护进程能够在UDP和TCP 端口接收日志消息。
【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第7张图片
【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第8张图片

# 如下在37客户端,/etc/rsyslog.conf中,将info级别及其以上级别(可改成crit等其他级别)的日志发送给target服务端
*.info action(type="omfwd" name="remoteLog" template="LogUtilFileFormat" target="10.75.159.146" port="514" protocol="udp"
           queue.type="LinkedList" action.resumeRetryCount="1" action.resumeInterval="60"
           queue.size="500" queue.workerthreadminimummessages="101" queue.discardseverity="0"
           queue.timeoutenqueue="0" queue.timeoutshutdown="10" queue.timeoutactioncompletion="100"
           action.reportSuspension="off" action.reportSuspensionContinuation="off")

$IncludeConfig /etc/rsyslog.d/*.conf
# 如下在146服务端,/etc/rsyslog.conf中打开如下配置,tcp打开tcp的配置
module(load="imudp")
input(type="imudp" port="514")
$IncludeConfig /etc/rsyslog.d/*.conf
# /etc/rsyslog.d/remote.conf 配置如下(修改完需重启rsyslog进程),sonic将如下直接写在rsyslog.conf:
:fromhost,isequal, "10.75.159.37"  /var/log/remote_37.log

2.logrotate:logrotate /etc/logrotate.rsyslog(bb中重命名)

【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第9张图片
在这里插入图片描述

# rsyslog.logrotate
     postrotate  #脚本只执行一次 开始
         /bin/kill -HUP `cat /var/run/rsyslogd.pid 2> /dev/null` 2> /dev/null || true
        # HUP/USR1/USR2(挂起信号,重新加载配置文件)
     endscript   #结束

在这里插入图片描述
logrotate是有两种方式做定时任务的,如果使用Crontab模式,则把/etc/cron.daily/logrotate这个文件里检测/run/systemd/system目录的这3行注释掉和停掉logrotate.timer(systemctl stop logrotate.timer)。如下20 * 1024 * 1024 = 20971520,logger或syslog函数写的才受rsyslog进程管控,
【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第10张图片
【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第11张图片
在这里插入图片描述
【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第12张图片
如下不换行输出。
在这里插入图片描述
如下处理特殊字符。
在这里插入图片描述
【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第13张图片

3.post_code:ipmid.c(ipmi_handle_sensor函数)调用sel.c和sdr.c(sensor.c提供接口)解析返回response

cpu下 ipmitool raw 0x0a 0x44 0x1F 0x00 0x02 0xE0 0x63 0x89 0x64 0x21 0x00 0x04 0x13(sensor type) 0x0a(sensor num) 0x6F 0xaa 0x11 0x11 回车显示00 00 ,作为ipmid进程request。

1.support dimm sensor (sensor type:0x0c)
2.support post_code sensor (sensor type:0x0f)
3.support PCIE sensor (sensor type:0x13) ###########
4.support process sensor (sensor type:0x07)
5.support system boot sensor (sensor type:0x0f)
6.support system event sensor (sensor type:0x0f)
NOTE: The sensor number needs to be adjusted according to the BIOS configuration in the future
static void parse_sel(...) {   // sel.c中总接口
    /* Sensor num (Byte 11) */  // 12
    sensor_num = (uint8_t) sel[11]; // 如下表
    pal_get_event_sensor_name(fru, sel, sensor_name);  // obmc-pal.c中提供接口, 得到sensor_name只为了下面的syslog打印, 处理event data用snr_num匹配

    /* Event Data (Byte 13:15) */   // 14-16
    ret = pal_parse_sel(fru, sel, error_log);  // obmc-pal.c中提供这接口调用pal_parse_sel_helper
    sprintf(event_data, "%02X%02X%02X", sel[13], sel[14], sel[15]);

    syslog(LOG_CRIT, "SEL Entry: FRU: %d, Record: %s (0x%02X), Time: %s, "
        "Sensor: %s (0x%02X), Event Data: (%s) %s", fru, error_type, record_type, time,
        sensor_name, sensor_num,
        event_data, error_log);
}
// obmc-pal.c中
int __attribute__((weak))  // 一般用于库,同名覆盖
pal_get_event_sensor_name(uint8_t fru, uint8_t *sel, char *name) { // 根据snr_type或snr_num得出snr_name
  uint8_t snr_type = sel[10]; //从0开始对应byte11,type和num都是自定义
  uint8_t snr_num = sel[11];
  switch (snr_type) {
    case OS_BOOT:
      sprintf(name, "OS");  // OS_BOOT used by OS , sensor name is OS
      return 0; }
  return pal_get_x86_event_sensor_name(fru, snr_num, name);
}

int __attribute__((weak))
pal_get_x86_event_sensor_name(uint8_t fru, uint8_t snr_num, char *name)
{
  if (pal_is_fru_x86(fru))
  {
    switch (snr_num) {  // sensor number
      case CPU_ERROR:
        sprintf(name, "CPU_ERROR");
        break;
      case CPU_MISSMATCH:
        sprintf(name, "CPU_MISSMATCH");
        break;
      case SYSTEM_BOOT:
        sprintf(name, "SYSTEM_BOOT");
        break;
      case CRITICAL_INTERRUPT00:
        sprintf(name, "PCIE_CRIT00");  // sensor name
        break;

// obmc-pal.h中
enum { // 自定义的sensor number
  CRITICAL_INTERRUPT00 = 0x00,
  CRITICAL_INTERRUPT01 = 0x01,

如下ipmi-second-gen-interface-spec-v2-rev手册615页。
在这里插入图片描述
点击跳转31.6如下。
【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第14张图片
【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第15张图片
【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第16张图片
如下Sensor-specific offset一列对应event data 1低四位,Event一列对应event data 2。
【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第17张图片

// obmc-pal.c
struct system_fw_progress {
  uint8_t EventData1;   // 8bit,1字节
  char DecodeString[128];
};
struct system_fw_progress system_fw_error[] = {
  {0x00, "Unspecified"},   // 一个{}就是一个system_fw_progress结构体,129字节
  {0x01, "No system memory is physically installed in the system"},
  {0x02, "No usable system memory, all installed memory has experienced an unrecoverable failure"}, // 88字节
  ...
  {0x0D, "CPU speed matching failure"},
};
struct system_fw_progress system_fw_hang_or_progress[] = {
  {0x00, "Unspecified"},
  {0x01, "Memory initialization"},
  ...
  {0x19, "Primary processor initialization"},
};

 // 如下在pal_parse_sel_helper函数中
  uint8_t *ed = &event_data[3];
  char temp_log[512] = {0};
  char add_sol_log_com[512] = "logger -p local3.info ";
  
  case POST_ERROR: // switch (snr_num)
    if (((ed[0] >> 6) & 0x03) == 0x3) {  // table29 eventdata1第6和7位为11
      switch (ed[0] & 0xF) { // table29取eventdata1低四位 去匹配即=Sensor-specific offset这一列
        case 0x00:
          strcat(error_log, "System Firmware Error (POST Error), IPMI Post Code"); // 50字节
          if (ed[1] <  (sizeof(system_fw_error) / sizeof(system_fw_error[0]))) { // 13*129(总字节数)/129=0x0d, 因为从0开始,不用<=
            sprintf(temp_log, ", %s", system_fw_error[ed[1]].DecodeString); // 88字节,ed[1]即eventdata2
          } else {
            sprintf(temp_log, ", reserved");
          }
          break;
        case 0x01:
          strcat(error_log, "System Firmware Hang, IPMI Post Code");
        case 0x02:
          if (strcmp(error_log, "") == 0) {
            strcat(error_log, "System Firmware Progress, IPMI Post Code");
          }
          if (ed[1] <  (sizeof(system_fw_hang_or_progress) / sizeof(system_fw_hang_or_progress[0]))) {
            sprintf(temp_log, ", %s", system_fw_hang_or_progress[ed[1]].DecodeString);
          } else {
            sprintf(temp_log, ", reserved");
          }
          break;
        default:
          sprintf(temp_log, "Unknown");
          break;
      }
      strcat(error_log, temp_log); //138

      // send post code sel to sol log
      sprintf(temp_log, " '%s' ", error_log);
      strcat(add_sol_log_com, temp_log);
      system(add_sol_log_com);
      break;

4.journalctl:linux内存中日志是二进制无法查看,用journalctl查看

【openbmc1】rsyslog,logrotate,post_code,journalctl,log_第18张图片

5.导出log:ls -tr 按修改时间排序

# auto_dump.sh
#!/bin/bash
work_path=$(pwd)
conf_file_path="${work_path}/auto_dump.cfg"
dst_dir_path=""
search_path="/var/log"

if [ $# -ne 1 ];then
    program=$(basename "$0")
    echo "Usage: $program "
    echo "Examples:$program /root/os_log/"
    exit
else
    dst_dir_path=$1
fi

cat $conf_file_path | while read conf_path_line
do
    last_file_path=$(basename ${conf_path_line})  #last_file_path=${conf_path_line##*/}
    tmp=${conf_path_line%/*}
    last_dir_path=${tmp##*/}  # network 
    find $search_path -name $last_file_path | while read search_path_line
    do
        mkdir -p $dst_dir_path
        dst_file_path=$dst_dir_path/tmp/$last_dir_path/
        mkdir -p $dst_file_path
        cp $search_path_line $dst_file_path
    done
    cd $dst_dir_path/tmp/
    if [ ! -f $conf_path_line ];then
        echo $conf_path_line" not found"
    fi
done
# auto_dump.cfg
network/netstate.log
network/services.log
hardware/dmidecode.log
hardware/cmdline.log
message/alert.log
message/audit.log
#!/bin/bash 
dst_dir_path="/home/os_log"
dst_dir_path_filenum=10

mkdir_tmp(){
    mkdir -p $dst_dir_path/tmp/network
    mkdir -p $dst_dir_path/tmp/hardware
    mkdir -p $dst_dir_path/tmp/message/var/log
}

write_network(){
    ifconfig > $dst_dir_path/tmp/network/ip.log
    netstat > $dst_dir_path/tmp/network/netstate.log
    route > $dst_dir_path/tmp/network/route.log
    arp > $dst_dir_path/tmp/network/arp.log
    systemctl -all > $dst_dir_path/tmp/network/services.log
}

write_hardware(){
    dmidecode > $dst_dir_path/tmp/hardware/dmidecode.log
    cat /proc/cmdline > $dst_dir_path/tmp/hardware/cmdline.log
    cat /proc/cpuinfo > $dst_dir_path/tmp/hardware/cpuinfo.log
    cat /proc/meminfo > $dst_dir_path/tmp/hardware/meminfo.log
    lsblk > $dst_dir_path/tmp/hardware/lsblk.log
    mount > $dst_dir_path/tmp/hardware/mount.log
    cat /proc/interrupts > $dst_dir_path/tmp/hardware/interrupts.log
    lspci > $dst_dir_path/tmp/hardware/lspci.log

    if [ -f $dst_dir_path/tmp/hardware/smartctl.log ];then
        rm $dst_dir_path/tmp/hardware/smartctl.log
    fi

    fdisk -l | grep "/dev/sd" | awk '{print $2}' | awk -F ':' '{print $1}' > tmp 
    cat tmp | while read line
    do
        smartctl -a $line >> $dst_dir_path/tmp/hardware/smartctl.log
    done
    rm tmp

    fdisk -l | grep "/dev/nvme" | head -1 | awk '{print $2}' | awk -F ':' '{print $1}' > tmp 
    cat tmp | while read line
    do
        smartctl -a $line >> $dst_dir_path/tmp/hardware/smartctl.log
    done
    rm tmp

    cat /proc/modules > $dst_dir_path/tmp/hardware/modules.log
    cat /proc/version > $dst_dir_path/tmp/hardware/version.log
}

tar_log(){
    cp -rf /var/log/* $dst_dir_path/tmp/message/var/log
    cd $dst_dir_path/tmp
    rm log.tar.gz
    tar -czf log.tar.gz *
    mv log.tar.gz $dst_dir_path/log.tar.gz
    rm -rf $dst_dir_path/tmp
}

mov_log(){
    if [ -f $dst_dir_path/log.9.tar.gz ];then
        rm $dst_dir_path/log.9.tar.gz
    fi

    if [ -f $dst_dir_path/log.tar.gz ];then
        for((i=$(($dst_dir_path_filenum-2));i>=1;i--))
        do
            if [ -f $dst_dir_path/log.$i.tar.gz ];then
                mv $dst_dir_path/log.$i.tar.gz $dst_dir_path/log.$(($i+1)).tar.gz
            fi
        done
        mv $dst_dir_path/log.tar.gz $dst_dir_path/log.1.tar.gz
    fi
}

mkdir_tmp
write_network
write_hardware
mov_log
tar_log

你可能感兴趣的:(openbmc,linux)