linux kernel 报错:FATAL: kernel too old

kernel 报错:
FATAL: kernel too old
Kernel panic - not syncing: Attempted to kill init! exitcode=0x00007f00

原因:

根本原因是由于 kernel 和 其它软件 版本不匹配造成的。
报错如上
这里的 “其它软件” 就是 glibc ,因为这行错误是 glibc 报的。

报错代码位置:

第一句报错是glibc的报错:
glibc\sysdeps\unix\sysv\linux\dl-osinfo.h

#define DL_SYSDEP_OSCHECK(FATAL)					      \
  do {									      \
    /* Test whether the kernel is new enough.  This test is only performed    \
       if the library is not compiled to run on all kernels.  */	      \
									      \
    int version = _dl_discover_osversion ();				      \
    if (__glibc_likely (version >= 0))					      \
      {									      \
	if (__builtin_expect (GLRO(dl_osversion) == 0, 1)		      \
	    || GLRO(dl_osversion) > version)				      \
	  GLRO(dl_osversion) = version;					      \
									      \
	/* Now we can test with the required version.  */		      \
	if (__LINUX_KERNEL_VERSION > 0 && version < __LINUX_KERNEL_VERSION)   \
	  /* Not sufficent.  */						      \
	  FATAL ("FATAL: kernel too old\n");				      \
      }									      \
    else if (__LINUX_KERNEL_VERSION > 0)				      \
      FATAL ("FATAL: cannot determine kernel version\n");		      \
  } while (0)

也就是说,报错的是 /lib/libc-2.xx.so 库。

第二句是kernel自己报的错:

void panic(const char *fmt, ...)
{
	static char buf[1024];
	va_list args;
	long i, i_next = 0;
	int state = 0;
	int old_cpu, this_cpu;
	bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers;
...
...

	console_verbose();
	bust_spinlocks(1);
	va_start(args, fmt);
	vsnprintf(buf, sizeof(buf), fmt, args);
	va_end(args);
	pr_emerg("Kernel panic - not syncing: %s\n", buf);

分析:

kernel版本过低导致:一般都是因为 glibc 升级了,而 kernel 还保留旧版本。
kernel 的版本号过高导致:也就是版本号超过了一个字节 256 导致计算错误。比如 4.5.256 就会被 glibc 报错。 这是glibc的代码bug,不是版本不匹配的问题。
问题代码在:
glibc\sysdeps\unix\sysv\linux\dl-sysdep.c:

int
attribute_hidden
_dl_discover_osversion (void)
{
...
  char bufmem[64];
  char *buf = bufmem;
  unsigned int version;
  int parts;
  char *cp;
  struct utsname uts;

  /* Try the uname system call.  */
  if (__uname (&uts))
    {
      /* This was not successful.  Now try reading the /proc filesystem.  */
      int fd = __open64_nocancel ("**/proc/sys/kernel/osrelease**", O_RDONLY);
      if (fd < 0)
	return -1;
      ssize_t reslen = __read_nocancel (fd, bufmem, sizeof (bufmem));
      __close_nocancel (fd);
      if (reslen <= 0)
	/* This also didn't work.  We give up since we cannot
	   make sure the library can actually work.  */
	return -1;
      buf[MIN (reslen, (ssize_t) sizeof (bufmem) - 1)] = '\0';
    }
  else
    buf = uts.release;

  /* Now convert it into a number.  The string consists of at most
     three parts.  */
  version = 0;
  parts = 0;
  cp = buf;
  while ((*cp >= '0') && (*cp <= '9'))
    {
      unsigned int here = *cp++ - '0';

      while ((*cp >= '0') && (*cp <= '9'))
	{
	  here *= 10;
	  here += *cp++ - '0';
	}

      ++parts;
      version <<= 8;
      version |= here; // 错误代码位置,举例:这里或上 256 的时候,加入中间版本号是奇数,例如 5 ,就变成了加 0 ,导致 4.5.256 被计算成了 4.5.0

      if (*cp++ != '.' || parts == 3)
	/* Another part following?  */
	break;
    }

  if (parts < 3)
    version <<= 8 * (3 - parts);

  return version;
}

可以拷贝上面代码自己写一个粗略脚本:

#include 
#include 
#include 
#include 
#include 


int
_dl_discover_osversion (void)
{
  char bufmem[64];
  char *buf = bufmem;
  unsigned int version;
  int parts;
  char *cp;
  struct utsname uts;

  /* Try the uname system call.  */

      /* This was not successful.  Now try reading the /proc filesystem.  */
      int fd = open ("./osrelease", O_RDONLY);
      if (fd < 0)
	return -1;
      ssize_t reslen = read (fd, bufmem, sizeof (bufmem));
      close (fd);
      if (reslen <= 0)
	/* This also didn't work.  We give up since we cannot
	   make sure the library can actually work.  */
	return -1;
      buf[MIN (reslen, (ssize_t) sizeof (bufmem) - 1)] = '\0';


  /* Now convert it into a number.  The string consists of at most
     three parts.  */
  version = 0;
  parts = 0;
  cp = buf;
  while ((*cp >= '0') && (*cp <= '9'))
    {
      unsigned int here = *cp++ - '0';

      while ((*cp >= '0') && (*cp <= '9'))
	{
	  here *= 10;
	  here += *cp++ - '0';
	}

      ++parts;
      version <<= 8;
      version |= here;

      if (*cp++ != '.' || parts == 3)
	/* Another part following?  */
	break;
    }

  if (parts < 3)
    version <<= 8 * (3 - parts);

  return version;
}

int main(){

    printf("version=%d\n\n",_dl_discover_osversion());
}

结果:
linux kernel 报错:FATAL: kernel too old_第1张图片

version |= here;

改为:

version += here;

计算就正常,但我还没测试是否会引起其它问题,暂时不这样做。

怎么解决:

  1. 如果是因为 kernel 版本太低导致不匹配,无法匹配高版本的glibc库,则提高 kernel 版本号或者降低 glibc库 的版本号

    解决办法:编译高版本的 kernel,或者替换到更低版本的 libc-2.xx.so 即可。

  2. 如果是因为 kernel 小版本号太高导致溢出,可以降低 kernel 版本号。

    解决办法:kernel 版本号太高,glibc又不方便改动,有一个治标不治本的办法:恢复 kernel 的小版本号到旧版

如何修改呢?
只需要将 linux 目录下Makefile文件的 小版本号 改掉重新编译即可,比如将 4.5.270 改为 4.5.255 即可:
Makefile
因为小版本号一般不影响功能,所以修改了也没关系。当然这是治标不治本的办法。

正规的途径是在编译 glibc 的时候,参数中添加上 --enable-kernel=4.5.0
这样就不会运行到这段代码了。

你可能感兴趣的:(linux,linux)