Linux命令·split

split命令用于将一个文件按照不同维度分割成多个

Usage: split [OPTION]... [INPUT [PREFIX]]
Output fixed-size pieces of INPUT to PREFIXaa, PREFIXab, ...; default
size is 1000 lines, and default PREFIX is `x'.  With no INPUT, or when INPUT
is -, read standard input.

Mandatory arguments to long options are mandatory for short options too.
  -a, --suffix-length=N   use suffixes of length N (default 2)
  -b, --bytes=SIZE        put SIZE bytes per output file
  -C, --line-bytes=SIZE   put at most SIZE bytes of lines per output file
  -d, --numeric-suffixes  use numeric suffixes instead of alphabetic
  -l, --lines=NUMBER      put NUMBER lines per output file
      --verbose           print a diagnostic just before each
                            output file is opened
      --help     display this help and exit
      --version  output version information and exit

SIZE may be (or may be an integer optionally followed by) one of following:
KB 1000, K 1024, MB 1000*1000, M 1024*1024, and so on for G, T, P, E, Z, Y.

其源码位于 ./coreutils/src/split.c 文件中, 大致流程为
初始化系统配置 > 解析命令行参数 > 初始化逻辑参数 > 逻辑实现


主要关注按指定行数分割逻辑

/* Descriptor on which output file is open.  */
static int output_desc = -1;    // 输出文件句柄, 全局变量

/* Write BYTES bytes at BP to an output file.
   If NEW_FILE_FLAG is true, open the next output file.
   Otherwise add to the same output file already in use.  */

static void
cwrite (bool new_file_flag, const char *bp, size_t bytes)
{
  if (new_file_flag)    // 是否新建文件
    {
      if (!bp && bytes == 0 && elide_empty_files)
        return;
      closeout (NULL, output_desc, filter_pid, outfile);
      next_file_name ();    // 新建文件
      if ((output_desc = create (outfile)) < 0)
        error (EXIT_FAILURE, errno, "%s", outfile);
    }
    // 数据写入文件
  if (full_write (output_desc, bp, bytes) != bytes && ! ignorable (errno))
    error (EXIT_FAILURE, errno, "%s", outfile);
}

/* Split into pieces of exactly N_LINES lines.
   Use buffer BUF, whose size is BUFSIZE.  */

static void
lines_split (uintmax_t n_lines, char *buf, size_t bufsize)
{
  size_t n_read;
  char *bp, *bp_out, *eob;
  bool new_file_flag = true;
  uintmax_t n = 0;

  do
    {
      n_read = full_read (STDIN_FILENO, buf, bufsize);
      if (n_read < bufsize && errno)
        error (EXIT_FAILURE, errno, "%s", infile);
      bp = bp_out = buf;
      eob = bp + n_read;
      *eob = '\n';
      while (true)
        {
          bp = memchr (bp, '\n', eob - bp + 1);
          if (bp == eob)
            {
              if (eob != bp_out) /* do not write 0 bytes! */
                {
                  size_t len = eob - bp_out;
                  cwrite (new_file_flag, bp_out, len);
                  new_file_flag = false;
                }
              break;
            }

          ++bp;
          if (++n >= n_lines)
            {
              cwrite (new_file_flag, bp_out, bp - bp_out);
              bp_out = bp;
              new_file_flag = true;
              n = 0;
            }
        }
    }
  while (n_read == bufsize);
}

其逻辑实现为, 从输入流按照bufsize读取数据, 遇到'\n'换行, 打到目标行数则调用next_file_name新建文件

你可能感兴趣的:(Linux命令·split)