阅读时发现常常见到msplit的使用,因此阅读了该部分代码做一记录.
/**************************************************************** * * Function: mSplit() * * Purpose: Splits a string into tokens non-destructively. * * Parameters: * char * * The string to be split * char * * A string of token seperaters * int * The maximum number of tokens to be returned. A value * of 0 means to get them all. * int * * Place to store the number of tokens returned * char * The "escape metacharacter", treat the character after * this character as a literal and "escape" a seperator. * * Note if max_toks is reached, the last tok in the returned * token array will possibly have separator characters in it. * * Returns: * 2D char array with one token per "row" of the returned * array. * ****************************************************************/ char ** mSplit(const char *str, const char *sep_chars, const int max_toks, int *num_toks, const char meta_char) { size_t cur_tok = 0; /* current token index into array of strings */ size_t tok_start; /* index to start of token */ size_t i, j; int escaped = 0; /* It's rare we'll need more than this even if max_toks is set really * high. Store toks here until finished, then allocate. If more than * this is necessary, then allocate max toks */ char *toks_buf[TOKS_BUF_SIZE]; size_t toks_buf_size = TOKS_BUF_SIZE; int toks_buf_size_increment = 10; char **toks_alloc = NULL; /* Used if the static buf isn't enough */ char **toks = toks_buf; /* Pointer to one of the two above */ char **retstr; char *whitespace = " \t"; if (num_toks == NULL) return NULL; *num_toks = 0; /** 被分割字串为空或者切割标志串不为空且长度为0这停止处理*/ if ((str == NULL) || (strlen(str) == 0) || ((sep_chars != NULL) && (strlen(sep_chars) == 0))) { return NULL; } /** 如果切割标志串为空则替换为" \t"*/ if (sep_chars == NULL) sep_chars = whitespace; /** 切割标志中不能有结尾标志*/ /* Meta char cannot also be a separator char */ for (i = 0; i < strlen(sep_chars); i++) { if (sep_chars[i] == meta_char) return NULL; } /** 跳过被分割串头部所有的切割标志以及空格字符*/ /* Move past initial separator characters and whitespace */ for (i = 0; i < strlen(str); i++) { for (j = 0; j < strlen(sep_chars); j++) { if ((str[i] == sep_chars[j]) || isspace((int)str[i])) { break; } } /* Not a separator character or whitespace */ if (j == strlen(sep_chars)) break; } /*整个串中都是空格字符或者分割标志*/ if (i == strlen(str)) { /* Nothing but separator characters or whitespace in string */ return NULL; } /* User only wanted one tok so return the rest of the string in * one tok */ /**用户只需要一个字串时直接返回处理后的串*/ if ((cur_tok + 1) == (size_t)max_toks) { retstr = (char **)SnortAlloc(sizeof(char *)); retstr[cur_tok] = SnortStrndup(&str[i], strlen(str) - i); if (retstr[cur_tok] == NULL) { mSplitFree(&retstr, cur_tok + 1); return NULL; } *num_toks = cur_tok + 1; return retstr; } /* Mark the beginning of the next tok */ tok_start = i; for (; i < strlen(str); i++) { if (!escaped) /** 是否扫描到结尾标志*/ { /* Got an escape character. Don't include it now, but * must be a character after it. */ if (str[i] == meta_char) /**遇到结尾标志打上标记继续下个循环*/ { escaped = 1; continue; } /* See if the current character is a separator */ /**检查是否是分割标志*/ for (j = 0; j < strlen(sep_chars); j++) { /**找到直接进入下个*/ if (str[i] == sep_chars[j]) break; } /* It's a normal character */ /**不是分割标志或结尾标志继续检查下个字符*/ if (j == strlen(sep_chars)) continue; /* Current character matched a separator character. Trim off * whitespace previous to the separator. If we get here, there * is at least one savable character */ /**让j 指向分割标志左端的第一个非空字符*/ for (j = i; j > tok_start; j--) { if (!isspace((int)str[j - 1])) break; } /** 获取分割出的字串*/ /* Allocate a buffer. The length will not have included the * meta char of escaped separators */ toks[cur_tok] = mSplitAddTok(&str[tok_start], j - tok_start, sep_chars, meta_char); /* Increment current token index */ cur_tok++; /** 跳过被分割串头部所有的切割标志以及空格字符*/ /* Move past any more separator characters or whitespace */ for (; i < strlen(str); i++) { for (j = 0; j < strlen(sep_chars); j++) { if ((str[i] == sep_chars[j]) || isspace((int)str[i])) { break; } } /* Not a separator character or whitespace */ if (j == strlen(sep_chars)) break; } /**若果剩下的全是分割字符以及空格字符可以返回结构*/ /* Nothing but separator characters or whitespace left in the string */ if (i == strlen(str)) { *num_toks = cur_tok; if (toks != toks_alloc) { retstr = (char **)SnortAlloc(sizeof(char *) * cur_tok); memcpy(retstr, toks, (sizeof(char *) * cur_tok)); } else { retstr = toks; } return retstr; } /** 下面的是存储空间的处理*/ /* Reached the size of our current string buffer and need to * allocate something bigger. Only get here once if max toks * set to something other than 0 because we'll just allocate * max toks in that case. */ if (cur_tok == toks_buf_size) { char **tmp; if (toks_alloc != NULL) tmp = toks_alloc; else tmp = toks_buf; if (max_toks != 0) toks_buf_size = max_toks; else toks_buf_size = cur_tok + toks_buf_size_increment; toks_alloc = (char **)SnortAlloc(sizeof(char *) * toks_buf_size); memcpy(toks_alloc, tmp, (sizeof(char *) * cur_tok)); toks = toks_alloc; if (tmp != toks_buf) free(tmp); } if ((max_toks != 0) && ((cur_tok + 1) == (size_t)max_toks)) { /* Return rest of string as last tok */ *num_toks = cur_tok + 1; /* Already got a ret string */ if (toks != toks_alloc) { retstr = (char **)SnortAlloc(sizeof(char *) * (cur_tok + 1)); memcpy(retstr, toks, (sizeof(char *) * (cur_tok + 1))); } else { retstr = toks; } /* Trim whitespace at end of last tok */ for (j = strlen(str); j > tok_start; j--) { if (!isspace((int)str[j - 1])) break; } retstr[cur_tok] = SnortStrndup(&str[i], j - i); if (retstr[cur_tok] == NULL) { mSplitFree(&retstr, cur_tok + 1); return NULL; } return retstr; } tok_start = i; } else { /* This character is escaped with the meta char */ escaped = 0; } } /* Last character was an escape character */ if (escaped) { for (i = 0; i < cur_tok; i++) free(toks[i]); if (toks == toks_alloc) free(toks_alloc); return NULL; } /* Trim whitespace at end of last tok */ for (j = i; j > tok_start; j--) { if (!isspace((int)str[j - 1])) break; } /* Last character was not a separator character so we've got * one more tok. Unescape escaped sepatator charactors */ if (toks != toks_alloc) { retstr = (char **)SnortAlloc(sizeof(char *) * (cur_tok + 1)); memcpy(retstr, toks, (sizeof(char *) * (cur_tok + 1))); } else { retstr = toks; } retstr[cur_tok] = mSplitAddTok(&str[tok_start], j - tok_start, sep_chars, meta_char); /* Just add one to cur_tok index instead of incrementing * since we're done */ *num_toks = cur_tok + 1; return retstr; } /* Will not return NULL. SnortAlloc will fatal if it fails */ static char * mSplitAddTok(const char *str, const int len, const char *sep_chars, const char meta_char) { size_t i, j, k; char *tok; int tok_len = 0; int got_meta = 0; /* Get the length of the returned tok * Could have a maximum token length and use a fixed sized array and * fill it in as we go but don't want to put on that constraint */ for (i = 0; (int)i < len; i++) { if (!got_meta) { if (str[i] == meta_char) { got_meta = 1; continue; } } else { /* See if the current character is a separator */ for (j = 0; j < strlen(sep_chars); j++) { if (str[i] == sep_chars[j]) break; } /* It's a non-separator character, so include * the meta character in the return tok */ if (j == strlen(sep_chars)) tok_len++; got_meta = 0; } tok_len++; } /* Allocate it and fill it in */ tok = (char *)SnortAlloc(tok_len + 1); for (i = 0, k = 0; (int)i < len; i++) { if (!got_meta) { if (str[i] == meta_char) { got_meta = 1; continue; } } else { /* See if the current character is a separator */ for (j = 0; j < strlen(sep_chars); j++) { if (str[i] == sep_chars[j]) break; } /* It's a non-separator character, so include * the meta character in the return tok */ if (j == strlen(sep_chars)) tok[k++] = meta_char; got_meta = 0; } tok[k++] = str[i]; } return tok; }
msplit 是用来按照某个几个标志作为切割符从原字串中切割子串的.
该接口处理后的返回值一定要做参数检测
处理后的返回空间一定要释放
C++中str的find接口作为检索来提取字串可以达到相同效果