re2 官方地址:
https://github.com/google/re2
cre2 官方地址:
https://github.com/marcomaggi/cre2
不透明类型:cre2_regexp_t
Struct Typedef:cre2_string_t
'const char * data'
Pointer to the first byte in the referenced substring.
'int length'
The number of bytes in the referenced substring.
Enumeration Typedef: cre2_error_code_t
函数:cre2_regexp_t * cre2_new(const char * PATTERN,int
PATTERN_LEN,const cre2_options_t * OPT)
void cre2_delete(cre2_regexp_t * REX)
const char * cre2_pattern(const cre2_regexp_t * REX)
int cre2_num_capturing_groups(const cre2_regexp_t * REX)
int cre2_find_named_capturing_groups(const cre2_regexp_t
* REX,const char * NAME)
const char * pattern = "from (?P.*) to (?P.*)";
cre2_options_t * opt = cre2_opt_new();
cre2_regexp_t * rex = cre2_new(pattern, strlen(pattern),
opt);
{
if (cre2_error_code(rex))
{ /* handle the error */ }
int nmatch = cre2_num_capturing_groups(rex) + 1;
cre2_string_t strings[nmatch];
int e, SIndex, DIndex;
const char * text = \
"from Montreal, Canada to Lausanne, Switzerland";
int text_len = strlen(text);
e = cre2_match(rex, text, text_len, 0, text_len,
CRE2_UNANCHORED, strings, nmatch);
if (0 == e)
{ /* handle the error */ }
SIndex = cre2_find_named_capturing_groups(rex, "S");
if (0 != strncmp("Montreal, Canada",
strings[SIndex].data, strings[SIndex].length))
{ /* handle the error */ }
DIndex = cre2_find_named_capturing_groups(rex, "D");
if (0 != strncmp("Lausanne, Switzerland",
strings[DIndex].data, strings[DIndex].length))
{ /* handle the error */ }
}
cre2_delete(rex);
cre2_opt_delete(opt);
int cre2_program_size(const cre2_regexp_t * REX)
int cre2_error_code(const cre2_regexp_t * REX)
const char * cre2_error_string(const cre2_regexp_t * REX)
If REX is a successfully built regular expression object: return a
pointer to an empty string.
The following code:
cre2_regexp_t * rex;
rex = cre2_new("ci(ao", 5, NULL);
{
printf("error: code=%d, msg=\"%s\"\n",
cre2_error_code(rex),
cre2_error_string(rex));
}
cre2_delete(rex);
prints:
error: code=6, msg="missing ): ci(ao"
void cre2_error_arg(const cre2_regexp_t * REX,
cre2_string_t * ARG)
If REX is a successfully built regular expression object: ARG
references an empty string.
The following code:
cre2_regexp_t * rex;
cre2_string_t S;
rex = cre2_new("ci(ao", 5, NULL);
{
cre2_error_arg(rex, &S);
printf("arg: len=%d, data=\"%s\"\n", S.length, S.data);
}
cre2_delete(rex);
prints:
arg: len=5 data="ci(ao"
cre2_options_t * opt;
opt = cre2_opt_new();
cre2_opt_set_log_errors(opt, 0);
Opaque Typedef:cre2_options_t
Enumeration Typedef:cre2_encoding_t
CRE2_UNKNOWN
CRE2_UTF8
CRE2_Latin1
The value 'CRE2_UNKNOWN' should never be used: it exists only in
case there is a mismatch between the definitions of RE2 and CRE2.
cre2_options_t * cre2_opt_new(void)
Function:void cre2_opt_delete(cre2_options_t * OPT)
以下所有函数都是正则
表达式选项的getter和setter ; setter的FLAG参数必须为false才能
禁用该选项,而true为true以启用它; 除非另行指定,否则
如果启用该选项,则“int”返回值为true,如果
禁用,则返回false 。
void cre2_opt_set_encoding(cre2_options_t * OPT,
cre2_encoding_t ENC)
int cre2_opt_posix_syntax(cre2_options_t * OPT)
void cre2_opt_set_posix_syntax(cre2_options_t * OPT,int
FLAG)
- 函数:int cre2_opt_longest_match(cre2_options_t * OPT)
- 功能:void cre2_opt_set_longest_match(cre2_options_t * OPT,int
FLAG)
- Function:int cre2_opt_log_errors(cre2_options_t * OPT)
- 功能:void cre2_opt_set_log_errors(cre2_options_t * OPT,int
FLAG)
— 将语法和执行错误记录到’stderr’。默认值已启用。
Function:int cre2_opt_literal(cre2_options_t * OPT)
void cre2_opt_set_literal(cre2_options_t * OPT,int FLAG)
Setting this option is equivalent to quoting all the special
characters defining a regular expression pattern:
cre2_regexp_t * rex;
cre2_options_t * opt;
const char * pattern = "(ciao) (hello)";
const char * text = pattern;
int len = strlen(pattern);
opt = cre2_opt_new();
cre2_opt_set_literal(opt, 1);
rex = cre2_new(pattern, len, opt);
{
/* successful match */
cre2_match(rex, text, len, 0, len,
CRE2_UNANCHORED, NULL, 0);
}
cre2_delete(rex);
cre2_opt_delete(opt);
Function:int cre2_opt_never_nl(cre2_options_t * OPT)
void cre2_opt_set_never_nl(cre2_options_t * OPT,int
FLAG)
int cre2_opt_dot_nl(cre2_options_t * OPT)
void cre2_opt_set_dot_nl(cre2_options_t * OPT,int FLAG)
Function:int cre2_opt_never_capture(cre2_options_t * OPT)
void cre2_opt_set_never_capture(cre2_options_t * OPT,int
FLAG)
Function:int cre2_opt_case_sensitive(cre2_options_t * OPT)
void cre2_opt_set_case_sensitive(cre2_options_t * OPT,
int FLAG)
Function:int cre2_opt_max_mem(cre2_options_t * OPT)
void cre2_opt_set_max_mem(cre2_options_t * OPT,int M)
启用POSIX语法时,仅查询以下选项; 禁用POSIX语法时:这些功能始终处于
启用状态且无法关闭。
Function:int cre2_opt_perl_classes(cre2_options_t * OPT)
void cre2_opt_set_perl_classes(cre2_options_t * OPT,int
FLAG)
int cre2_opt_word_boundary(cre2_options_t * OPT)
void cre2_opt_set_word_boundary(cre2_options_t * OPT,int
FLAG)
int cre2_opt_one_line (cre2_options_t * OPT)
void cre2_opt_set_one_line (cre2_options_t * OPT, int
FLAG)
基本模式匹配如下:
cre2_regexp_t * rex;
cre2_options_t * opt;
const char * pattern = "(ciao) (hello)";
opt = cre2_opt_new();
cre2_opt_set_posix_syntax(opt, 1);
rex = cre2_new(pattern, strlen(pattern), opt);
{
const char * text = "ciao hello";
int text_len = strlen(text);
int nmatch = 3;
cre2_string_t match[nmatch];
cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED,
match, nmatch);
/* prints: full match: ciao hello */
printf("full match: ");
fwrite(match[0].data, match[0].length, 1, stdout);
printf("\n");
/* prints: first group: ciao */
printf("first group: ");
fwrite(match[1].data, match[1].length, 1, stdout);
printf("\n");
/* prints: second group: hello */
printf("second group: ");
fwrite(match[2].data, match[2].length, 1, stdout);
printf("\n");
}
cre2_delete(rex);
cre2_opt_delete(opt);
- Enumeration Typedef:cre2_anchor_t
CRE2_UNANCHORED
CRE2_ANCHOR_START
CRE2_ANCHOR_BOTH
int cre2_match(const cre2_regexp_t * REX,const char *
TEXT,int TEXT_LEN,int START_POS,int END_POS,cre2_anchor_t
ANCHOR,cre2_string_t * MATCH,int NMATCH)
The zero-based indices START_POS (inclusive) and END_POS
(exclusive) select the substring of TEXT to be examined. ANCHOR
selects the anchor point for the matching operation.
Data about the matching groups is stored in the array MATCH, which
must have at least NMATCH entries; the referenced substrings are
portions of the TEXT buffer. If we are only interested in
verifying if the text matches or not (ignoring the matching
portions of text): we can use 'NULL' as MATCH argument and 0 as
NMATCH argument.
The first element of MATCH (index 0) references the full portion of
the substring of TEXT matching the pattern; the second element of
MATCH (index 1) references the portion of text matching the first
parenthetical subexpression, the third element of MATCH (index 2)
references the portion of text matching the second parenthetical
subexpression; and so on.
int cre2_easy_match(const char * PATTERN,int
PATTERN_LEN,const char * TEXT,int TEXT_LEN,cre2_string_t *
MATCH,int NMATCH)
Struct Typedef:cre2_range_t
'long start'
Inclusive start byte index.
'long past'
Exclusive end byte index.
void cre2_strings_to_ranges(const char * TEXT,
cre2_range_t * RANGES,cre2_string_t * STRINGS,int NMATCH)
cre2_regexp_t * rex;
cre2_options_t * opt;
const char * pattern;
pattern = "(ciao) (hello)";
opt = cre2_opt_new();
rex = cre2_new(pattern, strlen(pattern), opt);
{
if (cre2_error_code(rex))
printf("rex error \n");
int nmatch = 3;
cre2_string_t strings[nmatch];
cre2_range_t ranges[nmatch];
int e;
const char * text = "ciao hello";
int text_len = strlen(text);
e = cre2_match(rex, text, text_len, 0, text_len, CRE2_UNANCHORED, strings, nmatch);
if (1 != e)
printf("match error \n");
cre2_strings_to_ranges(text, ranges, strings, nmatch);
printf("full match: ");
printf("%.*s\n", ranges[0].past-ranges[0].start,text+ranges[0].start);
printf("\n");
printf("first group: ");
printf("%.*s\n", ranges[1].past-ranges[1].start,text+ranges[1].start);
printf("\n");
printf("second group: ");
printf("%.*s\n", ranges[2].past-ranges[2].start,text+ranges[2].start);
printf("\n");
}
cre2_delete(rex);
cre2_opt_delete(opt);
结果:
full match: ciao hello
first group: ciao
second group: hello
以下示例是成功匹配:
const char * pattern = "ci.*ut";
const char * text = "ciao salut";
cre2_string_t input = {
.data = text,
.length = strlen(text)
};
int result;
result = cre2_full_match(pattern, &input, NULL, 0);
result => 1
以下示例是成功匹配,其中
忽略括号子表达式:
const char * pattern = "(ciao) salut";
const char * text = "ciao salut";
cre2_string_t input = {
.data = text,
.length = strlen(text)
};
int result;
result = cre2_full_match(pattern, &input, NULL, 0);
result => 1
以下示例是成功匹配,其中
报告了与括号子表达式匹配的文本部分:
const char * pattern = "(ciao) salut";
const char * text = "ciao salut";
cre2_string_t input = {
.data = text,
.length = strlen(text)
};
int nmatch = 1;
cre2_string_t match[nmatch];
int result;
result = cre2_full_match(pattern, &input, match, nmatch);
result => 1
strncmp(text, input.data, input.length) => 0
strncmp("ciao", match[0].data, match[0].length) => 0
1. int cre2_full_match(const char * PATTERN,const
cre2_string_t * TEXT,cre2_string_t * MATCH,int NMATCH)
2. int cre2_full_match_re(cre2_regexp_t * REX,const
cre2_string_t * TEXT,cre2_string_t * MATCH,int NMATCH)
For example: the text 'abcdef' matches the pattern 'abcdef'
according to this function, but neither the pattern 'abc' nor the
pattern 'def' will match.
int cre2_partial_match(const char * PATTERN,const
cre2_string_t * TEXT,cre2_string_t * MATCH,int NMATCH)
int cre2_partial_match_re(cre2_regexp_t * REX,const
cre2_string_t * TEXT,cre2_string_t * MATCH,int NMATCH)
For example: the text 'abcDEFghi' matches the pattern 'DEF'
according to this function.
int cre2_consume(const char * PATTERN,cre2_string_t *
TEXT,cre2_string_t * MATCH,int NMATCH)
int cre2_consume_re(cre2_regexp_t * REX,cre2_string_t *
TEXT,cre2_string_t * MATCH,int NMATCH)
For example: the text 'abcDEF' matches the pattern 'abc' according
to this function; after the call TEXT will reference the text
'DEF'.
int cre2_find_and_consume(const char * PATTERN,
cre2_string_t * TEXT,cre2_string_t * MATCH,int NMATCH)
int cre2_find_and_consume_re(cre2_regexp_t * REX,
cre2_string_t * TEXT,cre2_string_t * MATCH,int NMATCH)
For example: the text 'abcDEFghi' matches the pattern 'DEF'
according to this function; the prefix 'abc' is skipped; after the
call TEXT will reference the text 'ghi'.
cre2_replace_re(cre2_regexp_t * REX,
cre2_string_t * TEXT,cre2_string_t * replace)
cre2_regexp_t * rex;
const char * pattern = "hello";
const char * text = "ciao hello salut";
const char * replace = "ohayo";
cre2_string_t target = {
.data = text,
.length = strlen(text)
};
cre2_string_t rewrite = {
.data = replace,
.length = strlen(replace)
};
int result;
rex = cre2_new(pattern, strlen(pattern), NULL);
{
result = cre2_replace_re(rex, &target, &rewrite);
if (1 != result)
goto error;
if (0 != strncmp("ciao ohayo salut", target.data, target.length))
goto error;
if ('\0' != target.data[target.length])
goto error;
PRINTF("rewritten to: ");
FWRITE(target.data, target.length, 1);
PRINTF("\n");
}
cre2_delete(rex);
free((void *)target.data);
还可以支持全局替换
cre2_regexp_t * rex;
const char * pattern = "(壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|佰|仟|萬|万|亿|零)";
const char * text = "ciao 伍 salut 肆陆壹叁";
const char * replace = "sty";
cre2_string_t target = {
.data = text,
.length = strlen(text)
};
cre2_string_t rewrite = {
.data = replace,
.length = strlen(replace)
};
int result;
rex = cre2_new(pattern, strlen(pattern), NULL);
{
result = cre2_global_replace_re(rex, &target, &rewrite);
printf("result is %d\n", result);
if (1 != result)
printf("replace error \n");
if (0 != strncmp("ciao sty salut sty", target.data, target.length))
printf("cmp error \n");
if ('\0' != target.data[target.length])
printf("target error \n");
printf("rewritten to: ");
printf("%.*s\n", target.length, target.data);
printf("\n");
}
cre2_delete(rex);
free((void *)target.data);