c regex
- 编译正则表达式
regcomp()
- 匹配正则表达式
regexec()
- 释放正则表达式
regfree()
- 错误处理
regerror()
#include
#include //标准c不支持,Linux常带有此文件
int main(){
regex_t reg; //定义一个正则实例
const char* pattern = "^\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*.\\w+([-.]\\w+)*$"; //定义模式串
regcomp(®, pattern, REG_EXTENDED); //编译正则模式串
char* buf = "[email protected]"; //定义待匹配串
const size_t nmatch = 1; //定义匹配结果最大允许数
regmatch_t pmatch[1]; //定义匹配结果在待匹配串中的下标范围
int status = regexec(®, buf, nmatch, pmatch, 0); //匹配他
if (status == REG_NOMATCH){ //如果没匹配上
printf("No Match\n");
}
else if (status == 0){ //如果匹配上了
printf("Match\n");
for (int i = pmatch[0].rm_so; i < pmatch[0].rm_eo; i++){ //遍历输出匹配范围的字符串
printf("%c", buf[i]);
}
printf("\n");
}
regfree(®); //释放正则表达式
return 0;
}
1.int regcomp (regex_t *compiled, const char *pattern, int cflags)
-
regex_t
是一个结构体数据类型,用来存放编译后的正则表达式-
regex_t
的成员re_nsub
用来存储正则表达式中的子正则表达式的个数,子正则表达式就是用圆括号包起来的部分表达式
-
-
pattern
是指向我们写好的正则表达式的指针 -
cflags
有如下4个值或者是它们或运算(|)后的值:- REG_EXTENDED 以功能更加强大的扩展正则表达式的方式进行匹配。
- REG_ICASE 匹配字母时忽略大小写。
- REG_NOSUB 不用存储匹配后的结果。
- REG_NEWLINE 识别换行符,这样'$'就可以从行尾开始匹配,'^'就可以从行的开头开始匹配
2.int regexec (regex_t *compiled, char *string, size_t nmatch, regmatch_t matchptr [], int eflags)
- 如果在编译正则表达式的时候没有指定cflags的参数为REG_NEWLINE,则默认情况下是忽略换行符的,也就是把整个文本串当作一个字符串处理。执行成功返回0
-
regmatch_t
是一个结构体数据类型,在regex.h中定义:typedef struct{regoff_t rm_so;regoff_t rm_eo;} regmatch_t;
- 成员
rm_so
存放匹配文本串在目标串中的开始位置,rm_eo
存放结束位置 - 通常我们以数组的形式定义一组这样的结构。因为往往我们的正则表达式中还包含子正则表达式。数组0单元存放主正则表达式位置,后边的单元依次存放子正则表达式位置
-
-
compiled
是已经用regcomp
函数编译好的正则表达式 -
string
是目标文本串 -
nmatch
是regmatch_t
结构体数组的长度 -
matchptr regmatch_t
类型的结构体数组,存放匹配文本串的位置信息 -
eflags
有两个值-
REG_NOTBOL
按我的理解是如果指定了这个值,那么’^’就不会从我们的目标串开始匹配。总之我到现在还不是很明白这个参数的意义 -
REG_NOTEOL
和上边那个作用差不多,不过这个指定结束end of line
-
3.void regfree (regex_t *compiled)
- 清空compiled指向的regex_t结构体的内容
- 如果是重新编译的话,一定要先清空regex_t结构体c
4.size_t regerror (int errcode, regex_t *compiled, char *buffer, size_t length)
-
当执行regcomp
或者regexec
产生错误的时候,就可以调用这个函数而返回一个包含错误信息的字符串 -
errcode
是由regcomp
和regexec
函数返回的错误代号 -
compiled
是已经用regcomp
函数编译好的正则表达式,这个值可以为NULL
-
buffer
指向用来存放错误信息的字符串的内存空间 -
length
指明buffer
的长度,如果这个错误信息的长度大于这个值,则regerror
函数会自动截断超出的字符串,但他仍然会返回完整的字符串的长度。所以我们可以用如下的方法先得到错误字符串的长度。
`size_t length = regerror (errcode, compiled, NULL, 0);`
转载:
c regex 用法
C++ regex
regex头文件
Regular Expressions (C++)
正则表达式 (C++)
正则表达式 (C++)
微软regex
regex typedefs
类
-
basic_regex
包装正则表达式。 -
match_results
包含一系列子匹配项。 -
regex_constants
包含各种类型的常量。 -
regex_error
报告错误的正则表达式。 -
regex_iterator
循环访问匹配结果。 -
regex_traits
描述用于匹配的元素的特征。 -
regex_traits
描述用于匹配的char
的特征。 -
regex_traits
描述用于匹配的wchar_t
的特征。 -
regex_token_iterator
循环访问子匹配项。 -
sub_match
介绍子匹配项。
类型定义
-
cmatch
char match_results
的类型定义。 -
cregex_iterator
char regex_iterator
的类型定义。 -
cregex_token_iterator
char regex_token_iterator
的类型定义。 -
csub_match
char sub_match
的类型定义。 -
regex
char basic_regex
的类型定义。 -
smatch
string match_results
的类型定义。 -
sregex_iterator
string regex_iterator
的类型定义。 -
sregex_token_iterator
string regex_token_iterator
的类型定义。 -
ssub_match
string sub_match
的类型定义。 -
wcmatch
wchar_t match_results
的类型定义。 -
wcregex_iterator
wchar_t regex_iterator
的类型定义。 -
wcregex_token_iterator
wchar_t regex_token_iterator
的类型定义。 -
wcsub_match
wchar_t sub_match
的类型定义。 -
wregex
wchar_t basic_regex
的类型定义。 -
wsmatch
wstring match_results
的类型定义。 -
wsregex_iterator
wstring regex_iterator
的类型定义。 -
wsregex_token_iterator
wstring regex_token_iterator
的类型定义。 -
wssub_match
wstring sub_match
的类型定义。
函数
-
regex_match
与正则表达式完全匹配。 -
regex_replace
替换匹配正则表达式。 -
regex_search
搜索正则表达式匹配项。 -
swap
交换basic_regex
或match_results
对象。
运算符
-
operator==
比较各种对象,相等。 -
operator!=
比较各种对象,不相等。 -
operator<
比较各种对象,小于。 -
operator<=
比较各种对象,小于或等于。 -
operator>
比较各种对象,大于。 -
operator>=
比较各种对象,大于或等于。 -
operator<<
将 sub_match 插入流中。
regex_match
#include "stdafx.h"
#include
#include
using namespace std;
int _tmain(int argc, _TCHAR* argv[])
{
// (1) with char*
// Note how const char* requires cmatch and regex
const char *first = "abc";
const char *last = first + strlen(first);
cmatch narrowMatch;
regex rx("a(b)c");
bool found = regex_match(first, last, narrowMatch, rx);
// (1) with std::wstring
// Note how wstring requires wsmatch and wregex.
// Note use of const iterators cbegin() and cend().
wstring target(L"Hello");
wsmatch wideMatch;
wregex wrx(L"He(l+)o");
if (regex_match(target.cbegin(), target.cend(), wideMatch, wrx))
wcout << L"The matching text is:" << wideMatch.str() << endl;
// (2) with std::string
string target2("Drizzle");
regex rx2(R"(D\w+e)"); // no double backslashes with raw string literal
found = regex_match(target2.cbegin(), target2.cend(), rx2);
// (3) with wchar_t*
const wchar_t* target3 = L"2014-04-02";
wcmatch wideMatch2;
// LR"(...)" is a raw wide-string literal. Open and close parens
// are delimiters, not string elements.
wregex wrx2(LR"(\d{4}(-|/)\d{2}(-|/)\d{2})");
if (regex_match(target3, wideMatch2, wrx2))
{
wcout << L"Matching text: " << wideMatch2.str() << endl;
}
return 0;
}
regex_replace
// std__regex__regex_replace.cpp
// compile with: /EHsc
#include
#include
int main()
{
char buf[20];
const char *first = "axayaz";
const char *last = first + strlen(first);
std::regex rx("a");
std::string fmt("A");
std::regex_constants::match_flag_type fonly =
std::regex_constants::format_first_only;
*std::regex_replace(&buf[0], first, last, rx, fmt) = '\0';
std::cout << "replacement == " << &buf[0] << std::endl;
*std::regex_replace(&buf[0], first, last, rx, fmt, fonly) = '\0';
std::cout << "replacement == " << &buf[0] << std::endl;
std::string str("adaeaf");
std::cout << "replacement == "
<< std::regex_replace(str, rx, fmt) << std::endl;
std::cout << "replacement == "
<< std::regex_replace(str, rx, fmt, fonly) << std::endl;
return (0);
}
结果:
replacement == AxAyAz
replacement == Axayaz
replacement == AdAeAf
replacement == Adaeaf
regex_search
// std__regex__regex_search.cpp
// compile with: /EHsc
#include
#include
int main()
{
const char *first = "abcd";
const char *last = first + strlen(first);
std::cmatch mr;
std::regex rx("abc");
std::regex_constants::match_flag_type fl =
std::regex_constants::match_default;
std::cout << "search(f, f+1, \"abc\") == " << std::boolalpha
<< regex_search(first, first + 1, rx, fl) << std::endl;
std::cout << "search(f, l, \"abc\") == " << std::boolalpha
<< regex_search(first, last, mr, rx) << std::endl;
std::cout << " matched: \"" << mr.str() << "\"" << std::endl;
std::cout << "search(\"a\", \"abc\") == " << std::boolalpha
<< regex_search("a", rx) << std::endl;
std::cout << "search(\"xabcd\", \"abc\") == " << std::boolalpha
<< regex_search("xabcd", mr, rx) << std::endl;
std::cout << " matched: \"" << mr.str() << "\"" << std::endl;
std::cout << "search(string, \"abc\") == " << std::boolalpha
<< regex_search(std::string("a"), rx) << std::endl;
std::string str("abcabc");
std::match_results mr2;
std::cout << "search(string, \"abc\") == " << std::boolalpha
<< regex_search(str, mr2, rx) << std::endl;
std::cout << " matched: \"" << mr2.str() << "\"" << std::endl;
return (0);
}
match_results
// std__regex__match_results_begin.cpp
// compile with: /EHsc
#include
#include
int main()
{
std::regex rx("c(a*)|(b)");
std::cmatch mr;
std::regex_search("xcaaay", mr, rx);
std::cout << "prefix: matched == " << std::boolalpha
<< mr.prefix().matched
<< ", value == " << mr.prefix() << std::endl;
std::cout << "whole match: " << mr.length() << " chars, value == "
<< mr.str() << std::endl;
std::cout << "suffix: matched == " << std::boolalpha
<< mr.suffix().matched
<< ", value == " << mr.suffix() << std::endl;
std::cout << std::endl;
std::string fmt("\"c(a*)|(b)\" matched \"$0\"\n"
"\"(a*)\" matched \"$1\"\n"
"\"(b)\" matched \"$2\"\n");
std::cout << mr.format(fmt) << std::endl;
std::cout << std::endl;
// index through submatches
for (size_t n = 0; n < mr.size(); ++n)
{
std::cout << "submatch[" << n << "]: matched == " << std::boolalpha
<< mr[n].matched <<
" at position " << mr.position(n) << std::endl;
std::cout << " " << mr.length(n)
<< " chars, value == " << mr[n] << std::endl;
}
std::cout << std::endl;
// iterate through submatches
for (std::cmatch::iterator it = mr.begin(); it != mr.end(); ++it)
{
std::cout << "next submatch: matched == " << std::boolalpha
<< it->matched << std::endl;
std::cout << " " << it->length()
<< " chars, value == " << *it << std::endl;
}
std::cout << std::endl;
// other members
std::cmatch mr1(mr);
mr = mr1;
mr.swap(mr1);
char buf[10];
*mr.format(&buf[0], "<$0>") = '\0';
std::cout << &buf[0] << std::endl;
std::cout << "empty == " << std::boolalpha << mr.empty() << std::endl;
std::cmatch::allocator_type al = mr.get_allocator();
std::cmatch::string_type str = std::string("x");
std::cmatch::size_type maxsiz = mr.max_size();
std::cmatch::char_type ch = 'x';
std::cmatch::difference_type dif = mr.begin() - mr.end();
std::cmatch::const_iterator cit = mr.begin();
std::cmatch::value_type val = *cit;
std::cmatch::const_reference cref = val;
std::cmatch::reference ref = val;
maxsiz = maxsiz; // to quiet "unused" warnings
if (ref == cref)
ch = ch;
dif = dif;
return (0);
}
sub_match
// std__regex__sub_match_compare.cpp
// compile with: /EHsc
#include
#include
int main()
{
std::regex rx("c(a*)|(b)");
std::cmatch mr;
std::regex_search("xcaaay", mr, rx);
std::csub_match sub = mr[1];
std::cout << "matched == " << std::boolalpha
<< sub.matched << std::endl;
std::cout << "length == " << sub.length() << std::endl;
std::csub_match::difference_type dif = std::distance(sub.first, sub.second);
std::cout << "difference == " << dif << std::endl;
std::csub_match::iterator first = sub.first;
std::csub_match::iterator last = sub.second;
std::cout << "range == " << std::string(first, last)
<< std::endl;
std::cout << "string == " << sub << std::endl;
std::csub_match::value_type *ptr = "aab";
std::cout << "compare(\"aab\") == "
<< sub.compare(ptr) << std::endl;
std::cout << "compare(string) == "
<< sub.compare(std::string("AAA")) << std::endl;
std::cout << "compare(sub) == "
<< sub.compare(sub) << std::endl;
return (0);
}
regex_error
// std__regex__regex_error_code.cpp
// compile with: /EHsc
#include
#include
int main()
{
std::regex_error paren(std::regex_constants::error_paren);
try
{
std::regex rx("(a");
}
catch (const std::regex_error& rerr)
{
std::cout << "regex error: "
<< (rerr.code() == paren.code()
"unbalanced parentheses" : "")
<< std::endl;
}
catch (...)
{
std::cout << "unknown exception" << std::endl;
}
return (0);
}
regex_iterator
// std__regex__regex_iterator_difference_type.cpp
// compile with: /EHsc
#include
#include
typedef std::regex_iterator Myiter;
int main()
{
const char *pat = "axayaz";
Myiter::regex_type rx("a");
Myiter next(pat, pat + strlen(pat), rx);
Myiter end;
for (; next != end; ++next)
std::cout << "match == " << next->str() << std::endl;
// other members
Myiter it1(pat, pat + strlen(pat), rx);
Myiter it2(it1);
next = it1;
Myiter::iterator_category cat = std::forward_iterator_tag();
Myiter::difference_type dif = -3;
Myiter::value_type mr = *it1;
Myiter::reference ref = mr;
Myiter::pointer ptr = &ref;
dif = dif; // to quiet "unused" warnings
ptr = ptr;
return (0);
}
regex_token_iterator
#include
#include
typedef std::regex_token_iterator Myiter;
int main()
{
const char *pat = "aaxaayaaz";
Myiter::regex_type rx("(a)a");
Myiter next(pat, pat + strlen(pat), rx);
Myiter end;
// show whole match
for (; next != end; ++next)
std::cout << "match == " << next->str() << std::endl;
std::cout << std::endl;
// show prefix before match
next = Myiter(pat, pat + strlen(pat), rx, -1);
for (; next != end; ++next)
std::cout << "match == " << next->str() << std::endl;
std::cout << std::endl;
// show (a) submatch only
next = Myiter(pat, pat + strlen(pat), rx, 1);
for (; next != end; ++next)
std::cout << "match == " << next->str() << std::endl;
std::cout << std::endl;
// show prefixes and submatches
std::vector vec;
vec.push_back(-1);
vec.push_back(1);
next = Myiter(pat, pat + strlen(pat), rx, vec);
for (; next != end; ++next)
std::cout << "match == " << next->str() << std::endl;
std::cout << std::endl;
// show prefixes and whole matches
int arr[] = {-1, 0};
next = Myiter(pat, pat + strlen(pat), rx, arr);
for (; next != end; ++next)
std::cout << "match == " << next->str() << std::endl;
std::cout << std::endl;
// other members
Myiter it1(pat, pat + strlen(pat), rx);
Myiter it2(it1);
next = it1;
Myiter::iterator_category cat = std::forward_iterator_tag();
Myiter::difference_type dif = -3;
Myiter::value_type mr = *it1;
Myiter::reference ref = mr;
Myiter::pointer ptr = &ref;
dif = dif; // to quiet "unused" warnings
ptr = ptr;
return (0);
}
regex_traits
// std__regex__regex_traits_char_class_type.cpp
// compile with: /EHsc
#include
#include
typedef std::regex_traits Mytr;
int main()
{
Mytr tr;
Mytr::char_type ch = tr.translate('a');
std::cout << "translate('a') == 'a' == " << std::boolalpha
<< (ch == 'a') << std::endl;
std::cout << "nocase 'a' == 'A' == " << std::boolalpha
<< (tr.translate_nocase('a') == tr.translate_nocase('A'))
<< std::endl;
const char *lbegin = "abc";
const char *lend = lbegin + strlen(lbegin);
Mytr::size_type size = tr.length(lbegin);
std::cout << "length(\"abc\") == " << size <
C++正则表达式库
C++正则表达式库
#include
#include
#include
#include
int main()
{
std::string s = "Some people, when confronted with a problem, think "
"\"I know, I'll use regular expressions.\" "
"Now they have two problems.";
std::regex self_regex("REGULAR EXPRESSIONS",
std::regex_constants::ECMAScript | std::regex_constants::icase);
if (std::regex_search(s, self_regex)) {
std::cout << "Text contains the phrase 'regular expressions'\n";
}
std::regex word_regex("(\\S+)");
auto words_begin =
std::sregex_iterator(s.begin(), s.end(), word_regex);
auto words_end = std::sregex_iterator();
std::cout << "Found "
<< std::distance(words_begin, words_end)
<< " words\n";
const int N = 6;
std::cout << "Words longer than " << N << " characters:\n";
for (std::sregex_iterator i = words_begin; i != words_end; ++i) {
std::smatch match = *i;
std::string match_str = match.str();
if (match_str.size() > N) {
std::cout << " " << match_str << '\n';
}
}
std::regex long_word_regex("(\\w{7,})");
std::string new_s = std::regex_replace(s, long_word_regex, "[$&]");
std::cout << new_s << '\n';
}
ECMA与POSIX区别
#include
#include
#include
int main()
{
std::string str = "zzxayyzz";
std::regex re1(".*(a|xayy)"); // ECMA
std::regex re2(".*(a|xayy)", std::regex::extended); // POSIX
std::cout << "Searching for .*(a|xayy) in zzxayyzz:\n";
std::smatch m;
std::regex_search(str, m, re1);
std::cout << " ECMA (depth first search) match: " << m[0] << '\n';
std::regex_search(str, m, re2);
std::cout << " POSIX (leftmost longest) match: " << m[0] << '\n';
}
boost regex
1 boost::regex_match
正则表达式匹配
2 boost::regex_replace
正则表达式替换
3 boost::regex_search
正则表达式检索
#include
#include
void main()
{
std::string str = "chinaen 8Glish";
boost::regex expr("(\\w+)\\s(\\w+)");
//+ 用来表示重复一或多次
//d-digit 任何0-9之间的数字
//s-space 任何空格字符
//u-upper A-Z之间的大写字母。如果设置了地域的话,可能包含其他字符
//w-word 任何单词字符-字母数字下划线
std::cout << boost::regex_match(str, expr) << std::endl;//匹配1,不匹配0
boost::smatch what;
if (boost::regex_search(str, what, expr))//正则表达式检索
{
std::cout << what[0] << std::endl;
std::cout << what[1] << std::endl;
std::cout << what[2] << std::endl;
std::cout << what[3] << std::endl;
}
else
{
std::cout << "检索失败" << std::endl;
}
}
boost::regex_replace
正则表达式替换
//s-space 任何空格字符
#include
#include
void main()
{
std::string str = "chinaen 8Glish";
boost::regex expr("\\s");//s-space 任何空格字符
std::string tihuan = "____";
std::cout << boost::regex_replace(str, expr, tihuan) << std::endl;//把expr替换
}
以上内容转载:https://www.cnblogs.com/denggelin/p/5769624.html
Boost学习之正则表达式--regex
#include
#include
using namespace std;
int main(int argc, char* argv[])
{
//( 1 ) (( 3 ) 2 )(( 5 )4)( 6 )
//(\w+)://((\w+\.)*\w+)((/\w*)*)(/\w+\.\w+)?
//^协议://网址(x.x...x)/路径(n个\字串)/网页文件(xxx.xxx)
const char *szReg = "(\\w+)://((\\w+\\.)*\\w+)((/\\w*)*)(/\\w+\\.\\w+)?";
const char *szStr = "http://www.cppprog.com/2009/0112/48.html";
{ //字符串匹配
boost::regex reg( szReg );
bool r=boost::regex_match( szStr , reg);
assert(r);
}
{ //提取子串
boost::cmatch mat;
boost::regex reg( szReg );
bool r=boost::regex_match( szStr, mat, reg);
if(r) //如果匹配成功
{
//显示所有子串
for(boost::cmatch::iterator itr=mat.begin(); itr!=mat.end(); ++itr)
{
// 指向子串对应首位置 指向子串对应尾位置 子串内容
cout << itr->first-szStr << ' ' << itr->second-szStr << ' ' << *itr << endl;
}
}
//也可直接取指定位置信息
if(mat[4].matched) cout << "Path is" << mat[4] << endl;
}
{ //查找
boost::cmatch mat;
boost::regex reg( "\\d+" ); //查找字符串里的数字
if(boost::regex_search(szStr, mat, reg))
{
cout << "searched:" << mat[0] << endl;
}
}
{ //替换
boost::regex reg( szReg );
string s = boost::regex_replace( string(szStr), reg, "ftp://$2$5");
cout << "ftp site:"<< s << endl;
}
{ //替换2,把<>&转换成网页字符
string s1 = "(<)|(>)|(&)";
string s2 = "(?1<)(?2>)(?3&)";
boost::regex reg( s1 );
string s = boost::regex_replace( string("cout << a&b << endl;"), reg, s2, boost::match_default | boost::format_all);
cout << "HTML:"<< s << endl;
}
{ //使用迭代器找出所有数字
boost::regex reg( "\\d+" ); //查找字符串里的数字
boost::cregex_iterator itrBegin = make_regex_iterator(szStr,reg); //(szStr, szStr+strlen(szStr), reg);
boost::cregex_iterator itrEnd;
for(boost::cregex_iterator itr=itrBegin; itr!=itrEnd; ++itr)
{
// 指向子串对应首位置 指向子串对应尾位置 子串内容
cout << (*itr)[0].first-szStr << ' ' << (*itr)[0].second-szStr << ' ' << *itr << endl;
}
}
{ //使用迭代器拆分字符串
boost::regex reg("/"); //按/符拆分字符串
boost::cregex_token_iterator itrBegin = make_regex_token_iterator(szStr,reg,-1); //使用-1参数时拆分,使用其它数字时表示取第几个子串,可使用数组取多个串
boost::cregex_token_iterator itrEnd;
for(boost::cregex_token_iterator itr=itrBegin; itr!=itrEnd; ++itr)
{
cout << *itr << endl;
}
}
{ //使用迭代器拆分字符串2
boost::regex reg("(.)/(.)"); //取/的前一字符和后一字符(这个字符串形象貌似有点邪恶-_-)
int subs[] = {1,2}; // 第一子串和第二子串
boost::cregex_token_iterator itrBegin = make_regex_token_iterator(szStr,reg,subs); //使用-1参数时拆分,使用其它数字时表示取第几个子串,可使用数组取多个串
boost::cregex_token_iterator itrEnd;
for(boost::cregex_token_iterator itr=itrBegin; itr!=itrEnd; ++itr)
{
cout << *itr << endl;
}
}
cin.get();
return 0;
}
正则表达式 boost Regex
.NET 正则表达式
https://docs.microsoft.com/zh-cn/dotnet/standard/base-types/regular-expressions
http://download.microsoft.com/download/D/2/4/D240EBF6-A9BA-4E4F-A63F-AEB6DA0B921C/Regular%20expressions%20quick%20reference.docx
正则表达式语言 - 快速参考
Regex 类
其他:
深入理解正则表达式环视的概念与用法
表达式全集
正则表达式30分钟入门教程
揭开正则表达式的神秘面纱
正则表达式 30 分钟入门教程
Python正则表达式指南