正则表达式可以为我们带来极大的方便,有了它,再也不用为此烦恼
头文件:
#include <boost/regex.hpp>
std::string str("abcd");boost::regex reg( "a\\w*d" );
if (regex_match(str, reg))
{std::cout << str << " is match" << std::endl;
}else
{std::cout << str << " is not match" << std::endl;
}
const char* mail = "[email protected]";boost::cmatch res;//建立3个子表达式boost::regex reg("(\\w+)@(\\w+).(\\w+)");
if (boost::regex_match(mail,res, reg))
{//既可以通过迭代器获取数据, 也可以通过数组方式获取数据for (boost::cmatch::iterator pos = res.begin(); pos != res.end(); ++pos){std::cout << *pos << std::endl;}//res[0]存放匹配到的完整字符串std::cout << "name:" << res[1] << std::endl;
}
const char* mail = "[email protected]";boost::cmatch res;//建立3个子表达式boost::regex reg("(\\w+)@(\\w+).(\\w+)");
if (boost::regex_search(mail,res, reg))
{std::cout <<"**************************************" << std::endl;
//既可以通过迭代器获取数据, 也可以通过数组方式获取数据for (boost::cmatch::iterator pos = res.begin(); pos != res.end(); ++pos){std::cout << *pos << std::endl;}//res[0]存放匹配到的完整字符串std::cout << "match :" << res[0] << std::endl << "name:" << res[1] << std::endl;}
替换匹配到的子字符串, 可以通过$N 引用第N个匹配到的值、$& 引用全匹配
#include <boost/algorithm/string.hpp>
void TestReplace(){//将[email protected] 替换成[email protected]std::string mail("[email protected]");//建立3个子表达式boost::regex reg("(\\w+)@(\\w+).(\\w+)");
std::cout << boost::regex_replace(mail, reg, "$1@139.$3") << std::endl;
std::cout << boost::regex_replace(mail, reg, "my$1@$2.$3") << std::endl;
//自定义替换函数,regex_replace将匹配到的字符串数组传递给回调函数,由回调函数返回新的字符串std::cout << boost::regex_replace(mail, reg, [](const boost::smatch &m){
return boost::to_upper_copy(m[0].str());
});}
当需要从字符串中提取多个表达式时,可以采用迭代进行提取
std::string str("[email protected], [email protected], [email protected]");boost::regex reg("(\\w+)@(\\w+).(\\w+)");
boost::sregex_iterator pos(str.begin(), str.end(), reg);
boost::sregex_iterator end;
while(pos != end){std::cout << "[" << (*pos)[0] << "]";++pos;}
#include <iostream>#include <boost/regex.hpp>void TestToken(){using namespace std;using namespace boost;string str("[email protected], [email protected], [email protected]");regex reg("\\w+");
sregex_token_iterator pos(str.begin(), str.end(), reg);
while(pos != sregex_token_iterator())
{cout << "[" << *pos << "]" ;++pos;}cout << endl;//如果最后一个参数args为-1,则把匹配到的字符串视为分隔符regex split_reg(",");
pos = sregex_token_iterator(str.begin(), str.end(), split_reg, -1);
while(pos != sregex_token_iterator())
{cout << "[" << *pos << "]" ;++pos;}cout << endl;//如果最后一个参数args为正数,则返回匹配结果的第args个子串regex split_sub_reg("(\\w*)@(\\w*).(\\w*)");
pos = sregex_token_iterator(str.begin(), str.end(), split_sub_reg, 1);
while(pos != sregex_token_iterator())
{cout << "[" << *pos << "]" ;++pos;}cout << endl;//匹配并指定输出顺序//从下面字符串中提取日期,并转换成 年月日 的顺序输出std::string input("01/02/2003 blahblah 04/23/1999 blahblah 11/13/1981");regex re("(\\d{2})/(\\d{2})/(\\d{4})"); // find a dateint const sub_matches[] = { 3, 1, 2 }; // year,month, day
sregex_token_iterator begin( input.begin(), input.end(), re, sub_matches ), end;// write all the words to std::cout
std::ostream_iterator< std::string > out_iter( std::cout, "\n" );std::copy( begin, end, out_iter );
}