C++相关闲碎记录(16)

1、正则表达式

(1)regex的匹配和查找接口
#include 
#include 
using namespace std;

void out (bool b)
{
    cout << ( b ? "found" : "not found") << endl;
}

int main()
{
    // find XML/HTML-tagged value (using default syntax):
    regex reg1("<.*>.*");
    bool found = regex_match ("value",   // data
                              reg1);                // regular expression
    out(found);
    
    // find XML/HTML-tagged value (tags before and after the value must match):
    regex reg2("<(.*)>.*");
    found = regex_match ("value",        // data
                         reg2);                     // regular expression
    out(found);

    // find XML/HTML-tagged value (using grep syntax):
    regex reg3("<\\(.*\\)>.*",regex_constants::grep);
    found = regex_match ("value",        // data
                         reg3);                     // regular expression
    out(found);

    // use C-string as regular expression (needs explicit cast to regex):
    found = regex_match ("value",        // data
                         regex("<(.*)>.*"));  // regular expression
    out(found);
    cout << endl;

    // regex_match() versus regex_search():
    found = regex_match ("XML tag: value", 
                         regex("<(.*)>.*"));         // fails to match
    out(found);
    found = regex_match ("XML tag: value", 
                         regex(".*<(.*)>.*.*"));     // matches
    out(found);
    found = regex_search ("XML tag: value", 
                          regex("<(.*)>.*"));        // matches
    out(found);
    found = regex_search ("XML tag: value", 
                          regex(".*<(.*)>.*.*"));    // matches
    out(found);
}
输出:
found
found
found
found

not found
found
found
found

regex_match()检验是否整个字符序列匹配某个正则表达式。

regex_search()检验是否部分字符序列匹配某个正则表达式。

regex_match(data, regex(pattern))

总是等价于

regex_search(data, regex("(.|\n)*" + pattern + "(.|\n)*")),其中(.|\n)*指任何数量和任意字符,.意指换行之外的任意字符,|表示or。

(2)处理次表达式
#include 
#include 
#include 
#include 
using namespace std;

int main()
{
    string data = "XML tag: the value.";
    cout << "data:             " << data << "\n\n";

    smatch m;  // for returned details of the match
    bool found = regex_search (data,
                               m, 
                               regex("<(.*)>(.*)"));  //出现\1则是代表与第一个小括号中要匹配的内容相同。

    // print match details:
    cout << "m.empty():        " << boolalpha << m.empty() << endl;
    cout << "m.size():         " << m.size() << endl;
    if (found) {
        cout << "m.str():          " << m.str() << endl;
        cout << "m.length():       " << m.length() << endl;
        cout << "m.position():     " << m.position() << endl;
        cout << "m.prefix().str(): " << m.prefix().str() << endl;
        cout << "m.suffix().str(): " << m.suffix().str() << endl;
        cout << endl;

        // iterating over all matches (using the match index):
        for (int i=0; ilength() << ")" << endl;
        }
    }
}
输出:
data:             XML tag: the value.

m.empty():        false
m.size():         4
m.str():          the value
m.length():       30
m.position():     9
m.prefix().str(): XML tag:
m.suffix().str(): .

m[0].str():       the value
m.str(0):         the value
m.position(0):    9
m[1].str():       tag-name
m.str(1):         tag-name
m.position(1):    10
m[2].str():       the value
m.str(2):         the value
m.position(2):    19
m[3].str():       tag-name
m.str(3):         tag-name
m.position(3):    30

matches:
 the value (length: 30)
 tag-name (length: 8)
 the value (length: 9)
 tag-name (length: 8)

smatch:针对“匹配string”而设计

cmatch:针对“匹配C-string(const char*)”而设计

wsmatch:针对“匹配wstring”而设计

wcmatch:针对“匹配wide C-string(const wchar_t*)”而设计

出现\1则是代表与第一个小括号中要匹配的内容相同。注意:\1必须与小括号配合使用

C++相关闲碎记录(16)_第1张图片

 (3)regex iterator
#include 
#include 
#include 
#include 
using namespace std;

int main()
{
    string data = "\n"
                  " Nico\n"
                  " Josuttis\n"
                  "\n";

    regex reg("<(.*)>(.*)");

    // iterate over all matches (using a regex_iterator):
    sregex_iterator pos(data.cbegin(),data.cend(),reg);
    sregex_iterator end;
    for ( ; pos!=end ; ++pos ) {
        cout << "match:  " << pos->str() << endl;
        cout << " tag:   " << pos->str(1) << endl;
        cout << " value: " << pos->str(2) << endl;
    }

    // use a regex_iterator to process each matched substring as element in an algorithm:
    sregex_iterator beg(data.cbegin(),data.cend(),reg);
    for_each (beg,end,[](const smatch& m) {
                          cout << "match:  " << m.str() << endl;
                          cout << " tag:   " << m.str(1) << endl;
                          cout << " value: " << m.str(2) << endl;
                      });
}
输出:
match:  Nico
 tag:   first
 value: Nico
match:  Josuttis
 tag:   last
 value: Josuttis
match:  Nico
 tag:   first
 value: Nico
match:  Josuttis
 tag:   last
 value: Josuttis
(4)regex token iterator
#include 
#include 
#include 
#include 
using namespace std;

int main()
{
    string data = "\n"
                  " Nico\n"
                  " Josuttis\n"
                  "\n";

    regex reg("<(.*)>(.*)");

    // iterate over all matches (using a regex_token_iterator):
    sregex_token_iterator pos(data.cbegin(),data.cend(), // sequence
                              reg,                       // token separator
                              {0,2});      // 0: full match, 2: second substring
    sregex_token_iterator end;
    for ( ; pos!=end ; ++pos ) {
        cout << "match:  " << pos->str() << endl;
    }
    cout << endl;

    string names = "nico, jim, helmut, paul, tim, john paul, rita";
    regex sep("[ \t\n]*[,;.][ \t\n]*");  // separated by , ; or . and spaces
    sregex_token_iterator p(names.cbegin(),names.cend(),  // sequence
                            sep,                          // separator
                            -1);        // -1: values between separators
    sregex_token_iterator e;
    for ( ; p!=e ; ++p ) {
        cout << "name:  " << *p << endl;
    }
}
输出:
match:  Nico
match:  Nico
match:  Josuttis
match:  Josuttis

name:  nico
name:  jim
name:  helmut
name:  paul
name:  tim
name:  john paul
name:  rita
(5)用于替换的正则表达式
#include 
#include 
#include 
#include 

using namespace std;

int main() {
    string data = "\n"
                  " Nico\n"
                  " Josuttis\n"
                  "\n";
    regex reg("<(.*)>(.*)");
    cout << regex_replace(data,
                          reg,                               
                          "<$1 value=\"$2\"/>") << endl;     //replacement
    cout << regex_replace(data,
                          reg,
                          "<\\1 value=\"\\2\"/>",
                           regex_constants::format_sed) << endl;
    string res2;
    regex_replace(back_inserter(res2),
                  data.begin(), data.end(),
                  reg,
                  "<$1 value=\"$2\"/>",
                  regex_constants::format_no_copy | regex_constants::format_first_only);
    cout << res2 << endl;
    return 0;
}
输出:

 
 



 
 


C++相关闲碎记录(16)_第2张图片

(6)regex flag
#include 
#include 
#include 
using namespace std;

int main()
{
    // case-insensitive find LaTeX index entries
    string pat1 = R"(\\.*index\{([^}]*)\})";       // first capture group
    string pat2 = R"(\\.*index\{(.*)\}\{(.*)\})";  // 2nd and 3rd capture group
    regex pat (pat1+"\n"+pat2,
               regex_constants::egrep|regex_constants::icase);

    // initialize string with characters from standard input:
    string data((istreambuf_iterator(cin)),
                istreambuf_iterator());

    // search and print matching index entries:
    smatch m;
    auto pos = data.cbegin();
    auto end = data.cend();
    for ( ; regex_search (pos,end,m,pat); pos=m.suffix().first) {
        cout << "match: " << m.str() << endl;
        cout << "  val: " << m.str(1)+m.str(2) << endl;
        cout << "  see: " << m.str(3) << endl;
    }
}

C++相关闲碎记录(16)_第3张图片

 (7)regex 异常
#include 
#include 

template 
std::string regexCode (T code)
{
    switch (code) {
      case std::regex_constants::error_collate:
        return "error_collate: "
               "regex has invalid collating element name";
      case std::regex_constants::error_ctype:
        return "error_ctype: "
               "regex has invalid character class name";
      case std::regex_constants::error_escape:
        return "error_escape: "
               "regex has invalid escaped char. or trailing escape";
      case std::regex_constants::error_backref:
        return "error_backref: "
               "regex has invalid back reference";
      case std::regex_constants::error_brack:
        return "error_brack: "
               "regex has mismatched '[' and ']'";
      case std::regex_constants::error_paren:
        return "error_paren: "
               "regex has mismatched '(' and ')'";
      case std::regex_constants::error_brace:
        return "error_brace: "
               "regex has mismatched '{' and '}'";
      case std::regex_constants::error_badbrace:
        return "error_badbrace: "
               "regex has invalid range in {} expression";
      case std::regex_constants::error_range:
        return "error_range: "
               "regex has invalid character range, such as '[b-a]'";
      case std::regex_constants::error_space:
        return "error_space: "
               "insufficient memory to convert regex into finite state";
      case std::regex_constants::error_badrepeat:
        return "error_badrepeat: "
               "one of *?+{ not preceded by valid regex";
      case std::regex_constants::error_complexity:
        return "error_complexity: "
               "complexity of match against regex over pre-set level";
      case std::regex_constants::error_stack:
        return "error_stack: "
               "insufficient memory to determine regex match";
    }
    return "unknown/non-standard regex error code";
}
#include 
#include 
#include "regexexception.hpp"
using namespace std;

int main()
{
  try {
    // initialize regular expression with invalid syntax:
    regex pat ("\\\\.*index\\{([^}]*)\\}",
               regex_constants::grep|regex_constants::icase);
    //...
  }
  catch (const regex_error& e) {
        cerr << "regex_error: \n"
             << " what(): " << e.what() << "\n"
             << " code(): " << regexCode(e.code()) << endl;
  }
}
(8)regex ECMAScript文法

C++相关闲碎记录(16)_第4张图片

C++相关闲碎记录(16)_第5张图片

C++相关闲碎记录(16)_第6张图片

 

你可能感兴趣的:(c++)