regex
就只是了解语法, 和cpp
的差异;#include
https://en.cppreference.com/w/cpp/regex
\0
结尾的字符串, std::string
;std::basic_regex
类型, std::regex
则是std::basic_regex
的别名;std::match_results
;std::basic_regex
std::match_results
()
生成的组std::sub_match
, 继承自std::pair
, 存储一对迭代器, 而不是string
; std::match_results::begin
使用;regex_match
: 正则匹配整个字符串;regex_search
: 正则匹配子串;regex_replace
: 将匹配内容按照指定规则进行替换;regex_iterator
: 规则搜索所有匹配串;regex_token_iterator
: 迭代所有组;regex_error
: 正则表达式非法syntax_option_type
: 使用正则版本, 是否区分大小写之类;match_flag_type
: 匹配行为控制template<
class CharT,
class Traits = std::regex_traits<CharT>
> class basic_regex;
CharT
数据类型;typedef
std::regex std::basic_regex<char>
std::wregex std::basic_regex<wchar_t>
常用的regex
就是别名;
// 默认构造
basic_regex(); // (1) (since C++11)
// 使用 \0 结尾的字符串作为正则表达式, f 表示使用正则方案, egrep, extended, ecmascript 等
explicit basic_regex(const CharT *s,
flag_type f = std::regex_constants::ECMAScript); // (2) (since C++11)
// 使用字符串 s 的前 n 个作为正则, "(aa)bcd", 4 即 (aa) 作为正则
basic_regex(const CharT *s, std::size_t count,
flag_type f = std::regex_constants::ECMAScript); // (3) (since C++11)
// 拷贝其他正则;
basic_regex(const basic_regex &other); // (4) (since C++11)
// 右值运算, 清空 other 正则;
basic_regex(basic_regex &&other) noexcept; // (5) (since C++11)
// basic_string类型, 并萃取其基本类型; 常用 std::string 即 std::basic_string
template <class ST, class SA>
explicit basic_regex(const std::basic_string<CharT, ST, SA> &str,
flag_type f = std::regex_constants::ECMAScript); // (6) (since C++11)
// 使用迭代器进行初始化;
template <class ForwardIt>
basic_regex(ForwardIt first, ForwardIt last,
flag_type f = std::regex_constants::ECMAScript); // (7) (since C++11)
// 使用列表进行初始化; {'a', 'b', 'c'}; 这种;
basic_regex(std::initializer_list<CharT> init,
flag_type f = std::regex_constants::ECMAScript); //(8) (since C++11)
1. 可能异常, 和实现有关
2. `std::regex_error` 如果正则非法就会抛出;
3. `std::regex_error` 如果正则非法就会抛出;
4. 可能异常, 和实现有关
5. 可能异常, 和实现有关
6. `std::regex_error` 如果正则非法就会抛出;
7. `std::regex_error` 如果正则非法就会抛出;
8. `std::regex_error` 如果正则非法就会抛出;
// BidirIt 双向链表;
// [first, last) 指定搜索串;
// m 获取搜索结果
// e 正则表达式对象
// flags 匹配额外补充
template <class BidirIt,
class Alloc, class CharT, class Traits>
bool regex_match(BidirIt first, BidirIt last,
std::match_results<BidirIt, Alloc> &m,
const std::basic_regex<CharT, Traits> &e,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(1) (since C++11)
// 和 (1) 类似, 但是不要结果
template <class BidirIt,
class CharT, class Traits>
bool regex_match(BidirIt first, BidirIt last,
const std::basic_regex<CharT, Traits> &e,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(2) (since C++11)
// 字符串指定匹配串, 输出结果到 m
template <class CharT, class Alloc, class Traits>
bool regex_match(const CharT *str,
std::match_results<const CharT *, Alloc> &m,
const std::basic_regex<CharT, Traits> &e,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(3) (since C++11)
// std::string 类型, 输出结果到 m
template <class STraits, class SAlloc,
class Alloc, class CharT, class Traits>
bool regex_match(const std::basic_string<CharT, STraits, SAlloc> &s,
std::match_results<
typename std::basic_string<CharT, STraits, SAlloc>::
const_iterator,
Alloc> &m,
const std::basic_regex<CharT, Traits> &e,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(4) (since C++11)
// 字符串指定匹配串, 不输出结果;
template <class CharT, class Traits>
bool regex_match(const CharT *str,
const std::basic_regex<CharT, Traits> &e,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); // (5) (since C++11)
// std::string 指定匹配串, 不输出结果;
template <class STraits, class SAlloc,
class CharT, class Traits>
bool regex_match(const std::basic_string<CharT, STraits, SAlloc> &s,
const std::basic_regex<CharT, Traits> &e,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(6) (since C++11)
// 临时的 string 对象不支持; 即右值;
template <class STraits, class SAlloc,
class Alloc, class CharT, class Traits>
bool regex_match(const std::basic_string<CharT, STraits, SAlloc> &&,
std::match_results<
typename std::basic_string<CharT, STraits, SAlloc>::
const_iterator,
Alloc> &,
const std::basic_regex<CharT, Traits> &,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default) = delete; //(7) (since C++11)
true
匹配, false
不匹配
std::match_results
: 执行后不管成功与否都会修改;# If the match does not exist:
m.ready() == true
m.empty() == true
m.size() == 0
# If the match exists:
m.ready() true
m.empty() false
m.size() number of marked subexpressions plus 1, that is, 1 + e.mark_count()
# 用迭代器表示匹配部分前的结果; 比字符串或std::string更灵活;
m.prefix().first first
m.prefix().second first
m.prefix().matched false (the match prefix is empty)
# 匹配的后面字符串
m.suffix().first last
m.suffix().second last
m.suffix().matched false (the match suffix is empty)
# 0 表示整个串
m[0].first first
m[0].second last
m[0].matched true (the entire sequence is matched)
# n > 0 则表示第 n 个组;
m[n].first the start of the sequence that matched marked sub-expression n, or last if the subexpression did not participate in the match
m[n].second the end of the sequence that matched marked sub-expression n, or last if the subexpression did not participate in the match
m[n].matched true if sub-expression n participated in the match, false otherwise
# 具体可了解 match_results 类, 重载了 operator<< , 可以直接输出;
// BidirIt 双向链表
// 使用 frist, last 提供的字符串, e 的正则, m 输出结果
template<class BidirIt,
class Alloc, class CharT, class Traits> bool
regex_search(BidirIt first, BidirIt last,
std::match_results<BidirIt, Alloc> &m,
const std::basic_regex<CharT, Traits> &e,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(1) (since C++11)
// 同一, 使用字符串指定;
template <class CharT, class Alloc, class Traits>
bool regex_search(const CharT *str,
std::match_results<const CharT *, Alloc> &m,
const std::basic_regex<CharT, Traits> &e,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(2) (since C++11)
// 同上使用 std::string 指定字符串
template <class STraits, class SAlloc,
class Alloc, class CharT, class Traits>
bool regex_search(const std::basic_string<CharT, STraits, SAlloc> &s,
std::match_results<
typename std::basic_string<CharT, STraits, SAlloc>::const_iterator,
Alloc> &m,
const std::basic_regex<CharT, Traits> &e,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(3) (since C++11)
// 同一, 但不输出结果
template <class BidirIt,
class CharT, class Traits>
bool regex_search(BidirIt first, BidirIt last,
const std::basic_regex<CharT, Traits> &e,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(4) (since C++11)
// 同二, 但不输出结果
template <class CharT, class Traits>
bool regex_search(const CharT *str,
const std::basic_regex<CharT, Traits> &e,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(5) (since C++11)
template <class STraits, class SAlloc,
class CharT, class Traits>
// 同三, 但不输出结果
bool regex_search(const std::basic_string<CharT, STraits, SAlloc> &s,
const std::basic_regex<CharT, Traits> &e,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(6) (since C++11)
// 同三, 不支持右值;
template <class STraits, class SAlloc,
class Alloc, class CharT, class Traits>
bool regex_search(const std::basic_string<CharT, STraits, SAlloc> &&,
std::match_results<
typename std::basic_string<CharT, STraits, SAlloc>::const_iterator,
Alloc> &,
const std::basic_regex<CharT, Traits> &,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default) = delete; //(7) (since C++11)
If the match does not exist:
m.ready() == true
m.empty() == true
m.size() == 0
If the match exists:
m.ready() true
m.empty() false
m.size() number of marked subexpressions plus 1, that is, 1 + e.mark_count()
m.prefix().first first
m.prefix().second m[0].first
m.prefix().matched m.prefix().first != m.prefix().second
m.suffix().first m[0].second
m.suffix().second last
m.suffix().matched m.suffix().first != m.suffix().second
m[0].first the start of the matching sequence
m[0].second the end of the matching sequence
m[0].matched true
m[n].first the start of the sequence that matched marked sub-expression n, or last if the subexpression did not participate in the match
m[n].second the end of the sequence that matched marked sub-expression n, or last if the subexpression did not participate in the match
m[n].matched true if sub-expression n participated in the match, false otherwise
// OutputIt 输出迭代器, 并输出首地址;
// BidirIt 双向链表
// 使用 frist, last 提供的字符串, re 的正则
// fmt 使用 std::string 指定
template <class OutputIt, class BidirIt,
class Traits, class CharT,
class STraits, class SAlloc>
OutputIt regex_replace(OutputIt out, BidirIt first, BidirIt last,
const std::basic_regex<CharT, Traits> &re,
const std::basic_string<CharT, STraits, SAlloc> &fmt,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(1) (since C++11)
// 同上, fmt 用 字符串指定;
template <class OutputIt, class BidirIt,
class Traits, class CharT>
OutputIt regex_replace(OutputIt out, BidirIt first, BidirIt last,
const std::basic_regex<CharT, Traits> &re,
const CharT *fmt,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(2) (since C++11)
// 同一: 输出结果为字符串而不是迭代器;
// fmt 用 std::string 指定;
template <class Traits, class CharT,
class STraits, class SAlloc,
class FTraits, class FAlloc>
std::basic_string<CharT, STraits, SAlloc>
regex_replace(const std::basic_string<CharT, STraits, SAlloc> &s,
const std::basic_regex<CharT, Traits> &re,
const std::basic_string<CharT, FTraits, FAlloc> &fmt,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(3) (since C++11)
// 同上, fmt 用字符串指定;
template <class Traits, class CharT,
class STraits, class SAlloc>
std::basic_string<CharT, STraits, SAlloc>
regex_replace(const std::basic_string<CharT, STraits, SAlloc> &s,
const std::basic_regex<CharT, Traits> &re,
const CharT *fmt,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(4) (since C++11)
// s 用 字符串指定, 输出为 basic_string, fmt 为 std::basic_string
template <class Traits, class CharT,
class STraits, class SAlloc>
std::basic_string<CharT>
regex_replace(const CharT *s,
const std::basic_regex<CharT, Traits> &re,
const std::basic_string<CharT, STraits, SAlloc> &fmt,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(5) (since C++11)
// 同上, fmt用 字符串指定;
template <class Traits, class CharT>
std::basic_string<CharT>
regex_replace(const CharT *s,
const std::basic_regex<CharT, Traits> &re,
const CharT *fmt,
std::regex_constants::match_flag_type flags =
std::regex_constants::match_default); //(6) (since C++11)
fmt
#include
#include
int main () {
std::cout << std::regex_replace("hello, i'm xiaoli", std::regex(".+"), "$0!!") << std::endl;
}
$ g++ test.cpp && ./a.out
hello, i'm xiaoli!!
#include
#include
int main () {
std::cout << std::regex_replace("hello, i'm xiaoli", std::regex("(hello, i'm )(\\w+)"), "$1 xiaowang") << std::endl;
}
$ g++ test.cpp && ./a.out
hello, i'm xiaowang
#include
#include
int main () {
std::cout << std::regex_replace("hello, i'm xiaoli", std::regex("'"), "\n$`\n") << std::endl;
}
即正则表达式'
匹配串(不包含匹配字符串本身)
左边的内容;
$` == hello, i
$ g++ test.cpp && ./a.out
hello, i
hello, i
m xiaoli
#include
#include
int main () {
std::cout << std::regex_replace("hello, i'm xiaoli", std::regex("'"), "\n$'\n") << std::endl;
}
同样的道理, 是去除匹配串后的右边串;
$ g++ test.cpp && ./a.out
hello, i
m xiaoli
m xiaoli
https://www.geeksforgeeks.org/regex_replace-in-cpp-replace-the-match-of-a-string-using-regex_replace/
regex_iterator
regex_search
一样; 只是match_results
变为iter
;只输出匹配
#include
#include
#include
#include
int main()
{
const std::string s = "Quick brown fox.";
std::regex words_regex("[^\\s]+");
auto words_begin = std::sregex_iterator(s.begin(), s.end(), words_regex);
auto words_end = std::sregex_iterator();
std::cout << "Found " << std::distance(words_begin, words_end) << " words:\n";
for (std::sregex_iterator i = words_begin; i != words_end; ++i)
{
std::smatch match = *i;
std::string match_str = match.str();
std::cout << match_str << '\n';
}
}
Found 3 words:
Quick
brown
fox.
regex_token_iterator
#include
#include
#include
#include
#include
int main()
{
// Tokenization (non-matched fragments)
// Note that regex is matched only two times; when the third value is obtained
// the iterator is a suffix iterator.
const std::string text = "Quick brown fox.";
const std::regex ws_re("\\s+"); // whitespace
std::copy(std::sregex_token_iterator(text.begin(), text.end(), ws_re, -1),
std::sregex_token_iterator(),
std::ostream_iterator<std::string>(std::cout, "\n"));
std::cout << '\n';
// Iterating the first submatches
const std::string html = R"(google )"
R"(< a HREF ="http://cppreference.com">cppreference\n)";
const std::regex url_re(R"!!(<\s*A\s+[^>]*href\s*=\s*"([^"]*)")!!", std::regex::icase);
std::copy(std::sregex_token_iterator(html.begin(), html.end(), url_re, 1),
std::sregex_token_iterator(),
std::ostream_iterator<std::string>(std::cout, "\n"));
}
Quick
brown
fox.
http://google.com
http://cppreference.com