实例 1
import re
s = 'abc, abc, defg, dds'
re.split('\W+', s) # 说明:\W匹配任何非单词字符,任何字母
# 运行结果:
['abc', 'abc', 'defg', 'dds']
实例 2
import re
s = 'abc, abc, defg, dds'
re.split('(\W+)', s) # 说明:如果加上括号或'[]',结果会同时返回去掉的值
# 运行结果:
['abc', ', ', 'abc', ', ', 'defg', ', ', 'dds']
实例 3
import re
s = 'abc, abc, defg, dds'
re.split('(\W+)', s, 1) # 说明:当前字符串只切分1次
运行结果:
['abc', ', ', 'abc, defg, dds']
实例 4
import re
s = 'abc, abc, defg, dds'
re.split('wxy*', s) # 说明:没有可匹配的项,返回原来的字符串。
运行结果:
['abc, abc, defg, dds']
实例 5
import re
line = 'aaa bbb ccc;ddd eee,fff'
re.split(r'[;,]',line) # 两个字符以上切割需要放在 [ ] 中
运行结果:
['aaa bbb ccc', 'ddd eee', 'fff']
实例 6
import re
line = 'aaa bbb ccc;ddd eee,fff'
re.split(r'[;,\s]',line) # 所有空白字符切割
运行结果:
['aaa', 'bbb', 'ccc', 'ddd', '', '', 'eee', 'fff']
实例 7
import re
file_name = 'F:\\02-data\\data_standar\\0224整年-Exported.csv'
print(re.split('[\\\, .]', file_name))
['F:', '02-data', 'data_standar', '0224整年-Exported', 'csv']
实例8
>>> re.split(r'\W+', 'Words, words, words.')
# ['Words', 'words', 'words', '']
>>> re.split(r'(\W+)', 'Words, words, words.')
# ['Words', ', ', 'words', ', ', 'words', '.', '']
>>> re.split(r'\W+', 'Words, words, words.', 1)
# ['Words', 'words, words.']
>>> re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)
# ['0', '3', '9']
>>> re.split(r'(\W+)', '...words, words...')
# ['', '...', 'words', ', ', 'words', '...', '']
>>> re.split(r'\b', 'Words, words, words.')
# ['', 'Words', ', ', 'words', ', ', 'words', '.']
实例9
>>> re.split(r'\W*', '...words...')
# ['', '', 'w', 'o', 'r', 'd', 's', '', '']
>>> re.split(r'(\W*)', '...words...')
# ['', '...', '', '', 'w', '', 'o', '', 'r', '', 'd', '', 's', '...', '', '', '']
str(re.split('[-.]', 'filter_Molas_(2019-11-15_2020-03-28)_高层研究-山东泰山队.csv'))
Out[4]: "['filter_Molas_(2019', '11', '15_2020', '03', '28)_高层研究', '山东泰山队', 'csv']"
str(re.split('[-.]', 'filter_Molas_(2019-11-15_2020-03-28)_高层研究-山东泰山队.csv')[-2])
Out[5]: '山东泰山队'
实例10
案例:用split()函数分割一个字符串并转换成列表
import re
s = "abcabcacc"
l = re.split("b", s)
print('运行结果为:', l)
#运行结果为:['a', 'ca', 'cacc']
对id这一列,提取前面的数字部分
b = data.loc[:, 'id'].apply(lambda x: re.search('\d+', x).group())
语法
re.search(pattern, string, flags=0)
>>> re.match("c", "abcdef") # No match
>>> re.search("c", "abcdef") # Match
<re.Match object; span=(2, 3), match='c'>
>>> re.match("c", "abcdef") # No match
>>> re.search("^c", "abcdef") # No match
>>> re.search("^a", "abcdef") # Match
<re.Match object; span=(0, 1), match='a'>
>>> re.match('X', 'A\nB\nX', re.MULTILINE) # No match
>>> re.search('^X', 'A\nB\nX', re.MULTILINE) # Match
<re.Match object; span=(4, 5), match='X'>
>>> m = re.search(r'(?<=-)\w+', 'spam-egg')
>>> m.group(0)
'egg'
>>> import re
>>> m = re.search('(?<=abc)def', 'abcdef')
>>> m.group(0)
'def'
ids_list = data.loc[:, 'id'].apply(lambda x: re.search('\d+', x).group())
x列元素为’‘00005’,‘‘00228’,……,‘‘00263’,带一个单引号,匹配后的结果为’00005’,‘00228’,……,‘00263’
>>> re.findall(r'\bf[a-z]*', 'which foot or hand fell fastest')
['foot', 'fell', 'fastest']
#r'\bfoo\b' matches 'foo', 'foo.', '(foo)', 'bar foo baz' but not 'foobar' or 'foo3'
>>> re.findall(r'(\w+)=(\d+)', 'set width=20 and height=10')
[('width', '20'), ('height', '10')]
>>> print(re.escape('https://www.python.org'))
https://www\.python\.org
>>> legal_chars = string.ascii_lowercase + string.digits + "!#$%&'*+-.^_`|~:"
>>> print('[%s]+' % re.escape(legal_chars))
[abcdefghijklmnopqrstuvwxyz0123456789!\#\$%\&'\*\+\-\.\^_`\|\~:]+
>>> operators = ['+', '-', '*', '/', '**']
>>> print('|'.join(map(re.escape, sorted(operators, reverse=True))))
/|\-|\+|\*\*|\*
[1] python 中re.split()的用法 2019.9
[2] 百科:python正则表达式;
[3] Python 中re.split()方法 2019.9
[4] Python正则表达式 ;
[5] re — Regular expression operations¶ ;