4.2 正则表达式
>>> import re #导入re模块
>>> text = 'alpha,beta....gamma delta' #测试用的字符串
>>> re.split('[\. ]+',text) #使用指定字符作为分隔符进行分隔
['alpha,beta', 'gamma', 'delta']
>>> re.split('[\. ]+',text,maxsplit = 1) #最多分隔两次
['alpha,beta', 'gamma delta']
>>> re.split('[\. ]+',text,maxsplit = 2) #最多分隔两次
['alpha,beta', 'gamma', 'delta']
>>> pat = '[a-zA-Z]+'
>>> re.findall(pat,text) #查找所有单词
['alpha', 'beta', 'gamma', 'delta']
>>> pat = '{name}'
>>> text = 'Dear {name}...'
>>> re.sub(pat,'Mr.Dong',text) #在text中寻找与pat匹配的项,用‘Mr.Dong ’替换
'Dear Mr.Dong...'
>>> s = 'a s d'
>>> re.sub('a|s|d','Good',s) #字符串替换
'Good Good Good'
>>> s = "It's a very good good idea"
"It's a very good idea"
>>> re.sub(r'((\w+) )\1',r'\2',s) #处理连续的重复单词
"It's a very goodidea"
>>> re.sub('a',lambda x:x.group(0).upper(),'aaa abc abde') #repl为可调用对象
'AAA Abc Abde'
>>> re.sub('[a-z]',lambda x:x.group(0).upper(),'aaa abc abde')
'AAA ABC ABDE'
>>> re.sub('[a-zA-Z]',lambda x:chr(ord(x.group(0))^32),'aaa abc abde') #ord是转化成ascii码,chr是转化成字母
'AAA ABC ABDE'
>>> re.subn('a','dfg','aaa abc abde')
('dfgdfgdfg dfgbc dfgbde', 5)
>>> #返回新字符串和替换次数
>>> re.sub('a','dfg','aaa abc abde')
'dfgdfgdfg dfgbc dfgbde'
>>> re.escape('http://www.python.org') #字符串转义
'http://www\\.python\\.org'
4.2.4 使用正则表达式对象
4.2.5 子模式与match对象
>>> re.sub('a','dfg','aaa abc abde')
'dfgdfgdfg dfgbc dfgbde'
>>> re.escape('http://www.python.org') #字符串转义
'http://www\\.python\\.org'
>>>
>>>
>>> telNumber = '''Suppose my Phon No.is 0535-1234567,yours if 010-12345678,his is 025-87654321.'''
>>> pattern = re.compile(r'(\d{3,4})-(\d{7,8})')
>>> pattern.findall(telNumber)
[('0535', '1234567'), ('010', '12345678'), ('025', '87654321')]
>>> m = re.match(r"(\w+) (\w+)","Isaac Newton,physicist")
>>> m.group(0) #返回整个模式内容
'Isaac Newton'
>>> m.group(1) #返回第1个子模式内容
'Isaac'
>>> m.group(2) #返回第2个子模式内容
'Newton'
>>> m.group(1,2) #返回指定的多个子模式内容
('Isaac', 'Newton')