二十二、正则表达式补充

import re

'''
正则表达式:
re.match:从头匹配
re.search:浏览全部字符串,匹配第一个符合规则的字符串
re.findall():将匹配到得的所有内容都放置在一个列表中
#re.finditer():
re.split():
re.sub():
'''

'''
1.match
'''
origin = "hello tom bcd tom lge tom acd 19"
r=re.match("h\w+",origin)
print (r.group())         #获取匹配到得所有结果
print(r.groups())         #获取模型中匹配到的分组结果
print (r.groupdict())     #获取模型中匹配到的分组中的所有执行了key的分组
print ("-------------------------match1----------------------------")
---------------------------------------------------------------------
hello
()
{}
-------------------------match1----------------------------
---------------------------------------------------------------------
r=re.match("(h\w+)",origin)
print (r.group())
print(r.groups())
print (r.groupdict())
print ("--------------------------match2---------------------------")
---------------------------------------------------------------------
hello
('hello',)
{}
--------------------------match2---------------------------
---------------------------------------------------------------------
r=re.match("(h)(\w+)",origin)
print (r.group())
print(r.groups())
print (r.groupdict())
print ("--------------------------match3---------------------------")
---------------------------------------------------------------------
hello
('h', 'ello')
{}
--------------------------match3---------------------------
---------------------------------------------------------------------
r=re.match("(?Ph)(?P\w+)",origin)
print (r.group())
print(r.groups())
print (r.groupdict())
print ("--------------------------match4---------------------------")

---------------------------------------------------------------------


hello
('h', 'ello')
{'n1': 'h', 'n2': 'ello'}
--------------------------match4---------------------------
---------------------------------------------------------------------
'''
2.search:全字符串匹配
'''
origin = "hello tom bcd tom lge tom acd 19"
r=re.search("(t\w+).*(?P\d)$",origin)
print (r.group())         #获取匹配到得所有结果
print(r.groups())         #获取模型中匹配到的分组结果
print (r.groupdict())     #获取模型中匹配到的分组中的所有执行了key的分组
print ("--------------------------search1---------------------------")
---------------------------------------------------------------------

tom bcd tom lge tom acd 19
('tom', '9')
{'name': '9'}
--------------------------search1---------------------------
---------------------------------------------------------------------
origin = "hello tom bcd tom lge tom acd 19"
r=re.search("t(\w+)",origin)
print (r.group())         #获取匹配到得所有结果
print(r.groups())         #获取模型中匹配到的分组结果
print (r.groupdict())     #获取模型中匹配到的分组中的所有执行了key的分组
print ("--------------------------search2---------------------------")
---------------------------------------------------------------------
tom
('om',)
{}
--------------------------search2---------------------------
---------------------------------------------------------------------
'''
3.findall:匹配到的字符串放到列表(分组和不分组)
分组提取:从左到右,从外到内,有几个括号就取几次
'''
r=re.findall("\d+\w\d+","a2b3c4d5")
print (r)
print ("--------------------------findall1---------------------------")
---------------------------------------------------------------------

['2b3', '4d5']
--------------------------findall1---------------------------
---------------------------------------------------------------------
r=re.findall("","a2b3c4d5")
print (r)
print ("--------------------------findall2---------------------------")
---------------------------------------------------------------------
['', '', '', '', '', '', '', '', '']
--------------------------findall2---------------------------
---------------------------------------------------------------------
origin = "hello tomm bcd tomm lge tomm acd 19"
r=re.findall("(t)(\w+)(m)",origin)           #(\w+)中显示search中groups中的所有元素
print (r)
print ("--------------------------findall3---------------------------")
---------------------------------------------------------------------
[('t', 'om', 'm'), ('t', 'om', 'm'), ('t', 'om', 'm')]
--------------------------findall3---------------------------
---------------------------------------------------------------------
origin = "hello tomm bcd tomm lge tomm acd 19"
r=re.findall("((t)(\w+)(m))",origin)           #(\w+)中显示search中groups中的所有元素
print (r)
print ("--------------------------findall4---------------------------")
---------------------------------------------------------------------
[('tomm', 't', 'om', 'm'), ('tomm', 't', 'om', 'm'), ('tomm', 't', 'om', 'm')]
--------------------------findall4---------------------------
---------------------------------------------------------------------
origin = "hello tomn bcd tomn lge tomn acd 19"
r=re.findall("(t)(\w+(m))(n)",origin)           #(\w+)中显示search中groups中的所有元素
print (r)
print ("--------------------------findall5---------------------------")
---------------------------------------------------------------------
[('t', 'om', 'm', 'n'), ('t', 'om', 'm', 'n'), ('t', 'om', 'm', 'n')]
--------------------------findall5---------------------------
---------------------------------------------------------------------
'''
4.finditer():返回迭代器
'''
origin = "hello tomn bcd tomn lge tomn acd 19"
r=re.finditer("(t)(\w+(m))(?Pn)",origin)           #(\w+)中显示search中groups中的所有元素
print (r)
for i in r:
    print (r)
    print (i.group())
    print(i.groups())
    print(i.groupdict())
print ("--------------------------finditer1---------------------------")
---------------------------------------------------------------------



tomn
('t', 'om', 'm', 'n')
{'name': 'n'}

tomn
('t', 'om', 'm', 'n')
{'name': 'n'}

tomn
('t', 'om', 'm', 'n')
{'name': 'n'}
--------------------------finditer1---------------------------
---------------------------------------------------------------------
'''
5.re.split():分割
split(pattern, string, maxsplit=0, flags=0):
pattern:正则
string:字符串
maxsplit:最大分割次数
flags:标志位,用于控制正则表达式的匹配方式,如:是否区分大小写,多行匹配等等
'''
origin = "hello tomn bcd tomn lge tomn acd 19"
print (origin.split("t"))
r=re.split("t\w+",origin,1)
print(r)
print ("--------------------------split1---------------------------")
---------------------------------------------------------------------
['hello ', 'omn bcd ', 'omn lge ', 'omn acd 19']
['hello ', ' bcd tomn lge tomn acd 19']
--------------------------split1---------------------------
---------------------------------------------------------------------
#只要有组,中间的分割值就可以拿到
r=re.split("(t\w+)",origin,1)
print(r)
print ("--------------------------split2---------------------------")
---------------------------------------------------------------------
['hello ', 'tomn', ' bcd tomn lge tomn acd 19']
--------------------------split2---------------------------
---------------------------------------------------------------------
#去掉t和n,将()放在t之后n之前,提取后的内容不包含t和n
r=re.split("t(\w+)n",origin,1)
print(r)
print ("--------------------------split3---------------------------")

---------------------------------------------------------------------
['hello ', 'om', ' bcd tomn lge tomn acd 19']
--------------------------split3---------------------------
---------------------------------------------------------------------
'''
计算器
'''
source="1-2*((6 -30+(-40.0/5)*(-9-2*5/3+7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))"

#print (re.split("\([^()]+\)",source,1))
def func(x):
    return 1
while True:
    print(source)
    result = re.split("\(([^()]+)\)",source,1)
    if len(result) == 3:
        before=result[0]
        content=result[1]
        after=result[2]
        #before,content,after=result   当result知道了确定的长度,可这样写
        r=func(content)
        #print(r)
        new_source=before+str(r)+after
        source=new_source
    else:
        m=func(source)
        print (m)
        break
---------------------------------------------------------------------
1-2*((6 -30+(-40.0/5)*(-9-2*5/3+7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))
1-2*((6 -30+1*(-9-2*5/3+7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))
1-2*((6 -30+1*1)-(-4*3)/(16-3*2))
1-2*(1-(-4*3)/(16-3*2))
1-2*(1-1/(16-3*2))
1-2*(1-1/1)
1-2*1
1
---------------------------------------------------------------------
'''
6.re.sub():
sub(pattern, repl, string, count=0, flags=0):用于替换字符串中的匹配项
pattern:正则
repl:指定替换后的字符串
string:要替换的字符串
count:替换次数,默认所有
flags:标志位
subn(pattern, repl, string, count=0, flags=0):用于替换字符串中的匹配项,并返回替换次数
'''
origin="fsd2agds3gsd4gsdga5gas7g8a8sdf"
r=re.sub("\d+","OOO",origin,2)
print (r)
r=re.subn("\d+","OOO",origin)
print (r)
---------------------------------------------------------------------
fsdOOOagdsOOOgsd4gsdga5gas7g8a8sdf
('fsdOOOagdsOOOgsdOOOgsdgaOOOgasOOOgOOOaOOOsdf', 7)
---------------------------------------------------------------------
============================================================================================================

你可能感兴趣的:(python,正则表达式,python)