###正则表达式有什么用?
###下面来看一些例子
import re
s = '38x1x234x35x612x3yxxx'
patten1 = re.compile("x.*x") #返回一个,中间重复x
print '1\n',patten1.findall(s)
patten2 = re.compile("x\w.*?x") #中间至少有一个字符.xx不行
print '2\n',patten2.findall(s)
patten3 = re.compile("x.*?x") #返回多个.不重复
print '3\n',patten3.findall(s)
patten4 = re.compile("x+(.*?[xy])") #以x开头但开头不包含x,结尾以x或y结束
print '4\n',patten4.findall(s)
运行结果:
1
['x1x234x35x612x3yxxx']
2
['x1x', 'x35x', 'x3yx']
3
['x1x', 'x35x', 'x3yx', 'xx']
4
['1x', '35x', '3y', 'x']特殊情况只有一个字符开头和结果都是x
###实际应用解析html
得到所有链接
import re
def fomatLink(content):
pattenLink = re.compile('''[(src)(href)]=["'](.*?)['"]''')
links = pattenLink.findall(content)
return links
s = '''4399火线精英'''
print fomatLink(s)
运行结果:
['http://news.4399.com/hxjy/', 'http://imga5.5054399.com/upload_pic/2017/9/20/4399_15094597623.jpg']