In [1]: line = "abc"
In [2]: line.startswith("a")
Out[2]: True
re模块
In [1]: str1 = 'imooc python'
In [2]: import re
In [3]: pa = re.compile(r'imooc')
In [5]: str2=pa.match('imooc')
In [7]: str2.group()
Out[7]: 'imooc'
In [8]: str2.span()
Out[8]: (0, 5)
# 不区分大小写
In [9]: pa = re.compile(r'imooc',re.I)
In [10]: str2 = pa.match(r'Imooc')
In [11]: str2.group()
Out[11]: 'Imooc'
. 匹配任意单个字符串
In [20]: ma = re.match(r'.',r'0')
In [21]: ma.group()
Out[21]: '0'
In [31]: ma = re.match(r'{..}','{ba}')
In [32]: ma.group()
Out[32]: '{ba}'
[]匹配一组字符中的单个字符
In [33]: ma = re.match(r'{[abc]}','{a}')
In [34]: ma.group()
Out[34]: '{a}'
In [35]: ma = re.match(r'{[abc]}','{ab}')
In [36]: ma.group()
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
36-7c62fc675aee> in <module>()
----> 1 ma.group()
AttributeError: 'NoneType' object has no attribute 'group'
In [39]: ma = re.match(r'{[a-z]}','{a}')
In [40]: ma.group()
Out[40]: '{a}'
In [44]: ma = re.match(r'{[a-z]}','{A}',re.I)
In [45]: ma.group()
Out[45]: '{A}'
In [48]: ma = re.match(r'{[a-zA-Z0-9]}','{A}')
In [49]: ma.group()
Out[49]: '{A}'
\w匹配字母数字下划线,\W匹配相反
In [50]: ma = re.match(r'{[\w]}','{A}')
In [51]: ma.group()
Out[51]: '{A}'
\d匹配数字,\D匹配相反,\s匹配空白字符,\S匹配非空白字符
匹配 中括号 需要转义
In [54]: ma = re.match(r'\[\]','[]')
In [55]: ma
Out[55]: <_sre.SRE_Match at 0x7fbec557ad98>
In [56]: ma.group()
Out[56]: '[]'
.(一次匹配多个字符)
In [4]: ma = re.match(r'[A-Z][a-z]','Aa')
In [5]: ma.group()
Out[5]: 'Aa'
* 代表匹配前一个字符0次或者多次 **
In [6]: ma = re.match(r'[A-Z][a-z]*','Aasadsasad')
In [7]: ma.group()
Out[7]: 'Aasadsasad'
In [8]: ma = re.match(r'[A-Z][a-z]*','AasAAAAAdsasad')
In [9]: ma.group()
Out[9]: 'Aas'
* +代表匹配前一个字符1次或者多次*
In [14]: ma = re.match(r'[_a-zA-Z]+[_\w]*','a79')
In [15]: ma.group()
Out[15]: 'a79'
In [16]: ma = re.match(r'[_a-zA-Z]+[_\w]*','_Aa79')
In [17]: ma.group()
Out[17]: '_Aa79'
* ?匹配前一个字符0次或者1次 *
匹配1-99的数字
In [18]: ma = re.match(r'[1-9]?[0-9]','99')
In [19]: ma.group()
Out[19]: '99'
** {m} 匹配前一个字符m次
In [21]: ma = re.match(r'[a-zA-Z0-9]{6}','abc123')
In [22]: ma.group()
Out[22]: 'abc123'
In [26]: ma = re.match(r'[a-zA-Z0-9]{6}','abc12')
In [27]: ma.group()
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
27-7c62fc675aee> in <module>()
----> 1 ma.group()
AttributeError: 'NoneType' object has no attribute 'group'
In [30]: ma = re.match(r'[a-zA-Z0-9]{6}@163.com','[email protected]')
In [31]: ma.group()
Out[31]: '[email protected]'
{m,n}匹配前一个字符m到n次**
In [34]: ma = re.match(r'[a-zA-Z0-9]{6,10}@163.com','[email protected]')
In [35]: ma.group()
Out[35]: '[email protected]'
非贪婪模式,尽可能少匹配
#匹配前一个0次
In [36]: ma = re.match(r'[0-9][a-z]*?','1bc')
In [37]: ma.group()
Out[37]: '1'
#匹配前一个1次
In [38]: ma = re.match(r'[0-9][a-z]+?','1bc')
In [39]: ma.group()
Out[39]: '1b'
匹配边界字符
^匹配字符串开头或者取非
>>> ma = re.match('[^b]+','tang123')
>>> ma.group()
'tang123'
>>>
$匹配字符串结尾
例子:只匹配以com结尾的
In [3]: ma = re.match('[\w]{4,10}@163.com','[email protected]')
In [4]: ma.group()
Out[4]: '[email protected]'
In [5]: ma = re.match('[\w]{4,10}@163.com$','[email protected]')
In [6]: ma
例子:只匹配以tang开头的
In [11]: ma = re.match(r'^tang[\w]{1,8}@163.com','[email protected]')
In [12]: ma
Out[12]: <_sre.SRE_Match at 0x7fe28101db28>
In [13]: ma = re.match(r'^tang[\w]{1,8}@163.com','[email protected]')
In [14]: ma
\A匹配指定字符串开头的
a|b 代表匹配a或者b
In [15]: ma = re.match(r'abc|d','abc')
In [16]: ma.group()
Out[16]: 'abc'
In [17]: ma = re.match(r'abc|d','d')
In [18]: ma.group()
Out[18]: 'd'
(ab)括号中表达式作为一个分组
In [19]: ma = re.match(r'[\w]{4,6}@(163|126).com','[email protected]')
In [20]: ma
Out[20]: <_sre.SRE_Match at 0x7fe281024e40>
In [22]: ma = re.match(r'[\w]{4,6}@(163|126).com','[email protected]')
In [23]: ma
\ 匹配编号为的分组的字符串(且必须一样,book与book相同)
In [24]: ma = re.match(r'<([\w]+>)\1','book>' )
In [25]: ma.group()
Out[25]: 'book>'
起别名:
In [83]: ma = re.match(r'<(?P[\w]+>)(?P=mark)','bo>' )
In [84]: ma.groups()
Out[84]: ('bo>',)