作业笔记04_regex

按下面要求编写正则表达式

  1. 电子邮箱验证

    import re
    
    def validateEmail(email):
    
        if len(email) > 7:
            if re.match("^.+\\@(\\[?)[a-zA-Z0-9\\-\\.]+\\.([a-zA-Z]{2,3}|[0-9]{1,3})(\\]?)$", email) != None:
                return 1
        return 0
    
    # 测试与结果
    In [47]: validateEmail('[email protected]')
    Out[47]: 1
    
    In [48]: validateEmail('[email protected]')
    Out[48]: 0
    
    In [49]: validateEmail('[email protected]')
    Out[49]: 1
    
    In [50]: validateEmail('[email protected]')
    Out[50]: 1
    
  2. URL地址验证

    regex = re.compile(
            r'^(?:http|ftp)s?://' # http:// or https://
            r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' #domain...
            r'localhost|' #localhost...
            r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
            r'(?::\d+)?' # optional port
            r'(?:/?|[/?]\S+)$', re.IGNORECASE)
    
    # 测试与结果
    regex.match('http://bbs.smartisan.com/thread-107969-1-1.html')
    regex.match('http://www.spring.org.uk/')
    regex.match('http://www.pixiv.net/member_illust.php?mode=manga&illust_id=60181180')
    regex.match('https://www.google.com/ 886')
    
  3. 查找HTML代码中的标签内容

    findImg = re.compile(r']+>') 
    htmls = '
  4. ![](https://s.booth.pm/77c6444b-e8fa-46d9-a5e2-6463c889a13b/i/292178/f0945a3f-eec6-4200-b0f6-b04803453312_f_150x150.jpg)

    マスキングテープ Illustrated by ...

    pixiv事務局
  5. ![](https://s.booth.pm/77c6444b-e8fa-46d9-a5e2-6463c889a13b/i/267365/93f192fc-ae83-4e98-bff5-096826bcb0c4_f_150x150.jpg)

    「あしょんでよッ~うちの犬ログ~」マグカップ

    pixiv事務局
  6. ![](https://s.booth.pm/77c6444b-e8fa-46d9-a5e2-6463c889a13b/i/267373/89474534-f844-4ff7-8dc0-9016dc46d4a7_f_150x150.jpg)

    「そうしそうあい」トートバッグ

    pixiv事務局
  7. ![](https://s.booth.pm/77c6444b-e8fa-46d9-a5e2-6463c889a13b/i/267356/99649852-40e6-456e-8979-f464f917e5ce_f_150x150.jpg)
  8. ![](https://s.booth.pm/77c6444b-e8fa-46d9-a5e2-6463c889a13b/i/267359/b64a023b-8ff3-4881-ac1d-52eae1b6bf54_f_150x150.jpg)

    まだBOOTHにショップがない方はこちらから登録

" data-filter="lazy-image">
' findImg.findall(htmls) # 结果输出 Out[3]: ['![](http://i1.pixiv.net/c/150x150/img-master/img/2016/12/03/15/18/51/60214620_p0_master1200.jpg)', '![](http://i3.pixiv.net/c/150x150/img-master/img/2016/12/03/15/18/41/60214618_p0_master1200.jpg)', '![](http://i2.pixiv.net/c/150x150/img-master/img/2016/12/03/15/18/39/60214617_p0_master1200.jpg)', '![](http://i3.pixiv.net/c/150x150/img-master/img/2016/12/03/15/17/53/60214614_p0_master1200.jpg)', '![](http://i2.pixiv.net/c/150x150/img-master/img/2016/12/03/15/17/52/60214613_p0_master1200.jpg)', '![](http://i4.pixiv.net/c/150x150/img-master/img/2016/12/03/15/17/33/60214611_p0_master1200.jpg)', '![](https://s.booth.pm/77c6444b-e8fa-46d9-a5e2-6463c889a13b/i/292213/9965b565-0c8c-4aa8-b652-1bf84510b48c_f_150x150.jpg)', '![](https://s.booth.pm/77c6444b-e8fa-46d9-a5e2-6463c889a13b/i/292178/f0945a3f-eec6-4200-b0f6-b04803453312_f_150x150.jpg)', '![](https://s.booth.pm/77c6444b-e8fa-46d9-a5e2-6463c889a13b/i/267365/93f192fc-ae83-4e98-bff5-096826bcb0c4_f_150x150.jpg)', '![](https://s.booth.pm/77c6444b-e8fa-46d9-a5e2-6463c889a13b/i/267373/89474534-f844-4ff7-8dc0-9016dc46d4a7_f_150x150.jpg)', '![](https://s.booth.pm/77c6444b-e8fa-46d9-a5e2-6463c889a13b/i/267356/99649852-40e6-456e-8979-f464f917e5ce_f_150x150.jpg)', '![](https://s.booth.pm/77c6444b-e8fa-46d9-a5e2-6463c889a13b/i/267359/b64a023b-8ff3-4881-ac1d-52eae1b6bf54_f_150x150.jpg)']
  • 匹配正整数

    findPositiveInt = re.compile('^ [1-9]\d*$')
    stringI = ['123', '3', '-453', 'jid 5', '2.345', '-432','34', '5321']
    for i in stringI:
        print(findPositiveInt.findall(i))
    # 结果输出
    ['123']
    ['3']
    []
    []
    []
    []
    ['34']
    ['5321']
    
  • 你可能感兴趣的:(作业笔记04_regex)