python爬虫——正则获取手机号

用正则匹配网页手机号

1. 安装re模块、requests库、beautifulsoup4库

cmd → pip install re → 回车
cmd → pip install requests → 回车
cmd → pip install beautifulsoup4 → 回车

2.调用库

import requests
import re
from bs4 import BeautifulSoup

3. 调用网址

 r = requests.get("http://www.haomahao.com/")

4. 编写正则表达式

parttern = "1[35789]\d{9}"

5.完整代码

import requests
import re
from bs4 import BeautifulSoup

def down():
   r = requests.get("http://www.haomahao.com/") #调用网页
   r.encoding = r.apparent_encoding #更改网页编码,防止出现乱码
   #print(r.text)
   bs = BeautifulSoup(r.text,"html.parser")
   str1 = bs.getText() #将BeautifulSoup提出出来的写完text
   parttern = "1[35789]\d{9}" #编写手机号的正则表达式
   list = re.findall(parttern,str1)
   #print(list) #调试正则是否正确

   f = open("手机号.txt",mode="w",encoding="utf-8") #写入文件
   f.write("\n".join(list))
   f.close()
if __name__ == "__main__":
   down()

你可能感兴趣的:(python,正则表达式)