Python爬虫随笔:爬取iciba上的单词发音文件

不废话,上代码

# 抓取iciba网站上的发音文件并存储到系统中
import  requests
import  re
import  random

def donwload_voice(word):
    uapools = [
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0"
        "Mozilla/5.0 (Windstows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36"
        "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
    ]  # 浏览器伪装
    headers = {"User-Agent": random.choice(uapools)}
    r = requests.get('http://www.iciba.com/'+word,headers = headers)
    pattern = re.compile(r'',re.S)  #语音文件地址正则
    voices = re.findall(pattern,r.text)            #找到语音文件地址
    if len(voices) == 2 :
        filename1 = './mp3/' + word + '_en.mp3'     #第一个文件是英音
        filename2 = './mp3/' + word + '_us.mp3'     #第二个文件是美音
        voi_en = requests.get(voices[0], headers = headers)
        voi_us = requests.get(voices[1], headers = headers)
        with open(filename1,'wb') as f:
            f.write(voi_en.content)                 #将文件写入硬盘
        with open(filename2,'wb') as f:
            f.write(voi_us.content)                 #将文件写入硬盘
        print('----------success-----------')
    else:
        print('ERROR')
        print(voices)

donwload_voice('good')     #开始愉快地玩耍吧

没有做太多的校验和错误处理,有兴趣的朋友可以继续完善。

你可能感兴趣的:(爬虫)