获取到的音频文件以单词命名
# -*- coding: utf-8 -*-
# @Time : 2022/07/19 22:00
# @Author : Gordon
import os
import requests
from pyquery import PyQuery as pq
import openpyxl
import pandas as pd
headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36"}
def main():
try:
wordlist=get_wordlist()
print(wordlist)
look_up_words(wordlist)
except Exception as exc:
print(exc)
def get_wordlist():
""" 获取词表 """
ls=[]
df = pd.read_excel(r"C:\Users\cnliutz\Documents\python\words.xlsx")
data = df.to_dict('records')
#print(data)
for x in data:
ls.append(x["list"])
#print(ls)
return ls
def look_up_words(wordlist):
""" 爬取有道词典上的单词音标、词义、发音mp3 """
for text in wordlist: #遍历单词列表中的每个单词
target_name=os.path.join(r"C:\Users\cnliutz\Documents\python",text+".mp3")
if os.path.exists(target_name):
pass
else:
try:
data = {"audio":text, "lang": "zh","type":2} #向有道批量发请求,得到单词、短语的发音
resp = requests.post("https://dict.youdao.com/dictvoice", data=data,headers=headers)
with open(target_name,"wb") as f:
f.write(resp.content)
if resp.status_code != 200:#如果无法获取到keyword标签,证明单词没有查到,提醒单词不存在。
print(f"{text}单词不存在!")
with open("..\missingwords.txt","a+",encoding="utf-8") as f:
f.write(text+"\n")
except Exception as exc:
print(exc)
if __name__ == '__main__':
main()