本来想找个资源,找来找去,没找到,只找到一个
https://kaoyan.koolearn.com/20190131/1035624.html
里面是19年的,然后110个网址,,,每个里面大约50个词。
爬虫吧,边查边写,写了俩小时。。
主要是,这鬼东西,里面还有网址嵌套、、想找个现成的方法、、结果没找着,自己写了一个
clear
写文件的时候也出问题了,说txt是gbk编码,utf-8有问题、、就搜了个codecs.open()打开文件
#-*- coding:UTF-8 -*-
import requests
import re
url = "https://kaoyan.koolearn.com/20190131/1035624.html"
res = requests.get(url)
print(res.status_code)
#打印变量res的响应状态码,以检查请求是否成功
#print(res.text)
res = res.content.decode(encoding='utf-8')
#print('')
#print(res)
c = re.findall(r'http://kaoyan.koolearn.com/2019..../.......\.html',res)
#for i in c:
# print(i)
#print(len(c))
def get_url(url):
res = res = requests.get(url)
res = res.content.decode(encoding='utf-8')
res = re.findall(r' (..*?)
',res)
return res
#for i in res:
# print(i)
#print(res)
def clear_inner(s):
if '<' not in s:
return s
l_l,r_l,r_l,r_r = [-1,-1,-1,-1]
for i in range(len(s)):
if s[i] == '<':
if l_l == -1:
l_l = i
else:
l_r = i
elif s[i] == '>':
if r_l == -1:
r_l = i
else:
r_r = i
if -1 in [l_l,r_l,r_l,r_r]:
return s
#print(l_l,l_r,r_l,r_r)
#print(s[:l_l])
#print(s[r_l+1:l_r])
#print(s[r_r+1:])
return s[:l_l]+s[r_l+1:l_r]+s[r_r+1:]
#print(c[1])
print(c[18])
c.remove(c[0])
#c[0]是本身
#x = get_url(c[18])
#print(x[6])
#print(clear_inner(x[6]))
#for i in get_url(c[31]):
# print(i)
# print(clear_inner(i))
#print()
#print(len(c))
import codecs
#with codecs.open(write_fileroute,'a',encoding='utf-8') as f:
# f.write(u'要写入文件的内容')
with codecs.open("a.txt",'w',encoding='utf-8') as f:
cnt = 0
for j in c:
for i in get_url(j):
f.write(clear_inner(i)+'\n')
cnt = cnt+1
print(cnt)
这样爬完之后,写一个py代码
代码找的现成的gui编程链接、
改了改
就是输入单词或者意思,自动填充另一个,例如输入单词abandon,在下面自动填充 遗弃,或者反过来
考虑到根据意思反过来找单词会有多个,所以加了分号作为间隔,并且通过意思找很不稳定,毕竟,匹配的是通过
找字串来的,而不是真正理解了意思,
# -*- coding:utf-8 -*-
import wx
import codecs
class MyFrame(wx.Frame):
def __init__(self, parent, id):
wx.Frame.__init__(self, parent, id, '单词查找', size=(400, 300))
# 创建面板
panel = wx.Panel(self)
# 创建 data 变量储存数据
self.data = []
# 使用方法读取数据
self.GetData()
# 创建“确定”和“清除”按钮, 并绑定事件
self.bt_confirm = wx.Button(panel, label='确定')
self.bt_confirm.Bind(wx.EVT_BUTTON, self.OnclickSubmit)
self.bt_clear = wx.Button(panel, label='清除')
self.bt_clear.Bind(wx.EVT_BUTTON, self.OnclickClear)
# 创建文本,左对齐
self.title = wx.StaticText(panel, label="请输入单词或意思")
self.label_English = wx.StaticText(panel, label="单词:")
self.text_English = wx.TextCtrl(panel, style=wx.TE_LEFT)
self.label_Chinese = wx.StaticText(panel, label="意思:")
self.text_Chinese = wx.TextCtrl(panel, style=wx.TE_LEFT)
# 添加容器,容器中控件按横向并排排列
hsizer_English = wx.BoxSizer(wx.HORIZONTAL)
hsizer_English.Add(self.label_English, proportion=0, flag=wx.ALL, border=5)
hsizer_English.Add(self.text_English, proportion=1, flag=wx.ALL, border=5)
hsizer_Chinese = wx.BoxSizer(wx.HORIZONTAL)
hsizer_Chinese.Add(self.label_Chinese, proportion=0, flag=wx.ALL, border=5)
hsizer_Chinese.Add(self.text_Chinese, proportion=1, flag=wx.ALL, border=5)
hsizer_button = wx.BoxSizer(wx.HORIZONTAL)
hsizer_button.Add(self.bt_confirm, proportion=0, flag=wx.ALIGN_CENTER, border=5)
hsizer_button.Add(self.bt_clear, proportion=0, flag=wx.ALIGN_CENTER, border=5)
# 添加容器,容器中控件按纵向并排排列
vsizer_all = wx.BoxSizer(wx.VERTICAL)
vsizer_all.Add(self.title, proportion=0, flag=wx.BOTTOM | wx.TOP | wx.ALIGN_CENTER,
border=15)
vsizer_all.Add(hsizer_English, proportion=0, flag=wx.EXPAND | wx.LEFT | wx.RIGHT, border=45)
vsizer_all.Add(hsizer_Chinese, proportion=0, flag=wx.EXPAND | wx.LEFT | wx.RIGHT, border=45)
vsizer_all.Add(hsizer_button, proportion=0, flag=wx.ALIGN_CENTER | wx.TOP, border=15)
panel.SetSizer(vsizer_all)
def OnclickSubmit(self, event):
""" 点击确定按钮,执行方法 """
English = self.text_English.GetValue()
Chinese = self.text_Chinese.GetValue()
if English == "":
ans = []
for line in self.data:
pos = line.find(Chinese)
if pos != -1:
x = line.split()
ans.append(x[1])
self.text_English.SetValue(";".join(ans))
else:
for line in self.data:
x = line.split()
if English in x:
self.text_Chinese.SetValue("".join(x[2:]))
def OnclickClear(self,event):
"""点击清除按钮,执行方法"""
self.text_English.SetValue("")
self.text_Chinese.SetValue("")
def GetData(self):
with codecs.open("a.txt",'r',encoding='utf-8') as f:
self.data = f.readlines()
if self.data == []:
message = "请检查a.txt文件是否为空" #数据文件有问题
wx.MessageBox(message)
if __name__ == '__main__':
app = wx.App() # 初始化
frame = MyFrame(parent=None, id=-1) # 实例MyFrame类,并传递参数
frame.Show() # 显示窗口
app.MainLoop() # 调用主循环方法
然后看到评论说可以打包成exe,就又搜了下
链接、
就是先
pip install pyinstaller
然后cmd打开进入到py目录下,这里因为使用了第三方库,即wx,所以把wx复制到py文件同一目录下
Pyinstaller -F -w main.py
等待完成,哦,对了记得把a.txt手动拿到exe同一目录下
然后就完成了