import requests
from bs4 import BeautifulSoup
import threading
import time
#from tkinter import *
import tkinter as tk
from tkinter import ttk
import pandas as pd
def get_lst(pram = "xxxxxxx"):
url="https://xxxxxx"+pram
# 请求腾讯新闻的URL,获取其text文本
wbdata = requests.get(url).text
# 对获取到的文本进行解析
soup = BeautifulSoup(wbdata,'html.parser')
# 从解析文件中通过select选择器定位指定的元素,返回一个列表
#%%
page_n=int(soup.select("#pagination-ul > li.pagin-li")[-1].get_text().strip())#.pagin-li .pagin-li-last
#%%
lst_tmp=[]
id_item=0
print("+++++++++第{0}页的内容+++++++++".format(1))
for _p in range(page_n):
if _p>0:
time.sleep(0)
url="xxxxxxxx/page{0}.html?{1}".format(_p+1,pram)
wbdata = requests.get(url).text
soup = BeautifulSoup(wbdata,'html.parser')
print("+++++++++第{0}页的内容+++++++++".format(_p+1))
item_lst = soup.select("#utopia_widget_6 > div.demand-list > div.demand-card")
#% 对返回的列表进行遍历
for item in item_lst:
id_item+=1
price=item.select('div.demand-card-body > div.demand-price')[0].get_text().strip()
n=price.find('\n',1)
if n>-1:
price=price[0:n]
data = {
'标题':item.select('a')[0].get_text().strip(),
'报价':price,
'发布时间':item.select('div.demand-card-head > span')[0].get_text().strip(),
'参与人数':item.select('div.demand-card-head > span')[1].get_text().strip(),
'链接':item.select('a')[0].get('href'),
'内容':item.select('div.demand-card-body > div.demand-card-desc')[0].get_text().replace('\n','').strip()
}
lst_tmp.append(data)
#print(id_item,data)
return lst_tmp
def get_info(url):
url="https:"+url
wbdata = requests.get(url).text
soup = BeautifulSoup(wbdata,'html.parser')
item_lst = soup.select("div.list-item-content > p.node-title")
print(item_lst)
#%%
lst_data=get_lst()
def GetDF():
df=pd.DataFrame(lst_data)
return df
#%% 定时器
import http.client
def get_webtime(host):
#import http.client
start = time.time()
conn=http.client.HTTPConnection(host)
conn.request("GET", "/")
r=conn.getresponse()
#r.getheaders() #获取所有的http头
ts=r.getheader('date') #获取http头date部分
#将GMT时间转换成北京时间
ltime= time.strptime(ts[5:25], "%d %b %Y %H:%M:%S")
end = time.time()
print(end-start)
return ltime
def fun_timer():
_tm=get_webtime('www.baidu.com')
#_tm.tm_hour+=8
print('北京时间:{0}:{1}:{2}'.format(_tm.tm_hour,_tm.tm_min,_tm.tm_sec))
timer = threading.Timer(1, fun_timer)
timer.start()
def btn_refresh():
items=tv.get_children()
# print(items)
# tv.selection(selop='set',items=items)
tv.delete(*items)
lst_data=get_lst()
i=0
for item in lst_data:
tv.insert('',i,values=(item['标题'],item['报价'],item['发布时间'],item['参与人数'],item['链接'],item['内容']))
i+=1
def btn_Del():
items=tv.get_children()
tv.delete(*items)
root = tk.Tk()
root.geometry("1280x800")
_width=10
FrmTop=tk.Frame(root,height=50)
FrmTop.pack(side=tk.TOP,fill=tk.X)
tk.Button(FrmTop,name='bt1', text='刷新',width=_width, bg="LightGrey", command=btn_refresh).pack(fill=tk.Y, side=tk.LEFT)
tk.Button(FrmTop,name='bt2', text='清空',width=_width, bg="LightGrey", command=btn_Del).pack(fill=tk.Y, side=tk.LEFT)
tv = ttk.Treeview(root, height=18, show="headings", columns=('col1','col2','col3','col4','col5','col6'))
tv.column('col1', width=200, anchor='w')
tv.column('col2', width=70, anchor='w')
tv.column('col3', width=100, anchor='w')
tv.column('col4', width=160, anchor='w')
tv.column('col5', width=50, anchor='w')
tv.column('col6', width=300, anchor='w')
tv.heading('col1', text='标题')
tv.heading('col2', text='报价')
tv.heading('col3', text='发布时间')
tv.heading('col4', text='参与人数')
tv.heading('col5', text='链接')
tv.heading('col6', text='内容')
def onDBClick(event):
item = tv.selection()[0]
print("you clicked on ", tv.item(item, "values"))
link=tv.item(item, "values")[4]
get_info(link)
tv.bind("", onDBClick)
i=0
for item in lst_data:
tv.insert('',i,values=(item['标题'],item['报价'],item['发布时间'],item['参与人数'],item['链接'],item['内容']))
i+=1
vbar = ttk.Scrollbar(root,orient=tk.VERTICAL,command=tv.yview)
tv.configure(yscrollcommand=vbar.set)
vbar.pack(fill=tk.Y, side=tk.RIGHT)
hbar = ttk.Scrollbar(root,orient=tk.HORIZONTAL,command=tv.xview)
tv.configure(xscrollcommand=hbar.set)
hbar.pack(fill=tk.X, side=tk.BOTTOM)
tv.pack(fill=tk.BOTH, expand=tk.YES)
items=tv.get_children()
root.mainloop()