新加功能,按什么保存文件;多个网址搜索资源
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 30 17:01:26 2018
@author: gzs10227
搜索电影资源
"""
import re,os
import requests
import time,datetime
import urllib
import sys
stderr = sys.stderr
stdout = sys.stdout
reload(sys)
sys.setdefaultencoding('utf8')
sys.stderr = stderr
sys.stdout = stdout
urllib.getproxies_registry = lambda: {}
null = ''
from lxml import etree
import locale
HEADERS = {
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
}
print u'请输入您想搜索的电影:'
keyword = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))
print u'\n请输入您想保存文件的路径:'
save_path = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))
print u'\n请问您想按照哪种排序方式保存文件:'
print u' 1、文件大小 2、创建时间 3、下载次数 4、无要求.以txt格式保存搜索结果'
GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)))
while GS_num > 4:
print u'输入要求有误,请重新输入:1、文件大小 2、创建时间 3、下载次数 4、无要求'
GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)))
if GS_num == 4:
save = os.path.join(save_path+'\\',keyword+'.txt')
fle = open(save,'w')
def open_url(url):
html = requests.get(url,headers = HEADERS).content
web_data = etree.HTML(html)
return web_data
def get_url(keyword):
main_url = 'http://www.btyunsou.me/search/%s_ctime_1.html'%keyword
web_data = open_url(main_url)
links = web_data.xpath('//li[@class="media"]//h4//a/@href') # 获取链接
links = ['http://www.btyunsou.me'+i for i in links]
return links
def get_info(url):
web_data = open_url(url)
try:
title = web_data.xpath(r'//div[@class="row-flbtd tor-title"]/h2/text()')[0]
except:
title = ''
if keyword in title:
print u'电影名:',title
mange_link = 'magnet:?xt=urn:btih:' + url[23:-5]
print u'磁力链接: ',mange_link
if GS_num == 4:
fle.write(u'电影名:'+ title + '\n')
fle.write(u'磁力链接: ' + mange_link +'\n')
datalist = web_data.xpath(r'//table[@class="table detail table-hover"]/tbody//tr//td/text()')[:10]
for i in range(0,len(datalist),2):
print datalist[i],datalist[i+1]
fle.write(datalist[i] + datalist[i+1] + '\n')
else:
titles.append(title)
cls.append(mange_link)
datalist = web_data.xpath(r'//table[@class="table detail table-hover"]/tbody//tr//td/text()')[4:10]
for i in range(0,len(datalist),2):
print datalist[i],datalist[i+1]
if i == 0:
size.append(datalist[1]) # 文件大小
if i == 2:
ctime.append(datalist[3]) # 时间
if i == 4:
loadnum.append(datalist[5]) # 下载次数
else:
print 'Sorry! None Search,Please change one: '
def get_info2(keyword):
url = 'https://www.ciliba.org/s/%s.html'%keyword
web_data = open_url(url)
hrefs = web_data.xpath(r'//div[@class="item-title"]/h3/a/@href')
for href in hrefs:
try:
web_data = open_url(href)
except:
continue
try:
title = web_data.xpath(r'//*[@id="wall"]/h1/text()')[0]
except:
title = ''
if keyword in title:
print u'电影名: ',title
xl_link = web_data.xpath(r'//*[@id="wall"]/div[1]/p[6]/a[2]/@href')[0]
print u'迅雷链接: ',xl_link
data1 = web_data.xpath('//*[@id="wall"]/div[1]/p[2]/text()')[0]
data2 = web_data.xpath('//*[@id="wall"]/div[1]/p[3]/text()')[0]
print data1
print data2
if GS_num == 4:
fle.write(u'电影名:'+ title + '\n')
fle.write(u'迅雷链接: ' + xl_link +'\n')
fle.write(data1)
fle.write('\n')
fle.write(data2)
fle.write('\n')
else:
titles.append(title)
cls.append(xl_link)
size.append(data1.split(':')[1])
ctime.append(data2.split(':')[1])
loadnum.append(1)
else:
print 'Sorry! None Search,Please change one: '
def clear(i):
if 'Gb' in i or 'GB' in i:
inum = round(float(i.replace('GB','').replace('Gb','').replace(' ','')),2)
return int(inum * 1024)
else:
inum = round(float(i.replace('Mb','').replace('MB','').replace(' ','')),2)
return int(inum)
if __name__ == '__main__':
i = 1
while True:
if i > 1:
print u'请输入你想搜索的电影:'
keyword = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))
print u'\n请问您想按照哪种排序方式保存文件:1、文件大小 2、创建时间 3、下载次数 4、无要求,以txt保存'
GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)))
while GS_num > 4:
print u'输入要求有误,请重新输入[1-4]:1、文件大小 2、创建时间 3、下载次数 4、无要求,以txt保存'
GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)))
if GS_num == 4:
save = os.path.join(save_path,keyword+'.txt')
fle = open(save,'w')
links = get_url(keyword)
#df = pd.DataFrame()
titles = [];cls = [];size = [];ctime = [];loadnum = []
for url in links:
try:
get_info(url)
except:
continue
if GS_num == 4:
fle.write('--------------------------------------------------')
fle.write('\n')
try:
get_info2(keyword)
except:
pass
if GS_num != 4:
# df[u'标题'] = titles
# df[u'创建时间'] = ctime
# df[u'文件大小MB'] = size
# df[u'下载次数'] = loadnum
# df[u'下载链接'] = cls
# df[u'下载次数'] = df[u'下载次数'].astype(int)
# df[u'文件大小MB'] = map(clear,df[u'文件大小MB'])
size = map(clear,size)
df_list = []
for ii in range(len(titles)):
df_list.append([titles[ii],cls[ii],size[ii],ctime[ii],loadnum[ii]])
save = os.path.join(save_path,keyword+'.txt')
fle2 = open(save,'w')
if GS_num == 1:
#df2 = df.sort_values(by = u'文件大小MB', ascending = False)
df2 = sorted(df_list, key=lambda x: x[2],reverse = True)
if GS_num == 2:
#df2 = df.sort_values(by = u'创建时间', ascending = False)
df2 = sorted(df_list, key=lambda x: x[3],reverse = True)
else:
#df2 = df.sort_values(by = u'下载次数', ascending = False)
df2 = sorted(df_list, key=lambda x: x[4],reverse = True)
#df2.to_excel(save,index = False,encoding = 'gbk')
for sl in df2:
for s in range(len(sl)):
if s == 0:
ss = u'电影名:' + sl[s]
if s == 1:
ss = u'磁力链接:' + sl[s]
if s == 2:
ss = u'文件大小MB:' + str(sl[s])
if s == 3:
ss = u'创建时间: ' + str(sl[s])
if s == 4:
ss = u'热度: ' + str(sl[s])
fle2.write(str(ss))
fle2.write('\n')
fle2.write('--------------------------------------------------')
fle2.write('\n')
fle2.close()
else:
fle.close()
i = i + 1
print u'\n如果您想再次搜索,请输入电影名!否则请手动关闭窗口.\n'