电影资源搜索助手2

新加功能,按什么保存文件;多个网址搜索资源

# -*- coding: utf-8 -*-
"""
Created on Tue Jan 30 17:01:26 2018

@author: gzs10227

搜索电影资源
"""
import re,os
import requests
import time,datetime
import urllib
import sys
stderr = sys.stderr
stdout = sys.stdout
reload(sys)
sys.setdefaultencoding('utf8')
sys.stderr = stderr
sys.stdout = stdout
urllib.getproxies_registry = lambda: {}
null = ''
from lxml import etree
import locale

HEADERS = {
    'X-Requested-With': 'XMLHttpRequest',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
                  '(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
}

print u'请输入您想搜索的电影:'
keyword = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)) 

print u'\n请输入您想保存文件的路径:'
save_path = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True))

print u'\n请问您想按照哪种排序方式保存文件:'
print u' 1、文件大小 2、创建时间  3、下载次数  4、无要求.以txt格式保存搜索结果'
GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)))
while GS_num > 4:
    print u'输入要求有误,请重新输入:1、文件大小 2、创建时间  3、下载次数 4、无要求'
    GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)))

if GS_num == 4:
    save = os.path.join(save_path+'\\',keyword+'.txt')
    fle = open(save,'w')



def open_url(url):
    html = requests.get(url,headers = HEADERS).content
    web_data = etree.HTML(html)
    return web_data


def get_url(keyword):
    main_url = 'http://www.btyunsou.me/search/%s_ctime_1.html'%keyword
    web_data = open_url(main_url)
    links = web_data.xpath('//li[@class="media"]//h4//a/@href')          # 获取链接
    links = ['http://www.btyunsou.me'+i for i in links]
    return links


def get_info(url):    
    web_data = open_url(url)
    try:
        title = web_data.xpath(r'//div[@class="row-flbtd tor-title"]/h2/text()')[0]
    except:
        title = ''
        
    if keyword in title:        
        print u'电影名:',title
        mange_link = 'magnet:?xt=urn:btih:' + url[23:-5]
        print u'磁力链接: ',mange_link
        if GS_num == 4:
            fle.write(u'电影名:'+ title + '\n')
            fle.write(u'磁力链接: ' + mange_link +'\n')
            datalist = web_data.xpath(r'//table[@class="table detail table-hover"]/tbody//tr//td/text()')[:10]
            for i in range(0,len(datalist),2):
                print datalist[i],datalist[i+1]
                fle.write(datalist[i] + datalist[i+1] + '\n')
        else:
            titles.append(title)
            cls.append(mange_link)
            datalist = web_data.xpath(r'//table[@class="table detail table-hover"]/tbody//tr//td/text()')[4:10]
            for i in range(0,len(datalist),2):
                print datalist[i],datalist[i+1]     
                if i == 0:
                    size.append(datalist[1])   # 文件大小
                if i == 2:
                    ctime.append(datalist[3])      # 时间
                if i == 4:
                    loadnum.append(datalist[5])      # 下载次数
            
    else:
        print 'Sorry! None Search,Please change one: '


def get_info2(keyword):
    url = 'https://www.ciliba.org/s/%s.html'%keyword
    web_data = open_url(url)
    hrefs = web_data.xpath(r'//div[@class="item-title"]/h3/a/@href')
    for href in hrefs:
        try:
            web_data = open_url(href)
        except:
            continue
        try:
            title = web_data.xpath(r'//*[@id="wall"]/h1/text()')[0]
        except:
            title = ''
        if keyword in title:
            print u'电影名: ',title
            xl_link = web_data.xpath(r'//*[@id="wall"]/div[1]/p[6]/a[2]/@href')[0]
            print u'迅雷链接: ',xl_link
            data1 = web_data.xpath('//*[@id="wall"]/div[1]/p[2]/text()')[0]
            data2 = web_data.xpath('//*[@id="wall"]/div[1]/p[3]/text()')[0]
            print data1
            print data2
            if GS_num == 4:
                fle.write(u'电影名:'+ title + '\n')
                fle.write(u'迅雷链接: ' + xl_link +'\n')
                fle.write(data1)
                fle.write('\n')
                fle.write(data2)
                fle.write('\n')
            else:
                titles.append(title)
                cls.append(xl_link)
                size.append(data1.split(':')[1])
                ctime.append(data2.split(':')[1])
                loadnum.append(1)
        else:
            print 'Sorry! None Search,Please change one: '


def clear(i):
    if 'Gb' in i or 'GB' in i:
        inum = round(float(i.replace('GB','').replace('Gb','').replace(' ','')),2)
        return int(inum * 1024)
    else:
        inum = round(float(i.replace('Mb','').replace('MB','').replace(' ','')),2)
        return int(inum)        
        

if __name__ == '__main__':       
    i = 1
    while True:    
        if i > 1:
            print u'请输入你想搜索的电影:'
            keyword = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)) 
            print u'\n请问您想按照哪种排序方式保存文件:1、文件大小 2、创建时间  3、下载次数  4、无要求,以txt保存'
            GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)))
            while GS_num > 4:
                print u'输入要求有误,请重新输入[1-4]:1、文件大小 2、创建时间  3、下载次数 4、无要求,以txt保存'
                GS_num = int(raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)))            
            if GS_num == 4:
                save = os.path.join(save_path,keyword+'.txt')
                fle = open(save,'w')
                
        links = get_url(keyword)
        #df = pd.DataFrame()
        titles = [];cls = [];size = [];ctime = [];loadnum = []
        for url in links:
            try:            
                get_info(url)
            except:
                continue
            if GS_num == 4:
                fle.write('--------------------------------------------------')
                fle.write('\n')
                
        try:
            get_info2(keyword)
        except:
            pass
        
        if GS_num != 4:
#            df[u'标题'] = titles
#            df[u'创建时间'] = ctime
#            df[u'文件大小MB'] = size
#            df[u'下载次数'] = loadnum
#            df[u'下载链接'] = cls
#            df[u'下载次数'] = df[u'下载次数'].astype(int)
#            df[u'文件大小MB'] = map(clear,df[u'文件大小MB'])
            size = map(clear,size)
            df_list = []
            for ii in range(len(titles)):
                df_list.append([titles[ii],cls[ii],size[ii],ctime[ii],loadnum[ii]])
            save = os.path.join(save_path,keyword+'.txt')
            fle2 = open(save,'w')
            if GS_num == 1:
                #df2 = df.sort_values(by = u'文件大小MB', ascending = False)
                df2 = sorted(df_list, key=lambda x: x[2],reverse = True)
            if GS_num == 2:
                #df2 = df.sort_values(by = u'创建时间', ascending = False)
                df2 = sorted(df_list, key=lambda x: x[3],reverse = True)
            else:
                #df2 = df.sort_values(by = u'下载次数', ascending = False)    
                df2 = sorted(df_list, key=lambda x: x[4],reverse = True)
            #df2.to_excel(save,index = False,encoding = 'gbk')
            
            for sl in df2:
                for s in range(len(sl)):
                    if s == 0: 
                        ss = u'电影名:' + sl[s]
                    if s == 1:
                        ss = u'磁力链接:' + sl[s]
                    if s == 2:
                        ss = u'文件大小MB:' + str(sl[s])
                    if s == 3:
                        ss = u'创建时间: ' + str(sl[s])
                    if s == 4:
                        ss = u'热度: ' + str(sl[s])
                    fle2.write(str(ss))
                    fle2.write('\n')
                fle2.write('--------------------------------------------------')
                fle2.write('\n')  
                
            fle2.close()
                
        else:                
            fle.close()
        i = i + 1
        print u'\n如果您想再次搜索,请输入电影名!否则请手动关闭窗口.\n'
    
    
    
    
    


你可能感兴趣的:(python爬虫那些坑)