搜索电影资源

# -*- coding: utf-8 -*-
"""
Created on Tue Jan 30 17:01:26 2018

@author: gzs10227

搜索电影资源
"""

import re
import requests
import time,datetime
import pandas as pd
import urllib
from uuid import getnode as get_mac
import sys
stderr = sys.stderr
stdout = sys.stdout
reload(sys)
sys.setdefaultencoding('utf8')
sys.stderr = stderr
sys.stdout = stdout
urllib.getproxies_registry = lambda: {}
null = ''
from lxml import etree
import locale

HEADERS = {
    'X-Requested-With': 'XMLHttpRequest',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 '
                  '(KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
}

print u'请输入你想搜索的电影:'

keyword = raw_input().decode(sys.stdin.encoding or locale.getpreferredencoding(True)) 

save_path = u'C:/Users/gzs10227/Desktop/电影搜索/'
fle = open('%s%s.txt'%(save_path,keyword),'w')


def open_url(url):
    html = requests.get(url,headers = HEADERS).content
    web_data = etree.HTML(html)
    return web_data


def get_url(keyword):
    main_url = 'http://www.btyunsou.me/search/%s_ctime_1.html'%keyword
    web_data = open_url(main_url)
    links = web_data.xpath('//li[@class="media"]//h4//a/@href')          # 获取链接
    links = ['http://www.btyunsou.me'+i for i in links]
    return links


def get_info(url):    
    web_data = open_url(url)
    try:
        title = web_data.xpath(r'//div[@class="row-flbtd tor-title"]/h2/text()')[0]
    except:
        title = ''
    if keyword in title:        
        print u'电影名:',title
        mange_link = 'magnet:?xt=urn:btih:' + url[23:-5]
        print u'磁力链接: ',mange_link
        fle.write(u'电影名:'+ title + '\n')
        fle.write(u'磁力链接: ' + mange_link +'\n')
        datalist = web_data.xpath(r'//table[@class="table detail table-hover"]/tbody//tr//td/text()')[:10]
        for i in range(0,len(datalist),2):
            print datalist[i],datalist[i+1]
            fle.write(datalist[i] + datalist[i+1] + '\n')
    else:
        print 'Sorry! None Search,Please change one: '
        
links = get_url(keyword)
for url in links:
    get_info(url)
    fle.write('--------------------------------------------------')
    fle.write('\n')
fle.close()
    
    
    
    
    
接下来的计划是打包成exe,这样没有安装python也可以使用。

你可能感兴趣的:(python爬虫那些坑)