获得妹子图图片链接:
# coding: utf-8
import datetime, time
from pyquery import PyQuery as pq
fp = open('meizitu_link.txt', 'w') # 保存图片连接到meizitu_link.txt
def meizitu_link(n):
url = 'http://jandan.net/ooxx/page-'
v_source = pq(url + str(n), headers={
'Host': 'jandan.net',
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/'
'537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'DNT': '1',
'Referer': 'http://jandan.net/ooxx',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
'Cookie': '',
}) # cookie需要写
number = 0
for item in v_source('.commentlist a.view_img_link'):
print v_source(item).attr('href')
print >> fp, v_source(item).attr('href')
number += 1
print number
time.sleep(1)
page = 2018 # 最新页数手动更新
starttime = datetime.datetime.now()
for num in range(1, page):
try:
meizitu_link(num)
except Exception, e:
print("Exception has occurred at page " + str(num))
continue
print 'ALL/Already: ' + str(num) + '/' + str(page)
print (datetime.datetime.now() - starttime).seconds