python尤果网图片爬虫(简单)__selenium+phantomJS+urllib2

1.首先给python安装selenium库,然后下载phantomJS并配置环境变量(网上搜索一堆)

2.直接放python代码: youguo_image_spider.py

#!/usr/bin/env python
#_*_coding:utf-8_*_

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from lxml import etree
import urllib2

class youguoSpider():
    def __init__(self):
        self.url = "https://www.ugirls.com/"
        self.driver = webdriver.PhantomJS()

    def start(self):
        self.login_getIndexPage()

    def login_getIndexPage(self):
        #打开尤果网首页
        self.driver.get(self.url)
        #截图查看是否成功
        # driver.save_screenshot("index.png")

        #模拟点击"登录"按钮,跳出登录弹窗
        self.driver.find_element_by_id("btnshowlogin").click()
        # driver.save_screenshot("butLogin.png")

        #在登录弹窗填充账户密码
        self.driver.find_element_by_id("txtAccount").send_keys(u"XXXXXXXXXX")#账号
        self.driver.find_element_by_id("txtPass").send_keys(u"XXXXXXXX")#密码
        # driver.save_screenshot("account.png")
        #点击登录按钮
        self.driver.find_element_by_id("btnLogin").click()
        print "登录成功"
        # driver.save_screenshot("login.png")
        # print driver.page_source
        self.driver.find_element_by_id("1").click()
        html = self.driver.page_source
        print html
        self.get_personUrlList(html)

    def get_personUrlList(self,html):
        ehtml = etree.HTML(html)
        urllist =  ehtml.xpath('//div[@class="magazine_list_wrap"]/div/a/img[@class="magazine_img"]/@src')
        self.download(urllist)
        print urllist

    def download(self,urllist):
        print "开始下载图片:"
        flg = 1
        for personurl in urllist:
            print personurl
            response = urllib2.urlopen(personurl)
            with open(str(flg)+"_"+personurl[-48:-16]+".jpg","wb") as f:
                f.write(response.read())
            flg = int(flg) + 1
        print "下载结束!"

def main():
    ygs = youguoSpider()
    ygs.start()

if __name__ == "__main__":
    main()

3.效果图

python尤果网图片爬虫(简单)__selenium+phantomJS+urllib2_第1张图片

4.如有问题下方留言,讨论

你可能感兴趣的:(Python爬虫)