python 爬虫, 抓取百度美女吧图片

# ----2018-7-15 ------世界杯总决赛
import requests
from lxml import etree
import re


class TiBa_Image(object):


    # 创建同意方法
    def __init__(self):
        self.base_url = 'http://tieba.baidu.com/f'
        self.second_url = 'https://tieba.baidu.com'
        self.headers = {"User-Agent": '"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1"'}
        self.first_xpath = '''//div[@class="grbm_row_wrapper"]/div[@class="grbm_ele_wrapper"]/a[@class="grbm_ele_a grbm_ele_big"]/@href'''
        self.second_xpath = '//div[@class="ag_main_list"]/div/a/@href'

    #发送请求
    def send_request(self,url, params=None):
        response = requests.get(url, params=params, headers=self.headers)
        # 解析数据
        data = response.content.decode()
        return data

    #保存文件,以文件名保存
    def write_file(self, data, pic_id, i):
        pic_id = str(pic_id)
        flie_name = "image/" + pic_id + "/" + str(i) + '.jpg'
        print(flie_name)
        with open(flie_name, 'wb') as f:
            f.write(data)
            print('保存成功')

    #解析数据   //div[@class="grbm_ele_wrapper"]/a/@href
    def JieXi_data(self, data, xpath_str):
        #1.转换类型
        html_data = etree.HTML(data)
        #2.解析  ----所有的主页图片连接
        result_list = html_data.xpath(xpath_str)
        # 3.返回数据
        return result_list

    #首页
    def Home_Page(self):
        # 1.发送请求
        dict_parms = {
            "kw": "美女",
            "ie": "utf-8",
            "tab": "album",
        }
        data = self.send_request(self.base_url,dict_parms)
        # 正则替换注释数据
        # re_data = re.compile('
                    

你可能感兴趣的:(python 爬虫, 抓取百度美女吧图片)