第九周作业:颜值打分

import requests
from lxml import etree
import time

headers = {
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}

path = 'https://www.jianshu.com'

def get_url(url):
    res = requests.get(url,headers=headers)
    html = etree.HTML(res.text)
    infos = html.xpath('//ul[@class="note-list"]/li')
    for info in infos:
        href = info.xpath('div/a/@href')[0]
        det_url = path + href
        print(det_url)
        get_img(det_url)
    time.sleep(2)

def get_img(url):
    i = 1
    res = requests.get(url, headers=headers)
    html = etree.HTML(res.text)
    title = html.xpath('/html/body/div[1]/div[2]/div[1]/h1/text()')[0].strip('|').split(',')[0].split('/')[0]
    id = html.xpath('/html/body/div[1]/div[2]/div[1]/div[1]/div/span/a/text()')[0]
    # print(title,id)
    infos = html.xpath('//div[@class="image-package"]')
    for info in infos:
        img_url = 'http:' + info.xpath('div[1]/div[2]/img/@data-original-src')[0]
        print(img_url)
        res_1 = requests.get(img_url,headers=headers)
        fp = open('row_img/' + title + '+' + id + '+' + str(i) + '.jpg','wb')
        fp.write(res_1.content)
        i = i + 1


if __name__ == '__main__':
    urls = ['https://www.jianshu.com/c/bd38bd199ec6?order_by=added_at&page={}'.format(str(i)) for i in range(1,10)]
    for url in urls:
        get_url(url)

结果:


爬取图片
人脸识别
结果

你可能感兴趣的:(第九周作业:颜值打分)