将爬取的图片地址存入mongodb数据库

  • 流程如下
import requests
from lxml import etree
import pymongo

# 与mongodb数据建立连接
conn = pymongo.MongoClient('localhost',27017)
# 连接或创建一个库
db = conn.imgs
# 创建表
table = db.image

# 定义函数用来存储图片地址到mongo库
def save_data(dic):
    table.insert_one(dic)


# 定义函数获取 图片地址
def cunimg(url):
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36'
    }
    res = requests.get(url=url,headers=headers)
    res.encoding = 'gbk'
    tree = etree.HTML(res.text)

    li_list = tree.xpath('//div[@id="main"]/div[@class="list"]/ul/li')
    for li in li_list:
        src_list = li.xpath('.//img/@src')[0]
        name_list = ''.join(li.xpath('.//b/text()'))
        # print(name_list)
        # print(src_list)
        dic = {'img':src_list,'name':name_list}
        save_data(dic)

if __name__ == '__main__':
    url = 'http://www.netbian.com/dongwu/'
    cunimg(url)

你可能感兴趣的:(爬虫)