爬取图片实例python爬虫(保存到本地)

#导入请求、报错模块&正则表达式类库;
import urllib

import requests

import re

key_name = "python"
#定义函数,将爬到的每一页的商品url写入到文件;
 url = "https://search.jd.com/Search?keyword=" + key_name +"&enc=utf-8"

print(url)
 # 拿到每页源码;
// headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"}

data1 = requests.get(url,headers =headers).content.decode()

 #定义匹配规则';
 pat = 'src="//(.*?).jpg"'
#匹配到的所有图片url;
img_url = re.compile(pat).findall(data1)

print(img_url)

print(len(img_url))
 #内层for循环将所有图片写到本地;
for a_i in range(0,len(img_url)):

    this_img = img_url[a_i]

    this_img_url = "http://"+this_img + ".jpg"

    #每张图片的url

    print(this_img_url)

    

    img_path = "./imagetb" + str(a_i)+".jpg"

    urllib.request.urlretrieve(this_img_url,img_path)  # 通过urllib.request.urlretrieve()将对应链接的图片保存到本地

完整代码

#导入请求、报错模块&正则表达式类库
import urllib
import requests
import re
key_name = "python"
#定义函数,将爬到的每一页的商品url写入到文件
url = "https://search.jd.com/Search?keyword=" + key_name +"&enc=utf-8"
print(url)
# 拿到每页源码
headers = {
     "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"}
data1 = requests.get(url,headers =headers).content.decode()

#定义匹配规则
pat = 'src="//(.*?).jpg"'
#匹配到的所有图片url
img_url = re.compile(pat).findall(data1)
print(img_url)
print(len(img_url))

# #内层for循环将所有图片写到本地
for a_i in range(0,len(img_url)):
    this_img = img_url[a_i]
    this_img_url = "http://"+this_img + ".jpg"
    #每张图片的url
    print(this_img_url)
    
    img_path = "./imagetb" + str(a_i)+".jpg"
    urllib.request.urlretrieve(this_img_url,img_path)  # 通过urllib.request.urlretrieve()将对应链接的图片保存到本地

亲测可用

你可能感兴趣的:(python入门,pytorch,数据挖掘,url,正则表达式)