import requests
from bs4 import BeautifulSoup
import urllib
import os.path
def file_extension(path):
return os.path.splitext(path)[1]
res = requests.get('http://so.sccnn.com/search/%D1%F9%BB%FA/1.html')
soup = BeautifulSoup(res.text, "html.parser")
ImgS = soup.find_all('img')
imgS_url = []
for img in ImgS:
imgS_url.append(img['src'])
m = 0
for img_url in imgS_url:
print(img_url)
urllib.request.urlretrieve(img_url, 'e:/download/ss/' + str(m) + file_extension(img_url.split('?')[0]))
m += 1
爬取图片 python自创库
import requests
from bs4 import BeautifulSoup
import urllib
import os.path
class Crawler4j:
__count = 0
__valueList = []
def __file_extension(self, path):
return os.path.splitext(path)[1]
def __init__(self, url, elements, src, location):
self.__url = url
self.__elements = elements
self.__src = src
self.__location = location
def crawl(self):
res = requests.get(self.__url)
soup = BeautifulSoup(res.text, "html.parser")
values = soup.find_all(self.__elements)
for temp in values:
self.__valueList.append(temp[self.__src])
for value in self.__valueList:
urllib.request.urlretrieve(value, self.__location + str(self.__count) + self.__file_extension(
value.split('?')[0]))
self.__count += 1
if __name__ == '__main__':
app = Crawler4j('http://so.sccnn.com/search/%D1%F9%BB%FA/1.html', 'img', 'src', 'e:/download/bb/')
app.crawl()