python3
import urllib.request as ur
s=ur.urlopen('https://www.zhihu.com')
sl=s.read()
from bs4 import BeautifulSoup
bsObj = BeautifulSoup(s.read())
print(bsObj.h1)
bsObj.link['href']
path='/Users/huangyong/Downloads/py/img.png'
f=open(path,"wb")
data=urllib.urlopen(bsObj.link['href']).read()
>>> f.write(data)
f.close()
下载了一张图片
下载百度图片30张,代码:有些是没用的
# -*- coding:utf-8 -*-
import urllib2
import urllib
from bs4 import BeautifulSoup
import re
url = 'https://image.baidu.com/search/index?ct=201326592&cl=2&st=-1&lm=-1&nc=1&ie=utf-8&tn=baiduimage&ipn=r&rps=1&pv=&fm=rs5&word=仙剑奇侠传电视剧&oriquery=仙剑奇侠传&ofr=仙剑奇侠传'
response = urllib2.urlopen(url)
cont = response.read()
#bsObj = BeautifulSoup(cont)
#print(bsObj.li)
#print(bsObj.li.string)
#print(bsObj.find('a', class_='pull-rs'))
#print(bsObj.find('a', class_='pull-rs')['title'])
#print(bsObj.select("pageNum"))
picurl = re.findall('"objURL":"(.*?)"',cont)
i = 0
#print(picurl[0])
print "共提取"+str(len(picurl))+"个图片"
#print len(picurl)
for each in picurl:
i = i + 1
f=open('./py/仙剑奇侠传'+str(i)+'.jpg', 'w+')
data=urllib.urlopen(each).read()
f.write(data)
f.close()
data=None
print(str(i)+'....')
print('over')