爬取煎蛋网妹子图片

import requests
import bs4
import base64
import urllib.request
num_photo = 1
def download_photo(url , num ):
  global num_photo
  response = urllib.request.urlopen(url)
  cat = response.read()
  with open( 'I:/a/' + num + '.jpg' , 'wb') as f :
      f.write(cat)
  a = num_photo
  print("当前已下载第%d张" % a)

  num_photo = num_photo + 1 
  
def get_url(url): # 下载这个网页
    headers = { "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6735.400 QQBrowser/10.2.2614.400" }
    res = requests.get(url , headers = headers)
    return res
def get_soup(res):
    soup = bs4.BeautifulSoup(res.text , "html.parser")
    return soup

def get_page(num):
    
   # url = input("请输入一个url:")
    url = "http://jandan.net/ooxx/page-" + str(num) + "#comments"
    large_url = "http://wx2.sinaimg.cn/large/"
    res = get_url(url)
   # with open("date.txt","w" , encoding ='utf-8') as file:
   #         file.write(res.text)
   # print(res.text)
    soup = get_soup(res)
    num = 0 
    for i in soup.select('.img-hash'):
        #print(i.text)
        num = num + 1 
        link = base64.b64decode(i.text.encode('utf-8'))
       # print(link)
        B_link = str(link , 'utf-8').split('/')[-1]
        #去找到他的哈希码
        #print(B_link)
        New_url = large_url + B_link
        #print(New_url)
        download_photo(New_url , B_link , )
def main():
   print("you should input tow number to request download some picture what you like:")
   num = input("请输入要下载煎蛋网妹子图的页数:(当前输入第一个数字)")
   num1 = input("请输入要下载煎蛋网妹子图的页数:(当前输入第二个数字)")
   for each in range(int(num) , int(num1)):
       get_page(each)
   print("下载完成!")
if __name__ == "__main__":
   main()

你可能感兴趣的:(Python爬虫)