爬虫 Android app ------实战(绘本多多)

爬虫Android app(做过app的应该知道,基本使用json传递数据)

1、通过fiddler 找到对应的响应请求的url 和返回的JSON

2、requests 写代码,for循环进行网络请求,保存图片

未解决的问题:

绘本多多这个应用的图片能够直接拿到,也尝试过《咔哒故事》,图片地址不能直接返回图片放弃了;

 

# -*- coiding:UTF-8 -*-
import requests
import json
from contextlib import closing
import os

class get_photos(object):
	"""docstring for get_photos"""
	def __init__(self):
		self.books_list_ids = []
		self.book_pic_list_ids = []
		self.download_book_list_server = 'http://book.ergeduoduo.com/baby/bbpic.php?&pg=0&act=booklist&ps=600&pid=0'
		self.download_book_pic_list_server = 'http://book.ergeduoduo.com/baby/bbpic.php?&act=bookdetail&page=0&rid=ssss'
		self.download_book_pic = 'http://cdnbbbd.shoujiduoduo.com/bb/book/xxxx/zzzz.jpg'

	def download(self,book_id,pic_id):
		if not os.path.exists(book_id):
			os.mkdir(book_id)
		target = self.download_book_pic.replace('xxxx',book_id).replace('zzzz',pic_id)
		with closing(requests.get(url = target)) as r:
			with open('%s/%s.jpg'%(book_id,pic_id),'ab+') as f:
				for chunk in r.iter_content(chunk_size = 1024):
					if chunk:
						f.write(chunk)
						f.flush()
						pass
					pass

		pass
	def get_book_lists(self):
		req = requests.get(self.download_book_list_server)
		data = json.loads(req.text)
		books = data['list']
		for each in books:
			self.books_list_ids.append(each['id'])
			pass
		pass
	def get_book_pics_lists(self,book_id):
		self.book_pic_list_ids = []
		req = requests.get(self.download_book_pic_list_server.replace('ssss',book_id))
		data = json.loads(req.text)
		book_pics = data['list']
		for each in book_pics:
			self.book_pic_list_ids.append(each['id'])
			pass

		pass


if __name__ == '__main__':
	# target = 'http://book.ergeduoduo.com/baby/bbpic.php?&pg=0&act=booklist&ps=500&pid=0'
	# req = requests.get(url=target)
	# data = json.loads(req.text)
	# print(data['list'][0]['id'])
	gp = get_photos()
	print('获取书本列表......')
	gp.get_book_lists()
	print('获取列表成功,开始下载')
	# gp.get_book_pics_lists('20002309')
	for i in range(len(gp.books_list_ids)):
		print('  正在下载第%d本书....'%(i+1))
		gp.get_book_pics_lists(str(gp.books_list_ids[i]))
		for j in range(len(gp.book_pic_list_ids)):
			print(' 正在下载第%d页码......'%(j+1))
			gp.download(str(gp.books_list_ids[i]),str(gp.book_pic_list_ids[j]))
			pass
		pass
	print('......下载完成...... ')
	# gp.download('20002309','21000522')
	# print(gp.books_list_ids)
	# print(gp.book_pic_list_ids)

 

你可能感兴趣的:(机器学习)