#环境python3.6
# 爬取APP 无忧借条
# 通过用户账号密码爬取用户个人信息(我这账号密码已打码)
爬虫部分代码
# coding:utf8
import hashlib
import time
import urllib3
import json
headers = {'Accept': ' */*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,en-US;q=0.8',
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Redmi 3S Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 jinjiedao'}
origin_data = {} # 原始数据
result={} #提取后数据
http = urllib3.PoolManager() # http连接池
urllib3.disable_warnings() #禁用各种urllib3警告
# 获取token
def login(phone,password):
login_url='https://www.gushistory.com/jjdApi/user/login'
headers['refer'] = 'https://www.gushistory.com/jjdapp/html/register/login.html?telephone=' + phone
md5 = hashlib.md5()
md52 = hashlib.md5()
md5.update(password.encode(encoding='utf-8')) # 第一次md5
md52.update(md5.hexdigest().encode(encoding='utf-8')) # 第二次md5
# 创建登陆参数字典
data={'c_telephone': phone, 'c_pwd': md52.hexdigest()}
response=excute(login_url,'GET',data)
origin_data['login'] = response
return response['token']
# 获取登陆后的信息
def getAccountInfo(token):
account_url='https://www.gushistory.com/jjdApi/user/getHomepageInfo'
headers['refer'] = 'https://www.gushistory.com/jjdapp/html/index.html'
data = {'token': token}
response = excute(account_url, 'GET', data)
origin_data['account'] = response
user_info = response['userInfo']
result['balance'] = user_info['n_left_amt'] # 余额
result['wait_repay_amt'] = user_info['n_to_repay_amt'] # 待还金额
result['wait_receive'] = user_info['n_to_receive_amt'] # 代收金额
result['lender_cnt'] = response['borrowInfo']['n_lender_cnt'] # 出借金额
result['phone'] = user_info['c_telephone'] # 手机号
if 'c_name' in user_info: # 名称
result['name'] = user_info['c_name']
else:
result['name'] = '未实名'
if 'c_id_card_no' in user_info: # 身份证号
result['id_card'] = user_info['c_id_card_no']
return result
def excute(url, method, data):
if data == None:
data = {}
data['timestamp'] = int(round(time.time() * 1000))
r = http.request(url=url, method=method, headers=headers, fields=data)
response = json.loads(r.data.decode('utf-8'))
if response['code'] == 200:
return response['object']
else:
response['error'] = '处理失败'
return response
if __name__=="__main__":
phone='1516842****'
pwd='123456'
token = login(phone, pwd)
getAccountInfo(token)
运行完结果为json数据
以上是爬虫部分,接下来先把API接口模板写好:
from flask import Flask, g
from flask_restful import reqparse, Api, Resource
from flask_httpauth import HTTPTokenAuth
# Flask相关变量声明
app = Flask(__name__)
api = Api(app)
# RESTfulAPI的参数解析 -- put / post参数解析
parser_put = reqparse.RequestParser()
parser_put.add_argument("user", type=str, required=True, help="need user data")
parser_put.add_argument("pwd", type=str, required=True, help="need pwd data")
#这部分是爬虫功能
def to_do(arg1, args2):
return "this is info"
# 操作(post / get)资源列表
class TodoList(Resource):
def post(self):
"""
添加一个新用户: curl http://127.0.0.1:5000/users -X POST -d "name=Brown&age=20" -H "Authorization: token fejiasdfhu"
"""
args = parser_put.parse_args()
# 构建新用户
user = args['user']
pwd = args['pwd']
info = {"info": to_do(user, pwd)}
# 资源添加成功,返回201
return info, 201
# 设置路由,即路由地址为http://127.0.0.1:5000/users
api.add_resource(TodoList, "/users")
if __name__ == "__main__":
app.run(debug=True)
接下来将爬虫部分镶嵌到到模板里面,完整代码如下:
from flask import Flask, g
from flask_restful import reqparse, Api, Resource
from flask_httpauth import HTTPTokenAuth
import hashlib
import time
import urllib3
import json
# Flask相关变量声明
app = Flask(__name__)
api = Api(app)
# RESTfulAPI的参数解析 -- put / post参数解析
parser_put = reqparse.RequestParser()
parser_put.add_argument("user", type=str, required=True, help="need user data")
parser_put.add_argument("pwd", type=str, required=True, help="need pwd data")
headers = {'Accept': ' */*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,en-US;q=0.8',
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Redmi 3S Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 jinjiedao'}
origin_data = {} # 原始数据
result = {} # 提取后数据
http = urllib3.PoolManager() # http连接池
urllib3.disable_warnings() # 禁用各种urllib3警告
# 获取token
def login(phone, password):
login_url = 'https://www.gushistory.com/jjdApi/user/login'
headers['refer'] = 'https://www.gushistory.com/jjdapp/html/register/login.html?telephone=' + phone
md5 = hashlib.md5()
md52 = hashlib.md5()
md5.update(password.encode(encoding='utf-8')) # 第一次md5
md52.update(md5.hexdigest().encode(encoding='utf-8')) # 第二次md5
# 创建登陆参数字典
data = {'c_telephone': phone, 'c_pwd': md52.hexdigest()}
response = excute(login_url, 'GET', data)
origin_data['login'] = response
return response['token']
# 获取登陆后的信息
def getAccountInfo(token):
account_url = 'https://www.gushistory.com/jjdApi/user/getHomepageInfo'
headers['refer'] = 'https://www.gushistory.com/jjdapp/html/index.html'
data = {'token': token}
response = excute(account_url, 'GET', data)
origin_data['account'] = response
user_info = response['userInfo']
result['balance'] = user_info['n_left_amt'] # 余额
result['wait_repay_amt'] = user_info['n_to_repay_amt'] # 待还金额
result['wait_receive'] = user_info['n_to_receive_amt'] # 代收金额
result['lender_cnt'] = response['borrowInfo']['n_lender_cnt'] # 出借金额
result['phone'] = user_info['c_telephone'] # 手机号
if 'c_name' in user_info: # 名称
result['name'] = user_info['c_name']
else:
result['name'] = '未实名'
if 'c_id_card_no' in user_info: # 身份证号
result['id_card'] = user_info['c_id_card_no']
return result
def excute(url, method, data):
if data == None:
data = {}
data['timestamp'] = int(round(time.time() * 1000))
r = http.request(url=url, method=method, headers=headers, fields=data)
response = json.loads(r.data.decode('utf-8'))
if response['code'] == 200:
return response['object']
else:
response['error'] = '处理失败'
return response
def to_do(phone, password):
token = login(phone, password)
return getAccountInfo(token)
# 操作(post / get)资源列表
class TodoList(Resource):
def post(self):
"""
添加一个新用户: curl http://127.0.0.1:5000/users -X POST -d "name=Brown&age=20" -H "Authorization: token fejiasdfhu"
"""
args = parser_put.parse_args()
# 构建新用户
user = args['user']
pwd = args['pwd']
info = {"info": to_do(user, pwd)}
# 资源添加成功,返回201
return info, 201
# 设置路由
api.add_resource(TodoList, "/users")
if __name__ == "__main__":
app.run(debug=True)
这是用postman测试工具模拟浏览器请求,得出的结果