python爬虫爬取APP并封装成API接口调用,使用flask-restful

#环境python3.6

# 爬取APP 无忧借条

# 通过用户账号密码爬取用户个人信息(我这账号密码已打码)

爬虫部分代码

# coding:utf8
import hashlib
import time
import urllib3
import json


headers = {'Accept': ' */*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,en-US;q=0.8',
           'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Redmi 3S Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 jinjiedao'}
origin_data = {} # 原始数据
result={} #提取后数据
http = urllib3.PoolManager()  # http连接池
urllib3.disable_warnings() #禁用各种urllib3警告

# 获取token
def login(phone,password):
    login_url='https://www.gushistory.com/jjdApi/user/login'
    headers['refer'] = 'https://www.gushistory.com/jjdapp/html/register/login.html?telephone=' + phone
    md5 = hashlib.md5()
    md52 = hashlib.md5()
    md5.update(password.encode(encoding='utf-8')) # 第一次md5
    md52.update(md5.hexdigest().encode(encoding='utf-8'))  # 第二次md5
    # 创建登陆参数字典
    data={'c_telephone': phone, 'c_pwd': md52.hexdigest()}
    response=excute(login_url,'GET',data)
    origin_data['login'] = response
    return response['token']

# 获取登陆后的信息
def getAccountInfo(token):
    account_url='https://www.gushistory.com/jjdApi/user/getHomepageInfo'
    headers['refer'] = 'https://www.gushistory.com/jjdapp/html/index.html'
    data = {'token': token}
    response = excute(account_url, 'GET', data)
    origin_data['account'] = response
    user_info = response['userInfo']
    result['balance'] = user_info['n_left_amt']  # 余额
    result['wait_repay_amt'] = user_info['n_to_repay_amt'] # 待还金额
    result['wait_receive'] = user_info['n_to_receive_amt'] # 代收金额
    result['lender_cnt'] = response['borrowInfo']['n_lender_cnt']  # 出借金额
    result['phone'] = user_info['c_telephone']  # 手机号
    if 'c_name' in user_info:  # 名称
        result['name'] = user_info['c_name']
    else:
        result['name'] = '未实名'
    if 'c_id_card_no' in user_info:  # 身份证号
        result['id_card'] = user_info['c_id_card_no']

    return result


def excute(url, method, data):
    if data == None:
        data = {}
    data['timestamp'] = int(round(time.time() * 1000))
    r = http.request(url=url, method=method, headers=headers, fields=data)
    response = json.loads(r.data.decode('utf-8'))
    if response['code'] == 200:
        return response['object']
    else:
        response['error'] = '处理失败'
        return response




if __name__=="__main__":
    phone='1516842****'
    pwd='123456'
    token = login(phone, pwd)
    getAccountInfo(token)

运行完结果为json数据



以上是爬虫部分,接下来先把API接口模板写好:


from flask import Flask, g
from flask_restful import reqparse, Api, Resource
from flask_httpauth import HTTPTokenAuth


# Flask相关变量声明
app = Flask(__name__)
api = Api(app)

# RESTfulAPI的参数解析 -- put / post参数解析
parser_put = reqparse.RequestParser()
parser_put.add_argument("user", type=str, required=True, help="need user data")
parser_put.add_argument("pwd", type=str, required=True, help="need pwd data")

#这部分是爬虫功能
def to_do(arg1, args2):
    return "this is info"

# 操作(post / get)资源列表
class TodoList(Resource):

    def post(self):
        """
        添加一个新用户: curl http://127.0.0.1:5000/users -X POST -d "name=Brown&age=20" -H "Authorization: token fejiasdfhu"
        """
        args = parser_put.parse_args()

        # 构建新用户
        user = args['user']
        pwd = args['pwd']
        info = {"info": to_do(user, pwd)}

        # 资源添加成功,返回201
        return info, 201


# 设置路由,即路由地址为http://127.0.0.1:5000/users
api.add_resource(TodoList, "/users")


if __name__ == "__main__":
    app.run(debug=True)


接下来将爬虫部分镶嵌到到模板里面,完整代码如下:

from flask import Flask, g
from flask_restful import reqparse, Api, Resource
from flask_httpauth import HTTPTokenAuth
import hashlib
import time
import urllib3
import json


# Flask相关变量声明
app = Flask(__name__)
api = Api(app)

# RESTfulAPI的参数解析 -- put / post参数解析
parser_put = reqparse.RequestParser()
parser_put.add_argument("user", type=str, required=True, help="need user data")
parser_put.add_argument("pwd", type=str, required=True, help="need pwd data")

headers = {'Accept': ' */*', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,en-US;q=0.8',
           'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Redmi 3S Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043632 Safari/537.36 jinjiedao'}
origin_data = {}  # 原始数据
result = {}  # 提取后数据
http = urllib3.PoolManager()  # http连接池
urllib3.disable_warnings()  # 禁用各种urllib3警告


# 获取token
def login(phone, password):
    login_url = 'https://www.gushistory.com/jjdApi/user/login'
    headers['refer'] = 'https://www.gushistory.com/jjdapp/html/register/login.html?telephone=' + phone
    md5 = hashlib.md5()
    md52 = hashlib.md5()
    md5.update(password.encode(encoding='utf-8'))  # 第一次md5
    md52.update(md5.hexdigest().encode(encoding='utf-8'))  # 第二次md5
    # 创建登陆参数字典
    data = {'c_telephone': phone, 'c_pwd': md52.hexdigest()}
    response = excute(login_url, 'GET', data)
    origin_data['login'] = response
    return response['token']


# 获取登陆后的信息
def getAccountInfo(token):
    account_url = 'https://www.gushistory.com/jjdApi/user/getHomepageInfo'
    headers['refer'] = 'https://www.gushistory.com/jjdapp/html/index.html'
    data = {'token': token}
    response = excute(account_url, 'GET', data)
    origin_data['account'] = response
    user_info = response['userInfo']
    result['balance'] = user_info['n_left_amt']  # 余额
    result['wait_repay_amt'] = user_info['n_to_repay_amt']  # 待还金额
    result['wait_receive'] = user_info['n_to_receive_amt']  # 代收金额
    result['lender_cnt'] = response['borrowInfo']['n_lender_cnt']  # 出借金额
    result['phone'] = user_info['c_telephone']  # 手机号
    if 'c_name' in user_info:  # 名称
        result['name'] = user_info['c_name']
    else:
        result['name'] = '未实名'
    if 'c_id_card_no' in user_info:  # 身份证号
        result['id_card'] = user_info['c_id_card_no']

    return result


def excute(url, method, data):
    if data == None:
        data = {}
    data['timestamp'] = int(round(time.time() * 1000))
    r = http.request(url=url, method=method, headers=headers, fields=data)
    response = json.loads(r.data.decode('utf-8'))
    if response['code'] == 200:
        return response['object']
    else:
        response['error'] = '处理失败'
        return response


def to_do(phone, password):
    token = login(phone, password)
    return getAccountInfo(token)




# 操作(post / get)资源列表
class TodoList(Resource):

    def post(self):
        """
        添加一个新用户: curl http://127.0.0.1:5000/users -X POST -d "name=Brown&age=20" -H "Authorization: token fejiasdfhu"
        """
        args = parser_put.parse_args()

        # 构建新用户
        user = args['user']
        pwd = args['pwd']
        info = {"info": to_do(user, pwd)}

        # 资源添加成功,返回201
        return info, 201


# 设置路由
api.add_resource(TodoList, "/users")


if __name__ == "__main__":
    app.run(debug=True)


这是用postman测试工具模拟浏览器请求,得出的结果

python爬虫爬取APP并封装成API接口调用,使用flask-restful_第1张图片

你可能感兴趣的:(python爬虫项目)