实现登录认证、cookie管理,分页获取爬取数据
import requests
import sys,json,logging, base64, math
base_url = 'https://127.0.0.1:5667'
user_name = 'user'
user_pswd = 'user@123'
opener = requests.Session()
g_headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 11.0; Win64; x64)',
}
def conf_log():
'''
日志配置
'''
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.DEBUG)
console_handler.setFormatter(logging.Formatter(
'[%(asctime)s %(levelname)s] %(message)s', datefmt='%Y-%m-%d %H:%M:%S'
))
logging.basicConfig(handlers=[console_handler], level=logging.DEBUG)
def login():
'''
登录
'''
url = base_url+'/system/login'
# 注意去掉base64字符串的换行符(最后的strip函数)
# 登录信息使用base64方式简单处理,防止http传输过程中出现明文
ucode = base64.encodestring(
(user_name+'|'+user_pswd).encode('utf8')).decode('utf8').strip()
data = {
'ucode': ucode,
'rememberMe': False
}
res = opener.post(url=url, data=data, headers=g_headers, verify=False)
try:
result = json.loads(res.text)
if result['code'] == 1:
return True
else:
return False
except Exception as e:
logging.error(e)
return False
def logout():
'''
登出
'''
url = base_url+'/system/logout'
try:
opener.get(url=url, headers=g_headers, verify=False)
except Exception as e:
logging.error(e)
def load_jz_info(offset, limit, page):
url = base_url+"/system/gps/jzlist"
data = {
"devId": "", "dept": "", "description": "", "state": "",
"offset": offset, "limit": limit, "page": page, "order": "asc"
}
# 注意post的json参数
res = opener.post(url=url, json=data, headers=g_headers, verify=False)
try:
result = json.loads(res.text)
total = result['total']
for row in result['rows']:
jz_info = {
'devId': row['devId'],
'description': row['description'],
'longitude': row['longitude'],
'latitude': row['latitude'],
'dept': row['dept'],
'devType': row['devType']
}
logging.debug(jz_info)
if math.ceil(total/10) > page:
load_jz_info(page*limit, limit, page+1) # 递归调用,爬取所有的页
except Exception as e:
logging.error(e)
if __name__ == '__main__':
conf_log()
if not login():
logging.debug('login error')
sys.exit()
logging.debug('login ok')
load_jz_info(0, 10, 1)
logout()
logging.debug('login out')
from urllib import request, parse, error
from http import cookiejar # cookie管理模块
import sys, json,ssl, logging,base64, math
cookie = cookiejar.CookieJar()
cookie_handler = request.HTTPCookieProcessor(cookiejar=cookie)
context = ssl._create_unverified_context()
https_handler = request.HTTPSHandler(context=context)
http_handler = request.HTTPHandler()
opener = request.build_opener(http_handler, https_handler, cookie_handler)
opener.addheaders = [
('User-Agent', 'Mozilla/5.0 (Windows NT 11.0; Win64; x64)')]
base_url = 'https://127.0.0.1:5667'
user_name = 'user'
user_pswd = 'user@123'
def conf_logging():
'''
配置log
'''
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(logging.DEBUG)
console_handler.setFormatter(logging.Formatter(
'[%(asctime)s %(levelname)s]%(message)s', datefmt='%Y-%m-%d %H:%M:%S'))
logging.basicConfig(level=logging.DEBUG, handlers=[console_handler])
def login():
'''
登录
'''
url = base_url+'/system/login'
data = {
# 注意去掉base64字符串的换行符(最后的strip函数)
'ucode': base64.encodestring((user_name+'|'+user_pswd).encode('utf8')).decode('utf8').strip(),
'rememberMe': False
}
req = request.Request(url=url, data=parse.urlencode(data).encode('utf8'))
res = opener.open(req)
try:
result = json.loads(res.read().decode('utf8'))
if result['code'] == 1:
return True
else:
return False
except Exception as e:
logging.error(e)
return False
def logout():
'''
登出
'''
url = base_url+'/system/logout'
try:
req = request.Request(url=url)
opener.open(req)
except error.HTTPError as e:
logging.error(e)
def load_jz_info(offset, limit, page):
url = base_url+"/system/gps/jzlist"
data = {
"devId": "", "dept": "", "description": "", "state": "",
"offset": offset, "limit": limit, "page": page, "order": "asc"
}
# post + json 传参方式
headers = {
'Content-Type': 'application/json'
}
req = request.Request(url=url, data=bytes(
json.dumps(data), 'utf8'), headers=headers)
res = opener.open(req)
try:
result = json.loads(res.read().decode('utf8'))
total = result['total']
for row in result['rows']:
jz_info = {
'devId': row['devId'],
'description': row['description'],
'longitude': row['longitude'],
'latitude': row['latitude'],
'dept': row['dept'],
'devType': row['devType']
}
logging.debug(jz_info)
if math.ceil(total/10) > page:
load_jz_info(page*limit, limit, page+1)
except Exception as e:
logging.error(e)
if __name__ == '__main__':
conf_logging()
if not login():
logging.debug('login error')
sys.exit()
logging.debug('login ok')
load_jz_info(0, 10, 1)
logout()
logging.debug('login out')