本章主要记录下脚本,里面踩的坑也有一些
很多地方都可以优化一下
比如数据可以 直接写入excel即可,不用进行转换
我这里的话就直接复用以前的代码就先转txt再转excel了,懒得搞了
# 该类为相关文件转换类
import xlwt
class FormatConversion:
# 1、 将list对象写入到txt文件中
def list_to_txt(a):
# a = [
# {"Jodie1": "123"},
# {"Jodie2": "456"},
# {"Jodie3": "789"},
# ]
with open('1.txt', 'w') as f:
for i in range(len(a)):
for key, values in a[i].items():
print(key + "," + values + "\r")
f.write(key + "," + values + "\r")
# 2、 txt文件转excell
def txt_xls(filename, xlsname):
try:
f = open(filename, encoding='utf-8')
xls = xlwt.Workbook()
# 生成excel的方法,声明excel
sheet = xls.add_sheet('sheet', cell_overwrite_ok=True)
x = 0 # 在excel开始写的位置(y)
while True: # 循环读取文本里面的内容
line = f.readline() # 一行一行的读
if not line: # 如果没有内容,则退出循环
break
for i in range(len(line.split(','))):
item = line.split(',')[i]
# print("输出item的值", item)
sheet.write(x, i, item) # x单元格经度,i单元格纬度
x += 1 # 另起一行
f.close()
xls.save(xlsname) # 保存为xls文件
except:
raise
if __name__ == '__main__':
filename = 'export_data.txt'
xlsname = 'exp_export_data.xls'
fm = FormatConversion
fm.txt_xls(filename, xlsname)
import os
import time
from os import listdir
from os.path import isfile, join
from selenium import webdriver
import pandas as pd
import requests
import json
from test.format_conversion import *
headers_admin = {'content-type': 'application/json; charset=UTF-8', "zone": "1052650-recommend"}
host = "https://xxx.xxx.xxx" # 请求环境
sid = "2951D93BC7AECFB5594BAD209EA8D16B1C19CB519F91CAAA5AFAA211F823E48EFDDE250C290D3794"
# url拼接
def get_total_url(url, sid):
global host
print("url:" + host + url + "?sid=" + sid)
return (host + url + "?sid=" + sid)
# 1、导出推荐数据 -->加载到下载中心
def export_recommend_data(scene, memberId):
print("========================================================\n第一步:点击导出,加载到下载中心")
url = "/api/matchmaker/matchmaking-manage/exportRecommendLikeRecord.do"
payload_data = {"scene": scene, "matchStatus": -1, "memberId": memberId, "objectId": "", "recommendStartTime": "",
"recommendEndTime": "", "likeStatus": -1, "selectStatus": -1, "selectStartTime": "",
"selectEndTime": "", "isRead": -1, "pageIndex": 1, "pageSize": 100}
response = requests.post(get_total_url(url, sid), data=json.dumps(payload_data), headers=headers_admin)
print("导出结果:", response.json())
# 2、查询下载中心列表,获取导出文件url
def query_download_list():
print("========================================================\n第二步:查询下载中心列表,获取到返回的文件路径")
url = "/api/common/export/query-export-file.do"
payload_data = {"status": -1, "pageIndex": 1, "pageSize": 10}
response = requests.post(get_total_url(url, sid), data=json.dumps(payload_data), headers=headers_admin)
res_data = response.json()["data"]["list"]
# print("请求结果下载中心:", response.json()["data"]["list"])
latest_file_url = res_data[0]['fileUrl']
# print("类型",type(latest_file_url))
# 当查询最新的路径不为空时,才把路径打印出来;否则一直等到查询到数据
if latest_file_url != "":
print("打印第一条数据的fileUrl:", latest_file_url)
else:
print("没查询到数据,再次查询:", latest_file_url)
time.sleep(8) # 查询太频繁了,这里等待一下
query_download_list()
return latest_file_url
# 3、将下载完的文件保存到当前文件下
def download_export_data():
if query_download_list() == "":
print("文件数据为空,直接返回了")
return
export_file_url = query_download_list()
print("接收到的文件路径", export_file_url)
aim_dir = os.getcwd() + '\check_file'
print("存放excell文件目录", aim_dir)
# urllib.request.urlretrieve(export_file_url,dir) 总是Permission denied: 'D:\\ZA_autoTest\\test'
# 配置你自己的驱动位置
global driver # 不配置全局变量Chrome会闪退
driver_path = (r'C:\Users\Administrator\AppData\Local\Google\Chrome\Application\chromedriver.exe') # 配你本地路径
prefs = {'profile.default_content_settings.popups': 0,
'download.default_directory': aim_dir} # 设置下载文件存放路径,这里要写绝对路径
options = webdriver.ChromeOptions()
options.add_experimental_option('prefs', prefs)
driver = webdriver.Chrome(executable_path=driver_path, options=options)
driver.get(export_file_url)
time.sleep(1) # 因为有可能出现文件还没下载完就去读取文件名称的情况,所以这里等待2s
print("========================================================\n第三步:把下载完的文件保存在当前路径下")
# 拿到导出文件的名称
filenames = [f for f in listdir(aim_dir) if isfile(join(aim_dir, f))] # 遍历查询目标文件下所有的文件名称
export_data = filenames[0] # 这里我就默认只取一个文件了,如果生成多个的话你就删除掉其他的
print("导出文件名称", export_data)
export_dir = aim_dir + "\\" + export_data # excell文件路径
return export_dir
# 4、获取推荐列表数据
def get_recommend_record(scene, memberId):
print("========================================================\n第四步:获取推荐列表数据")
url = "/api/matchmaker/matchmaking-manage/queryRecommendLikeRecord.do"
payload_data = {"scene": scene, "matchStatus": -1, "memberId": memberId, "objectId": "", "recommendStartTime": "",
"recommendEndTime": "", "likeStatus": -1, "selectStatus": -1, "selectStartTime": "",
"selectEndTime": "", "isRead": -1, "pageIndex": 1, "pageSize": 100}
response = requests.post(get_total_url(url, sid), data=json.dumps(payload_data), headers=headers_admin)
# print(get_total_url(url,sid))
res_data = response.json()["data"]["list"]
return res_data
# 5、将推荐列表数据写入到txt
def deal_data_to_txt():
# 1、获取推荐的数据,然后封装到txt文件里
res_data = get_recommend_record(1, 1216709222)
# print(type(res_data)) # list
# print("调用结果:", res_data)
# 2、获取到所有值,组成一个list
exp_lists = []
for i in range(len(res_data)):
exp_list = []
for key, values in res_data[i].items():
# print("单个遍历对象:",res_data[0].items())
# print("判断值是不是str:", type(values) == type('a'))
if type(values) != type('a'):
values = str(values)
if key == 'memberId':
exp_list.append(values)
elif key == "objectId":
exp_list.append(values)
elif key == "objectName":
exp_list.append(values)
elif key == "objectMarriageDesc":
exp_list.append(values)
elif key == "objectAge":
exp_list.append(values)
elif key == "objectEducationDesc":
exp_list.append(values)
elif key == "objectHeight":
exp_list.append(values)
elif key == "recommendTime":
# if values == "None":
# values = ''
# values += str(pd.np.nan)
exp_list.append(values)
elif key == "likeStatus":
if values == "0":
values = "未表态"
elif values == "1":
values = "想认识"
elif values == "2":
values = "没感觉"
exp_list.append(values)
elif key == "likeTime":
# if values == "None":
# values = ''
# values += str(pd.np.nan)
exp_list.append(values)
elif key == "selectStatus":
if values == "0":
values = "否"
elif values == "1":
values = "是"
exp_list.append(values)
elif key == "selectTime":
# None判断要为空 注意:None为空对象和字符串空不一样,且
# excell里面存的是nan(坑),不自己写个方法去读取出来压根不知道存的是nan
# if values == "None":
# values = ''
# values += str(pd.np.nan)
exp_list.append(values)
elif key == "matchStatus":
if values == "0":
values = "未处理"
elif values == "1":
values = "牵线成功"
elif values == "2":
values = "牵线失败"
elif values == "3":
values = "无需处理"
exp_list.append(values)
# print("单个list集合::", exp_list)
exp_lists.append(exp_list)
# print("整个lists集合:", exp_lists)
with open("export_data.txt", "w", errors="ignore",
encoding="utf-8") as output:
output.write(
"表态状态,表态时间,红娘处理状态,会员ID,对方年龄,对方学历,对方身高,对方ID,对方婚况,对方姓名,推荐/曝光时间,是否互选,互选时间,\n")
for row in exp_lists:
rowtxt = "{},{},{},{},{},{},{},{},{},{},{},{},{},".format(
row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], row[11],
row[12],)
output.write(rowtxt)
output.write("\n")
output.close()
print("========================================================\n第五步:推荐数据写入txt文件成功")
# 6、将txt转为excell
def deal_txt_to_excell():
filename = 'export_data.txt'
xlsname = 'exp_export_data.xls'
FormatConversion.txt_xls(filename, xlsname)
print("========================================================\n第六步:txt转换成excell成功")
# 8、要将导出来的excell里面的nan替换成空字符串或者None
def change_export_data():
export_data = download_export_data()
time.sleep(2) # 注意:读取文件前记得等待几s,如果立马就读,这文件还没下载成功,就会报错
print("导出数据的路径:", export_data)
excell_data = pd.read_excel(export_data)
print("替换前:", excell_data)
# 将NaN替换为None
# exp_excell = excell_data.where(excell_data.notnull(), None)
# print("替换后:", exp_excell)
df = pd.DataFrame(excell_data)
last_dir = os.getcwd() + "\\check_file" + "\\a.xls"
print("最后的目标文件路径", last_dir)
df.to_excel(last_dir, sheet_name="sheet1", na_rep="None")
print("存放转换后的excell文件目录", last_dir)
return last_dir
# 7、比较两个excell表格的值是否一致
def check_excell_data():
# 转化后的excell
export_data = change_export_data()
df1 = pd.read_excel(export_data)
# 校验的excell
df2 = pd.read_excel("exp_export_data.xls")
flag = df1[
['会员ID', '对方ID', '对方姓名', '对方婚况', '对方年龄', '对方学历', '对方身高', '推荐/曝光时间', '表态状态', '表态时间', '是否互选', '互选时间',
'红娘处理状态']].values \
== df2[['会员ID', '对方ID', '对方姓名', '对方婚况', '对方年龄', '对方学历', '对方身高', '推荐/曝光时间', '表态状态', '表态时间', '是否互选', '互选时间',
'红娘处理状态']
].values
print("========================================================\n第七步:请检验数据校验结果")
print("对比结果:", flag)
if __name__ == '__main__':
# 1、导出推荐数据
export_recommend_data(1, 1216709222)
# 2、查询下载列表,获取到导出文件路径
download_export_data
# 3、将接口返回的推荐数据封装一下,写入txt文件中
deal_data_to_txt()
# 4、txt转excell文档
deal_txt_to_excell()
# 5、将下载的excell空值进行转换为None
# 6、校验两个excell数据是否一致
check_excell_data()