需求:
1、先下载不同环境下的表格
2、对比两个表格数据是否一致,包括:表格总数、内容
3、一致返回True,不一致展示出不一致的原因
import json
import logging
import operator
from core.rest_client import RestClient
# import requests
from common.analysis import *
import os
import time
import copy
from testcases.diffRes.pre_case import *
from testcases.diffRes.post_case import *
from common.deal_data import data
from common.logger import logger
from datetime import datetime, date
from datetime import timedelta
"""通过Ip登陆"""
ip_39 = "https://xxxxx:443"
ip_67 = "https://xxxxx:443"
ip_address = [ip_39, ip_67]
basePath = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
data_file_path = os.path.join(basePath, "config", "setting.ini")
cur_time = datetime.now().strftime("%Y-%m-%d")
# cur_time = datetime.now().strftime("%Y-%m-%d")
def login_xbb(ipAddress):
url = ipAddress + "xxxxxx"
headers = {"host": "xxxxx"}
body = {"account": "xxxxx", "password": "xxxxxxxx", "login_style": 1}
res = requests.post(url=url, json=body, headers=headers, verify=False)
token = json.loads(res.content)["data"]["token"]
return token
def customer_settlement_bill_export(ipAddress, exportUrl, body, headers,method):
# url = ipAddress+"/xxxx/all/export.json"
url = ipAddress + exportUrl
logging.info("链接:",url)
data_list_url = ipAddress + "/export/list.json"
headers = headers
body = body
RestClient.request(url=url,method=method, body=body, headers=headers, verify=False)
data_list_body = {"moduleId": "", "curPage": 0, "pageSize": 10}
data_list_res = requests.post(url=data_list_url, data=data_list_body, headers=headers, verify=False)
dataInfo = json.loads(data_list_res.content)["data"]["dataList"][0]
print(dataInfo)
while dataInfo["status"] == "导出中":
time.sleep(3)
print("导出中")
data_list_res = requests.post(url=data_list_url, data=data_list_body, headers=headers, verify=False)
dataInfo = json.loads(data_list_res.content)["data"]["dataList"][0]
else:
if dataInfo["status"] == "导出成功":
data_list_res = requests.post(url=data_list_url, data=data_list_body, headers=headers, verify=False)
dataInfo = json.loads(data_list_res.content)["data"]["dataList"][0]
downloadUrl = dataInfo["downloadUrl"].replace("https://xxxxx", ipAddress)
taskName = dataInfo["taskName"]
# taskName = dataInfo["taskName"].replace('.xlsx','csv')
print("downloadUrl:", downloadUrl)
f = requests.get(url=downloadUrl, headers=headers, params={}, verify=False)
with open(taskName, "wb") as code:
code.write(f.content)
else:
print("导出失败")
return dataInfo["taskName"]
import json
import logging
import operator
from deepdiff import DeepDiff
from common.logger import logger
import re
import copy
import time
import pandas as pd
# import feather
def make_df_from_excel(file_name, nrows=1):
"""Read from an Excel file in chunks and make a single DataFrame.
Parameters
----------
file_name : str
nrows : int
Number of rows to read at a time. These Excel files are too big,
so we can't read all rows in one go.
"""
# In this case, there was only a single Worksheet in the Workbook.
# Read the header outside of the loop, so all chunk reads are
# consistent across all loop iterations.
df_file = pd.ExcelFile(file_name)
# df_header = df_file.parse(nrows=1)
# df_header=pd.read_excel(io=file_name,nrows=1)
# print(f"Excel file: {file_name} (worksheet: {sheetname})")
# num = 0
# The first row is the header. We have already read it, so we skip it.
num = []
df_list=[]
# 可能会存在多个sheet页 每个sheet页都需要进行对比
for i in range(len(df_file.sheet_names)):
df_header = df_file.parse(nrows=1,sheet_name=df_file.sheet_names[i])
# df_header = pd.read_excel(io=file_name,nrows=1,sheet_name=df_file.sheet_names[i])
# data = pd.DataFrame(pd.read_excel(io=file_name,))
rownember=df_file.parse(sheet_name=df_file.sheet_names[i]).shape[0]+1
columnnmber=df_file.parse(sheet_name=df_file.sheet_names[i]).shape[1]
# rownember = data.shape[0] + 1
skiprows = 1
i_chunk = 0
chunks = []
if rownember>=10000: #一次处理的数量 有可能表格数量小于这个数量 做个判断
nrows=10000
else:
nrows=rownember
while True:
# df_chunk = pd.read_excel(io=file_name,nrows=nrows, skiprows=skiprows, header=None,sheet_name=df_file.sheet_names[i])
df_chunk = df_file.parse(nrows=nrows, skiprows=skiprows, header=None,sheet_name=df_file.sheet_names[i])
# print(type(df_chunk))
skiprows += nrows
# When there is no data, we know we can break out of the loop.
if not df_chunk.shape[0]:
break
else:
print(f" - chunk {i_chunk} ({df_chunk.shape[0]} rows)")
# nums += df_chunk.shape[0]
chunks.append(df_chunk)
# print("结束时间:", time.time())
df_chunks = pd.concat(chunks)
# Rename the columns to concatenate the chunks with the header.
columns = {i: col for i, col in enumerate(df_header.columns.tolist())}
df_chunks.rename(columns=columns, inplace=True)
df = pd.concat([df_header, df_chunks])
df=df.drop_duplicates() # 合并成一个,并去重
# df=df.sort_values("姓名") # 合并成一个,并去重
# print(type(df))
df_list.append(df)
num.append(rownember)
num.append(columnnmber)
return num,df_list
# data = read_csv_feature(filePath)
def count(filename):
data = pd.DataFrame(pd.read_excel(filename))
# data.drop_duplicates(subset=None, keep='first', inplace=False, ignore_index=False) # 去重
data.sort_index(axis=0, ascending=False, inplace=True) # 排序
rownember = data.shape[0] + 1
columnnmber = data.shape[1]
return [columnnmber, rownember]
def check(old_task_name, new_task_name,remarks):
timestamp = int(time.time() * 1000)
new_num,new_data = make_df_from_excel(new_task_name, 10000)
old_num,old_data = make_df_from_excel(old_task_name, 10000)
if operator.eq(new_num,old_num):
for j in range(len(new_data)):
ind=old_data[j].compare(new_data[j], keep_shape=True).dropna(axis=0,how="all")
ind=ind.to_dict("records")
# print(ind)
# print(len(ind))
if len(ind)==0:
continue
else:
filename = "test" + str(timestamp) + "txt"
with open(filename, 'w', encoding="utf-8") as file:
# alldata = list(res.values())
file.write("=========================================================")
file.write(remarks)
file.write(str(ind[0]))
return False
return True
else:
print("新的接口的导出数据的数量:", new_num)
print("旧的接口的导出数据的数量:", old_num)
return False
result = check("xxxxx.xlsx", "xxxxxx.xlsx",case_info["remarks"])
assert result is True