【Python】| 基于Python实现对比Excel的小工具

目的:设计一个应用GUI用于对比两个Excel文件

背景:因工作中长于数据打交道,不可避免的会进行简单繁琐数据的对比,故人生苦短我用Python!

思路

1.参数
  • 同一个excel文件两个sheet页其中一个ODS(老数据),一个DWH(新数据)

  • 生成对比文件

  • 输入主键1 输入主键2

(默认新旧文件列名一致)

2.效果
  • 生成的文件
  • 数据量一样、每个字段不一致的数据前10
  • 数据量不一样、取两边不一样的数据前10、排除不一样的数据、每个字段不一致的数据前10
3.实现
  • 循环对比组合列(主键+对比列)
  • pandas处理差异数据、处理生成的sheet的数据格式. (先生成数据,然后调整格式)

最终实现效果

  • 选择需要对比的Excel文件
  • 输入主键1、主键2
  • 点击运行

【Python】| 基于Python实现对比Excel的小工具_第1张图片

  • 运行成功界面

【Python】| 基于Python实现对比Excel的小工具_第2张图片

  • 生成的对比结果文件

【Python】| 基于Python实现对比Excel的小工具_第3张图片

配置

import pandas as pd
from openpyxl import load_workbook

#选择文件路径
path=r"C:\Users\小管同学\Desktop\Migration_Data_Compari\对比文件.xls" #input("选择文件路径:")

TargetPath=r"C:\Users\小管同学\Desktop\Migration_Data_Comparison_Tool\目标文件\对比结果.xlsx"

DATA_ODS=pd.read_excel(r"C:\Users\小管同学\Desktop\Migration_Data_Comparison_Tool\对比文件.xls",sheet_name="ODS")
DATA_DWH=pd.read_excel(r"C:\Users\小管同学\Desktop\Migration_Data_Comparison_Tool\对比文件.xls",sheet_name="DWH")


#选择主键
Primarykey="员工编号"#input("选择主键1:")
Primarykey

# 员工编号

一、数据量

输出表格1–数据量


def write_to_excel_DataVolume(Data,TargetPath): # cor_df 为要保存的 dataframe 
    writer = pd.ExcelWriter(TargetPath, engine='xlsxwriter') # 这里用
    Data.to_excel(writer,sheet_name='Sheet1', encoding='utf8', header=False, startcol=0, startrow=2) # 把dataframe的数据从第2行开始
    workbook  = writer.book
    
    format1 = workbook.add_format({ # 先把样式打包,然后之后赋值即可
        'bold': True, # 字体加粗
        'text_wrap': True, # 是否自动换行
        'valign': 'bottom',  #垂直对齐方式
        'align': 'center', # 水平对齐方式
        'fg_color': '#C5D9F1', # 单元格背景颜色
        'border': 1,# 边框
    })    
    writer_sheet = writer.sheets['Sheet1']
    # 设置宽度
    writer_sheet.set_column("A:I", 16)
    writer_sheet.set_column('C:C',30)

    writer_sheet.merge_range(0,0,0,2,'对比结果',format1)
    writer_sheet.merge_range(4,2,4,0,'数据量差异',format1)
    writer_sheet.write(1,0,'',format1)
    writer_sheet.write(1,1,'ODS',format1)
    writer_sheet.write(1,2,'DWH',format1)
    writer.save()
    writer.close()
    
DataFrame_DataVolume=pd.DataFrame([[DATA_ODS.shape[0]],[DATA_DWH.shape[0]]]).T
DataFrame_DataVolume.columns =["ODS","DWH"]
DataFrame_DataVolume.index=["数据量"]
DataFrame_DataVolume
#writeFileDataVolume(DataFrame_DataVolume,TargetPath)

write_to_excel_DataVolume(DataFrame_DataVolume,TargetPath)


在这里插入图片描述

输出表格2–数据量差异合同


if DATA_ODS.shape[0]==DATA_DWH.shape[0]:
    pass
else:
    
    DATA_ODS_Primarykey=pd.DataFrame(DATA_ODS[Primarykey])
    DATA_DWH_Primarykey=pd.DataFrame(DATA_DWH[Primarykey])
    df_union = pd.concat([DATA_ODS_Primarykey,DATA_DWH_Primarykey])
    # 实现1
    df_diff_ODS = df_union.append(DATA_ODS_Primarykey).drop_duplicates(subset=df_union.columns.to_list(), keep=False)
    df_diff_DWH = df_union.append(DATA_DWH_Primarykey).drop_duplicates(subset=df_union.columns.to_list(), keep=False)

    #DWH多的合同
    df_diff_ODS

    #DWH少的合同
    df_diff_DWH


    df_diff_DWH_Data=[]
    df_diff_ODS_Data=[]
    for i in df_diff_ODS.head(10).values.tolist():
        for n in i:
            df_diff_ODS_Data.append(n)
            
    for i in df_diff_DWH.head(10).values.tolist():
        for n in i:
            df_diff_DWH_Data.append(n)
    
    while True:
        if len(df_diff_DWH_Data)>len(df_diff_ODS_Data):
            df_diff_ODS_Data.append("-")
        elif len(df_diff_DWH_Data)< len(df_diff_ODS_Data):
            df_diff_DWH_Data.append("-")
        elif len(df_diff_DWH_Data)== len(df_diff_ODS_Data):
            break
    
    DataFrame_DataVolume_Count_result=pd.DataFrame(df_diff_DWH_Data,df_diff_ODS_Data).reset_index()
    DataFrame_DataVolume_Count_result.columns=['DWH多的合同','DWH少的的合同']
    DataFrame_DataVolume_Count_result=DataFrame_DataVolume_Count_result.reset_index()
    DataFrame_DataVolume_Count_result.columns=['序号','DWH多的合同','DWH少的的合同']

DataFrame_DataVolume_Count_result


from openpyxl import load_workbook
 
def write_to_excel_Count_result(Data,TargetPath):
    df_Old = pd.DataFrame(pd.read_excel(TargetPath)) #读取原数据文件和表 
    writer = pd.ExcelWriter(TargetPath,engine='openpyxl')
    book=load_workbook(TargetPath)
    writer.book = book
    writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
    df_rows = df_Old.shape[0] #获取原数据的行数
    Data.to_excel(writer,startrow=df_rows+1, index=False,startcol=0,header=True)#将数据写入excel中的aa表,从第一个空行开始写
    writer.save()#保存
    
write_to_excel_Count_result(DataFrame_DataVolume_Count_result,TargetPath)

【Python】| 基于Python实现对比Excel的小工具_第4张图片

二、数据值

输出表格3–差异数据

def DifferenceColumnData(OriginalData,CurrentData,Primarykey):
    #列对比函数
    Comparison_Column=pd.merge(OriginalData, CurrentData, on=Primarykey)
    Comparison_Column.columns=[Primarykey,"ODS","DWH"] 
    Difference_Merge=Comparison_Column[Comparison_Column["ODS"]!=Comparison_Column["DWH"]]
    return Difference_Merge

Difference_Field_Comparison_Data=pd.DataFrame([["数据值差异","数据值差异","数据值差异"]],columns=[Primarykey,'ODS','DWH'])
Difference_Field_Comparison_Data.drop(Difference_Field_Comparison_Data.index, inplace=True) 
Difference_Field_Comparison_Data

   
M=[]
for i in DATA_ODS.columns:
    if i!=Primarykey:
        
        Difference_Division=pd.DataFrame([["字段:{Field_Name}".format(Field_Name=i),"ODS","DWH"]],columns=[Primarykey,'ODS','DWH'])
        ResultTable=DifferenceColumnData(DATA_ODS[[Primarykey,i]],DATA_DWH[[Primarykey,i]],Primarykey=Primarykey)
        if ResultTable.empty==False:
            M.append(i)
        ResultTable=ResultTable.head(10)
        Difference_Division=Difference_Division.append(ResultTable)
        EmptyLine=pd.DataFrame([["","",""]],columns=[Primarykey,'ODS','DWH'])
        Difference_Division=Difference_Division.append(EmptyLine)
        
        Difference_Field_Comparison_Data=Difference_Field_Comparison_Data.append(Difference_Division)
        
Difference_Field_Comparison_Data

三、按格式写入

workbook = xlsxwriter.Workbook(TargetPath)

# 添加不同的sheet.
worksheet1 = workbook.add_worksheet("对比结果")

# header格式.
bold = workbook.add_format({'bold': 2})

headersbold = workbook.add_format({'bold': 1})

#headers = list(Summary.columns)

# 列宽.
worksheet1.set_column('A:D', 16)
worksheet1.set_column('C:C', 20)
worksheet1.set_column('B:B', 25)
# 行高.
worksheet1.set_row(0, 18, bold)
worksheet1.write_row('A1', headers)
worksheet1.autofilter('A1:D9999')
# worksheet1.autofilter(0, 0, 10, 3)  # 和上边的作用一样,不同写法.

row = 1
for row_data in (Summary.values.tolist()):
    worksheet1.write_row(row, 0, row_data)
    row += 1

format1 = workbook.add_format({ # 先把样式打包,然后之后赋值即可
       'bold': True, # 字体加粗
        'text_wrap': True, # 是否自动换行
        'valign': 'bottom',  #垂直对齐方式
        'align': 'center', # 水平对齐方式
        'bg_color': '#C5D9F1', # 单元格背景颜色
        'border': 1,# 边框
    })  

for n in ['ODS','DWH','字段','序号','数据','合同量差异','Report完整度','完成','百分比']:
    worksheet1.conditional_format('A1:D999', {'type':     'text',
                                        'criteria': 'containing',
                                        'value':    n,
                                        'format':   format1})


#worksheet1.merge_range('A2:C2')
#worksheet1.merge_range('A6:C6')

worksheet1.merge_range(0,0,0,2,'生成自动对比结果',format1)
worksheet1.merge_range(1,0,1,2,'数据量',format1)
worksheet1.merge_range(5,2,5,0,'完整度',format1)
worksheet1.merge_range(9,2,9,0,'合同量差异',format1)
worksheet1.merge_range(18,2,18,0,'各个字段差异TOP10',format1)

workbook.close()

四、GUI

from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtWidgets import QFileDialog, QApplication,QInputDialog
from PyQt5.QtWidgets import QMessageBox
import pandas as pd
import xlsxwriter
from openpyxl import load_workbook
import sys 

class Ui_MainWindow(object):

        
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(935, 521)
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.centralwidget.setObjectName("centralwidget") 
        self.YunXingAnLiu = QtWidgets.QPushButton(self.centralwidget)
        self.YunXingAnLiu.setGeometry(QtCore.QRect(490, 400, 75, 23))
        self.YunXingAnLiu.setObjectName("YunXingAnLiu")
        self.TuiChuAnLiu = QtWidgets.QPushButton(self.centralwidget)
        self.TuiChuAnLiu.setGeometry(QtCore.QRect(330, 400, 75, 23))
        self.TuiChuAnLiu.setObjectName("TuiChuAnLiu")
        self.ZhuJian02Kuang = QtWidgets.QLineEdit(self.centralwidget)
        self.ZhuJian02Kuang.setGeometry(QtCore.QRect(300, 300, 311, 20))
        self.ZhuJian02Kuang.setObjectName("ZhuJian02Kuang")
        self.XuanZheAnLiu = QtWidgets.QPushButton(self.centralwidget)
        self.XuanZheAnLiu.setGeometry(QtCore.QRect(210, 170, 75, 23))
        self.XuanZheAnLiu.setObjectName("XuanZheAnLiu")
        self.ShuRuZhuJian01AnLiu = QtWidgets.QPushButton(self.centralwidget)
        self.ShuRuZhuJian01AnLiu.setGeometry(QtCore.QRect(210, 230, 75, 23))
        self.ShuRuZhuJian01AnLiu.setObjectName("ShuRuZhuJian01AnLiu")
        self.ShuRuZhuJian02AnLiu = QtWidgets.QPushButton(self.centralwidget)
        self.ShuRuZhuJian02AnLiu.setGeometry(QtCore.QRect(210, 300, 75, 23))
        self.ShuRuZhuJian02AnLiu.setObjectName("ShuRuZhuJian02AnLiu")
        self.XuanZheWenJianKuang = QtWidgets.QLineEdit(self.centralwidget)
        self.XuanZheWenJianKuang.setGeometry(QtCore.QRect(300, 170, 311, 20))
        self.XuanZheWenJianKuang.setObjectName("XuanZheWenJianKuang")
        self.ZhuJian01Kuang = QtWidgets.QLineEdit(self.centralwidget)
        self.ZhuJian01Kuang.setGeometry(QtCore.QRect(300, 230, 311, 20))
        self.ZhuJian01Kuang.setObjectName("ZhuJian01Kuang")
        MainWindow.setCentralWidget(self.centralwidget)
        self.menubar = QtWidgets.QMenuBar(MainWindow)
        self.menubar.setGeometry(QtCore.QRect(0, 0, 935, 23))
        self.menubar.setObjectName("menubar")
        self.menu = QtWidgets.QMenu(self.menubar)
        self.menu.setObjectName("menu")
        MainWindow.setMenuBar(self.menubar)
        self.statusbar = QtWidgets.QStatusBar(MainWindow)
        self.statusbar.setObjectName("statusbar")
        MainWindow.setStatusBar(self.statusbar)
        self.actionTime = QtWidgets.QAction(MainWindow)
        self.actionTime.setObjectName("actionTime")
        self.actiontuivhu = QtWidgets.QAction(MainWindow)
        self.actiontuivhu.setObjectName("actiontuivhu")
        self.menubar.addAction(self.menu.menuAction())
        
        self.retranslateUi(MainWindow)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)
 
        
    def retranslateUi(self, MainWindow):
        _translate = QtCore.QCoreApplication.translate
        MainWindow.setWindowTitle(_translate("MainWindow", "BI Migration Tools V1.0"))
        self.YunXingAnLiu.setText(_translate("MainWindow", "运行"))
        self.TuiChuAnLiu.setText(_translate("MainWindow", "退出"))
        self.XuanZheAnLiu.setText(_translate("MainWindow", "选择文件"))
        self.ShuRuZhuJian01AnLiu.setText(_translate("MainWindow", "输入主键1"))
        self.ShuRuZhuJian02AnLiu.setText(_translate("MainWindow", "输入主键2"))
        self.menu.setTitle(_translate("MainWindow", "菜单"))
        self.actionTime.setText(_translate("MainWindow", "Time"))
        self.actiontuivhu.setText(_translate("MainWindow", "tuivhu"))


最终Code

使用GUI将配置的过程进行封装,让用户进行选择,并打包成可移植的exe文件

# -*- coding: utf-8 -*-
#
# Author: Kyle
# Creation time:April 10, 2022
# Version:V1.0

from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtWidgets import QFileDialog, QApplication,QInputDialog
from PyQt5.QtWidgets import QMessageBox
import pandas as pd
import xlsxwriter
from openpyxl import load_workbook
import sys 

class Ui_MainWindow(object):
    def __init__(self):
        self.path_file = ""
        self.Primarykey1=""#self.getText01()
        self.Primarykey2=""#self.getText01()
        #self.TargetPath="C:\\Users\\小管同学\\Desktop\\Migration_Data_Comparison_Tool\\对比结果.xlsx"
        self.TargetPath="./对比结果.xlsx"
        
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(935, 521)
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.centralwidget.setObjectName("centralwidget") 
        self.YunXingAnLiu = QtWidgets.QPushButton(self.centralwidget)
        self.YunXingAnLiu.setGeometry(QtCore.QRect(490, 400, 75, 23))
        self.YunXingAnLiu.setObjectName("YunXingAnLiu")
        self.TuiChuAnLiu = QtWidgets.QPushButton(self.centralwidget)
        self.TuiChuAnLiu.setGeometry(QtCore.QRect(330, 400, 75, 23))
        self.TuiChuAnLiu.setObjectName("TuiChuAnLiu")
        self.ZhuJian02Kuang = QtWidgets.QLineEdit(self.centralwidget)
        self.ZhuJian02Kuang.setGeometry(QtCore.QRect(300, 300, 311, 20))
        self.ZhuJian02Kuang.setObjectName("ZhuJian02Kuang")
        self.XuanZheAnLiu = QtWidgets.QPushButton(self.centralwidget)
        self.XuanZheAnLiu.setGeometry(QtCore.QRect(210, 170, 75, 23))
        self.XuanZheAnLiu.setObjectName("XuanZheAnLiu")
        self.ShuRuZhuJian01AnLiu = QtWidgets.QPushButton(self.centralwidget)
        self.ShuRuZhuJian01AnLiu.setGeometry(QtCore.QRect(210, 230, 75, 23))
        self.ShuRuZhuJian01AnLiu.setObjectName("ShuRuZhuJian01AnLiu")
        self.ShuRuZhuJian02AnLiu = QtWidgets.QPushButton(self.centralwidget)
        self.ShuRuZhuJian02AnLiu.setGeometry(QtCore.QRect(210, 300, 75, 23))
        self.ShuRuZhuJian02AnLiu.setObjectName("ShuRuZhuJian02AnLiu")
        self.XuanZheWenJianKuang = QtWidgets.QLineEdit(self.centralwidget)
        self.XuanZheWenJianKuang.setGeometry(QtCore.QRect(300, 170, 311, 20))
        self.XuanZheWenJianKuang.setObjectName("XuanZheWenJianKuang")
        self.ZhuJian01Kuang = QtWidgets.QLineEdit(self.centralwidget)
        self.ZhuJian01Kuang.setGeometry(QtCore.QRect(300, 230, 311, 20))
        self.ZhuJian01Kuang.setObjectName("ZhuJian01Kuang")
        MainWindow.setCentralWidget(self.centralwidget)
        self.menubar = QtWidgets.QMenuBar(MainWindow)
        self.menubar.setGeometry(QtCore.QRect(0, 0, 935, 23))
        self.menubar.setObjectName("menubar")
        self.menu = QtWidgets.QMenu(self.menubar)
        self.menu.setObjectName("menu")
        MainWindow.setMenuBar(self.menubar)
        self.statusbar = QtWidgets.QStatusBar(MainWindow)
        self.statusbar.setObjectName("statusbar")
        MainWindow.setStatusBar(self.statusbar)
        self.actionTime = QtWidgets.QAction(MainWindow)
        self.actionTime.setObjectName("actionTime")
        self.actiontuivhu = QtWidgets.QAction(MainWindow)
        self.actiontuivhu.setObjectName("actiontuivhu")
        self.menubar.addAction(self.menu.menuAction())
        
        self.retranslateUi(MainWindow)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)


        #app = QtWidgets.QApplication(sys.argv)
        #self.QInputDialog=QtWidgets.QInputDialog()  #生成一个自定义的QInputDialog的实例
        
        self.XuanZheAnLiu.clicked.connect(self.openTextFile)#选择文件
        self.TuiChuAnLiu.clicked.connect(self.quitApp)#退出
        #self.ShuRuZhuJian01AnLiu.clicked.connect(self.getItem)#主键1
        #self.ShuRuZhuJian02AnLiu.clicked.connect(self.getText)#主键1
        self.YunXingAnLiu.clicked.connect(self.RunAnLiu)#主键1

        
    def retranslateUi(self, MainWindow):
        _translate = QtCore.QCoreApplication.translate
        MainWindow.setWindowTitle(_translate("MainWindow", "BI Migration Tools V1.0"))
        self.YunXingAnLiu.setText(_translate("MainWindow", "运行"))
        self.TuiChuAnLiu.setText(_translate("MainWindow", "退出"))
        self.XuanZheAnLiu.setText(_translate("MainWindow", "选择文件"))
        self.ShuRuZhuJian01AnLiu.setText(_translate("MainWindow", "输入主键1"))
        self.ShuRuZhuJian02AnLiu.setText(_translate("MainWindow", "输入主键2"))
        self.menu.setTitle(_translate("MainWindow", "菜单"))
        self.actionTime.setText(_translate("MainWindow", "Time"))
        self.actiontuivhu.setText(_translate("MainWindow", "tuivhu"))


    #打开excel获取路径
    def openTextFile(self): 
        fd,fp = QFileDialog.getOpenFileName(self.centralwidget, "选择文件", "", "All Files(*)")
        f=open(fd,'rb')
        self.XuanZheWenJianKuang.setText(fd)
        f.close()
        print(fd)
        self.path_file=fd 
     
    def getText01(self):
        self.Primarykey1= self.ZhuJian01Kuang.text()
        print(self.Primarykey1) 
        return self.Primarykey1
        
    def getText02(self):
        self.Primarykey2= self.ZhuJian02Kuang.text()
        print(self.Primarykey2) 
        return self.Primarykey2
    
    def getItem(self): 
        text, ok = QInputDialog.getText(self, '文本输入框', '输入主键1')
        if ok and text:
            self.lineEdit1.setText(text)

    def getText(self):
        text, ok = QInputDialog.getText(self, '文本输入框', '输入主键2')
        if ok and text:
            self.lineEdit2.setText(text)

    def quitApp(self):
        print("退出成功!")
        sys.exit(0)
     
    
    def DifferenceColumnData(self,OriginalData,CurrentData,Primarykey):
        #列对比函数
        Comparison_Column=pd.merge(OriginalData, CurrentData, on=Primarykey)
        Comparison_Column.columns=[Primarykey,"ODS","DWH"] 
        Difference_Merge=Comparison_Column[Comparison_Column["ODS"]!=Comparison_Column["DWH"]]
        return Difference_Merge


    def retrieveData(self,TargetPath,Primarykey,DATA_ODS,DATA_DWH):
        DataFrame_DataVolume=pd.DataFrame([[DATA_ODS.shape[0]],[DATA_DWH.shape[0]]]).T
        DataFrame_DataVolume.columns =["ODS","DWH"]
        DataFrame_DataVolume.index=["数据量"]
        DataFrame_DataVolume=DataFrame_DataVolume.reset_index()
        DataFrame_DataVolume.columns=["序号","ODS","DWH"]
        DataFrame_DataVolume
        
        
        
        if DATA_ODS.shape[0]==DATA_DWH.shape[0]:
            pass
        else:
            DATA_ODS_Primarykey=pd.DataFrame(DATA_ODS[Primarykey])
            DATA_DWH_Primarykey=pd.DataFrame(DATA_DWH[Primarykey])
            df_union = pd.concat([DATA_ODS_Primarykey,DATA_DWH_Primarykey])
            # 实现1
            df_diff_ODS = df_union.append(DATA_ODS_Primarykey).drop_duplicates(subset=df_union.columns.to_list(), keep=False)
            df_diff_DWH = df_union.append(DATA_DWH_Primarykey).drop_duplicates(subset=df_union.columns.to_list(), keep=False)

            #DWH多的合同
            df_diff_ODS

            #DWH少的合同
            df_diff_DWH


            df_diff_DWH_Data=[]
            df_diff_ODS_Data=[]
            for i in df_diff_ODS.head(10).values.tolist():
                for n in i:
                    df_diff_ODS_Data.append(n)
            
            for i in df_diff_DWH.head(10).values.tolist():
                for n in i:
                    df_diff_DWH_Data.append(n)
    
            while True:
                if len(df_diff_DWH_Data)>len(df_diff_ODS_Data):
                    df_diff_ODS_Data.append("-")
                elif len(df_diff_DWH_Data)< len(df_diff_ODS_Data):
                    df_diff_DWH_Data.append("-")
                elif len(df_diff_DWH_Data)== len(df_diff_ODS_Data):
                    break
    
            DataFrame_DataVolume_Count_result=pd.DataFrame(df_diff_DWH_Data,df_diff_ODS_Data).reset_index()
            DataFrame_DataVolume_Count_result.columns=['DWH多的合同','DWH少的的合同']
            DataFrame_DataVolume_Count_result=DataFrame_DataVolume_Count_result.reset_index()
            DataFrame_DataVolume_Count_result.columns=['序号','DWH多的合同','DWH少的的合同']
            DataFrame_DataVolume_Count_result
            
            Difference_Field_Comparison_Data=pd.DataFrame([["数据值差异","数据值差异","数据值差异"]],columns=[Primarykey,'ODS','DWH'])
            Difference_Field_Comparison_Data.drop(Difference_Field_Comparison_Data.index, inplace=True) 
            Difference_Field_Comparison_Data

               
            M=[]
            for i in DATA_ODS.columns:
                if i!=Primarykey:
                    Difference_Division=pd.DataFrame([["字段:{Field_Name}".format(Field_Name=i),"ODS","DWH"]],columns=[Primarykey,'ODS','DWH'])
                    ResultTable=self.DifferenceColumnData(DATA_ODS[[Primarykey,i]],DATA_DWH[[Primarykey,i]],Primarykey=Primarykey)
                    if ResultTable.empty==False:
                        M.append(i)
                    ResultTable=ResultTable.head(10)
                    Difference_Division=Difference_Division.append(ResultTable)
                    EmptyLine=pd.DataFrame([["","",""]],columns=[Primarykey,'ODS','DWH'])
                    Difference_Division=Difference_Division.append(EmptyLine)
        
                    Difference_Field_Comparison_Data=Difference_Field_Comparison_Data.append(Difference_Division)
        
            Difference_Field_Comparison_Data
            
            DataFrame_DataVolume
            
            Percentagedifference = format(round(len(M)/DATA_ODS.shape[1], 6) , '.2%') 
            PercentageComplete=format(1-round(len(M)/DATA_ODS.shape[1], 6) , '.2%') 
            FieldValueDifferenceHeader=pd.DataFrame([["","Report完整度",""],["序号",'已完成','未完成'],["百分比",PercentageComplete,Percentagedifference]],columns=["序号",'ODS','DWH'])
            FieldValueDifferenceHeader

            #表格2
            DataFrame_DataVolume_Count_result.columns=["序号",'ODS','DWH']
            DataFrame_DataVolume_Count_result


            #表格3
            Difference_Field_Comparison_Data.columns=["序号",'ODS','DWH']
            Difference_Field_Comparison_Data
            
            
            #空行
            Empty_Line_DataFrame=pd.DataFrame([[" "," ",""]],columns=["序号",'ODS','DWH'])

            Summary=pd.DataFrame([["","数据量",""],["序号","ODS","DWH"]],columns=["序号","ODS","DWH"])
            Summary=Summary.append(DataFrame_DataVolume)
            Summary=Summary.append(Empty_Line_DataFrame)


            Summary=Summary.append(FieldValueDifferenceHeader)
            Summary=Summary.append(Empty_Line_DataFrame)

            
            SummaryDataVolumeVarianceHeader=pd.DataFrame([["","合同量差异",""],["序号","DWH多的合同","DWH少的的合同"]],columns=["序号",'ODS','DWH'])
            Summary=Summary.append(SummaryDataVolumeVarianceHeader)
            Summary=Summary.append(DataFrame_DataVolume_Count_result)


            if DataFrame_DataVolume_Count_result.shape[0]<5:
                for i in  range(5-DataFrame_DataVolume_Count_result.shape[0]+2):
                    Summary=Summary.append(Empty_Line_DataFrame)
                    
            
            
            
            FieldValueDifferenceHeader=pd.DataFrame([["","各个字段差异TOP10",""]],columns=["序号",'ODS','DWH'])

            Summary=Summary.append(FieldValueDifferenceHeader)
            Summary=Summary.append(Difference_Field_Comparison_Data)
            Summary=Summary.append(Empty_Line_DataFrame)

            
            
            
            workbook = xlsxwriter.Workbook(TargetPath)
            # 添加不同的sheet.
            worksheet1 = workbook.add_worksheet("对比结果")

            # header格式.
            bold = workbook.add_format({'bold': 2})

            headersbold = workbook.add_format({'bold': 1})

            headers = list(Summary.columns)

            # 列宽.
            worksheet1.set_column('A:D', 16)
            worksheet1.set_column('C:C', 20)
            worksheet1.set_column('B:B', 25)
            # 行高.
            worksheet1.set_row(0, 18, bold)
            worksheet1.write_row('A1', headers)
            worksheet1.autofilter('A1:D9999')
            # worksheet1.autofilter(0, 0, 10, 3)  # 和上边的作用一样,不同写法.

            row = 1
            for row_data in (Summary.values.tolist()):
                worksheet1.write_row(row, 0, row_data)
                row += 1

            format1 = workbook.add_format({ # 先把样式打包,然后之后赋值即可
                       'bold': True, # 字体加粗
                       'text_wrap': True, # 是否自动换行
                       'valign': 'bottom',  #垂直对齐方式
                       'align': 'center', # 水平对齐方式
                       'bg_color': '#C5D9F1', # 单元格背景颜色
                       'border': 1,# 边框
                   })  

            for n in ['ODS','DWH','字段','序号','数据','合同量差异','Report完整度','完成','百分比']:
                worksheet1.conditional_format('A1:D999', {'type':     'text',
                                        'criteria': 'containing',
                                        'value':    n,
                                        'format':   format1})


            #worksheet1.merge_range('A2:C2')
            #worksheet1.merge_range('A6:C6')

            worksheet1.merge_range(0,0,0,2,'生成自动对比结果',format1)
            worksheet1.merge_range(1,0,1,2,'数据量',format1)
            worksheet1.merge_range(5,2,5,0,'完整度',format1)
            worksheet1.merge_range(9,2,9,0,'合同量差异',format1)
            worksheet1.merge_range(18,2,18,0,'各个字段差异TOP10',format1)

            workbook.close()


                                
    def success(self):
        #使用infomation信息框
        QMessageBox.information(None,"运行成功!","已生成对比文件",QMessageBox.Yes|QMessageBox.No,QMessageBox.Yes)
 
    def RunAnLiu(self):
        print("运行")
        path=self.path_file
        #print(path)
        DATA_ODS=pd.read_excel(path,sheet_name="ODS")
        DATA_DWH=pd.read_excel(path,sheet_name="DWH")
        Primarykey1=self.getText01()
        Primarykey2=self.getText02()
        if Primarykey2=='':
            Primarykey=Primarykey1
        else:
            Primarykey=Primarykey1+ Primarykey2
        
        TargetPath=self.TargetPath
        
        self.retrieveData(TargetPath,Primarykey,DATA_ODS,DATA_DWH)
        
        self.success()

        print("运行成功!")



你可能感兴趣的:(Python,Python,Excel,数据对比,差异对比)