一、准备工作
1). 安装python,installed version:2.7.9 in Window.
2). 安装pip(可选)
3). excel文档规范化
4). 安装xlrd
什么是xlrd? xlrd是python中专门用于抽取Microsoft Excel表格数据的库。xlrd v0.9.3, 下载地址:https://pypi.python.org/pypi/xlrd
安装步骤:下载tar.gz,并解压到目录:xlrd\xlrd-0.9.3.tar\xlrd-0.9.3,然后转到该目录下,执行:python setup.py install
二、编写脚本
# -*- coding: cp936 -*- import xlrd class DataComparison(): compare_result = '' f_dir = r'' datasrc = '' datadst = '' f1 = '' f2 = '' col_sets_sheet1 = [] col_sets_sheet2 = [] f1_sheet_index = 0 f2_sheet_index = 0 if_with_title = '1' def __init__(self, dataList): ############### Set Parameters ############# compare_result = dataList[0] datasrc = dataList[1] datadst = dataList[2] # Read the 2 excel files to be compared. # Set data file name f1 = dataList[3] f2 = dataList[4] # Set col numbers in the 1st sheet col_sets_sheet1 = dataList[5] # col_sets_sheet2 = dataList[6] # # Set sheet in files to be compared. # The 1st sheet as default f1_sheet_index = dataList[7] f2_sheet_index = dataList[8] # Set if excel sheet has title(only 1st row as title) if_with_title = dataList[9] ############### Load Data & Compare ############# print "Start Loading..." book1 = xlrd.open_workbook(f1) book2 = xlrd.open_workbook(f2) print "Loading Files OK!" b1sheet = book1.sheet_by_index(f1_sheet_index) b2sheet = book2.sheet_by_index(f2_sheet_index) print "Ranging Data" col_1_b1sheet = b1sheet.col_values(col_sets_sheet1[0]) col_2_b1sheet = b1sheet.col_values(col_sets_sheet1[1]) col_1_b2sheet = b2sheet.col_values(col_sets_sheet2[0]) col_2_b2sheet = b2sheet.col_values(col_sets_sheet2[1]) if if_with_title == '1': print "Removing titles" col1_b1sheet = col_1_b1sheet[1:] col2_b1sheet = col_2_b1sheet[1:] col1_b2sheet = col_1_b2sheet[1:] col2_b2sheet = col_2_b2sheet[1:] print "Round columns with number type" for i, val in enumerate(col2_b1sheet): col2_b1sheet[i] = round(val,2) for i, val in enumerate(col2_b2sheet): col2_b2sheet[i] = round(val,2) # Get sum value of number cols sum1 = sum(col2_b1sheet) sum2 = sum(col2_b2sheet) print "Do zipping..." zip1 = zip(col1_b1sheet, col2_b1sheet) zip2 = zip(col1_b2sheet, col2_b2sheet) print "Sort ziped list" zip1.sort() zip2.sort() print "Compare rows in summary" len1 = len(zip1) len2 = len(zip2) print len1, '--', len2 compare_result = compare_result+":\n 总数比对:" if len1 == len2: print "记录总数比较一致!" compare_result = compare_result+"结果一致,均为"+str(len1)+"条;" else: print "记录总数不一致!" compare_result = compare_result+"结果不一致,"+ datasrc +"为"+str(len1)+"条,"+datadst +"为"+str(len2)+"条;" print sum1, '--', sum2 if round(sum1,2) == round(sum2,2): print "总金额比较一致!" compare_result = compare_result+"总金额比较一致,"+"均为"+str(sum1)+";\n" else: print "总金额不一致!" compare_result = compare_result+"总金额不一致,"+datasrc +"为"+str(sum1)+","+datadst +"为"+str(sum2)+";\n" print "Get different rows in detail" cnt = 0 for c in range(len(zip1)): if zip1[c] != zip2[c]: cnt += 1 print zip1[c], '--', zip2[c] if cnt >0: print cnt, "行比较有差异!" compare_result = compare_result + " 明细比对:存在"+str(cnt)+"条差异。" else: print "未发现明细差异数据" compare_result = compare_result + " 明细比对:无差异。" print compare_result self.compare_result = compare_result print "Finished!" def getResult(self): return self.compare_result datasrc = "源数据1" datadst = "源数据2" f1_sheet_index = 0 f2_sheet_index = 0 if_with_title = '1' compare_result = "比对结果:" f1 = r'D:\tmp\src001.xlsx' f2 = r'D:\tmp\tmp001.xlsx' col_sets_sheet1 = [2,4] #取第3、5列 col_sets_sheet2 = [1,2] #取第2、3列 # 组装dataList dataList = [compare_result, datasrc, datadst, f1, f2, col_sets_sheet1, col_sets_sheet2, f1_sheet_index, f2_sheet_index, if_with_title] dc1 = DataComparison(dataList) # 打印比较结果 print dc1.getResult()
总结:
本次主要练习了使用xlrd读取xls文件,并对文件记录进行加工、比较。下一步将练习xlwd并增加对文件写入的相关操作。