Python积累--pyhton比较两个文件内容的区别

最简朴的方案

matched = open('matchedrecords.txt','w') 

with open('srcone.txt') as b: 
    blines = set(b) 

with open('srctwo.txt') as a: 
    alines = set(a) 

with open('notInfirstSource.txt', 'w') as result: 
    for line in alines: 
    if line not in blines: 
     result.write(line) 
    else: 
     matched.write(line)  

with open('notInsecondSource.txt', 'w') as non: 
    for lin in blines: 
     if lin not in alines: 
     non.write(lin) 

matched.close() 

notInfirstSource.txt中保存的是不在srcone.txt中内容。
notInsecondSource.txt中保存的是不在srctwo.txt中的内容。
两者相同的内容保存在matchedrecords.txt

pandas对比文件内容

例子:查询bl.txt文件与ba.txt文件的数据是否一致对应

import pandas as pd


def parse_change_Record():
    rawsnppath="/Users/joe/workspace/diffrentgt/ba.txt"
    fixsnppath = "/Users/joe/workspace/diffrentgt/bl.txt"
    rawsnp = pd.read_table(rawsnppath, dtype=str)
    fixsnp = pd.read_table(fixsnppath, dtype=str)
    for index, row in fixsnp.iterrows():
        if len(rawsnp[(rawsnp['#rsid'] == row['#rsid']) & (rawsnp['genotype'] == row['genotype'])].index.tolist()) > 0:
            print("找到对应%s, %s" % (row['#rsid'], row['genotype']))
        else:
            print("未找到对应%s, %s" % (row['#rsid'], row['genotype']))


parse_change_Record()

使用difflib生成对比html

import logging
import difflib
logger = logging.getLogger(__name__)


def diff(file_path_1, file_path_2, report_html_path):
    txt_line1 = get_lines(file_path_1)
    txt_line2 = get_lines(file_path_2)

    d = difflib.HtmlDiff()

    fid = open(report_html_path, 'w')

    fid.write(d.make_file(txt_line1, txt_line2))

    fid.close()


def get_lines(file_name):
    return open(file_name).readlines()


file_path_1="/Users/joe/Downloads/1.vcf"
file_path_2="/Users/joe/workspace/platform/diff-file/newvcf/1.vcf"
report_html_path="/Users/joe/workspace/platform/diff-file/newvcf/1.html"

diff(file_path_1, file_path_2, report_html_path)

你可能感兴趣的:(python)