Filter the 3-column BLAST output according to following rules

# Filter the 3-column BLAST output according to following rules
# 1. Remove self matches, multiple matches
# 2. Re-order gene pairs lexicographically

import os
import sys

try:
    infile = sys.argv[1]
    outfile = sys.argv[2]
except:
    print "Usage: python filter_blast.py infile outfile"

fp = file(infile)
pairs = {}
j = 0
for row in fp:
    j+=1
    a,b,e = row.split()
    e = float(e)
    if a==b: continue
    if a>b: a,b=b,a
    pair_name = "%s&%s"%(a,b)
    if pair_name not in pairs or (pair_name in pairs and e

你可能感兴趣的:(Filter the 3-column BLAST output according to following rules)