RNA
AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA
转换为protein,按照表
MAMAPRTEINSTRING
代码
#change the table to a dict for searching
f = open('dd.txt')
lines = f.readlines()
dict = {}
key = ""
for line in lines:
line.strip("\n")
line = line.split()
for i in range(len(line)):
if len(line[i]) == 3:
dict[line[i]] = ""
key = line[i]
else :
dict[key] = line[i]
f1 = open('gg.txt')
line = f1.readline().strip("\n")
#split the string to 3 letters per group
res= [line[i:i+3] for i in range(0,len(line),3)]
abc = []
for i in res:
#Stop doesnot show
if dict[i] != "Stop":
abc.append(dict[i])
print("".join(abc))
统计基因段中的碱基,并返回每列最大的碱基
#每段基因查询碱基
def FindGen(word, str1):
lenght = "".join(str1[0])
res = [0] * len(lenght)
for i in str1:
i = "".join(i)
for j in range(len(i)):
if i[j] == word:
res[j] = res[j] +1
strA = map(str, res)
print("{}:".format(word), end=" ")
for i in res:
print(i, end=" ")
print("\r")
return res
#读取基因段,将基因段放入字典
f = open("gg.txt")
lines = f.readlines()
dict = {}
#if "<", get the line to key
#if not, get the line to value
for line in lines:
index = lines.index(line)
line = line.strip('\n')
if line[0] == ">" :
name = line.split(">")[1]
dict[name] = []
else:
dict[name].append(line)
#读取基因段
value = list(dict.values())
#查询碱基
GG = ["A", "C", "G", "T"]
countA = FindGen("A", value)
countC = FindGen("C", value)
countG = FindGen("G", value)
countT = FindGen("T", value)
ginstr = []
#每列最大的碱基
for i in range(len(countA)):
mm = [countA[i], countC[i], countG[i], countT[i]]
bb = mm.index(max(mm))
ginstr.append(GG[bb])
print(''.join(ginstr))
找到基因段中最后三个字符与最初三个字符一致的基因段
#读取基因段,将基因段放入字典
f = open("gg.txt")
lines = f.readlines()
dict = {}
#if "<", get the line to key
#if not, get the line to value
for line in lines:
index = lines.index(line)
line = line.strip('\n')
if line[0] == ">" :
name = line.split(">")[1]
dict[name] = []
else:
dict[name].append(line)
#多行转换为一个字符串
for i in dict.keys():
dict[i] = ''.join(dict[i])
#最后三个字符等于起始的三个字符
for i in dict.keys():
finder = dict[i][-3:]
for j in dict.keys():
if j != i and finder == dict[j][0:3]:
print(i,j)
找到基因里面的最大共同碱基串
#读取基因段,将基因段放入字典
f = open("gg.txt")
lines = f.readlines()
dict = {}
#if "<", get the line to key
#if not, get the line to value
for line in lines:
index = lines.index(line)
line = line.strip('\n')
if line[0] == ">" :
name = line.split(">")[1]
dict[name] = []
else:
dict[name].append(line)
#多行转换为一个字符串
for i in dict.keys():
dict[i] = ''.join(dict[i])
#最后三个字符等于起始的三个字符
ii = list(dict.values())
kk = []
res = ""
#取第一个基因,生成需要匹配的字段,字符串从大到小
length = len(ii[0])
for i in range(length):
strlen = length - i
for j in range(length):
if j + strlen <= length :
kk.append(ii[0][j:j+strlen])
#需要匹配的字段,每次取出一个,匹配所有基因,如果匹配到一个,就退出
for i in kk:
tmp =[]
for j in ii:
re = j.find(i)
tmp.append(re)
if -1 not in tmp:
print(i)
break
数组全排列和输出到文件
import itertools
iter = itertools.permutation(list)
#数组输出到文件,但是去掉中括号和逗号
pp = str(list).strip('[').strip(']').replace(',','') + "\n"
f.write(pp)