2022-10-11 ROSALIND_14:Finding a Shared Motif

# ----ROSALIND_14: ---- 
# Finding a Shared Motif

with open("14_Finding a Shared Motif.txt") as f:
    DNA_file = f.readlines()
    DNA_file = [i.strip("\n") for i in DNA_file]

DNA_file
image.png
def max_lcs(file):
    ROSname = []
    ROSindex = []
    DNAstring = []
    newDNAstring = []
    ROSname = [x for x in file if "Rosalind" in x]
    ROSindex = [file.index(x) for x in ROSname ] # 返回下标
    
    for i in range(0,len(file),(ROSindex[1] - ROSindex[0])):
        DNAstring.append(file[i: i + (ROSindex[1] - ROSindex[0])])
    
    
    # 合并ROSname之后的字符串
    newDNAstring = ["".join(i[1:]) for i in DNAstring]
    
    DNAstring_dict = {}
    DNAstring_dict = dict(zip(ROSname,newDNAstring))
    
    # 遍历字典,返回最小值
    DNAstring_dict_values = []
    DNAstring_dict_values = list(DNAstring_dict.values())
    
    DNAstring_dict_values.sort(reverse = False) # reverse = True 按照升序排列
    # 返回最短字符串
    min_DNAstring_dict_values = DNAstring_dict_values[0]

    
    # 在原序列中删除最短字符串
    DNAstring_dict_values.remove(min_DNAstring_dict_values)
    
    # 找到最短字符串中的所有子字符串
    results = []
    for i in range(len(min_DNAstring_dict_values)):
        # j表示滑动量
        for j in range(len(min_DNAstring_dict_values) - i):
            results.append(min_DNAstring_dict_values[j:j + i + 1])
    
    # 检索最短字符串中的所有子字符串在剩余字符串中出现的次数
    # 创建一个新字典,统计次数
    count_dict = {}
    for i in results:
        count_dict[i] = 0 # 初始化
        # 如果子字符串出现在其他字符串中,则count + 1,并创建新的字典
        for j in DNAstring_dict_values:
            if i in j:
                count_dict[i] += 1
    
    # 遍历字典返回最大值,此时max_count_dict里面都是在剩余字符串中出现次数最多的子字符串
    max_count_dict = {}
    for k,v in count_dict.items():
        if v == max(count_dict.values()):
            max_count_dict[k] = v
            
    
    max_count_k = [i for i in list(max_count_dict.keys())]
    # 根据字符串长度降序排列,最终返回最长子字符串
    max_count_k.sort(key=lambda x:len(x), reverse= True)
          
    return print(max_count_k[0])
 
image.png

你可能感兴趣的:(2022-10-11 ROSALIND_14:Finding a Shared Motif)