根据PMID批量获取NCBI摘要页内容

根据PMID批量获取NCBI摘要页内容

'根据PMID号,获取Pubmed摘要页面的所有内容(题目和摘要等。)NBK开头的PMID在程序自动搜索时会自动将NBK去掉,所以不适用于此代码生成。'

__author__ = 'shixq'
# source:https://www.jianshu.com/p/b36adf266c3d
from Bio import Entrez

Entrez.email = "[email protected]" # 输入自己的邮箱,这里使用一个我随便申请的QQ邮箱
# with open('PMID.txt') as pmid_file
pmid_file = open('PMID.txt')# 输入文件不要有空行,筛掉NBK开头的文献。
output = open('PMID_abstract.txt', 'w', encoding='utf-8')
id_list = []
abstracts = []
for element in pmid_file:
    id_list.append(element.strip())# 去掉'/n'
pmid_file.close()
count = 0
for pid in id_list:
    handle = Entrez.efetch(db="pubmed", id=pid, rettype="abstract", retmode="text")# Entrez 里的efetch模块获取摘要页面的text内容。
    re =[line.strip() for line in handle.readlines()] #将所有的换行输出排列在一行上
    # print(re)
    # record = [handle.read().strip()]
    # all_ab.append(record)
    # print(record)
    # all_ab.append(record)
    # for sab in record:
    #     print(pid + '\t' + sab + '\n')
    output.write(pid + '\t' + ''.join(re) + '\n')#将一个list内的所有元素不换行输出
    output.close()
    count += 1
    print('complete', '%.1f%%'%((count/len(id_list))*100))# 在屏幕上打印完成的进度百分比
    # try:
    # ab = record['PubmedArticle']
    # print(ab)
        # for a in ab:
        #     print(a)
    # except KeyError:
    #     ab = ['none abstract']
    # except IndexError:
    #     ab = ['AAA:This PMID may be a book & Documents, no abstract']    
    # for abstract in ab:
    #     if pid == pid:
    #         abstract = abstract +
    #     count += 1
    #     print('complete', '%.1f%%'%((count/len(id_list))*100))
    # output.write(pid + '\t' + sab + '\n')ssss

你可能感兴趣的:(根据PMID批量获取NCBI摘要页内容)