利用Biopython 快速根据pmid 来下载参考文献信息

之前用的常规爬虫思路(import requests,from bs4 import BeautifulSoup)来下载文章题目,作者,来源等信息时, 偶尔会出现各种问题,有那个调试的时间,就自己根据biopython快速写了一个脚本 ,简单好用。

# !bin/python
# encoding:utf-8

from Bio import Entrez
from  Bio  import Medline
Entrez.email = '[email protected]'
ref = open('ref.txt','w+')
def downref(pmid):
    handle = Entrez.efetch(db="pubmed" , id=pmid , rettype="medline" , retmode="text")
    records = Medline.parse(handle)
    records = list(records) # records 是一个迭代器,所以只能访问这些records一次。如果想保存这些records,需要把他们转成列表。

    for record in records:
        print "title:" , record.get("TI" , "?")
        if len(record.get("AU" , "?"))>3: 
            author = ','.join(record.get("AU" , "?")[0:3]) #如果名字很多时,作者名字取前三个
            print "authors:",author
        else:
            author =','.join(record.get("AU" , "?"))
            print "authors:",author
        print "source:" , record.get("SO" , "?")
        text = record.get("TI" , "?")+'\t'+ author +'\t' +record.get("SO" , "?") +'\n'
        ref.write(text)

if __name__=="__main__":
    ids = ['21142915','25007187','27574448','16330681','24404132','22868256','23314736','18221820','24338217','17537913','19016765','22124095','18551042','17194903','22546611','22304580','22838950','21642870','19376514','20665488','24647007','23603345','23816762','19203783','25495407','21057378','22329723','21902499','21635146','24215845','22188361','19786980','21362365','21449681','24193570','17457342','19307503','20634689','17700593','23736036','20385995','25331073','24732178','20638924','21605004','22134350','17602053','20226083','18538455','18538445','24361227','16609362','18294295','21995462','26641474','18347181','26696550','19052714','18357466','24533712','23588304','19898482','2953022','26857783','20562211','19450125','15007088','16636344','23860526','20125120','24061601','25069034','20078613','19620936','27498158','25592234','24446315','19362955','16875718','22821704','17352537','18377430','19214144','23222202','24300978','22838949','23230131','16551864','24980784','27150640','25084203','22843789','20212519','16890579','26763541','22441531','21521023'] #可以读文件来输入pmid
    for id in ids:
        downref(id)

参考:https://blog.csdn.net/Cassiel60/article/details/90664558

你可能感兴趣的:(python)