请参考rcsb官方提供文档接口
由于python提供接口简单,所以选择了它
1.从thefile.txt 文件中读取蛋白序列
2.然后从rcsb 查询,获取查询XML数据
3.解析XML文件
4.下载pdb文件
可以用python name.py
直接运行.
#!/usr/bin/python
# -*- coding: UTF-8 -*-
import urllib
import urllib2
import xml.sax
global_url = "https://files.rcsb.org/download/"
class PdbHandler( xml.sax.ContentHandler ):
def __init__(self):
self.CurrentData = ""
self.BlastOutput_program = ""
self.BlastOutput_version = ""
self.BlastOutput_db = ""
self.BlastOutput_query_def = ""
self.BlastOutput_query_len = ""
self.blastOutput_iterations = ""
self.blastOutput_query_len = 0
self.BlastOutput_param = ""
self.Parameters_matrix = ""
self.Parameters_expect = ""
self.Hit_def = ""
# 元素开始事件处理
def startElement(self, tag, attributes):
self.CurrentData = tag
if tag == "BlastOutput":
print "-------start---BlastOutput--"
# 元素结束事件处理
def endElement(self, tag):
if self.CurrentData == "BlastOutput_program":
print "BlastOutput_program:", self.BlastOutput_program
elif self.CurrentData == "Hit_def":
print "file name :", self.Hit_def[0:4]
download_pdb(self.Hit_def[0:4])
# 内容事件处理
def characters(self, content):
if self.CurrentData == "BlastOutput_program":
self.BlastOutput_program = content
elif self.CurrentData == "BlastOutput_version":
self.BlastOutput_version = content
elif self.CurrentData == "BlastOutput_db":
self.BlastOutput_db = content
elif self.CurrentData == "BlastOutput_query-ID":
self.BlastOutput_query_ID = content
elif self.CurrentData == "BlastOutput_query-def":
self.BlastOutput_query_def = content
elif self.CurrentData == "BlastOutput_query-len":
self.BlastOutput_query_len = content
elif self.CurrentData == "Parameters_matrix":
self.Parameters_matrix = content
elif self.CurrentData == "Hit_def":
self.Hit_def = content
def download_pdb(file_name):
try:
f = urllib2.urlopen(global_url + file_name + ".pdb")
data = f.read()
with open("pdb/" + file_name + ".pdb", "wb") as code:
code.write(data)
code.close()
except Exception as e:
print global_url + file_name + ".pdb"
def setup_connect():
test_data = {'sequence':'TDMLTLTRYVMEKGRQAKGTGELTQLLNSMLTAIKAISSAVRKAGLAHLYGIAGSVNVDQ'
, 'eCutOff':'10.0'
, 'matrix':'BLOSUM62'
, 'outputFormat':'XML'}
#test_data['sequence'] = seq
test_data_urlencode = urllib.urlencode(test_data)
requrl = "https://www.rcsb.org/pdb/rest/postBLAST"
req = urllib2.Request(url = requrl,data =test_data_urlencode)
print req
res_data = urllib2.urlopen(req)
print res_data
res = res_data.read()
return res
def save_to_file(file_name, contents):
try:
fh = open(file_name, 'w')
fh.write(contents)
fh.close()
except Exception as e:
print("save to file error!")
def read_file():
'''
file_object = open('thefile.txt')
try:
#: for line in file_object:
# process line
finally:
file_object.close()
'''
if ( __name__ == "__main__"):
print '__main__'
# 创建一个 XMLReader
parser = xml.sax.make_parser()
# turn off namepsaces
parser.setFeature(xml.sax.handler.feature_namespaces, 0)
# 重写 ContextHandler
Handler = PdbHandler()
parser.setContentHandler( Handler )
parser.parse("output1.xml")
'''
xml_data = setup_connect()
save_to_file('output1.xml', xml_data)
file_object = open('thefile.txt')
try:
while 1:
line = file_object.readline()
if not line:
break
line = line.split()
print( line )
print( line[1] )
xml_data = setup_connect(line[1])
# save_to_file('output.txt', data)
parser = xml.sax.parseString(xml_data, PdbHandler())
#parser.setFeature(xml.sax.handler.feature_namespaces, 0)
#Handler =
#parser.setContentHandler( Handler )
parser.parse()
finally:
file_object.close()
'''
下面是thefile.txt文件格式,字段之间用空格分隔.
TDMLTLTRYVMEKGRQAKGTGELTQLLNSMLTAIKAISSAVRKAGLAHLYGIAGSVNVDQ 1