用http协议在rcsb.org自动下载pdb文件

请参考rcsb官方提供文档接口
由于python提供接口简单,所以选择了它

程序流程

1.从thefile.txt 文件中读取蛋白序列
2.然后从rcsb 查询,获取查询XML数据
3.解析XML文件
4.下载pdb文件

python代码

可以用python name.py 直接运行.

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import urllib
import urllib2
import xml.saxglobal_url = "https://files.rcsb.org/download/"class PdbHandler( xml.sax.ContentHandler ):def __init__(self):self.CurrentData = ""self.BlastOutput_program = ""self.BlastOutput_version = ""self.BlastOutput_db = ""self.BlastOutput_query_def = ""self.BlastOutput_query_len = ""self.blastOutput_iterations = ""self.blastOutput_query_len = 0self.BlastOutput_param = ""self.Parameters_matrix = ""self.Parameters_expect = ""self.Hit_def = ""       # 元素开始事件处理def startElement(self, tag, attributes):self.CurrentData = tagif tag == "BlastOutput":print "-------start---BlastOutput--"# 元素结束事件处理def endElement(self, tag):if self.CurrentData == "BlastOutput_program":print "BlastOutput_program:", self.BlastOutput_programelif self.CurrentData == "Hit_def":print "file name :", self.Hit_def[0:4] download_pdb(self.Hit_def[0:4])# 内容事件处理def characters(self, content):if self.CurrentData == "BlastOutput_program":self.BlastOutput_program = contentelif self.CurrentData == "BlastOutput_version":self.BlastOutput_version = contentelif self.CurrentData == "BlastOutput_db":self.BlastOutput_db = contentelif self.CurrentData == "BlastOutput_query-ID":self.BlastOutput_query_ID = contentelif self.CurrentData == "BlastOutput_query-def":self.BlastOutput_query_def = contentelif self.CurrentData == "BlastOutput_query-len":self.BlastOutput_query_len = contentelif self.CurrentData == "Parameters_matrix":self.Parameters_matrix = contentelif self.CurrentData == "Hit_def":self.Hit_def = contentdef download_pdb(file_name):try:f = urllib2.urlopen(global_url + file_name + ".pdb") data = f.read()with open("pdb/" + file_name + ".pdb", "wb") as code:     code.write(data)code.close()except Exception as e:print global_url + file_name + ".pdb"def setup_connect():test_data = {'sequence':'TDMLTLTRYVMEKGRQAKGTGELTQLLNSMLTAIKAISSAVRKAGLAHLYGIAGSVNVDQ', 'eCutOff':'10.0', 'matrix':'BLOSUM62', 'outputFormat':'XML'}#test_data['sequence'] = seqtest_data_urlencode = urllib.urlencode(test_data)requrl = "https://www.rcsb.org/pdb/rest/postBLAST"req = urllib2.Request(url = requrl,data =test_data_urlencode)print reqres_data = urllib2.urlopen(req)print res_datares = res_data.read()return resdef save_to_file(file_name, contents):try:fh = open(file_name, 'w')fh.write(contents)fh.close()      except Exception as e:print("save to file error!")def read_file():'''file_object = open('thefile.txt')try:#:  for line in file_object:#  process linefinally:file_object.close()'''if ( __name__ == "__main__"):print '__main__'# 创建一个 XMLReaderparser = xml.sax.make_parser()# turn off namepsacesparser.setFeature(xml.sax.handler.feature_namespaces, 0)# 重写 ContextHandlerHandler = PdbHandler()parser.setContentHandler( Handler )parser.parse("output1.xml")'''xml_data = setup_connect()save_to_file('output1.xml', xml_data)file_object = open('thefile.txt')try:while 1:line = file_object.readline()if not line:breakline = line.split()print( line )print( line[1] )xml_data = setup_connect(line[1])
#   save_to_file('output.txt', data)parser = xml.sax.parseString(xml_data, PdbHandler())#parser.setFeature(xml.sax.handler.feature_namespaces, 0)#Handler = #parser.setContentHandler( Handler )parser.parse()finally:file_object.close()
'''

下面是thefile.txt文件格式,字段之间用空格分隔.

TDMLTLTRYVMEKGRQAKGTGELTQLLNSMLTAIKAISSAVRKAGLAHLYGIAGSVNVDQ  1

下面是我的运行结果,下载了很多PDB文件.
这里写图片描述


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部