BitTorrent文件使用bencode编码,其中包括了4种数据类型:
'd' 开头表示是dict类型,'e'表示结束
'l' (小写字母L)开头表示是list类型,'e'表示结束
'i'开头表示是integer类型,'e'表示结束,可以表示负数
以数字开头表示string类型,数字为string长度,长度与string内容以':'分割
默认所有text类型的属性为utf-8编码,但是大多数BitTorrent包含codepage 和 encoding属性,指定了text的编码格式
BitTorrent的标准参见:http://www.bittorrent.org/beps/bep_0003.html
以下是自己写的Python实现,初学Python,代码写起来还都是C/C++风格,慢慢改进吧。
import os from datetime import tzinfo from datetime import datetime import bcodec _READ_MAX_LEN = -1 class BTFormatError(BaseException): pass class TorrentFile(object): __metainfo = {} __file_name = '' def read_file(self, filename): torrent_file = open(filename, 'rb') data = torrent_file.read(_READ_MAX_LEN) torrent_file.close() data = list(data) metainfo = bcodec.bdcode(data) if metainfo and type(metainfo) == type({}): self.__file_name = filename self.__metainfo = metainfo else: raise BTFormatError() def __is_singlefile(self): return 'length' in self.__metainfo.keys() def __decode_text(self, text): encoding = 'utf-8' resultstr = '' if self.get_encoding(): encoding = self.get_encoding() elif self.get_codepage(): encoding = 'cp' + str(self.get_codepage()) if text: try: resultstr = text.decode(encoding=encoding) except ValueError: return text else: return None return resultstr def __get_meta_top(self, key): if key in self.__metainfo.keys(): return self.__metainfo[key] else: return None def __get_meta_info(self,key): meta_info = self.__get_meta_top('info') if meta_info and key in meta_info.keys(): return meta_info[key] return None def get_codepage(self): return self.__get_meta_top('codepage') def get_encoding(self): return self.__get_meta_top('encoding') def get_announces(self): announces = [] ann = self.__get_meta_top('announce') if ann: ann_list = [] ann_list.append(ann) announces.append(ann_list) announces.append(self.__get_meta_top('announce-list')) return announces def get_publisher(self): return self.__decode_text(self.__get_meta_top('publisher')) def get_publisher_url(self): return self.__decode_text(self.__get_meta_top('publisher-url')) def get_creater(self): return self.__decode_text(self.__get_meta_top('created by')) def get_creation_date(self): utc_date = self.__get_meta_top('creation date') if utc_date is None: return utc_date creationdate = datetime.utcfromtimestamp(utc_date) return creationdate def get_comment(self): return self.__get_meta_top('comment') def get_nodes(self): return self.__get_meta_top('nodes') def get_piece_length(self): return self.__get_meta_info('piece length') def get_files(self): files = [] pieces = self.__get_meta_info('pieces') name = self.__decode_text(self.__get_meta_info('name')) piece_length = self.get_piece_length() if not pieces or not name: return files if self.__is_singlefile(): file_name = name file_length = self.__get_meta_info('length') if not file_length: return files pieces_num = file_length/piece_length if file_length % piece_length: pieces_num = int(pieces_num) + 1 if 20*pieces_num > len(pieces): return files file_pieces = [] i = 0 pn = 0 while pn < pieces_num: file_pieces.append(pieces[i:i+20]) i += 20 pn += 1 files.appen({'name':[file_name], 'length':file_length, 'peaces':file_pieces}) return files folder = name meta_files = self.__get_meta_info('files') if not meta_files: return files total_length = 0 for one_file in self.__get_meta_info('files'): file_info = {} path_list = [] path_list.append(folder) if 'path' not in one_file.keys(): break for path in one_file['path']: path_list.append(self.__decode_text(path)) file_info['name'] = path_list if 'length' not in one_file.keys(): break file_info['length'] = one_file['length'] piece_index = int(total_length / piece_length) total_length += one_file['length'] pieces_num = int(total_length / piece_length) - piece_index pieces_num = int(file_info['length']/piece_length) if total_length % piece_length: pieces_num += 1 # print (piece_index+pieces_num)*20, len(pieces),pieces_num,file_info['length'], self.get_piece_length() if (piece_index+pieces_num)*20 > len(pieces): break file_info['pieces'] = [] pn = 0 while pn < pieces_num: file_info['pieces'].append(pieces[piece_index*20:piece_index*20+20]) pn += 1 files.append(file_info) return files if __name__ == '__main__': #filename = r".\huapi2.torrent" #filename = r".\mh5t3tJ0EC.torrent" filename = r".\huapi2.1.torrent" torrent = TorrentFile() print "begin to read file" try: torrent.read_file(filename) except (IOError,BTFormatError), reason: print "Read bittorrent file error! Error:%s" %reason print "end to read file" print "announces: " , torrent.get_announces() print "peace length:", torrent.get_piece_length() print "code page:" , torrent.get_codepage() print "encoding:" , torrent.get_encoding() print "publisher:" ,torrent.get_publisher() print "publisher url:", torrent.get_publisher_url() print "creater:" , torrent.get_creater() print "creation date:", torrent.get_creation_date() print "commnent:", torrent.get_comment() print "nodes:", torrent.get_nodes() torrent.get_files() for one_file in torrent.get_files(): print 'file name:', '\\'.join(one_file['name']) print 'file length:', one_file['length'] print 'pieces:', list(one_file['pieces'])
1 ''' 2 Created on 2012-9-30 3 4 @author: ddt 5 ''' 6 def bdcode(data): 7 data = list(data) 8 return _read_chunk(data) 9 10 def _read_chunk(data): 11 12 chunk = None 13 14 if len(data) == 0: 15 return chunk 16 17 leading_chr = data[0] 18 19 if leading_chr.isdigit(): 20 chunk = _read_string(data) 21 elif leading_chr == 'd': 22 chunk = _read_dict(data) 23 elif leading_chr == 'i': 24 chunk = _read_integer(data) 25 elif leading_chr == 'l': 26 chunk = _read_list(data) 27 28 #print leading_chr, chunk 29 return chunk 30 31 def _read_dict(data): 32 33 if len(data) == 0 or data.pop(0) != 'd': 34 return None 35 36 chunk = {} 37 while len(data) > 0 and data[0] != 'e': 38 39 key = _read_chunk(data) 40 value = _read_chunk(data) 41 42 if key and value and type(key) == type(''): 43 chunk[key] = value 44 else: 45 return None 46 47 if len(data) == 0 or data.pop(0) != 'e': 48 return None 49 50 return chunk 51 52 def _read_list(data): 53 54 if len(data) == 0 or data.pop(0) != 'l': 55 return None 56 57 chunk = [] 58 while len(data) > 0 and data[0] != 'e': 59 value = _read_chunk(data) 60 if value: 61 chunk.append(value) 62 else: 63 return None 64 65 if len(data) == 0 or data.pop(0) != 'e': 66 return None 67 68 return chunk 69 70 def _read_string(data): 71 72 str_len = '' 73 while len(data) > 0 and data[0].isdigit(): 74 str_len += data.pop(0) 75 76 if len(data) == 0 or data.pop(0) != ':': 77 return None 78 79 str_len = int(str_len) 80 if str_len > len(data): 81 return None 82 83 value = data[0:str_len] 84 del data[0:str_len] 85 return ''.join(value) 86 87 def _read_integer(data): 88 89 integer = '' 90 if len(data) < len('i2e') or data.pop(0) != 'i': 91 return None 92 93 sign = data.pop(0) 94 if sign != '-' and not sign.isdigit(): 95 return None 96 integer += sign 97 98 while len(data) > 0 and data[0].isdigit(): 99 integer += data.pop(0) 100 101 if len(data) == 0 or data.pop(0) != 'e': 102 return None 103 104 return int(integer)