Python边学边用--BT客户端实现之BitTorrent文件解析

BitTorrent文件使用bencode编码,其中包括了4种数据类型:

'd' 开头表示是dict类型,'e'表示结束

'l' (小写字母L)开头表示是list类型,'e'表示结束

'i'开头表示是integer类型,'e'表示结束,可以表示负数

以数字开头表示string类型,数字为string长度,长度与string内容以':'分割

默认所有text类型的属性为utf-8编码,但是大多数BitTorrent包含codepage 和 encoding属性,指定了text的编码格式

BitTorrent的标准参见:http://www.bittorrent.org/beps/bep_0003.html

以下是自己写的Python实现,初学Python,代码写起来还都是C/C++风格,慢慢改进吧。

 

 torrent_file.py
复制代码
import os
from datetime import tzinfo
from datetime import datetime

import bcodec

_READ_MAX_LEN = -1

class BTFormatError(BaseException):
    pass
    
class TorrentFile(object):
    
    __metainfo = {}
    __file_name = ''
    
    def read_file(self, filename):
        
        torrent_file = open(filename, 'rb')
        data = torrent_file.read(_READ_MAX_LEN)
        torrent_file.close()
        
        data = list(data)
        metainfo = bcodec.bdcode(data)
        if metainfo and type(metainfo) == type({}):
            self.__file_name = filename
            self.__metainfo = metainfo
        else:
            raise BTFormatError()
           
    def __is_singlefile(self):
        return 'length' in self.__metainfo.keys()
    
    def __decode_text(self, text):
        encoding = 'utf-8'
        resultstr = ''
        if self.get_encoding():
            encoding = self.get_encoding()
        elif self.get_codepage():
            encoding = 'cp' + str(self.get_codepage())
        if text:
            try:
                resultstr = text.decode(encoding=encoding)
            except ValueError:
                return text
        else:
            return None
        return resultstr
    
    def __get_meta_top(self, key):
        if key in self.__metainfo.keys():
            return self.__metainfo[key]
        else:
            return None
    def __get_meta_info(self,key):
        meta_info = self.__get_meta_top('info')
        if meta_info and key in meta_info.keys():
                return meta_info[key]
        return None
    
    def get_codepage(self):
        return self.__get_meta_top('codepage')
    def get_encoding(self):
        return self.__get_meta_top('encoding')
    
    def get_announces(self):
        announces = []
        ann = self.__get_meta_top('announce')
        if ann:
            ann_list = []
            ann_list.append(ann)
            announces.append(ann_list)
        announces.append(self.__get_meta_top('announce-list'))
        return announces
    
    def get_publisher(self):
        return self.__decode_text(self.__get_meta_top('publisher'))
    def get_publisher_url(self):
        return self.__decode_text(self.__get_meta_top('publisher-url'))
    
    def get_creater(self):
        return self.__decode_text(self.__get_meta_top('created by'))
    def get_creation_date(self):
        utc_date = self.__get_meta_top('creation date')
        if utc_date is None:
            return utc_date
        creationdate = datetime.utcfromtimestamp(utc_date)
        return creationdate
    def get_comment(self):
        return self.__get_meta_top('comment')
          
    def get_nodes(self):
        return self.__get_meta_top('nodes')
    
    def get_piece_length(self):
        return self.__get_meta_info('piece length')
    
    def get_files(self):
        
        files = []
        pieces = self.__get_meta_info('pieces')
        name = self.__decode_text(self.__get_meta_info('name'))
        piece_length = self.get_piece_length()
        
        if not pieces or not name:
            return files
        
        if self.__is_singlefile():
            file_name = name
            file_length = self.__get_meta_info('length')
            if not file_length:
                return files
            
            pieces_num = file_length/piece_length
            if file_length % piece_length:
                pieces_num = int(pieces_num) + 1
            if 20*pieces_num > len(pieces):
                return  files
                           
            file_pieces = []
            i = 0
            pn = 0
            while pn < pieces_num:
                file_pieces.append(pieces[i:i+20])
                i += 20
                pn += 1
            
            files.appen({'name':[file_name], 'length':file_length, 'peaces':file_pieces})
            return files
        

        folder = name
        meta_files = self.__get_meta_info('files')
        if not meta_files:
            return files
        
        total_length = 0
        for one_file in self.__get_meta_info('files'):
            
            file_info = {}
            path_list = []
            path_list.append(folder)
                        
            if 'path' not in one_file.keys():
                break
            for path in one_file['path']:
                path_list.append(self.__decode_text(path))
            file_info['name'] = path_list
            
            if 'length' not in one_file.keys():
                break
            
            file_info['length'] =  one_file['length']
            
            piece_index = int(total_length / piece_length)
            total_length += one_file['length']
            pieces_num = int(total_length / piece_length) - piece_index
            pieces_num = int(file_info['length']/piece_length)
            
            if total_length % piece_length:
                pieces_num += 1
            
           # print  (piece_index+pieces_num)*20, len(pieces),pieces_num,file_info['length'], self.get_piece_length()
            
            if (piece_index+pieces_num)*20 > len(pieces):
                break
            
            file_info['pieces'] = []
            
            pn = 0
            while pn < pieces_num:
                file_info['pieces'].append(pieces[piece_index*20:piece_index*20+20])
                pn += 1

            files.append(file_info)
            
        return files
    
if __name__ == '__main__':
    #filename = r".\huapi2.torrent"
    #filename = r".\mh5t3tJ0EC.torrent"
    filename = r".\huapi2.1.torrent"   
    torrent = TorrentFile()

    print "begin to read file"
    try:
        torrent.read_file(filename)
    except (IOError,BTFormatError), reason:
        print "Read bittorrent file error! Error:%s" %reason
     
    print "end to read file"

    print "announces: " , torrent.get_announces() 
    print "peace length:", torrent.get_piece_length()
    print "code page:" , torrent.get_codepage()
    print "encoding:" , torrent.get_encoding()
    print "publisher:" ,torrent.get_publisher()
    print "publisher url:", torrent.get_publisher_url()
    print "creater:" , torrent.get_creater()
    print "creation date:", torrent.get_creation_date()
    print "commnent:", torrent.get_comment()
    print "nodes:", torrent.get_nodes()
    torrent.get_files()
    for one_file in torrent.get_files():
        print 'file name:', '\\'.join(one_file['name'])
        print 'file length:', one_file['length']
        print 'pieces:', list(one_file['pieces'])
复制代码
bcodec.py
复制代码
  1 '''
  2 Created on 2012-9-30
  3 
  4 @author: ddt
  5 '''
  6 def bdcode(data):
  7     data = list(data)
  8     return _read_chunk(data)
  9     
 10 def _read_chunk(data):
 11     
 12     chunk = None
 13     
 14     if len(data) == 0:
 15         return chunk
 16     
 17     leading_chr = data[0]
 18                      
 19     if leading_chr.isdigit():
 20         chunk = _read_string(data)
 21     elif leading_chr == 'd':
 22         chunk = _read_dict(data)
 23     elif leading_chr == 'i':
 24         chunk = _read_integer(data)
 25     elif leading_chr == 'l':
 26         chunk = _read_list(data)
 27 
 28     #print leading_chr, chunk
 29     return chunk
 30                            
 31 def _read_dict(data):
 32     
 33     if  len(data) == 0 or data.pop(0) != 'd': 
 34         return None
 35     
 36     chunk = {} 
 37     while len(data) > 0 and data[0] != 'e':
 38         
 39         key = _read_chunk(data)
 40         value = _read_chunk(data)
 41         
 42         if key and value and type(key) == type(''):
 43             chunk[key] = value
 44         else:
 45             return None
 46         
 47     if len(data) == 0 or data.pop(0) != 'e':
 48         return None
 49     
 50     return chunk
 51 
 52 def _read_list(data):
 53 
 54     if  len(data) == 0 or data.pop(0) != 'l': 
 55         return None
 56     
 57     chunk = []
 58     while len(data) > 0 and data[0] != 'e':
 59         value = _read_chunk(data)
 60         if value:
 61             chunk.append(value)
 62         else:
 63             return None
 64         
 65     if len(data) == 0 or data.pop(0) != 'e': 
 66         return None
 67 
 68     return chunk
 69 
 70 def _read_string(data):
 71     
 72     str_len = ''
 73     while len(data) > 0 and data[0].isdigit():
 74         str_len +=  data.pop(0)
 75     
 76     if len(data) == 0 or data.pop(0) != ':':
 77         return None
 78     
 79     str_len = int(str_len)
 80     if str_len > len(data):
 81         return None
 82     
 83     value = data[0:str_len]
 84     del data[0:str_len]
 85     return ''.join(value)
 86 
 87 def _read_integer(data):
 88    
 89     integer = ''
 90     if len(data) < len('i2e') or data.pop(0) != 'i': 
 91         return None
 92     
 93     sign = data.pop(0)
 94     if sign != '-' and not sign.isdigit():
 95         return None
 96     integer += sign
 97     
 98     while len(data) > 0 and data[0].isdigit():
 99         integer += data.pop(0)
100     
101     if len(data) == 0 or data.pop(0) != 'e':
102         return None
103 
104     return  int(integer)
复制代码

你可能感兴趣的:(Python边学边用--BT客户端实现之BitTorrent文件解析)