BT种子文件使用了一种叫bencoding的编码方法来保存数据。
bencoding有四种类型的数据:srings(字符串),integers(整数),lists(列表),dictionaries(字典)
编码规则如下:
(1)strings(字符串)编码为:<字符串长度>:<字符串>
例如: 4:test 表示为字符串"test"
4:例子 表示为字符串“例子”
字符串长度单位为字节
没开始或结束标记
(2)integers(整数)编码为:i<整数>e
开始标记i,结束标记为e
例如: i1234e 表示为整数1234
i-1234e 表示为整数-1234
整数没有大小限制
i0e 表示为整数0
i-0e 为非法
以0开头的为非法如: i01234e 为非法
(3)lists(列表)编码为:l<bencoding编码类型>e
开始标记为l,结束标记为e
列表里可以包含任何bencoding编码类型,包括整数,字符串,列表,字典。
例如: l4:test5:abcdee 表示为二个字符串["test","abcde"]
(4)dictionaries(字典)编码为d<bencoding字符串><bencoding编码类型>e
开始标记为d,结束标记为e
关键字必须为bencoding字符串
值可以为任何bencoding编码类型
例如: d3:agei20ee 表示为{"age"=20}
d4:path3:C:"8:filename8:test.txte
表示为{"path"="C:"","filename"="test.txt"}
(5)具体文件结构如下:
全部内容必须都为bencoding编码类型。
整个文件为一个字典结构,包含如下关键字
announce:tracker服务器的URL(字符串)
announce-list(可选):备用tracker服务器列表(列表)
creation date(可选):种子创建的时间,Unix标准时间格式,从1970 1月1日 00:00:00到创建时间的秒数(整数)
comment(可选):备注(字符串)
created by(可选):创建人或创建程序的信息(字符串)
info:一个字典结构,包含文件的主要信息,为分二种情况:单文件结构或多文件结构
单文件结构如下:
length:文件长度,单位字节(整数)
md5sum(可选):长32个字符的文件的MD5校验和,BT不使用这个值,只是为了兼容一些程序所保留!(字符串)
name:文件名(字符串)
piece length:每个块的大小,单位字节(整数)
pieces:每个块的20个字节的SHA1 Hash的值(二进制格式)
多文件结构如下:
files:一个字典结构
length:文件长度,单位字节(整数)
md5sum(可选):同单文件结构中相同
path:文件的路径和名字,是一个列表结构,如"test"test.txt 列表为l4:test8test.txte
name:最上层的目录名字(字符串)
piece length:同单文件结构中相同
pieces:同单文件结构中相同
(6)实例:
用记事本打开一个.torrent可以看来类似如下内容
d8:announce35:http://www.manfen.net:7802/announce13:creation datei1076675108e4:infod6:lengthi17799e4:name62:MICROSOFT.WINDOWS.2000.AND.NT4.SOURCE.CODE-SCENELEADER.torrent12:piece lengthi32768e6:pieces20:?W ?躐?緕排T酆ee
很容易看出
announce=http://www.manfen.net:7802/announce
creation date=1076675108秒(02/13/04 20:25:08)
文件名=MICROSOFT.WINDOWS.2000.AND.NT4.SOURCE.CODE-SCENELEADER.torrent
文件大小=17799字节
文件块大小=32768字节
对Azureus中解析Torrent种子文件的源代码进行了适度裁剪,得到下面这样一个解析torrent文件的示例代码,如下所示:
/*
* BeDecoder.java
*
*/
package
com.vista.test;
import
java.io.
*
;
import
java.nio.ByteBuffer;
import
java.nio.CharBuffer;
import
java.nio.charset.Charset;
import
java.util.ArrayList;
import
java.util.HashMap;
import
java.util.Iterator;
import
java.util.List;
import
java.util.Map;
/**
* A set of utility methods to decode a bencoded array of byte into a Map.
* integer are represented as Long, String as byte[], dictionnaries as Map, and list as List.
*
*/
public
class
BDecoder
{
//
字符集
public
static
final
String BYTE_ENCODING
=
"
UTF8
"
;
public
static
Charset BYTE_CHARSET;
static
{
try
{
BYTE_CHARSET
=
Charset.forName(BYTE_ENCODING);
}
catch
( Throwable e )
{
e.printStackTrace();
}
}
private
static
final
boolean
TRACE
=
true
;
private
boolean
recovery_mode;
public
static
Map decode(BufferedInputStream is)
throws
Exception
{
//
解码
return
(
new
BDecoder().decodeStream( is ));
}
public
BDecoder()
{
}
public
Map decodeStream(BufferedInputStream data )
throws
Exception
{
Object res
=
decodeInputStream(
new
BDecoderInputStreamStream(data),
0
);
//
0指定递归层次从第一层开始
if
( res
==
null
)
{
throw
(
new
Exception(
"
BDecoder: zero length file
"
));
}
else
if
(
!
(res
instanceof
Map ))
{
throw
(
new
Exception(
"
BDecoder: top level isn't a Map
"
));
}
return
((Map)res );
}
/**
*
*
@param
dbis
*
@param
nesting 递归层次
*
@throws
Exception
*/
private
Object decodeInputStream(BDecoderInputStream dbis,
int
nesting )
throws
Exception
{
if
(nesting
==
0
&&
!
dbis.markSupported())
{
throw
new
IOException(
"
InputStream must support the mark() method
"
);
}
//
set a mark
dbis.mark(Integer.MAX_VALUE);
//
read a byte
int
tempByte
=
dbis.read();
//
读一个字节
//
decide what to do
switch
(tempByte)
{
case
'
d
'
:
{
//
是字典
//
create a new dictionary object
Map tempMap
=
new
HashMap();
try
{
//
get the key
byte
[] tempByteArray
=
null
;
while
((tempByteArray
=
(
byte
[]) decodeInputStream(dbis, nesting
+
1
))
!=
null
)
{
//
decode some more
Object value
=
decodeInputStream(dbis,nesting
+
1
);
//
读值
//
value interning is too CPU-intensive, let's skip that for now
//
if(value instanceof byte[] && ((byte[])value).length < 17)
//
value = StringInterner.internBytes((byte[])value);
//
keys often repeat a lot - intern to save space
String key
=
null
;
if
( key
==
null
)
{
CharBuffer cb
=
BYTE_CHARSET.decode(ByteBuffer.wrap(tempByteArray));
key
=
new
String(cb.array(),
0
,cb.limit());
//
键
}
if
( TRACE )
{
System.out.println( key
+
"
->
"
+
value
+
"
;
"
);
}
//
recover from some borked encodings that I have seen whereby the value has
//
not been encoded. This results in, for example,
//
18:azureus_propertiesd0:e
//
we only get null back here if decoding has hit an 'e' or end-of-file
//
that is, there is no valid way for us to get a null 'value' here
if
( value
==
null
)
{
//
Debug.out( "Invalid encoding - value not serialsied for '" + key + "' - ignoring" );
break
;
}
tempMap.put( key, value);
//
放入结果集中
}
dbis.mark(Integer.MAX_VALUE);
tempByte
=
dbis.read();
dbis.reset();
if
( nesting
>
0
&&
tempByte
==
-
1
)
{
throw
(
new
Exception(
"
BDecoder: invalid input data, 'e' missing from end of dictionary
"
));
}
}
catch
( Throwable e )
{
if
(
!
recovery_mode )
{
if
( e
instanceof
IOException )
{
throw
((IOException)e);
}
throw
(
new
IOException(e.getMessage()));
}
}
return
tempMap;
}
case
'
l
'
:
{
//
create the list
ArrayList tempList
=
new
ArrayList();
try
{
//
create the key
Object tempElement
=
null
;
while
((tempElement
=
decodeInputStream(dbis, nesting
+
1
))
!=
null
)
{
//
add the element
tempList.add(tempElement);
//
读取列表元素并加入列表中
}
tempList.trimToSize();
dbis.mark(Integer.MAX_VALUE);
tempByte
=
dbis.read();
dbis.reset();
if
( nesting
>
0
&&
tempByte
==
-
1
)
{
throw
(
new
Exception(
"
BDecoder: invalid input data, 'e' missing from end of list
"
));
}
}
catch
( Throwable e )
{
if
(
!
recovery_mode )
{
if
( e
instanceof
IOException )
{
throw
((IOException)e);
}
throw
(
new
IOException(e.getMessage()));
}
}
//
return the list
return
tempList;
}
case
'
e
'
:
case
-
1
:
return
null
;
//
当前结束
case
'
i
'
:
return
new
Long(getNumberFromStream(dbis,
'
e
'
));
//
整数
case
'
0
'
:
case
'
1
'
:
case
'
2
'
:
case
'
3
'
:
case
'
4
'
:
case
'
5
'
:
case
'
6
'
:
case
'
7
'
:
case
'
8
'
:
case
'
9
'
:
//
move back one
dbis.reset();
//
get the string
return
getByteArrayFromStream(dbis);
//
读取指定长度字符串
default
:
{
int
rem_len
=
dbis.available();
if
( rem_len
>
256
)
{
rem_len
=
256
;
}
byte
[] rem_data
=
new
byte
[rem_len];
dbis.read( rem_data );
throw
(
new
Exception(
"
BDecoder: unknown command '
"
+
tempByte
+
"
, remainder =
"
+
new
String( rem_data )));
}
}
}
/**
only create the array once per decoder instance (no issues with recursion as it's only used in a leaf method)
*/
private
final
char
[] numberChars
=
new
char
[
32
];
private
long
getNumberFromStream(BDecoderInputStream dbis,
char
parseChar)
throws
IOException
{
int
tempByte
=
dbis.read();
int
pos
=
0
;
while
((tempByte
!=
parseChar)
&&
(tempByte
>=
0
))
{
//
读取整数字节,直到终结字符'e'
numberChars[pos
++
]
=
(
char
)tempByte;
if
( pos
==
numberChars.length )
{
throw
(
new
NumberFormatException(
"
Number too large:
"
+
new
String(numberChars,
0
,pos)
+
"
"
));
}
tempByte
=
dbis.read();
}
//
are we at the end of the stream?
if
(tempByte
<
0
)
{
return
-
1
;
}
else
if
( pos
==
0
)
{
//
support some borked impls that sometimes don't bother encoding anything
return
(
0
);
}
return
( parseLong( numberChars,
0
, pos ));
//
转换为Long型整数
}
public
static
long
parseLong(
char
[] chars,
int
start,
int
length )
{
//
转换为Long型整数
long
result
=
0
;
boolean
negative
=
false
;
int
i
=
start;
int
max
=
start
+
length;
long
limit;
if
( length
>
0
)
{
if
( chars[i]
==
'
-
'
)
{
negative
=
true
;
limit
=
Long.MIN_VALUE;
i
++
;
}
else
{
limit
=
-
Long.MAX_VALUE;
}
if
( i
<
max )
{
int
digit
=
chars[i
++
]
-
'
0
'
;
if
( digit
<
0
||
digit
>
9
)
{
throw
new
NumberFormatException(
new
String(chars,start,length));
}
else
{
result
=
-
digit;
}
}
long
multmin
=
limit
/
10
;
while
( i
<
max )
{
//
Accumulating negatively avoids surprises near MAX_VALUE
int
digit
=
chars[i
++
]
-
'
0
'
;
if
( digit
<
0
||
digit
>
9
)
{
throw
new
NumberFormatException(
new
String(chars,start,length));
}
if
( result
<
multmin )
{
throw
new
NumberFormatException(
new
String(chars,start,length));
}
result
*=
10
;
if
( result
<
limit
+
digit )
{
throw
new
NumberFormatException(
new
String(chars,start,length));
}
result
-=
digit;
}
}
else
{
throw
new
NumberFormatException(
new
String(chars,start,length));
}
if
( negative )
{
if
( i
>
start
+
1
)
{
return
result;
}
else
{
/*
Only got "-"
*/
throw
new
NumberFormatException(
new
String(chars,start,length));
}
}
else
{
return
-
result;
}
}
private
byte
[] getByteArrayFromStream(BDecoderInputStream dbis )
throws
IOException
{
int
length
=
(
int
) getNumberFromStream(dbis,
'
:
'
);
if
(length
<
0
)
{
return
null
;
}
//
note that torrent hashes can be big (consider a 55GB file with 2MB pieces
//
this generates a pieces hash of 1/2 meg
if
( length
>
8
*
1024
*
1024
)
{
throw
(
new
IOException(
"
Byte array length too large (
"
+
length
+
"
)
"
));
}
byte
[] tempArray
=
new
byte
[length];
int
count
=
0
;
int
len
=
0
;
//
get the string
while
(count
!=
length
&&
(len
=
dbis.read(tempArray, count, length
-
count))
>
0
)
{
count
+=
len;
}
if
( count
!=
tempArray.length )
{
throw
(
new
IOException(
"
BDecoder::getByteArrayFromStream: truncated
"
));
}
return
tempArray;
}
public
void
setRecoveryMode(
boolean
r )
{
recovery_mode
=
r;
}
public
static
void
print(PrintWriter writer,Object obj )
{
print( writer, obj,
""
,
false
);
}
private
static
void
print(PrintWriter writer,Object obj,String indent,
boolean
skip_indent )
{
String use_indent
=
skip_indent
?
""
:indent;
if
( obj
instanceof
Long )
{
writer.println( use_indent
+
obj );
}
else
if
( obj
instanceof
byte
[])
{
byte
[] b
=
(
byte
[])obj;
if
( b.length
==
20
)
{
writer.println( use_indent
+
"
{
"
+
ByteFormatter.nicePrint( b )
+
"
}
"
);
}
else
if
( b.length
<
64
)
{
writer.println(
new
String(b)
+
"
[
"
+
ByteFormatter.encodeString( b )
+
"
]
"
);
}
else
{
writer.println(
"
[byte array length
"
+
b.length );
}
}
else
if
( obj
instanceof
String )
{
writer.println( use_indent
+
obj );
}
else
if
( obj
instanceof
List )
{
List l
=
(List)obj;
writer.println( use_indent
+
"
[
"
);
for
(
int
i
=
0
;i
<
l.size();i
++
)
{
writer.print( indent
+
"
(
"
+
i
+
"
)
"
);
print( writer, l.get(i), indent
+
"
"
,
true
);
}
writer.println( indent
+
"
]
"
);
}
else
{
Map m
=
(Map)obj;
Iterator it
=
m.keySet().iterator();
while
( it.hasNext())
{
String key
=
(String)it.next();
if
( key.length()
>
256
)
{
writer.print( indent
+
key.substring(
0
,
256
)
+
"
=
"
);
}
else
{
writer.print( indent
+
key
+
"
=
"
);
}
print( writer, m.get(key), indent
+
"
"
,
true
);
}
}
}
private
static
void
print(File f,File output)
{
try
{
BDecoder decoder
=
new
BDecoder();
//
解码器
PrintWriter pw
=
new
PrintWriter(
new
FileWriter( output ));
//
输出结果
print( pw, decoder.decodeStream(
new
BufferedInputStream(
new
FileInputStream( f ))));
pw.flush();
}
catch
( Throwable e )
{
e.printStackTrace();
}
}
private
interface
BDecoderInputStream
{
public
int
read()
throws
IOException;
public
int
read(
byte
[] buffer)
throws
IOException;
public
int
read(
byte
[] buffer,
int
offset,
int
length )
throws
IOException;
public
int
available()
throws
IOException;
public
boolean
markSupported();
public
void
mark(
int
limit );
public
void
reset()
throws
IOException;
}
private
class
BDecoderInputStreamStream
implements
BDecoderInputStream
{
final
private
BufferedInputStream is;
private
BDecoderInputStreamStream(BufferedInputStream _is )
{
is
=
_is;
}
/**
* 从此输入流中读取下一个数据字节。返回一个 0 到 255 范围内的 int 字节值。
* 如果因为已经到达流末尾而没有字节可用,则返回 -1。
* 在输入数据可用、检测到流末尾或抛出异常之前,此方法将一直阻塞。
*/
public
int
read()
throws
IOException
{
return
( is.read());
}
/**
* 从此输入流中将 byte.length 个字节的数据读入一个 byte 数组中。在某些输入可用之前,此方法将阻塞。
*/
public
int
read(
byte
[] buffer )
throws
IOException
{
return
( is.read( buffer ));
}
/**
* 从此字节输入流中给定偏移量处开始将各字节读取到指定的 byte 数组中。
*/
public
int
read(
byte
[] buffer,
int
offset,
int
length)
throws
IOException
{
return
( is.read( buffer, offset, length ));
}
/**
* 返回可以从此输入流读取(或跳过)、且不受此输入流接下来的方法调用阻塞的估计字节数。
*/
public
int
available()
throws
IOException
{
return
( is.available());
}
/**
* 测试此输入流是否支持 mark 和 reset 方法。
*/
public
boolean
markSupported()
{
return
( is.markSupported());
}
/**
* 在输入流中的当前位置上作标记。reset 方法的后续调用将此流重新定位在最后标记的位置上,以便后续读取操作重新读取相同的字节。
*
@param
limit 在标记位置变为无效之前可以读取字节的最大限制。
*/
public
void
mark(
int
limit )
{
is.mark( limit );
}
/**
* 将此流重新定位到对此输入流最后调用 mark 方法时的位置。
*/
public
void
reset()
throws
IOException
{
is.reset();
}
}
public
static
void
main(String[] args )
{
print(
new
File(
"
C:\\1001.torrent
"
),
new
File(
"
C:\\tables.txt
"
));
}
}