Python下各种格式的编码效率初步测试(JSON,BSON, bz2, lzma,msgpack)


# -*- coding:utf-8 -*-
'''
测试证明BSON和JSON所需要的字节数相差无几,甚至更多。
压缩算法的效果取决于其冗余程度。
'''
from __future__ import division
import collections  # From Python standard library.

import datetime
test = {"name":"lemo", "age":12,
        "address":{
            "city":"suzhou",
            "country":"china",
            "code":215000} ,
            #"timestamp":datetime.datetime.now(),
    "scores":[{"name":"english","grade":3.0,"code":215000},
              {"name":"chinese","grade":2.0,"code":215000},
              {"name":"kerea","grade":3.0,"code":232000},
              {"name":"france","grade":4.0,"code":235000},
              {"name":"japanese","grade":4.0,"code":235000},
              {"name":"kerea","grade":4.0,"code":235000},
              {"name":"kerea","grade":4.0,"code":235300}
    ]
}

#import bson
#from bson.codec_options import CodecOptions
#data = bson.BSON.encode(test)
#print ("bson:", len(data) )
#decoded_doc = bson.BSON.decode(data)
import json
json_str = json.dumps( test )
print ("json:",len(json_str) )
### 
import bz2
compressor = bz2.BZ2Compressor()
compressor.compress(json_str.encode('utf-8')) 
bz2_str = compressor.flush() 
print ("bz2:",len(bz2_str),  len(bz2_str)/len(json_str) )

import sys
if sys.version_info.major*1000+ sys.version_info.minor>=3003:
    import lzma
    lzc = lzma.LZMACompressor()
    out1 = lzc.compress(json_str.encode('utf-8') )
    out2 = lzc.flush()
    # Concatenate all the partial results:
    result = b"".join([out1, out2])
    print ("lzma:",len(result),  len(result)/len(json_str) )
    lzd = lzma.LZMADecompressor()
    dec_str = lzd.decompress(result )
    assert json_str.encode('utf-8') == dec_str

### msgpack
import datetime
import msgpack
#useful_dict = {
    #"id": 1,
    #"created": datetime.datetime.now(),
#}

def decode_datetime(obj):
    if b'__datetime__' in obj:
        obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f")
    return obj

def encode_datetime(obj):
    if isinstance(obj, datetime.datetime):
        return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")}
    return obj

packed_dict = msgpack.packb(test, default=encode_datetime)
print ("msgpack:",len(packed_dict),  len(packed_dict)/len(json_str) )
#compressor = bz2.BZ2Compressor(compresslevel=1)
#compressor.compress(packed_dict) 
#bz2_str = compressor.flush() 
#print ("msgpack_bz2:",len(bz2_str),  len(bz2_str)/len(json_str) )


this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime)

你可能感兴趣的:(json,python,bz2,BSON,msgpack,lzma)