一、原始问题
1.执行如下代码
importjsondeftest_dumps():
data={"keys":"string",1:[2,3],"dict":{"a":"b"},"key_bytes":b'123'}
ans=json.dumps(data)print(ans)if __name__ == "__main__":
test_dumps()
2.对于如上代码,我们会遇到如下错误
Traceback (most recent call last):
File"test_dumps.py", line 8, in test_dumps()
File"test_dumps.py", line 4, intest_dumps
ans=json.dumps(data)
File"/usr/lib/python3.6/json/__init__.py", line 231, indumpsreturn_default_encoder.encode(obj)
File"/usr/lib/python3.6/json/encoder.py", line 199, inencode
chunks= self.iterencode(o, _one_shot=True)
File"/usr/lib/python3.6/json/encoder.py", line 257, initerencodereturn_iterencode(o, 0)
File"/usr/lib/python3.6/json/encoder.py", line 180, indefault
o.__class__.__name__)
TypeError: Object of type'bytes' is not JSON serializable
二、代码追踪
1.dumps函数
针对以上问题,我们一步一步看源码,进入到json.dumps源码,可以看到如下内容,这里删除了源码中的注释。可以看到是通过JSONEncoder这个类的encode方法来编码输入的obj数据
def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True,
allow_nan=True, cls=None, indent=None, separators=None,
default=None, sort_keys=False, **kw):
#cached encoder
if (not skipkeys and ensure_ascii andcheck_circularand allow_nan andclsis None and indent is None and separators is None anddefaultis None and not sort_keys and notkw):return_default_encoder.encode(obj)if cls isNone:
cls=JSONEncoderreturncls(
skipkeys=skipkeys, ensure_ascii=ensure_ascii,
check_circular=check_circular, allow_nan=allow_nan, indent=indent,
separators=separators, default=default, sort_keys=sort_keys,**kw).encode(obj)
2.encode函数实现
再次进入到encode中,如果数据o是字符串,则有两种编码方式。
一种(encode_basestring_ascii)是使用ascii码表示,这种是会把中文字符自动转化为unicode,然后在其他语言时会把Unicode解析为字符,而非一个中文字符。比如会把"中"转化为"\u4e2d",而其他语言会讲这个看为6个字符,而非"中"
另一种(encode_basestring)是编码为二进制。
在这个函数中,主要是利用self.iterencode这个方法处理数据。
defencode(self, o):
#This is for extremely simple cases and benchmarks.
ifisinstance(o, str):ifself.ensure_ascii:returnencode_basestring_ascii(o)else:returnencode_basestring(o)
chunks = self.iterencode(o, _one_shot=True)if notisinstance(chunks, (list, tuple)):
chunks=list(chunks)return ''.join(chunks)
3.核心处理函数_make_iterencode,其中包含了可扩展的_default
真正要编码的数据的部分是如下,可以看到在_iterencode中各个if条件处理数据,其中dict和list又单独写了一个函数处理,而如果数据不在if中,会调用_default处理。
def_make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,## HACK: hand-optimized bytecode; turn globals into locals
ValueError=ValueError,
dict=dict,
float=float,
id=id,
int=int,
isinstance=isinstance,
list=list,
str=str,
tuple=tuple,
_intstr=int.__str__,
):if _indent is not None and notisinstance(_indent, str):
_indent= ' ' *_indentdef _iterencode_list(lst, _current_indent_level): #只给出函数定义,具体方法这里不列出
def _iterencode_dict(dct, _current_indent_level): #只给出函数定义,具体方法这里不列出
def_iterencode(o, _current_indent_level):ifisinstance(o, str):yield_encoder(o)elif o isNone:yield 'null'
elif o isTrue:yield 'true'
elif o isFalse:yield 'false'
elifisinstance(o, int):#see comment for int/float in _make_iterencode
yield_intstr(o)elifisinstance(o, float):#see comment for int/float in _make_iterencode
yield_floatstr(o)elifisinstance(o, (list, tuple)):yield from_iterencode_list(o, _current_indent_level)elifisinstance(o, dict):yield from_iterencode_dict(o, _current_indent_level)else:if markers is notNone:
markerid=id(o)if markerid inmarkers:raise ValueError("Circular reference detected")
markers[markerid]=o
o=_default(o)yield from_iterencode(o, _current_indent_level)if markers is notNone:delmarkers[markerid]return _iterencode
三、解决问题
再次回到开始的问题,我们需要重写json.JSONEncoder中的default函数,这个default函数就是上述提到的_default函数,在default中添加处理bytes类型,修改后代码如下。
importjsonimportnumpy as npclassEncoder(json.JSONEncoder):defdefault(self, obj):ifisinstance(obj, np.ndarray):returnobj.tolist()elifisinstance(obj, bytes):return str(obj, encoding='utf-8')returnjson.JSONEncoder.default(self, obj)deftest_dumps():
data={"keys":"string",1:[2,3],"dict":{"a":"b"},"key_bytes":b'123'}
ans=json.dumps(data,cls=Encoder)print(ans)if __name__ == "__main__":
test_dumps()
再次运行,可以获得如下结果,成功解决问题
{"keys": "string", "1": [2, 3], "dict": {"a": "b"}, "key_bytes": "123"}