Python--编码转换

 1 # -*- coding:gbk -*-
 2 # 即使设置文件编码为gbk,下方定义的字符串s1依旧为unicode
 3 
 4 # 获取默认编码格式
 5 import sys
 6 print(sys.getdefaultencoding())
 7 # >>> utf-8
 8 
 9 # 编码转换
10 # --------------python 2----------------
11 # utf-8  -->   decode  -->  unicode
12 # unicode --> encode --> gbk
13 
14 s = "你好"
15 # 无decode方法,由于python3默认unicode, utf-8为原本的字符集,传给decode识别
16 s_unicode = s.decode("utf-8")
17 # gbk为目标字符集,指定给encode进行转换
18 s_gbk = s_unicode.encode("gbk")
19 
20 # 示例
21 s1 = "大家好"
22 s1_to_utf8 = s1.encode("utf-8")
23 print(s1_to_utf8) # >>> b'\xe5\xa4\xa7\xe5\xae\xb6\xe5\xa5\xbd'
24 s1_to_gbk = s1_to_utf8.decode("utf-8").encode("gbk")
25 s1_to_gb2312 = s1_to_utf8.decode("utf-8").encode("gb2312")
26 s1_to_gb2312_chinese = s1_to_gb2312.decode("gb2312")  
27 # decode转换为unicode后可以输出中文,所以证明默认为unicode
28 print(s1_to_gb2312_chinese) # >>> 大家好
29 print(s1_to_gb2312) # >>> b'\xb4\xf3\xbc\xd2\xba\xc3'
30 print(s1_to_gbk) # >>> b'\xb4\xf3\xbc\xd2\xba\xc3'
31 print(s1) # >>> 大家好

你可能感兴趣的:(Python--编码转换)