GoLang
将结构体struct
进行Marshal
后,会将HTML
中的等以
Unicode
形式输出,如下形式:第一行代码是Marshal
后的输出,但是有时候需要的是原格式的json
,但是Golang
中没有Python
的Decode
的功能,需要使用者自己转化。
"\u003cspan\u003e\u003cfont color=\"black\"\u003ehand-rolled \u003c/font\u003e\u003c/span\u003e"
<span><font color=\"black\">hand-rolled </font></span>
本文主要解决两个问题:
1、go json的反转义Unicode
,以原格式输出;
2、go json的Formation
形式输出。
该方法是通过将marshal后的json文本进行对应的转义替换成原有格式,只要采用strconv.Quote
、 strconv.Unquote
和 strings.Replace
三个方法, 具体使用看源代码
源代码
// Quote returns a double-quoted Go string literal representing s. The
// returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
// control characters and non-printable characters as defined by
// IsPrint.
func Quote(s string) string {
return quoteWith(s, '"', false, false)
}
func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
}
func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte {
// Often called with big strings, so preallocate. If there's quoting,
// this is conservative but still helps a lot.
if cap(buf)-len(buf) < len(s) {
nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
copy(nBuf, buf)
buf = nBuf
}
buf = append(buf, quote)
for width := 0; len(s) > 0; s = s[width:] {
r := rune(s[0])
width = 1
if r >= utf8.RuneSelf {
r, width = utf8.DecodeRuneInString(s)
}
if width == 1 && r == utf8.RuneError {
buf = append(buf, `\x`...)
buf = append(buf, lowerhex[s[0]>>4])
buf = append(buf, lowerhex[s[0]&0xF])
continue
}
buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
}
buf = append(buf, quote)
return buf
}
// Unquote interprets s as a single-quoted, double-quoted,
// or backquoted Go string literal, returning the string value
// that s quotes. (If s is single-quoted, it would be a Go
// character literal; Unquote returns the corresponding
// one-character string.)
func Unquote(s string) (string, error) {
n := len(s)
if n < 2 {
return "", ErrSyntax
}
quote := s[0]
if quote != s[n-1] {
return "", ErrSyntax
}
s = s[1 : n-1]
if quote == '`' {
if contains(s, '`') {
return "", ErrSyntax
}
if contains(s, '\r') {
// -1 because we know there is at least one \r to remove.
buf := make([]byte, 0, len(s)-1)
for i := 0; i < len(s); i++ {
if s[i] != '\r' {
buf = append(buf, s[i])
}
}
return string(buf), nil
}
return s, nil
}
if quote != '"' && quote != '\'' {
return "", ErrSyntax
}
if contains(s, '\n') {
return "", ErrSyntax
}
// Is it trivial? Avoid allocation.
if !contains(s, '\\') && !contains(s, quote) {
switch quote {
case '"':
if utf8.ValidString(s) {
return s, nil
}
case '\'':
r, size := utf8.DecodeRuneInString(s)
if size == len(s) && (r != utf8.RuneError || size != 1) {
return s, nil
}
}
}
var runeTmp [utf8.UTFMax]byte
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
for len(s) > 0 {
c, multibyte, ss, err := UnquoteChar(s, quote)
if err != nil {
return "", err
}
s = ss
if c < utf8.RuneSelf || !multibyte {
buf = append(buf, byte(c))
} else {
n := utf8.EncodeRune(runeTmp[:], c)
buf = append(buf, runeTmp[:n]...)
}
if quote == '\'' && len(s) != 0 {
// single-quoted must be single character
return "", ErrSyntax
}
}
return string(buf), nil
}
反转义代码
func UnescapeUnicodeCharactersInJSON(jsonRaw []byte) ([]byte, error) {
str, err := strconv.Unquote(strings.Replace(strconv.Quote(string(jsonRaw)), `\\u`, `\u`, -1))
if err != nil {
return nil, err
}
return []byte(str), nil
}
测试
原有处理方式–转义输出
// @File: unescapeChar
// @Version: 1.0.0
// @Creator: JoeLang
// @Date: 2020/5/16 12:07
package main
import (
"encoding/json"
"fmt"
)
type Track struct {
XmlRequest string `json:"xmlRequest"`
}
func main() {
message := new(Track)
message.XmlRequest = "XML "
fmt.Println("Before Marshal", message)
messageJSON, _ := json.Marshal(message)
fmt.Println("After marshal", string(messageJSON))
}
//---------output---------------
Before Marshal &{<car><mirror>XML</mirror></car>}
After marshal {"xmlRequest":"\u003ccar\u003e\u003cmirror\u003eXML\u003c/mirror\u003e\u003c/car\u003e"}
非转义代码
func main() {
message := new(Track)
message.XmlRequest = "XML "
fmt.Println("Before Marshal", message)
messageJSON, _ := json.Marshal(message)
//-------------
unescapeJson, _ := UnescapeUnicodeCharactersInJSON(messageJSON)
//-------------
fmt.Println("After marshal", string(messageJSON))
fmt.Println("After marshal", string(unescapeJson))
}
//--------output---------
Before Marshal &{<car><mirror>XML</mirror></car>}
After marshal {"xmlRequest":"\u003ccar\u003e\u003cmirror\u003eXML\u003c/mirror\u003e\u003c/car\u003e"}
After marshal {"xmlRequest":"XML "}
go json/encoding/stream 源代码
// An Encoder writes JSON values to an output stream.
type Encoder struct {
w io.Writer
err error
escapeHTML bool
indentBuf *bytes.Buffer
indentPrefix string
indentValue string
}
// NewEncoder returns a new encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
return &Encoder{w: w, escapeHTML: true}
}
// Encode writes the JSON encoding of v to the stream,
// followed by a newline character.
//
// See the documentation for Marshal for details about the
// conversion of Go values to JSON.
func (enc *Encoder) Encode(v interface{}) error {
if enc.err != nil {
return enc.err
}
e := newEncodeState()
err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}
if err != nil {
return err
}
// Terminate each value with a newline.
// This makes the output look a little nicer
// when debugging, and some kind of space
// is required if the encoded value was a number,
// so that the reader knows there aren't more
// digits coming.
e.WriteByte('\n')
b := e.Bytes()
if enc.indentPrefix != "" || enc.indentValue != "" {
if enc.indentBuf == nil {
enc.indentBuf = new(bytes.Buffer)
}
enc.indentBuf.Reset()
err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
if err != nil {
return err
}
b = enc.indentBuf.Bytes()
}
if _, err = enc.w.Write(b); err != nil {
enc.err = err
}
encodeStatePool.Put(e)
return err
}
// SetIndent instructs the encoder to format each subsequent encoded
// value as if indented by the package-level function Indent(dst, src, prefix, indent).
// Calling SetIndent("", "") disables indentation.
func (enc *Encoder) SetIndent(prefix, indent string) {
enc.indentPrefix = prefix
enc.indentValue = indent
}
// SetEscapeHTML specifies whether problematic HTML characters
// should be escaped inside JSON quoted strings.
// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
// to avoid certain safety problems that can arise when embedding JSON in HTML.
//
// In non-HTML settings where the escaping interferes with the readability
// of the output, SetEscapeHTML(false) disables this behavior.
func (enc *Encoder) SetEscapeHTML(on bool) {
enc.escapeHTML = on
}
根据
json/encoding/stream
源代码实现反转义功能
该函数具有一般性
func UnescapeJsonMarshal(jsonRaw interface{}) ([]byte, error) {
buffer := &bytes.Buffer{}
encoder := json.NewEncoder(buffer)
encoder.SetEscapeHTML(false)
//带有缩进的格式化
encoder.SetIndent("", " ")
err := encoder.Encode(jsonRaw)
return buffer.Bytes(), err
}
测试
// @File: unescapeChar
// @Version: 1.0.0
// @Creator: JoeLang
// @Date: 2020/5/16 12:07
package main
import (
"bytes"
"encoding/json"
"fmt"
"strconv"
"strings"
)
type Track struct {
XmlRequest string `json:"xmlRequest"`
}
type Word struct {
Index int `json:"index"`
Category string `json:"category"`
ScreenText string `json:"screen_text"`
Answers []Result `json:"answers"`
}
type Result struct {
Content string `json:"content"`
Correct bool `json:"correct"`
}
type WordSlice struct {
Items []Word `json:"items"`
}
func main() {
answer := []Result{
{
Content: "手のひら",
Correct: true,
},
{
Content: "毎~,~ごとに",
Correct: false,
},
{
Content: "飛行機",
Correct: false,
},
{
Content: "ゲル《モンゴル高原で使われる円形のテント式住居》",
Correct: false,
},
}
item := []Word{
{Index: 1, Category: "read_word", ScreenText: "hand-rolled ",Answers: answer},
{Index: 1, Category: "read_word", ScreenText: "hand-rolled ",Answers: answer},
{Index: 1, Category: "read_word", ScreenText: "hand-rolled ",Answers: answer},
}
word := WordSlice{
Items: item,
}
jsonRaw, _ := UnescapeJsonMarshal(word)
fmt.Println("After Escaping", string(jsonRaw))
jsonRaw1, _ := json.MarshalIndent(word, "", " ")
//MarshalIndent 缩进式Marshal
fmt.Println("NO Escaping", string(jsonRaw1))
}
func UnescapeJsonMarshal(jsonRaw interface{}) ([]byte, error) {
buffer := &bytes.Buffer{}
encoder := json.NewEncoder(buffer)
encoder.SetEscapeHTML(false)
//带有缩进的格式化
encoder.SetIndent("", " ")
err := encoder.Encode(jsonRaw)
return buffer.Bytes(), err
}
输出结果
After Escaping
{
"items": [
{
"index": 1,
"category": "read_word",
"screen_text": "hand-rolled ",
"answers": [
{
"content": "手のひら",
"correct": true
},
{
"content": "毎~,~ごとに",
"correct": false
},
{
"content": "飛行機",
"correct": false
},
{
"content": "ゲル《モンゴル高原で使われる円形のテント式住居》",
"correct": false
}
]
},
{
"index": 1,
"category": "read_word",
"screen_text": "hand-rolled ",
"answers": [
{
"content": "手のひら",
"correct": true
},
{
"content": "毎~,~ごとに",
"correct": false
},
{
"content": "飛行機",
"correct": false
},
{
"content": "ゲル《モンゴル高原で使われる円形のテント式住居》",
"correct": false
}
]
},
{
"index": 1,
"category": "read_word",
"screen_text": "hand-rolled ",
"answers": [
{
"content": "手のひら",
"correct": true
},
{
"content": "毎~,~ごとに",
"correct": false
},
{
"content": "飛行機",
"correct": false
},
{
"content": "ゲル《モンゴル高原で使われる円形のテント式住居》",
"correct": false
}
]
}
]
}
NO Escaping
{
"items": [
{
"index": 1,
"category": "read_word",
"screen_text": "\u003cspan\u003e\u003cfont color=\"black\"\u003ehand-rolled \u003c/font\u003e\u003c/span\u003e",
"answers": [
{
"content": "手のひら",
"correct": true
},
{
"content": "毎~,~ごとに",
"correct": false
},
{
"content": "飛行機",
"correct": false
},
{
"content": "ゲル《モンゴル高原で使われる円形のテント式住居》",
"correct": false
}
]
},
{
"index": 1,
"category": "read_word",
"screen_text": "\u003cspan\u003e\u003cfont color=\"black\"\u003ehand-rolled \u003c/font\u003e\u003c/span\u003e",
"answers": [
{
"content": "手のひら",
"correct": true
},
{
"content": "毎~,~ごとに",
"correct": false
},
{
"content": "飛行機",
"correct": false
},
{
"content": "ゲル《モンゴル高原で使われる円形のテント式住居》",
"correct": false
}
]
},
{
"index": 1,
"category": "read_word",
"screen_text": "\u003cspan\u003e\u003cfont color=\"black\"\u003ehand-rolled \u003c/font\u003e\u003c/span\u003e",
"answers": [
{
"content": "手のひら",
"correct": true
},
{
"content": "毎~,~ごとに",
"correct": false
},
{
"content": "飛行機",
"correct": false
},
{
"content": "ゲル《モンゴル高原で使われる円形のテント式住居》",
"correct": false
}
]
}
]
}