GolangJson Marshal UN-Escape Unicode Characters/解决go的Marshal的转义和格式化输出

GoLang Escape characters for Json fomat

  • 前言
    • go-json 反转义(1)
    • go-json 反转义(2)
    • 参考连接

前言

GoLang 将结构体struct进行Marshal后,会将HTML中的等以Unicode形式输出,如下形式:第一行代码是Marshal后的输出,但是有时候需要的是原格式的json,但是Golang中没有PythonDecode的功能,需要使用者自己转化。

"\u003cspan\u003e\u003cfont color=\"black\"\u003ehand-rolled \u003c/font\u003e\u003c/span\u003e"

<span><font color=\"black\">hand-rolled </font></span>

本文主要解决两个问题:
1、go json的反转义Unicode,以原格式输出;
2、go jsonFormation形式输出。

go-json 反转义(1)

该方法是通过将marshal后的json文本进行对应的转义替换成原有格式,只要采用strconv.Quotestrconv.Unquotestrings.Replace三个方法, 具体使用看源代码

源代码

// Quote returns a double-quoted Go string literal representing s. The
// returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
// control characters and non-printable characters as defined by
// IsPrint.
func Quote(s string) string {
	return quoteWith(s, '"', false, false)
}
func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
	return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
}
func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte {
	// Often called with big strings, so preallocate. If there's quoting,
	// this is conservative but still helps a lot.
	if cap(buf)-len(buf) < len(s) {
		nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
		copy(nBuf, buf)
		buf = nBuf
	}
	buf = append(buf, quote)
	for width := 0; len(s) > 0; s = s[width:] {
		r := rune(s[0])
		width = 1
		if r >= utf8.RuneSelf {
			r, width = utf8.DecodeRuneInString(s)
		}
		if width == 1 && r == utf8.RuneError {
			buf = append(buf, `\x`...)
			buf = append(buf, lowerhex[s[0]>>4])
			buf = append(buf, lowerhex[s[0]&0xF])
			continue
		}
		buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
	}
	buf = append(buf, quote)
	return buf
}

// Unquote interprets s as a single-quoted, double-quoted,
// or backquoted Go string literal, returning the string value
// that s quotes.  (If s is single-quoted, it would be a Go
// character literal; Unquote returns the corresponding
// one-character string.)
func Unquote(s string) (string, error) {
	n := len(s)
	if n < 2 {
		return "", ErrSyntax
	}
	quote := s[0]
	if quote != s[n-1] {
		return "", ErrSyntax
	}
	s = s[1 : n-1]

	if quote == '`' {
		if contains(s, '`') {
			return "", ErrSyntax
		}
		if contains(s, '\r') {
			// -1 because we know there is at least one \r to remove.
			buf := make([]byte, 0, len(s)-1)
			for i := 0; i < len(s); i++ {
				if s[i] != '\r' {
					buf = append(buf, s[i])
				}
			}
			return string(buf), nil
		}
		return s, nil
	}
	if quote != '"' && quote != '\'' {
		return "", ErrSyntax
	}
	if contains(s, '\n') {
		return "", ErrSyntax
	}

	// Is it trivial? Avoid allocation.
	if !contains(s, '\\') && !contains(s, quote) {
		switch quote {
		case '"':
			if utf8.ValidString(s) {
				return s, nil
			}
		case '\'':
			r, size := utf8.DecodeRuneInString(s)
			if size == len(s) && (r != utf8.RuneError || size != 1) {
				return s, nil
			}
		}
	}

	var runeTmp [utf8.UTFMax]byte
	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
	for len(s) > 0 {
		c, multibyte, ss, err := UnquoteChar(s, quote)
		if err != nil {
			return "", err
		}
		s = ss
		if c < utf8.RuneSelf || !multibyte {
			buf = append(buf, byte(c))
		} else {
			n := utf8.EncodeRune(runeTmp[:], c)
			buf = append(buf, runeTmp[:n]...)
		}
		if quote == '\'' && len(s) != 0 {
			// single-quoted must be single character
			return "", ErrSyntax
		}
	}
	return string(buf), nil
}

反转义代码

func UnescapeUnicodeCharactersInJSON(jsonRaw []byte) ([]byte, error) {
	str, err := strconv.Unquote(strings.Replace(strconv.Quote(string(jsonRaw)), `\\u`, `\u`, -1))
	if err != nil {
		return nil, err
	}
	return []byte(str), nil
}

测试

原有处理方式–转义输出

// @File:    unescapeChar
// @Version: 1.0.0
// @Creator: JoeLang
// @Date:    2020/5/16 12:07

package main

import (
	"encoding/json"
	"fmt"
)

type Track struct {
	XmlRequest string `json:"xmlRequest"`
}

func main() {
	message := new(Track)
	message.XmlRequest = "XML"
	fmt.Println("Before Marshal", message)
	messageJSON, _ := json.Marshal(message)
	fmt.Println("After marshal", string(messageJSON))
}
//---------output---------------
Before Marshal &{<car><mirror>XML</mirror></car>}
After marshal {"xmlRequest":"\u003ccar\u003e\u003cmirror\u003eXML\u003c/mirror\u003e\u003c/car\u003e"}

非转义代码

func main() {
	message := new(Track)
	message.XmlRequest = "XML"
	fmt.Println("Before Marshal", message)
	messageJSON, _ := json.Marshal(message)
	//-------------
	unescapeJson, _ := UnescapeUnicodeCharactersInJSON(messageJSON)
	//-------------
	fmt.Println("After marshal", string(messageJSON))
	fmt.Println("After marshal", string(unescapeJson))
}

//--------output---------
Before Marshal &{<car><mirror>XML</mirror></car>}
After marshal {"xmlRequest":"\u003ccar\u003e\u003cmirror\u003eXML\u003c/mirror\u003e\u003c/car\u003e"}
After marshal {"xmlRequest":"XML"}

go-json 反转义(2)

go json/encoding/stream 源代码

// An Encoder writes JSON values to an output stream.
type Encoder struct {
	w          io.Writer
	err        error
	escapeHTML bool

	indentBuf    *bytes.Buffer
	indentPrefix string
	indentValue  string
}

// NewEncoder returns a new encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
	return &Encoder{w: w, escapeHTML: true}
}

// Encode writes the JSON encoding of v to the stream,
// followed by a newline character.
//
// See the documentation for Marshal for details about the
// conversion of Go values to JSON.
func (enc *Encoder) Encode(v interface{}) error {
	if enc.err != nil {
		return enc.err
	}
	e := newEncodeState()
	err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML}
	if err != nil {
		return err
	}

	// Terminate each value with a newline.
	// This makes the output look a little nicer
	// when debugging, and some kind of space
	// is required if the encoded value was a number,
	// so that the reader knows there aren't more
	// digits coming.
	e.WriteByte('\n')

	b := e.Bytes()
	if enc.indentPrefix != "" || enc.indentValue != "" {
		if enc.indentBuf == nil {
			enc.indentBuf = new(bytes.Buffer)
		}
		enc.indentBuf.Reset()
		err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
		if err != nil {
			return err
		}
		b = enc.indentBuf.Bytes()
	}
	if _, err = enc.w.Write(b); err != nil {
		enc.err = err
	}
	encodeStatePool.Put(e)
	return err
}

// SetIndent instructs the encoder to format each subsequent encoded
// value as if indented by the package-level function Indent(dst, src, prefix, indent).
// Calling SetIndent("", "") disables indentation.
func (enc *Encoder) SetIndent(prefix, indent string) {
	enc.indentPrefix = prefix
	enc.indentValue = indent
}

// SetEscapeHTML specifies whether problematic HTML characters
// should be escaped inside JSON quoted strings.
// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e
// to avoid certain safety problems that can arise when embedding JSON in HTML.
//
// In non-HTML settings where the escaping interferes with the readability
// of the output, SetEscapeHTML(false) disables this behavior.
func (enc *Encoder) SetEscapeHTML(on bool) {
	enc.escapeHTML = on
}

根据 json/encoding/stream 源代码实现反转义功能

该函数具有一般性

func UnescapeJsonMarshal(jsonRaw interface{}) ([]byte, error) {
	buffer := &bytes.Buffer{}
	encoder := json.NewEncoder(buffer)
	encoder.SetEscapeHTML(false)
	//带有缩进的格式化
	encoder.SetIndent("", "  ")
	err := encoder.Encode(jsonRaw)
	return buffer.Bytes(), err
}

测试

// @File:    unescapeChar
// @Version: 1.0.0
// @Creator: JoeLang
// @Date:    2020/5/16 12:07

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"strconv"
	"strings"
)

type Track struct {
	XmlRequest string `json:"xmlRequest"`
}

type Word struct {
	Index           int      `json:"index"`
	Category        string   `json:"category"`
	ScreenText      string   `json:"screen_text"`
	Answers         []Result `json:"answers"`
}

type Result struct {
	Content string `json:"content"`
	Correct bool   `json:"correct"`
}

type WordSlice struct {
	Items []Word `json:"items"`
}


func main() {
	answer := []Result{
		{
			Content: "手のひら",
			Correct: true,
		},
		{
			Content: "毎~,~ごとに",
			Correct: false,
		},
		{
			Content: "飛行機",
			Correct: false,
		},
		{
			Content: "ゲル《モンゴル高原で使われる円形のテント式住居》",
			Correct: false,
		},
	}

	item := []Word{
		{Index: 1, Category: "read_word", ScreenText: "hand-rolled ",Answers: answer},
		{Index: 1, Category: "read_word", ScreenText: "hand-rolled ",Answers: answer},
		{Index: 1, Category: "read_word", ScreenText: "hand-rolled ",Answers: answer},
	}
	word := WordSlice{
		Items: item,
	}
	jsonRaw, _ := UnescapeJsonMarshal(word)
	fmt.Println("After Escaping", string(jsonRaw))

	jsonRaw1, _ := json.MarshalIndent(word, "", "  ")
	//MarshalIndent 缩进式Marshal
	fmt.Println("NO Escaping", string(jsonRaw1))
}

func UnescapeJsonMarshal(jsonRaw interface{}) ([]byte, error) {
	buffer := &bytes.Buffer{}
	encoder := json.NewEncoder(buffer)
	encoder.SetEscapeHTML(false)
	//带有缩进的格式化
	encoder.SetIndent("", "  ")
	err := encoder.Encode(jsonRaw)
	return buffer.Bytes(), err
}

输出结果

After Escaping 
{
  "items": [
    {
      "index": 1,
      "category": "read_word",
      "screen_text": "hand-rolled ",
      "answers": [
        {
          "content": "手のひら",
          "correct": true
        },
        {
          "content": "毎~,~ごとに",
          "correct": false
        },
        {
          "content": "飛行機",
          "correct": false
        },
        {
          "content": "ゲル《モンゴル高原で使われる円形のテント式住居》",
          "correct": false
        }
      ]
    },
    {
      "index": 1,
      "category": "read_word",
      "screen_text": "hand-rolled ",
      "answers": [
        {
          "content": "手のひら",
          "correct": true
        },
        {
          "content": "毎~,~ごとに",
          "correct": false
        },
        {
          "content": "飛行機",
          "correct": false
        },
        {
          "content": "ゲル《モンゴル高原で使われる円形のテント式住居》",
          "correct": false
        }
      ]
    },
    {
      "index": 1,
      "category": "read_word",
      "screen_text": "hand-rolled ",
      "answers": [
        {
          "content": "手のひら",
          "correct": true
        },
        {
          "content": "毎~,~ごとに",
          "correct": false
        },
        {
          "content": "飛行機",
          "correct": false
        },
        {
          "content": "ゲル《モンゴル高原で使われる円形のテント式住居》",
          "correct": false
        }
      ]
    }
  ]
}

NO Escaping 
{
  "items": [
    {
      "index": 1,
      "category": "read_word",
      "screen_text": "\u003cspan\u003e\u003cfont color=\"black\"\u003ehand-rolled \u003c/font\u003e\u003c/span\u003e",
      "answers": [
        {
          "content": "手のひら",
          "correct": true
        },
        {
          "content": "毎~,~ごとに",
          "correct": false
        },
        {
          "content": "飛行機",
          "correct": false
        },
        {
          "content": "ゲル《モンゴル高原で使われる円形のテント式住居》",
          "correct": false
        }
      ]
    },
    {
      "index": 1,
      "category": "read_word",
      "screen_text": "\u003cspan\u003e\u003cfont color=\"black\"\u003ehand-rolled \u003c/font\u003e\u003c/span\u003e",
      "answers": [
        {
          "content": "手のひら",
          "correct": true
        },
        {
          "content": "毎~,~ごとに",
          "correct": false
        },
        {
          "content": "飛行機",
          "correct": false
        },
        {
          "content": "ゲル《モンゴル高原で使われる円形のテント式住居》",
          "correct": false
        }
      ]
    },
    {
      "index": 1,
      "category": "read_word",
      "screen_text": "\u003cspan\u003e\u003cfont color=\"black\"\u003ehand-rolled \u003c/font\u003e\u003c/span\u003e",
      "answers": [
        {
          "content": "手のひら",
          "correct": true
        },
        {
          "content": "毎~,~ごとに",
          "correct": false
        },
        {
          "content": "飛行機",
          "correct": false
        },
        {
          "content": "ゲル《モンゴル高原で使われる円形のテント式住居》",
          "correct": false
        }
      ]
    }
  ]
}

参考连接

  1. https://play.golang.org/p/bdqv3TUGr3
  2. https://play.golang.org/p/pUsrzrrcDG-

你可能感兴趣的:(GoLang)