tiny-rdm/backend/utils/convert/unicode_json_convert.go

package convutil

import (
	"bytes"
	"strconv"
	"strings"
	strutil "tinyrdm/backend/utils/string"
	"unicode"
	"unicode/utf16"
	"unicode/utf8"
)

type UnicodeJsonConvert struct{}

func (UnicodeJsonConvert) Enable() bool {
	return true
}

func (UnicodeJsonConvert) Decode(str string) (string, bool) {
	trimedStr := strings.TrimSpace(str)
	if (strings.HasPrefix(trimedStr, "{") && strings.HasSuffix(trimedStr, "}")) ||
		(strings.HasPrefix(trimedStr, "[") && strings.HasSuffix(trimedStr, "]")) {
		resultStr := strutil.JSONBeautify(trimedStr, "  ")
		if quoteStr, ok := UnquoteUnicodeJson([]byte(resultStr)); ok {
			return string(quoteStr), true
		}
	}
	return str, false
}

func (UnicodeJsonConvert) Encode(str string) (string, bool) {
	return strutil.JSONMinify(str), true
}

func UnquoteUnicodeJson(s []byte) ([]byte, bool) {
	var unquoted bytes.Buffer
	r := 0
	ls := len(s)
	for r < ls {
		c := s[r]
		offset := 1
		if c == '"' {
			// find next '"'
			for ; r+offset < ls; offset++ {
				if s[r+offset] == '"' && s[r+offset-1] != '\\' {
					offset += 1
					if ub, ok := unquoteBytes(s[r : r+offset]); ok {
						unquoted.WriteString(strconv.Quote(string(ub)))
					} else {
						return nil, false
					}
					break
				}
			}
			// can not find close '"' until reach to the end of content
			if r+offset >= ls {
				return nil, false
			}
		} else {
			unquoted.WriteByte(c)
		}
		r += offset
	}
	return unquoted.Bytes(), true
}

func getu4(s []byte) rune {
	if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
		return -1
	}
	var r rune
	for _, c := range s[2:6] {
		switch {
		case '0' <= c && c <= '9':
			c = c - '0'
		case 'a' <= c && c <= 'f':
			c = c - 'a' + 10
		case 'A' <= c && c <= 'F':
			c = c - 'A' + 10
		default:
			return -1
		}
		r = r*16 + rune(c)
	}
	return r
}

func unquoteBytes(s []byte) (t []byte, ok bool) {
	if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
		return
	}
	s = s[1 : len(s)-1]

	// Check for unusual characters. If there are none,
	// then no unquoting is needed, so return a slice of the
	// original bytes.
	r := 0
	for r < len(s) {
		c := s[r]
		if c == '\\' || c == '"' || c < ' ' {
			break
		}
		if c < utf8.RuneSelf {
			r++
			continue
		}
		rr, size := utf8.DecodeRune(s[r:])
		if rr == utf8.RuneError && size == 1 {
			break
		}
		r += size
	}
	if r == len(s) {
		return s, true
	}

	b := make([]byte, len(s)+2*utf8.UTFMax)
	w := copy(b, s[0:r])
	for r < len(s) {
		// Out of room? Can only happen if s is full of
		// malformed UTF-8 and we're replacing each
		// byte with RuneError.
		if w >= len(b)-2*utf8.UTFMax {
			nb := make([]byte, (len(b)+utf8.UTFMax)*2)
			copy(nb, b[0:w])
			b = nb
		}
		switch c := s[r]; {
		case c == '\\':
			r++
			if r >= len(s) {
				return
			}
			switch s[r] {
			default:
				return
			case '"', '\\', '/', '\'':
				b[w] = s[r]
				r++
				w++
			case 'b':
				b[w] = '\b'
				r++
				w++
			case 'f':
				b[w] = '\f'
				r++
				w++
			case 'n':
				b[w] = '\n'
				r++
				w++
			case 'r':
				b[w] = '\r'
				r++
				w++
			case 't':
				b[w] = '\t'
				r++
				w++
			case 'u':
				r--
				rr := getu4(s[r:])
				if rr < 0 {
					return
				}
				r += 6
				if utf16.IsSurrogate(rr) {
					rr1 := getu4(s[r:])
					if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
						// A valid pair; consume.
						r += 6
						w += utf8.EncodeRune(b[w:], dec)
						break
					}
					// Invalid surrogate; fall back to replacement rune.
					rr = unicode.ReplacementChar
				}
				w += utf8.EncodeRune(b[w:], rr)
			}

		// Quote, control characters are invalid.
		case c == '"', c < ' ':
			return

		// ASCII
		case c < utf8.RuneSelf:
			b[w] = c
			r++
			w++

		// Coerce to well-formed UTF-8.
		default:
			rr, size := utf8.DecodeRune(s[r:])
			r += size
			w += utf8.EncodeRune(b[w:], rr)
		}
	}
	return b[0:w], true
}
feat: add "Unicode JSON" format to display JSON string contains unescaped unicode charactor #158 2024-02-21 15:07:39 +08:00			`package convutil`

			`import (`
			`"bytes"`
fix: keep key sorting under "Unicode JSON" format #158 2024-02-27 14:50:19 +08:00			`"strconv"`
feat: add "Unicode JSON" format to display JSON string contains unescaped unicode charactor #158 2024-02-21 15:07:39 +08:00			`"strings"`
perf: json format compatible with nonstandard value 2024-05-17 16:25:40 +08:00			`strutil "tinyrdm/backend/utils/string"`
fix: keep key sorting under "Unicode JSON" format #158 2024-02-27 11:54:14 +08:00			`"unicode"`
			`"unicode/utf16"`
			`"unicode/utf8"`
feat: add "Unicode JSON" format to display JSON string contains unescaped unicode charactor #158 2024-02-21 15:07:39 +08:00			`)`

			`type UnicodeJsonConvert struct{}`

			`func (UnicodeJsonConvert) Enable() bool {`
			`return true`
			`}`

			`func (UnicodeJsonConvert) Decode(str string) (string, bool) {`
			`trimedStr := strings.TrimSpace(str)`
fix: keep key sorting under "Unicode JSON" format #158 2024-02-27 11:54:14 +08:00			`if (strings.HasPrefix(trimedStr, "{") && strings.HasSuffix(trimedStr, "}")) \|\|`
			`(strings.HasPrefix(trimedStr, "[") && strings.HasSuffix(trimedStr, "]")) {`
perf: json format compatible with nonstandard value 2024-05-17 16:25:40 +08:00			`resultStr := strutil.JSONBeautify(trimedStr, " ")`
			`if quoteStr, ok := UnquoteUnicodeJson([]byte(resultStr)); ok {`
			`return string(quoteStr), true`
feat: add "Unicode JSON" format to display JSON string contains unescaped unicode charactor #158 2024-02-21 15:07:39 +08:00			`}`
			`}`
			`return str, false`
			`}`

			`func (UnicodeJsonConvert) Encode(str string) (string, bool) {`
perf: json format compatible with nonstandard value 2024-05-17 16:25:40 +08:00			`return strutil.JSONMinify(str), true`
feat: add "Unicode JSON" format to display JSON string contains unescaped unicode charactor #158 2024-02-21 15:07:39 +08:00			`}`
fix: keep key sorting under "Unicode JSON" format #158 2024-02-27 11:54:14 +08:00
			`func UnquoteUnicodeJson(s []byte) ([]byte, bool) {`
			`var unquoted bytes.Buffer`
			`r := 0`
			`ls := len(s)`
			`for r < ls {`
			`c := s[r]`
			`offset := 1`
			`if c == '"' {`
			`// find next '"'`
			`for ; r+offset < ls; offset++ {`
			`if s[r+offset] == '"' && s[r+offset-1] != '\\' {`
			`offset += 1`
			`if ub, ok := unquoteBytes(s[r : r+offset]); ok {`
fix: keep key sorting under "Unicode JSON" format #158 2024-02-27 14:50:19 +08:00			`unquoted.WriteString(strconv.Quote(string(ub)))`
fix: keep key sorting under "Unicode JSON" format #158 2024-02-27 11:54:14 +08:00			`} else {`
			`return nil, false`
			`}`
			`break`
			`}`
			`}`
			`// can not find close '"' until reach to the end of content`
			`if r+offset >= ls {`
			`return nil, false`
			`}`
			`} else {`
			`unquoted.WriteByte(c)`
			`}`
			`r += offset`
			`}`
			`return unquoted.Bytes(), true`
			`}`

			`func getu4(s []byte) rune {`
			`if len(s) < 6 \|\| s[0] != '\\' \|\| s[1] != 'u' {`
			`return -1`
			`}`
			`var r rune`
			`for _, c := range s[2:6] {`
			`switch {`
			`case '0' <= c && c <= '9':`
			`c = c - '0'`
			`case 'a' <= c && c <= 'f':`
			`c = c - 'a' + 10`
			`case 'A' <= c && c <= 'F':`
			`c = c - 'A' + 10`
			`default:`
			`return -1`
			`}`
			`r = r*16 + rune(c)`
			`}`
			`return r`
			`}`

			`func unquoteBytes(s []byte) (t []byte, ok bool) {`
			`if len(s) < 2 \|\| s[0] != '"' \|\| s[len(s)-1] != '"' {`
			`return`
			`}`
			`s = s[1 : len(s)-1]`

			`// Check for unusual characters. If there are none,`
			`// then no unquoting is needed, so return a slice of the`
			`// original bytes.`
			`r := 0`
			`for r < len(s) {`
			`c := s[r]`
			`if c == '\\' \|\| c == '"' \|\| c < ' ' {`
			`break`
			`}`
			`if c < utf8.RuneSelf {`
			`r++`
			`continue`
			`}`
			`rr, size := utf8.DecodeRune(s[r:])`
			`if rr == utf8.RuneError && size == 1 {`
			`break`
			`}`
			`r += size`
			`}`
			`if r == len(s) {`
			`return s, true`
			`}`

			`b := make([]byte, len(s)+2*utf8.UTFMax)`
			`w := copy(b, s[0:r])`
			`for r < len(s) {`
			`// Out of room? Can only happen if s is full of`
			`// malformed UTF-8 and we're replacing each`
			`// byte with RuneError.`
			`if w >= len(b)-2*utf8.UTFMax {`
			`nb := make([]byte, (len(b)+utf8.UTFMax)*2)`
			`copy(nb, b[0:w])`
			`b = nb`
			`}`
			`switch c := s[r]; {`
			`case c == '\\':`
			`r++`
			`if r >= len(s) {`
			`return`
			`}`
			`switch s[r] {`
			`default:`
			`return`
			`case '"', '\\', '/', '\'':`
			`b[w] = s[r]`
			`r++`
			`w++`
			`case 'b':`
			`b[w] = '\b'`
			`r++`
			`w++`
			`case 'f':`
			`b[w] = '\f'`
			`r++`
			`w++`
			`case 'n':`
			`b[w] = '\n'`
			`r++`
			`w++`
			`case 'r':`
			`b[w] = '\r'`
			`r++`
			`w++`
			`case 't':`
			`b[w] = '\t'`
			`r++`
			`w++`
			`case 'u':`
			`r--`
			`rr := getu4(s[r:])`
			`if rr < 0 {`
			`return`
			`}`
			`r += 6`
			`if utf16.IsSurrogate(rr) {`
			`rr1 := getu4(s[r:])`
			`if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {`
			`// A valid pair; consume.`
			`r += 6`
			`w += utf8.EncodeRune(b[w:], dec)`
			`break`
			`}`
			`// Invalid surrogate; fall back to replacement rune.`
			`rr = unicode.ReplacementChar`
			`}`
			`w += utf8.EncodeRune(b[w:], rr)`
			`}`

			`// Quote, control characters are invalid.`
			`case c == '"', c < ' ':`
			`return`

			`// ASCII`
			`case c < utf8.RuneSelf:`
			`b[w] = c`
			`r++`
			`w++`

			`// Coerce to well-formed UTF-8.`
			`default:`
			`rr, size := utf8.DecodeRune(s[r:])`
			`r += size`
			`w += utf8.EncodeRune(b[w:], rr)`
			`}`
			`}`
			`return b[0:w], true`
			`}`