From 3a799b7b4adc3e77c43a9b902809799d10ad1844 Mon Sep 17 00:00:00 2001 From: Lykin <137850705+tiny-craft@users.noreply.github.com> Date: Tue, 27 Feb 2024 11:54:14 +0800 Subject: [PATCH] fix: keep key sorting under "Unicode JSON" format #158 --- backend/utils/convert/unicode_json_convert.go | 192 ++++++++++++++++-- 1 file changed, 178 insertions(+), 14 deletions(-) diff --git a/backend/utils/convert/unicode_json_convert.go b/backend/utils/convert/unicode_json_convert.go index 2ebc313..97293c9 100644 --- a/backend/utils/convert/unicode_json_convert.go +++ b/backend/utils/convert/unicode_json_convert.go @@ -4,6 +4,9 @@ import ( "bytes" "encoding/json" "strings" + "unicode" + "unicode/utf16" + "unicode/utf8" ) type UnicodeJsonConvert struct{} @@ -14,20 +17,12 @@ func (UnicodeJsonConvert) Enable() bool { func (UnicodeJsonConvert) Decode(str string) (string, bool) { trimedStr := strings.TrimSpace(str) - if strings.HasPrefix(trimedStr, "{") && strings.HasSuffix(trimedStr, "}") { - var obj map[string]any - if err := json.Unmarshal([]byte(trimedStr), &obj); err == nil { - var out []byte - if out, err = json.MarshalIndent(obj, "", " "); err == nil { - return string(out), true - } - } - } else if strings.HasPrefix(trimedStr, "[") && strings.HasSuffix(trimedStr, "]") { - var arr []any - if err := json.Unmarshal([]byte(trimedStr), &arr); err == nil { - var out []byte - if out, err = json.MarshalIndent(arr, "", " "); err == nil { - return string(out), true + if (strings.HasPrefix(trimedStr, "{") && strings.HasSuffix(trimedStr, "}")) || + (strings.HasPrefix(trimedStr, "[") && strings.HasSuffix(trimedStr, "]")) { + var out bytes.Buffer + if err := json.Indent(&out, []byte(trimedStr), "", " "); err == nil { + if quoteStr, ok := UnquoteUnicodeJson([]byte(trimedStr)); ok { + return string(quoteStr), true } } } @@ -41,3 +36,172 @@ func (UnicodeJsonConvert) Encode(str string) (string, bool) { } return dst.String(), true } + +func UnquoteUnicodeJson(s []byte) ([]byte, bool) { + var unquoted bytes.Buffer + r := 0 + ls := len(s) + for r < ls { + c := s[r] + offset := 1 + if c == '"' { + // find next '"' + for ; r+offset < ls; offset++ { + if s[r+offset] == '"' && s[r+offset-1] != '\\' { + offset += 1 + if ub, ok := unquoteBytes(s[r : r+offset]); ok { + unquoted.WriteByte('"') + unquoted.Write(ub) + unquoted.WriteByte('"') + } else { + return nil, false + } + break + } + } + // can not find close '"' until reach to the end of content + if r+offset >= ls { + return nil, false + } + } else { + unquoted.WriteByte(c) + } + r += offset + } + return unquoted.Bytes(), true +} + +func getu4(s []byte) rune { + if len(s) < 6 || s[0] != '\\' || s[1] != 'u' { + return -1 + } + var r rune + for _, c := range s[2:6] { + switch { + case '0' <= c && c <= '9': + c = c - '0' + case 'a' <= c && c <= 'f': + c = c - 'a' + 10 + case 'A' <= c && c <= 'F': + c = c - 'A' + 10 + default: + return -1 + } + r = r*16 + rune(c) + } + return r +} + +func unquoteBytes(s []byte) (t []byte, ok bool) { + if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' { + return + } + s = s[1 : len(s)-1] + + // Check for unusual characters. If there are none, + // then no unquoting is needed, so return a slice of the + // original bytes. + r := 0 + for r < len(s) { + c := s[r] + if c == '\\' || c == '"' || c < ' ' { + break + } + if c < utf8.RuneSelf { + r++ + continue + } + rr, size := utf8.DecodeRune(s[r:]) + if rr == utf8.RuneError && size == 1 { + break + } + r += size + } + if r == len(s) { + return s, true + } + + b := make([]byte, len(s)+2*utf8.UTFMax) + w := copy(b, s[0:r]) + for r < len(s) { + // Out of room? Can only happen if s is full of + // malformed UTF-8 and we're replacing each + // byte with RuneError. + if w >= len(b)-2*utf8.UTFMax { + nb := make([]byte, (len(b)+utf8.UTFMax)*2) + copy(nb, b[0:w]) + b = nb + } + switch c := s[r]; { + case c == '\\': + r++ + if r >= len(s) { + return + } + switch s[r] { + default: + return + case '"', '\\', '/', '\'': + b[w] = s[r] + r++ + w++ + case 'b': + b[w] = '\b' + r++ + w++ + case 'f': + b[w] = '\f' + r++ + w++ + case 'n': + b[w] = '\n' + r++ + w++ + case 'r': + b[w] = '\r' + r++ + w++ + case 't': + b[w] = '\t' + r++ + w++ + case 'u': + r-- + rr := getu4(s[r:]) + if rr < 0 { + return + } + r += 6 + if utf16.IsSurrogate(rr) { + rr1 := getu4(s[r:]) + if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar { + // A valid pair; consume. + r += 6 + w += utf8.EncodeRune(b[w:], dec) + break + } + // Invalid surrogate; fall back to replacement rune. + rr = unicode.ReplacementChar + } + w += utf8.EncodeRune(b[w:], rr) + } + + // Quote, control characters are invalid. + case c == '"', c < ' ': + return + + // ASCII + case c < utf8.RuneSelf: + b[w] = c + r++ + w++ + + // Coerce to well-formed UTF-8. + default: + rr, size := utf8.DecodeRune(s[r:]) + r += size + w += utf8.EncodeRune(b[w:], rr) + } + } + return b[0:w], true +}