fix: encode non-BMP characters as UTF-16 surrogate pairs in AsciiJSON

The AsciiJSON Render method used fmt.Appendf(buf, "\u%04x", r) for all
non-ASCII runes. For non-BMP characters (code points > 0xFFFF, e.g. emoji),
this produced invalid JSON with raw 5+ hex digit sequences like \u1f389.

Fix: use utf16.Encode([]rune{r}) to convert non-BMP runes into UTF-16
surrogate pairs, producing valid JSON like \ud83c\udf89.

Fixes #4688

Signed-off-by: Md Mushfiqur Rahim <20mahin2020@gmail.com>
This commit is contained in:
Md Mushfiqur Rahim 2026-06-13 09:38:33 +00:00
parent d75fcd4c9a
commit 3ddf111092

View File

@ -10,6 +10,7 @@ import (
"html/template"
"net/http"
"unicode"
"unicode/utf16"
"github.com/gin-gonic/gin/codec/json"
"github.com/gin-gonic/gin/internal/bytesconv"
@ -160,11 +161,16 @@ func (r AsciiJSON) Render(w http.ResponseWriter) error {
}
var buffer bytes.Buffer
escapeBuf := make([]byte, 0, 6) // Preallocate 6 bytes for Unicode escape sequences
escapeBuf := make([]byte, 0, 12) // Preallocate for up to two \uXXXX sequences
for _, r := range bytesconv.BytesToString(ret) {
if r > unicode.MaxASCII {
escapeBuf = fmt.Appendf(escapeBuf[:0], "\\u%04x", r) // Reuse escapeBuf
if r > 0xFFFF {
surrogates := utf16.Encode([]rune{r})
escapeBuf = fmt.Appendf(escapeBuf[:0], "\\u%04x\\u%04x", surrogates[0], surrogates[1])
} else {
escapeBuf = fmt.Appendf(escapeBuf[:0], "\\u%04x", r)
}
buffer.Write(escapeBuf)
} else {
buffer.WriteByte(byte(r))