fix(render): correctly escape non-BMP Unicode in AsciiJSON

AsciiJSON escaped every non-ASCII rune with "\u%04x", which only yields a
valid escape for the Basic Multilingual Plane (U+0000-U+FFFF). For a code
point above U+FFFF such as U+1F600 it emitted six hex digits ("ὠ0").
A JSON parser reads \u as exactly four hex digits, so this decoded to "ὠ0"
instead of "😀".

Per RFC 8259, code points above U+FFFF must be encoded as a UTF-16
surrogate pair (two \uXXXX escapes). Detect r > 0xFFFF and emit the pair via
unicode/utf16.EncodeRune. ASCII and BMP output is unchanged.

Add a regression test asserting AsciiJSON output is ASCII-only and
round-trips back to the original value.

Fixes #4688

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
Sarthak Srivastav 2026-06-03 18:54:55 +05:30
parent d75fcd4c9a
commit 25173ee381
2 changed files with 51 additions and 2 deletions

View File

@ -0,0 +1,42 @@
package render
import (
"encoding/json"
"net/http/httptest"
"testing"
"unicode"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TestRenderAsciiJSONNonBMP is a regression test for AsciiJSON corrupting
// non-BMP Unicode characters (code points above U+FFFF, such as emoji).
//
// It asserts only AsciiJSON's two user-visible contracts: the output must be
// ASCII-only, and it must decode back to the original value. The exact escape
// sequence used (a UTF-16 surrogate pair, a raw rune, etc.) is an
// implementation detail and is intentionally not asserted.
func TestRenderAsciiJSONNonBMP(t *testing.T) {
const grinningFace = "😀" // U+1F600 GRINNING FACE, a non-BMP code point
w := httptest.NewRecorder()
require.NoError(t, (AsciiJSON{map[string]string{"msg": grinningFace}}).Render(w))
out := w.Body.String()
// Contract 1: AsciiJSON must emit ASCII-only output.
for i := 0; i < len(out); i++ {
require.LessOrEqualf(t, out[i], byte(unicode.MaxASCII),
"AsciiJSON must emit ASCII-only output, got non-ASCII byte at %d in %q", i, out)
}
// Contract 2 (the bug): the rendered output must round-trip back to the
// original value. The buggy output {"msg":"ὠ0"} is valid JSON but
// decodes to "ὠ0" instead of "😀".
var decoded map[string]string
require.NoErrorf(t, json.Unmarshal([]byte(out), &decoded),
"AsciiJSON output is not valid JSON: %q", out)
assert.Equalf(t, grinningFace, decoded["msg"],
"AsciiJSON corrupted a non-BMP character; rendered output was %q", out)
}

View File

@ -10,6 +10,7 @@ import (
"html/template"
"net/http"
"unicode"
"unicode/utf16"
"github.com/gin-gonic/gin/codec/json"
"github.com/gin-gonic/gin/internal/bytesconv"
@ -160,11 +161,17 @@ func (r AsciiJSON) Render(w http.ResponseWriter) error {
}
var buffer bytes.Buffer
escapeBuf := make([]byte, 0, 6) // Preallocate 6 bytes for Unicode escape sequences
escapeBuf := make([]byte, 0, 12) // Preallocate for a \uXXXX\uXXXX surrogate pair
for _, r := range bytesconv.BytesToString(ret) {
if r > unicode.MaxASCII {
escapeBuf = fmt.Appendf(escapeBuf[:0], "\\u%04x", r) // Reuse escapeBuf
if r > 0xFFFF {
// Non-BMP code points must be escaped as a UTF-16 surrogate pair.
r1, r2 := utf16.EncodeRune(r)
escapeBuf = fmt.Appendf(escapeBuf[:0], "\\u%04x\\u%04x", r1, r2)
} else {
escapeBuf = fmt.Appendf(escapeBuf[:0], "\\u%04x", r) // Reuse escapeBuf
}
buffer.Write(escapeBuf)
} else {
buffer.WriteByte(byte(r))