mirror of
https://github.com/gin-gonic/gin.git
synced 2026-06-06 20:18:19 +08:00
AsciiJSON escaped every non-ASCII rune with "\u%04x", which only yields a
valid escape for the Basic Multilingual Plane (U+0000-U+FFFF). For a code
point above U+FFFF such as U+1F600 it emitted six hex digits ("ὠ0").
A JSON parser reads \u as exactly four hex digits, so this decoded to "ὠ0"
instead of "😀".
Per RFC 8259, code points above U+FFFF must be encoded as a UTF-16
surrogate pair (two \uXXXX escapes). Detect r > 0xFFFF and emit the pair via
unicode/utf16.EncodeRune. ASCII and BMP output is unchanged.
Add a regression test asserting AsciiJSON output is ASCII-only and
round-trips back to the original value.
Fixes #4688
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
43 lines
1.5 KiB
Go
43 lines
1.5 KiB
Go
package render
|
|
|
|
import (
|
|
"encoding/json"
|
|
"net/http/httptest"
|
|
"testing"
|
|
"unicode"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
// TestRenderAsciiJSONNonBMP is a regression test for AsciiJSON corrupting
|
|
// non-BMP Unicode characters (code points above U+FFFF, such as emoji).
|
|
//
|
|
// It asserts only AsciiJSON's two user-visible contracts: the output must be
|
|
// ASCII-only, and it must decode back to the original value. The exact escape
|
|
// sequence used (a UTF-16 surrogate pair, a raw rune, etc.) is an
|
|
// implementation detail and is intentionally not asserted.
|
|
func TestRenderAsciiJSONNonBMP(t *testing.T) {
|
|
const grinningFace = "😀" // U+1F600 GRINNING FACE, a non-BMP code point
|
|
|
|
w := httptest.NewRecorder()
|
|
require.NoError(t, (AsciiJSON{map[string]string{"msg": grinningFace}}).Render(w))
|
|
|
|
out := w.Body.String()
|
|
|
|
// Contract 1: AsciiJSON must emit ASCII-only output.
|
|
for i := 0; i < len(out); i++ {
|
|
require.LessOrEqualf(t, out[i], byte(unicode.MaxASCII),
|
|
"AsciiJSON must emit ASCII-only output, got non-ASCII byte at %d in %q", i, out)
|
|
}
|
|
|
|
// Contract 2 (the bug): the rendered output must round-trip back to the
|
|
// original value. The buggy output {"msg":"ὠ0"} is valid JSON but
|
|
// decodes to "ὠ0" instead of "😀".
|
|
var decoded map[string]string
|
|
require.NoErrorf(t, json.Unmarshal([]byte(out), &decoded),
|
|
"AsciiJSON output is not valid JSON: %q", out)
|
|
assert.Equalf(t, grinningFace, decoded["msg"],
|
|
"AsciiJSON corrupted a non-BMP character; rendered output was %q", out)
|
|
}
|