From 25173ee381384076f749995ee951560e8bc90a97 Mon Sep 17 00:00:00 2001 From: Sarthak Srivastav Date: Wed, 3 Jun 2026 18:54:55 +0530 Subject: [PATCH] fix(render): correctly escape non-BMP Unicode in AsciiJSON MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AsciiJSON escaped every non-ASCII rune with "\u%04x", which only yields a valid escape for the Basic Multilingual Plane (U+0000-U+FFFF). For a code point above U+FFFF such as U+1F600 it emitted six hex digits ("ὠ0"). A JSON parser reads \u as exactly four hex digits, so this decoded to "ὠ0" instead of "😀". Per RFC 8259, code points above U+FFFF must be encoded as a UTF-16 surrogate pair (two \uXXXX escapes). Detect r > 0xFFFF and emit the pair via unicode/utf16.EncodeRune. ASCII and BMP output is unchanged. Add a regression test asserting AsciiJSON output is ASCII-only and round-trips back to the original value. Fixes #4688 Co-Authored-By: Claude Opus 4.8 --- render/ascii_nonbmp_test.go | 42 +++++++++++++++++++++++++++++++++++++ render/json.go | 11 ++++++++-- 2 files changed, 51 insertions(+), 2 deletions(-) create mode 100644 render/ascii_nonbmp_test.go diff --git a/render/ascii_nonbmp_test.go b/render/ascii_nonbmp_test.go new file mode 100644 index 00000000..2a84e6e6 --- /dev/null +++ b/render/ascii_nonbmp_test.go @@ -0,0 +1,42 @@ +package render + +import ( + "encoding/json" + "net/http/httptest" + "testing" + "unicode" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestRenderAsciiJSONNonBMP is a regression test for AsciiJSON corrupting +// non-BMP Unicode characters (code points above U+FFFF, such as emoji). +// +// It asserts only AsciiJSON's two user-visible contracts: the output must be +// ASCII-only, and it must decode back to the original value. The exact escape +// sequence used (a UTF-16 surrogate pair, a raw rune, etc.) is an +// implementation detail and is intentionally not asserted. +func TestRenderAsciiJSONNonBMP(t *testing.T) { + const grinningFace = "😀" // U+1F600 GRINNING FACE, a non-BMP code point + + w := httptest.NewRecorder() + require.NoError(t, (AsciiJSON{map[string]string{"msg": grinningFace}}).Render(w)) + + out := w.Body.String() + + // Contract 1: AsciiJSON must emit ASCII-only output. + for i := 0; i < len(out); i++ { + require.LessOrEqualf(t, out[i], byte(unicode.MaxASCII), + "AsciiJSON must emit ASCII-only output, got non-ASCII byte at %d in %q", i, out) + } + + // Contract 2 (the bug): the rendered output must round-trip back to the + // original value. The buggy output {"msg":"ὠ0"} is valid JSON but + // decodes to "ὠ0" instead of "😀". + var decoded map[string]string + require.NoErrorf(t, json.Unmarshal([]byte(out), &decoded), + "AsciiJSON output is not valid JSON: %q", out) + assert.Equalf(t, grinningFace, decoded["msg"], + "AsciiJSON corrupted a non-BMP character; rendered output was %q", out) +} diff --git a/render/json.go b/render/json.go index 2f98676c..4af1bc07 100644 --- a/render/json.go +++ b/render/json.go @@ -10,6 +10,7 @@ import ( "html/template" "net/http" "unicode" + "unicode/utf16" "github.com/gin-gonic/gin/codec/json" "github.com/gin-gonic/gin/internal/bytesconv" @@ -160,11 +161,17 @@ func (r AsciiJSON) Render(w http.ResponseWriter) error { } var buffer bytes.Buffer - escapeBuf := make([]byte, 0, 6) // Preallocate 6 bytes for Unicode escape sequences + escapeBuf := make([]byte, 0, 12) // Preallocate for a \uXXXX\uXXXX surrogate pair for _, r := range bytesconv.BytesToString(ret) { if r > unicode.MaxASCII { - escapeBuf = fmt.Appendf(escapeBuf[:0], "\\u%04x", r) // Reuse escapeBuf + if r > 0xFFFF { + // Non-BMP code points must be escaped as a UTF-16 surrogate pair. + r1, r2 := utf16.EncodeRune(r) + escapeBuf = fmt.Appendf(escapeBuf[:0], "\\u%04x\\u%04x", r1, r2) + } else { + escapeBuf = fmt.Appendf(escapeBuf[:0], "\\u%04x", r) // Reuse escapeBuf + } buffer.Write(escapeBuf) } else { buffer.WriteByte(byte(r))