mirror of
https://github.com/gin-gonic/gin.git
synced 2026-06-06 03:08:11 +08:00
fix(render): correctly escape non-BMP Unicode in AsciiJSON
AsciiJSON escaped every non-ASCII rune with "\u%04x", which only yields a
valid escape for the Basic Multilingual Plane (U+0000-U+FFFF). For a code
point above U+FFFF such as U+1F600 it emitted six hex digits ("ὠ0").
A JSON parser reads \u as exactly four hex digits, so this decoded to "ὠ0"
instead of "😀".
Per RFC 8259, code points above U+FFFF must be encoded as a UTF-16
surrogate pair (two \uXXXX escapes). Detect r > 0xFFFF and emit the pair via
unicode/utf16.EncodeRune. ASCII and BMP output is unchanged.
Add a regression test asserting AsciiJSON output is ASCII-only and
round-trips back to the original value.
Fixes #4688
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
d75fcd4c9a
commit
25173ee381
42
render/ascii_nonbmp_test.go
Normal file
42
render/ascii_nonbmp_test.go
Normal file
@ -0,0 +1,42 @@
|
||||
package render
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"unicode"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestRenderAsciiJSONNonBMP is a regression test for AsciiJSON corrupting
|
||||
// non-BMP Unicode characters (code points above U+FFFF, such as emoji).
|
||||
//
|
||||
// It asserts only AsciiJSON's two user-visible contracts: the output must be
|
||||
// ASCII-only, and it must decode back to the original value. The exact escape
|
||||
// sequence used (a UTF-16 surrogate pair, a raw rune, etc.) is an
|
||||
// implementation detail and is intentionally not asserted.
|
||||
func TestRenderAsciiJSONNonBMP(t *testing.T) {
|
||||
const grinningFace = "😀" // U+1F600 GRINNING FACE, a non-BMP code point
|
||||
|
||||
w := httptest.NewRecorder()
|
||||
require.NoError(t, (AsciiJSON{map[string]string{"msg": grinningFace}}).Render(w))
|
||||
|
||||
out := w.Body.String()
|
||||
|
||||
// Contract 1: AsciiJSON must emit ASCII-only output.
|
||||
for i := 0; i < len(out); i++ {
|
||||
require.LessOrEqualf(t, out[i], byte(unicode.MaxASCII),
|
||||
"AsciiJSON must emit ASCII-only output, got non-ASCII byte at %d in %q", i, out)
|
||||
}
|
||||
|
||||
// Contract 2 (the bug): the rendered output must round-trip back to the
|
||||
// original value. The buggy output {"msg":"ὠ0"} is valid JSON but
|
||||
// decodes to "ὠ0" instead of "😀".
|
||||
var decoded map[string]string
|
||||
require.NoErrorf(t, json.Unmarshal([]byte(out), &decoded),
|
||||
"AsciiJSON output is not valid JSON: %q", out)
|
||||
assert.Equalf(t, grinningFace, decoded["msg"],
|
||||
"AsciiJSON corrupted a non-BMP character; rendered output was %q", out)
|
||||
}
|
||||
@ -10,6 +10,7 @@ import (
|
||||
"html/template"
|
||||
"net/http"
|
||||
"unicode"
|
||||
"unicode/utf16"
|
||||
|
||||
"github.com/gin-gonic/gin/codec/json"
|
||||
"github.com/gin-gonic/gin/internal/bytesconv"
|
||||
@ -160,11 +161,17 @@ func (r AsciiJSON) Render(w http.ResponseWriter) error {
|
||||
}
|
||||
|
||||
var buffer bytes.Buffer
|
||||
escapeBuf := make([]byte, 0, 6) // Preallocate 6 bytes for Unicode escape sequences
|
||||
escapeBuf := make([]byte, 0, 12) // Preallocate for a \uXXXX\uXXXX surrogate pair
|
||||
|
||||
for _, r := range bytesconv.BytesToString(ret) {
|
||||
if r > unicode.MaxASCII {
|
||||
escapeBuf = fmt.Appendf(escapeBuf[:0], "\\u%04x", r) // Reuse escapeBuf
|
||||
if r > 0xFFFF {
|
||||
// Non-BMP code points must be escaped as a UTF-16 surrogate pair.
|
||||
r1, r2 := utf16.EncodeRune(r)
|
||||
escapeBuf = fmt.Appendf(escapeBuf[:0], "\\u%04x\\u%04x", r1, r2)
|
||||
} else {
|
||||
escapeBuf = fmt.Appendf(escapeBuf[:0], "\\u%04x", r) // Reuse escapeBuf
|
||||
}
|
||||
buffer.Write(escapeBuf)
|
||||
} else {
|
||||
buffer.WriteByte(byte(r))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user