Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 121 additions & 33 deletions datamodel/spec_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import (
"fmt"
"strings"
"time"
"unicode/utf16"
"unicode/utf8"

"github.com/pb33f/libopenapi/utils"
"go.yaml.in/yaml/v4"
Expand Down Expand Up @@ -97,12 +99,11 @@ func extractSpecInfoInternal(spec []byte, bypass bool, skipJSON bool) (*SpecInfo

specInfo.NumLines = bytes.Count(spec, []byte{'\n'}) + 1

// Pre-process JSON to handle \/ escape sequences that YAML parser doesn't recognize.
// JSON (RFC 8259) allows \/ as an optional escape for forward slash, but YAML does not.
// See: https://github.com/pb33f/libopenapi/issues/479
// Pre-process JSON escapes that YAML parsers do not accept even though
// they are valid JSON, while preserving the existing YAML-node parse path.
parseBytes := spec
if specInfo.SpecFileType == JSONFileType {
parseBytes = unescapeJSONSlashes(spec)
parseBytes = normalizeJSONForYAMLParser(spec)
}

err := yaml.Unmarshal(parseBytes, &parsedSpec)
Expand Down Expand Up @@ -324,39 +325,126 @@ func parseVersionTypeData(d interface{}) (string, int, error) {
return string(r), int(r[0]) - '0', nil
}

// unescapeJSONSlashes replaces the optional \/ escape sequence in JSON with /
// JSON (RFC 8259) allows \/ as an optional escape for forward slash, but YAML
// parsers (including go.yaml.in/yaml/v4) do not recognize it.
// This handles escaped backslashes correctly: \\/ becomes \/ not //
// Returns the original slice if no transformation is needed (zero allocation).
func unescapeJSONSlashes(jsonBytes []byte) []byte {
// fast path: check if transformation is needed
if !bytes.Contains(jsonBytes, []byte(`\/`)) {
// normalizeJSONForYAMLParser rewrites the small set of JSON escapes accepted by
// RFC 8259 but rejected by go.yaml.in/yaml/v4. It returns the original slice
// without allocation unless a rewrite is required.
func normalizeJSONForYAMLParser(jsonBytes []byte) []byte {
if bytes.IndexByte(jsonBytes, '\\') < 0 {
return jsonBytes
}

result := make([]byte, 0, len(jsonBytes))
i := 0
for i < len(jsonBytes) {
if jsonBytes[i] == '\\' && i+1 < len(jsonBytes) {
switch jsonBytes[i+1] {
case '/':
// \/ -> / (json optional escape for solidus)
result = append(result, '/')
i += 2
case '\\':
// preserve escaped backslash to prevent \\/ becoming //
result = append(result, '\\', '\\')
i += 2
default:
// preserve other escape sequences (\n, \t, \", etc.)
result = append(result, jsonBytes[i])
i++
}
} else {
result = append(result, jsonBytes[i])
i++
var result []byte
var runeBytes [utf8.UTFMax]byte
last := 0
scan := 0

for scan < len(jsonBytes) {
rel := bytes.IndexByte(jsonBytes[scan:], '\\')
if rel < 0 {
break
}

escape := scan + rel
replacement, consumed, ok := jsonEscapeReplacement(jsonBytes, escape, &runeBytes)
if !ok {
scan = nextJSONEscapeScanOffset(jsonBytes, escape)
continue
}

if result == nil {
result = make([]byte, 0, len(jsonBytes))
}
result = append(result, jsonBytes[last:escape]...)
result = append(result, replacement...)
scan = escape + consumed
last = scan
}

if result == nil {
return jsonBytes
}

result = append(result, jsonBytes[last:]...)
return result
}

func jsonEscapeReplacement(jsonBytes []byte, escape int, runeBytes *[utf8.UTFMax]byte) ([]byte, int, bool) {
if escape+1 >= len(jsonBytes) {
return nil, 0, false
}

switch jsonBytes[escape+1] {
case '/':
runeBytes[0] = '/'
return runeBytes[:1], 2, true
case 'u':
if escape+12 > len(jsonBytes) {
return nil, 0, false
}

high, ok := decodeJSONUnicodeEscape(jsonBytes[escape+2 : escape+6])
if !ok || !isHighSurrogate(high) {
return nil, 0, false
}

lowEscape := escape + 6
if jsonBytes[lowEscape] != '\\' || jsonBytes[lowEscape+1] != 'u' {
return nil, 0, false
}

low, ok := decodeJSONUnicodeEscape(jsonBytes[lowEscape+2 : lowEscape+6])
if !ok || !isLowSurrogate(low) {
return nil, 0, false
}

r := utf16.DecodeRune(rune(high), rune(low))
n := utf8.EncodeRune(runeBytes[:], r)
return runeBytes[:n], 12, true
default:
return nil, 0, false
}
}

func nextJSONEscapeScanOffset(jsonBytes []byte, escape int) int {
if escape+1 >= len(jsonBytes) {
return escape + 1
}
return escape + 2
}

func decodeJSONUnicodeEscape(hexBytes []byte) (uint16, bool) {
if len(hexBytes) != 4 {
return 0, false
}

var value uint16
for _, b := range hexBytes {
hex, ok := jsonHexValue(b)
if !ok {
return 0, false
}
value = value<<4 | uint16(hex)
}
return value, true
}

func jsonHexValue(b byte) (byte, bool) {
switch {
case b >= '0' && b <= '9':
return b - '0', true
case b >= 'a' && b <= 'f':
return b - 'a' + 10, true
case b >= 'A' && b <= 'F':
return b - 'A' + 10, true
default:
return 0, false
}
}

func isHighSurrogate(value uint16) bool {
return value >= 0xD800 && value <= 0xDBFF
}

func isLowSurrogate(value uint16) bool {
return value >= 0xDC00 && value <= 0xDFFF
}
147 changes: 137 additions & 10 deletions datamodel/spec_info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -454,9 +454,12 @@ $self: something`
assert.Equal(t, "something", r.Self)
}

// TestUnescapeJSONSlashes tests the unescapeJSONSlashes helper function
// This addresses issue #479 where JSON files with \/ escape sequences fail to parse
func TestUnescapeJSONSlashes(t *testing.T) {
// TestNormalizeJSONForYAMLParser tests JSON escapes that are valid JSON but
// rejected by the YAML parser used for YAML-node construction.
func TestNormalizeJSONForYAMLParser(t *testing.T) {
thumbsUp := string(rune(0x1F44D))
rocket := string(rune(0x1F680))

tests := []struct {
name string
input string
Expand All @@ -475,22 +478,61 @@ func TestUnescapeJSONSlashes(t *testing.T) {
{"other escapes preserved", `\n\t\/`, `\n\t/`},
{"multiple escaped slashes", `\/one\/two\/three`, `/one/two/three`},
{"mixed content", `{"path":"\/test","url":"https:\/\/example.com"}`, `{"path":"/test","url":"https://example.com"}`},
{"valid surrogate pair", `\ud83d\udc4d`, thumbsUp},
{"valid uppercase surrogate pair", `\uD83D\uDC4D`, thumbsUp},
{"multiple surrogate pairs", `\ud83d\udc4d \ud83d\ude80`, thumbsUp + " " + rocket},
{"surrogate pair with escaped slash", `https:\/\/example.com\/\ud83d\udc4d`, `https://example.com/` + thumbsUp},
{"double escaped surrogate pair", `\\ud83d\\udc4d`, `\\ud83d\\udc4d`},
{"trailing backslash", `\`, `\`},
{"lone high surrogate", `\ud83d`, `\ud83d`},
{"high surrogate without low escape", `\ud83dxxxxxx`, `\ud83dxxxxxx`},
{"high surrogate followed by non-low surrogate", `\ud83d\u0041`, `\ud83d\u0041`},
{"lone low surrogate", `\udc4d`, `\udc4d`},
{"invalid high surrogate hex", `\ud83x\udc4d`, `\ud83x\udc4d`},
{"invalid low surrogate hex", `\ud83d\udc4x`, `\ud83d\udc4x`},
{"truncated unicode escape", `\u12`, `\u12`},
{"non surrogate unicode escape", `\u003c`, `\u003c`},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := unescapeJSONSlashes([]byte(tt.input))
result := normalizeJSONForYAMLParser([]byte(tt.input))
assert.Equal(t, tt.expected, string(result))
})
}
}

// TestUnescapeJSONSlashes_NoAllocation tests that the fast path returns original slice
func TestUnescapeJSONSlashes_NoAllocation(t *testing.T) {
input := []byte(`{"path":"/test"}`)
result := unescapeJSONSlashes(input)
// Should return same slice when no \/ present
assert.Equal(t, &input[0], &result[0], "Should return original slice when no \\/ present")
func TestDecodeJSONUnicodeEscape(t *testing.T) {
value, ok := decodeJSONUnicodeEscape([]byte("D83D"))
assert.True(t, ok)
assert.Equal(t, uint16(0xD83D), value)

_, ok = decodeJSONUnicodeEscape([]byte("123"))
assert.False(t, ok)

_, ok = decodeJSONUnicodeEscape([]byte("12xz"))
assert.False(t, ok)
}

// TestNormalizeJSONForYAMLParser_NoAllocation tests that unchanged inputs
// return the original slice.
func TestNormalizeJSONForYAMLParser_NoAllocation(t *testing.T) {
tests := []string{
`{"path":"/test"}`,
`{"text":"line\nquoted\"tab\tunicode\u003c"}`,
`{"text":"\\ud83d\\udc4d"}`,
`{"text":"\ud83d"}`,
`{"text":"\udc4d"}`,
}

for _, tt := range tests {
t.Run(tt, func(t *testing.T) {
input := []byte(tt)
result := normalizeJSONForYAMLParser(input)
assert.Equal(t, tt, string(result))
assert.Equal(t, &input[0], &result[0], "Should return original slice when no rewrite is needed")
})
}
}

// TestExtractSpecInfo_JSON_EscapedSlashes tests issue #479
Expand All @@ -506,6 +548,49 @@ func TestExtractSpecInfo_JSON_EscapedSlashes(t *testing.T) {
assert.Equal(t, utils.OpenApi3, r.SpecType)
}

func TestExtractSpecInfo_JSON_SurrogatePairInExample(t *testing.T) {
jsonWithSurrogatePair := `{
"openapi": "3.0.1",
"info": {"title": "r", "version": "1"},
"paths": {
"/t": {
"post": {
"operationId": "t",
"responses": {
"201": {
"description": "ok",
"content": {
"application/json": {
"schema": {"type": "object", "properties": {"x": {"type": "string"}}},
"examples": {
"e": {"value": {"x": "Hello \ud83d\udc4d"}}
}
}
}
}
}
}
}
}
}`

r, e := ExtractSpecInfo([]byte(jsonWithSurrogatePair))
assert.NoError(t, e)
assert.Equal(t, "3.0.1", r.Version)
assert.Equal(t, JSONFileType, r.SpecFileType)
assert.Equal(t, utils.OpenApi3, r.SpecType)
}

func TestExtractSpecInfo_JSON_SurrogatePairInDescription(t *testing.T) {
jsonWithSurrogatePair := `{"openapi":"3.0.1","info":{"title":"r","version":"1","description":"Hello \ud83d\udc4d"},"paths":{}}`

r, e := ExtractSpecInfo([]byte(jsonWithSurrogatePair))
assert.NoError(t, e)
assert.Equal(t, "3.0.1", r.Version)
assert.Equal(t, JSONFileType, r.SpecFileType)
assert.Equal(t, utils.OpenApi3, r.SpecType)
}

// TestExtractSpecInfo_JSON_EscapedSlashes_URL tests URL paths with escaped slashes
func TestExtractSpecInfo_JSON_EscapedSlashes_URL(t *testing.T) {
jsonWithURL := `{"openapi":"3.0.0","info":{"title":"Test","version":"1.0.0"},"servers":[{"url":"https:\/\/api.example.com\/v1"}],"paths":{}}`
Expand Down Expand Up @@ -619,6 +704,48 @@ func TestSpecInfo_Release(t *testing.T) {
assert.Equal(t, "3.1.0", s.Version)
}

var normalizeJSONForYAMLParserSink []byte

func BenchmarkNormalizeJSONForYAMLParser_NoEscapes(b *testing.B) {
input := []byte(`{"openapi":"3.0.1","info":{"title":"r","version":"1"},"paths":{}}`)
b.ReportAllocs()
b.SetBytes(int64(len(input)))

for i := 0; i < b.N; i++ {
normalizeJSONForYAMLParserSink = normalizeJSONForYAMLParser(input)
}
}

func BenchmarkNormalizeJSONForYAMLParser_CommonEscapesNoRewrite(b *testing.B) {
input := []byte(`{"openapi":"3.0.1","info":{"title":"line\nquoted\"tab\tunicode\u003c","version":"1"},"paths":{}}`)
b.ReportAllocs()
b.SetBytes(int64(len(input)))

for i := 0; i < b.N; i++ {
normalizeJSONForYAMLParserSink = normalizeJSONForYAMLParser(input)
}
}

func BenchmarkNormalizeJSONForYAMLParser_EscapedSlashes(b *testing.B) {
input := []byte(`{"openapi":"3.0.1","info":{"title":"r","version":"1"},"servers":[{"url":"https:\/\/api.example.com\/v1"}],"paths":{"\/test":{}}}`)
b.ReportAllocs()
b.SetBytes(int64(len(input)))

for i := 0; i < b.N; i++ {
normalizeJSONForYAMLParserSink = normalizeJSONForYAMLParser(input)
}
}

func BenchmarkNormalizeJSONForYAMLParser_SurrogatePair(b *testing.B) {
input := []byte(`{"openapi":"3.0.1","info":{"title":"r","version":"1","description":"Hello \ud83d\udc4d"},"paths":{}}`)
b.ReportAllocs()
b.SetBytes(int64(len(input)))

for i := 0; i < b.N; i++ {
normalizeJSONForYAMLParserSink = normalizeJSONForYAMLParser(input)
}
}

func TestSpecInfo_Release_Nil(t *testing.T) {
var s *SpecInfo
s.Release() // must not panic
Expand Down
Loading
Loading