From 680c63b914e9e6908999bcf6943317077d372558 Mon Sep 17 00:00:00 2001 From: Dayna Blackwell Date: Wed, 29 Apr 2026 19:38:57 -0700 Subject: [PATCH] fix(sanitize): preserve angle brackets inside code blocks and inline code bluemonday's StrictPolicy treats angle brackets inside markdown code blocks and inline code spans as HTML tags and strips them. This causes content like `mut_raw_ptr` to become `mut_raw_ptr` when read through MCP issue/PR endpoints. The fix protects angle brackets inside fenced code blocks (```) and inline code spans (`) with sentinels before HTML sanitization, then restores them after. Angle brackets outside code are still sanitized normally, preserving XSS protection. Fixes #2202 Signed-off-by: Dayna Blackwell --- pkg/sanitize/sanitize.go | 160 +++++++++++++++++++++++++++++++++- pkg/sanitize/sanitize_test.go | 46 ++++++++++ 2 files changed, 205 insertions(+), 1 deletion(-) diff --git a/pkg/sanitize/sanitize.go b/pkg/sanitize/sanitize.go index e6401e4fb3..504e4fa6bb 100644 --- a/pkg/sanitize/sanitize.go +++ b/pkg/sanitize/sanitize.go @@ -12,7 +12,14 @@ var policy *bluemonday.Policy var policyOnce sync.Once func Sanitize(input string) string { - return FilterHTMLTags(FilterCodeFenceMetadata(FilterInvisibleCharacters(input))) + cleaned := FilterCodeFenceMetadata(FilterInvisibleCharacters(input)) + // Protect angle brackets inside code blocks and inline code spans + // from being stripped by the HTML sanitizer. bluemonday treats , + // , etc. as unknown HTML tags and removes them. + // See https://github.com/github/github-mcp-server/issues/2202 + protected := protectCodeAngleBrackets(cleaned) + sanitized := FilterHTMLTags(protected) + return restoreCodeAngleBrackets(sanitized) } // FilterInvisibleCharacters removes invisible or control characters that should not appear @@ -145,6 +152,157 @@ func isSafeCodeFenceToken(token string) bool { return true } +// Sentinels used to protect angle brackets inside code from HTML sanitization. +// These are chosen to be unlikely to appear in real content. +const ( + ltSentinel = "\x00LT\x00" + gtSentinel = "\x00GT\x00" +) + +// protectCodeAngleBrackets replaces < and > inside fenced code blocks and +// inline code spans with sentinels so bluemonday does not strip them as HTML. +func protectCodeAngleBrackets(input string) string { + var b strings.Builder + b.Grow(len(input)) + + runes := []rune(input) + i := 0 + n := len(runes) + + for i < n { + // Fenced code block: ``` ... ``` + if i+2 < n && runes[i] == '`' && runes[i+1] == '`' && runes[i+2] == '`' { + // Find the fence length + fenceStart := i + fenceLen := 0 + for i < n && runes[i] == '`' { + fenceLen++ + i++ + } + // Write opening fence + rest of line (info string) + for range fenceLen { + b.WriteRune('`') + } + for i < n && runes[i] != '\n' { + b.WriteRune(runes[i]) + i++ + } + if i < n { + b.WriteRune(runes[i]) // newline + i++ + } + // Inside fence: protect angle brackets until closing fence + for i < n { + // Check for closing fence + if runes[i] == '`' { + closeLen := 0 + j := i + for j < n && runes[j] == '`' { + closeLen++ + j++ + } + if closeLen >= fenceLen { + for range closeLen { + b.WriteRune('`') + } + i = j + break + } + } + switch runes[i] { + case '<': + b.WriteString(ltSentinel) + case '>': + b.WriteString(gtSentinel) + default: + b.WriteRune(runes[i]) + } + i++ + } + _ = fenceStart + continue + } + + // Inline code: `...` + if runes[i] == '`' { + // Count opening backticks + openLen := 0 + j := i + for j < n && runes[j] == '`' { + openLen++ + j++ + } + // Don't treat ``` as inline code (handled above for fenced blocks) + if openLen >= 3 { + for range openLen { + b.WriteRune('`') + } + i = j + continue + } + // Find matching closing backticks + closeStart := -1 + for k := j; k <= n-openLen; k++ { + match := true + for m := range openLen { + if runes[k+m] != '`' { + match = false + break + } + } + if match { + // Verify it's exactly openLen backticks (not more) + if k+openLen < n && runes[k+openLen] == '`' { + continue + } + closeStart = k + break + } + } + if closeStart == -1 { + // No closing backticks found; treat as literal + for range openLen { + b.WriteRune('`') + } + i = j + continue + } + // Write opening backticks + for range openLen { + b.WriteRune('`') + } + // Protect content + for i = j; i < closeStart; i++ { + switch runes[i] { + case '<': + b.WriteString(ltSentinel) + case '>': + b.WriteString(gtSentinel) + default: + b.WriteRune(runes[i]) + } + } + // Write closing backticks + for range openLen { + b.WriteRune('`') + } + i = closeStart + openLen + continue + } + + b.WriteRune(runes[i]) + i++ + } + + return b.String() +} + +// restoreCodeAngleBrackets converts sentinels back to angle brackets. +func restoreCodeAngleBrackets(input string) string { + s := strings.ReplaceAll(input, ltSentinel, "<") + return strings.ReplaceAll(s, gtSentinel, ">") +} + func getPolicy() *bluemonday.Policy { policyOnce.Do(func() { p := bluemonday.StrictPolicy() diff --git a/pkg/sanitize/sanitize_test.go b/pkg/sanitize/sanitize_test.go index 35b23e6abe..05db3785db 100644 --- a/pkg/sanitize/sanitize_test.go +++ b/pkg/sanitize/sanitize_test.go @@ -300,3 +300,49 @@ func TestSanitizeRemovesInvisibleCodeFenceMetadata(t *testing.T) { result := Sanitize(input) assert.Equal(t, expected, result) } + +func TestSanitizePreservesAngleBracketsInCodeBlocks(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "fenced code block with angle brackets", + input: "```\nlet ptr: mut_raw_ptr = raw_new int;\n```", + expected: "```\nlet ptr: mut_raw_ptr = raw_new int;\n```", + }, + { + name: "inline code with angle brackets", + input: "Use `Vec` for collections.", + expected: "Use `Vec` for collections.", + }, + { + name: "angle brackets outside code are sanitized", + input: "This has in it.", + expected: "This has in it.", + }, + { + name: "fenced code block with generic types", + input: "Example:\n```go\nfunc Foo[T comparable](x T) {}\n```\nDone.", + expected: "Example:\n```go\nfunc Foo[T comparable](x T) {}\n```\nDone.", + }, + { + name: "multiple inline code spans with angle brackets", + input: "Compare `Map` and `Set`.", + expected: "Compare `Map` and `Set`.", + }, + { + name: "no code blocks passes through", + input: "No code here, just text.", + expected: "No code here, just text.", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := Sanitize(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +}