From 076989abc8ed3208426837b7f9802f98a47778bb Mon Sep 17 00:00:00 2001
From: Tianzhou <t@bytebase.com>
Date: Tue, 24 Mar 2026 09:45:48 -0700
Subject: [PATCH 1/4] fix: preserve string literals during schema qualification
 stripping (#371)

stripSchemaQualificationsFromText used regex patterns that matched
schema prefixes inside single-quoted SQL string literals. For example,
with schema "s", Pattern 4 treated the single quote in 's.manage' as a
valid non-double-quote prefix character, corrupting 's.manage' into
'manage'. This caused pgschema plan to generate destructive false-positive
ALTER POLICY statements that silently truncated function arguments.

Add stripSchemaQualificationsPreservingStrings which splits text on
single-quoted string boundaries (handling '' escapes) and applies schema
stripping only to non-string segments. Also add a fast-path
strings.Contains check to skip all work when the schema name is absent.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 internal/postgres/desired_state.go            | 53 +++++++++++++++++-
 internal/postgres/desired_state_test.go       | 55 +++++++++++++++++++
 .../create_policy/alter_policy_using/new.sql  | 12 +++-
 .../create_policy/alter_policy_using/old.sql  | 12 +++-
 .../alter_policy_using/plan.json              |  2 +-
 5 files changed, 129 insertions(+), 5 deletions(-)

diff --git a/internal/postgres/desired_state.go b/internal/postgres/desired_state.go
index 114a9008..bd20bc16 100644
--- a/internal/postgres/desired_state.go
+++ b/internal/postgres/desired_state.go
@@ -92,7 +92,7 @@ func GenerateTempSchemaName() string {
 // Only qualifications matching the specified schemaName are stripped.
 // All other schema qualifications are preserved as intentional cross-schema references.
 func stripSchemaQualifications(sql string, schemaName string) string {
-	if schemaName == "" {
+	if schemaName == "" || !strings.Contains(sql, schemaName) {
 		return sql
 	}
 
@@ -108,7 +108,56 @@ func stripSchemaQualifications(sql string, schemaName string) string {
 			// Preserve dollar-quoted content as-is
 			result.WriteString(seg.text)
 		} else {
-			result.WriteString(stripSchemaQualificationsFromText(seg.text, schemaName))
+			// Further split on single-quoted string literals to avoid stripping
+			// schema prefixes from inside string constants (Issue #371).
+			// e.g., has_scope('s.manage') must NOT become has_scope('manage')
+			result.WriteString(stripSchemaQualificationsPreservingStrings(seg.text, schemaName))
+		}
+	}
+	return result.String()
+}
+
+// stripSchemaQualificationsPreservingStrings splits text on single-quoted string
+// literals, applies schema stripping only to non-string parts, and reassembles.
+func stripSchemaQualificationsPreservingStrings(text string, schemaName string) string {
+	var result strings.Builder
+	result.Grow(len(text))
+	inString := false
+
+	i := 0
+	segStart := 0
+	for i < len(text) {
+		ch := text[i]
+		if ch == '\'' {
+			if !inString {
+				// End of non-string segment — strip schema qualifications from it
+				result.WriteString(stripSchemaQualificationsFromText(text[segStart:i], schemaName))
+				segStart = i
+				inString = true
+				i++
+			} else {
+				// Check for escaped quote ('')
+				if i+1 < len(text) && text[i+1] == '\'' {
+					i += 2 // skip ''
+				} else {
+					// End of string literal — write it as-is
+					inString = false
+					i++
+					result.WriteString(text[segStart:i])
+					segStart = i
+				}
+			}
+		} else {
+			i++
+		}
+	}
+	// Handle remaining text
+	if segStart < len(text) {
+		if inString {
+			// Unterminated string literal — write as-is
+			result.WriteString(text[segStart:])
+		} else {
+			result.WriteString(stripSchemaQualificationsFromText(text[segStart:], schemaName))
 		}
 	}
 	return result.String()
diff --git a/internal/postgres/desired_state_test.go b/internal/postgres/desired_state_test.go
index 77fb31f4..ee301e71 100644
--- a/internal/postgres/desired_state_test.go
+++ b/internal/postgres/desired_state_test.go
@@ -194,3 +194,58 @@ func TestReplaceSchemaInSearchPath(t *testing.T) {
 		})
 	}
 }
+
+func TestStripSchemaQualifications_PreservesStringLiterals(t *testing.T) {
+	tests := []struct {
+		name     string
+		sql      string
+		schema   string
+		expected string
+	}{
+		{
+			name:     "strips schema from table reference",
+			sql:      "CREATE TABLE public.items (id int);",
+			schema:   "public",
+			expected: "CREATE TABLE items (id int);",
+		},
+		{
+			name:     "preserves schema prefix inside single-quoted string",
+			sql:      "CREATE POLICY p ON items USING (has_scope('public.manage'));",
+			schema:   "public",
+			expected: "CREATE POLICY p ON items USING (has_scope('public.manage'));",
+		},
+		{
+			name:     "preserves schema prefix inside string with short schema name",
+			sql:      "CREATE POLICY p ON items USING (has_scope('s.manage')) WITH CHECK (has_scope('s.manage'));",
+			schema:   "s",
+			expected: "CREATE POLICY p ON items USING (has_scope('s.manage')) WITH CHECK (has_scope('s.manage'));",
+		},
+		{
+			name:     "strips schema from identifier but preserves string literal",
+			sql:      "CREATE POLICY p ON s.items USING (auth.has_scope('s.manage'));",
+			schema:   "s",
+			expected: "CREATE POLICY p ON items USING (auth.has_scope('s.manage'));",
+		},
+		{
+			name:     "preserves escaped quotes in string literals",
+			sql:      "SELECT 'it''s public.test' FROM public.t;",
+			schema:   "public",
+			expected: "SELECT 'it''s public.test' FROM t;",
+		},
+		{
+			name:     "handles multiple string literals",
+			sql:      "SELECT 'public.a', public.t, 'public.b';",
+			schema:   "public",
+			expected: "SELECT 'public.a', t, 'public.b';",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := stripSchemaQualifications(tt.sql, tt.schema)
+			if result != tt.expected {
+				t.Errorf("stripSchemaQualifications(%q, %q)\n  got:  %q\n  want: %q", tt.sql, tt.schema, result, tt.expected)
+			}
+		})
+	}
+}
diff --git a/testdata/diff/create_policy/alter_policy_using/new.sql b/testdata/diff/create_policy/alter_policy_using/new.sql
index de61326a..8eea5526 100644
--- a/testdata/diff/create_policy/alter_policy_using/new.sql
+++ b/testdata/diff/create_policy/alter_policy_using/new.sql
@@ -1,3 +1,6 @@
+CREATE FUNCTION has_scope(p_scope text) RETURNS boolean
+LANGUAGE sql STABLE AS $$ SELECT p_scope IS NOT NULL $$;
+
 CREATE TABLE users (
     id SERIAL PRIMARY KEY,
     name VARCHAR(100) NOT NULL,
@@ -10,4 +13,11 @@ ALTER TABLE users ENABLE ROW LEVEL SECURITY;
 CREATE POLICY user_tenant_isolation ON users
     FOR ALL
     TO PUBLIC
-    USING (tenant_id = 2);
\ No newline at end of file
+    USING (tenant_id = 2);
+
+-- Policy with string literal containing schema prefix (Issue #371)
+-- This must NOT produce a false-positive diff
+CREATE POLICY scope_check ON users
+    FOR SELECT
+    TO PUBLIC
+    USING (has_scope('public.manage'));
\ No newline at end of file
diff --git a/testdata/diff/create_policy/alter_policy_using/old.sql b/testdata/diff/create_policy/alter_policy_using/old.sql
index e0c906c4..47448493 100644
--- a/testdata/diff/create_policy/alter_policy_using/old.sql
+++ b/testdata/diff/create_policy/alter_policy_using/old.sql
@@ -1,3 +1,6 @@
+CREATE FUNCTION has_scope(p_scope text) RETURNS boolean
+LANGUAGE sql STABLE AS $$ SELECT p_scope IS NOT NULL $$;
+
 CREATE TABLE users (
     id SERIAL PRIMARY KEY,
     name VARCHAR(100) NOT NULL,
@@ -9,4 +12,11 @@ ALTER TABLE users ENABLE ROW LEVEL SECURITY;
 CREATE POLICY user_tenant_isolation ON users
     FOR ALL
     TO PUBLIC
-    USING (tenant_id = 1);
\ No newline at end of file
+    USING (tenant_id = 1);
+
+-- Policy with string literal containing schema prefix (Issue #371)
+-- This must NOT produce a false-positive diff
+CREATE POLICY scope_check ON users
+    FOR SELECT
+    TO PUBLIC
+    USING (has_scope('public.manage'));
\ No newline at end of file
diff --git a/testdata/diff/create_policy/alter_policy_using/plan.json b/testdata/diff/create_policy/alter_policy_using/plan.json
index c294dd57..e7bb6c22 100644
--- a/testdata/diff/create_policy/alter_policy_using/plan.json
+++ b/testdata/diff/create_policy/alter_policy_using/plan.json
@@ -3,7 +3,7 @@
   "pgschema_version": "1.7.4",
   "created_at": "1970-01-01T00:00:00Z",
   "source_fingerprint": {
-    "hash": "4dde0e257ba5ac0a0ad55c5be408409192726dd6029e431a68742a993f705d3c"
+    "hash": "e7aac12e5d350ee9d014b756c838017ddd0c17180b7baa33300f4a773c4c89b1"
   },
   "groups": [
     {

From b1dcba848f1307ada713223eccd5c0f3ad98cb39 Mon Sep 17 00:00:00 2001
From: tianzhou <t@bytebase.com>
Date: Thu, 26 Mar 2026 00:11:57 -0700
Subject: [PATCH 2/4] Document E'...' backslash-escaped quote limitation in
 string-literal parser

Add doc comment and test case capturing the known limitation where E'...'
escape-string syntax causes the single-quote parser to mistrack boundaries.
The failure mode is safe (false-negative: unstripped qualifier).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 internal/postgres/desired_state.go      |  7 +++++++
 internal/postgres/desired_state_test.go | 12 ++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/internal/postgres/desired_state.go b/internal/postgres/desired_state.go
index bd20bc16..fff263a2 100644
--- a/internal/postgres/desired_state.go
+++ b/internal/postgres/desired_state.go
@@ -119,6 +119,13 @@ func stripSchemaQualifications(sql string, schemaName string) string {
 
 // stripSchemaQualificationsPreservingStrings splits text on single-quoted string
 // literals, applies schema stripping only to non-string parts, and reassembles.
+//
+// Limitation: E'...' escape-string syntax uses backslash-escaped quotes (E'it\'s')
+// rather than doubled quotes ('it''s'). This parser only recognises the '' form.
+// With E'content\'', a backslash-escaped quote may cause the parser to mistrack
+// string boundaries, resulting in schema qualifiers after the string not being
+// stripped (false-negative). This is safe — it preserves the original SQL — and
+// E'...' strings are extremely rare in DDL schema definitions.
 func stripSchemaQualificationsPreservingStrings(text string, schemaName string) string {
 	var result strings.Builder
 	result.Grow(len(text))
diff --git a/internal/postgres/desired_state_test.go b/internal/postgres/desired_state_test.go
index ee301e71..4149ce8c 100644
--- a/internal/postgres/desired_state_test.go
+++ b/internal/postgres/desired_state_test.go
@@ -238,6 +238,18 @@ func TestStripSchemaQualifications_PreservesStringLiterals(t *testing.T) {
 			schema:   "public",
 			expected: "SELECT 'public.a', t, 'public.b';",
 		},
+		{
+			// Known limitation: E'...' escape-string syntax with backslash-escaped quotes
+			// is not handled. The parser treats \' as ordinary char + string-closer,
+			// mistracking boundaries. Here it strips inside the string (wrong) and
+			// misses the identifier after (also wrong). Both are safe: the SQL remains
+			// valid, and the unstripped qualifier just means the object is looked up
+			// in the original schema. E'...' in DDL is extremely rare.
+			name:     "E-string with backslash-escaped quote (known limitation)",
+			sql:      "SELECT E'it\\'s public.test' FROM public.t;",
+			schema:   "public",
+			expected: "SELECT E'it\\'s test' FROM public.t;",
+		},
 	}
 
 	for _, tt := range tests {

From 75949fd81966aa51dce8334abd8b93b053dabc1a Mon Sep 17 00:00:00 2001
From: tianzhou <t@bytebase.com>
Date: Thu, 26 Mar 2026 01:23:46 -0700
Subject: [PATCH 3/4] fix: handle SQL comments in string-literal-aware schema
 stripping

Apostrophes in SQL comments (-- don't ...) could flip the string-tracking
state, causing schema qualifiers after the comment to not be stripped.
Also cache compiled regexes per schema name for better performance.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 internal/postgres/desired_state.go      | 171 ++++++++++++++----------
 internal/postgres/desired_state_test.go |  18 +++
 2 files changed, 120 insertions(+), 69 deletions(-)

diff --git a/internal/postgres/desired_state.go b/internal/postgres/desired_state.go
index fff263a2..a07c8c73 100644
--- a/internal/postgres/desired_state.go
+++ b/internal/postgres/desired_state.go
@@ -9,6 +9,7 @@ import (
 	"fmt"
 	"regexp"
 	"strings"
+	"sync"
 	"time"
 )
 
@@ -118,7 +119,8 @@ func stripSchemaQualifications(sql string, schemaName string) string {
 }
 
 // stripSchemaQualificationsPreservingStrings splits text on single-quoted string
-// literals, applies schema stripping only to non-string parts, and reassembles.
+// literals and SQL comments, applies schema stripping only to non-string,
+// non-comment parts, and reassembles.
 //
 // Limitation: E'...' escape-string syntax uses backslash-escaped quotes (E'it\'s')
 // rather than doubled quotes ('it''s'). This parser only recognises the '' form.
@@ -129,44 +131,78 @@ func stripSchemaQualifications(sql string, schemaName string) string {
 func stripSchemaQualificationsPreservingStrings(text string, schemaName string) string {
 	var result strings.Builder
 	result.Grow(len(text))
-	inString := false
 
+	// flushCode writes text[segStart:end] through schema stripping and advances segStart.
 	i := 0
 	segStart := 0
+
+	flushCode := func(end int) {
+		if end > segStart {
+			result.WriteString(stripSchemaQualificationsFromText(text[segStart:end], schemaName))
+		}
+		segStart = end
+	}
+	flushLiteral := func(end int) {
+		result.WriteString(text[segStart:end])
+		segStart = end
+	}
+
 	for i < len(text) {
 		ch := text[i]
+
+		// Start of single-quoted string literal
 		if ch == '\'' {
-			if !inString {
-				// End of non-string segment — strip schema qualifications from it
-				result.WriteString(stripSchemaQualificationsFromText(text[segStart:i], schemaName))
-				segStart = i
-				inString = true
-				i++
-			} else {
-				// Check for escaped quote ('')
-				if i+1 < len(text) && text[i+1] == '\'' {
-					i += 2 // skip ''
+			flushCode(i)
+			i++ // skip opening quote
+			for i < len(text) {
+				if text[i] == '\'' {
+					if i+1 < len(text) && text[i+1] == '\'' {
+						i += 2 // skip escaped ''
+					} else {
+						i++ // skip closing quote
+						break
+					}
 				} else {
-					// End of string literal — write it as-is
-					inString = false
 					i++
-					result.WriteString(text[segStart:i])
-					segStart = i
 				}
 			}
-		} else {
-			i++
+			flushLiteral(i)
+			continue
 		}
-	}
-	// Handle remaining text
-	if segStart < len(text) {
-		if inString {
-			// Unterminated string literal — write as-is
-			result.WriteString(text[segStart:])
-		} else {
-			result.WriteString(stripSchemaQualificationsFromText(text[segStart:], schemaName))
+
+		// Start of line comment (--)
+		if ch == '-' && i+1 < len(text) && text[i+1] == '-' {
+			flushCode(i)
+			i += 2
+			for i < len(text) && text[i] != '\n' {
+				i++
+			}
+			if i < len(text) {
+				i++ // skip the newline
+			}
+			flushLiteral(i)
+			continue
 		}
+
+		// Start of block comment (/* ... */)
+		if ch == '/' && i+1 < len(text) && text[i+1] == '*' {
+			flushCode(i)
+			i += 2
+			for i < len(text) {
+				if text[i] == '*' && i+1 < len(text) && text[i+1] == '/' {
+					i += 2
+					break
+				}
+				i++
+			}
+			flushLiteral(i)
+			continue
+		}
+
+		i++
 	}
+	// Remaining text is code
+	flushCode(i)
 	return result.String()
 }
 
@@ -221,76 +257,73 @@ func splitDollarQuotedSegments(sql string) []dollarQuotedSegment {
 	return segments
 }
 
-// stripSchemaQualificationsFromText performs the actual schema qualification stripping on a text segment.
-func stripSchemaQualificationsFromText(text string, schemaName string) string {
-	// Escape the schema name for use in regex
-	escapedSchema := regexp.QuoteMeta(schemaName)
-
-	// Pattern matches schema qualification and captures the object name
-	// We need to handle 4 cases:
-	// 1. unquoted_schema.unquoted_object  -> unquoted_object
-	// 2. unquoted_schema."quoted_object"  -> "quoted_object"
-	// 3. "quoted_schema".unquoted_object  -> unquoted_object
-	// 4. "quoted_schema"."quoted_object"  -> "quoted_object"
-	//
-	// Key: The dot must be outside quotes (a schema.object separator, not part of an identifier)
-	// Important: For unquoted schema patterns, we must ensure the schema name isn't inside a quoted identifier
-	// Example: Don't match 'public' in CREATE INDEX "public.idx" where the whole thing is a quoted identifier
-
-	// Pattern 1: quoted schema + dot + quoted object: "schema"."object"
-	// Example: "public"."table" -> "table"
-	pattern1 := fmt.Sprintf(`"%s"\.(\"[^"]+\")`, escapedSchema)
-	re1 := regexp.MustCompile(pattern1)
+// schemaRegexes holds compiled regexes for a specific schema name, avoiding
+// recompilation on every call to stripSchemaQualificationsFromText.
+type schemaRegexes struct {
+	re1 *regexp.Regexp // "schema"."object"
+	re2 *regexp.Regexp // "schema".object
+	re3 *regexp.Regexp // schema."object"
+	re4 *regexp.Regexp // schema.object
+}
 
-	// Pattern 2: quoted schema + dot + unquoted object: "schema".object
-	// Example: "public".table -> table
-	pattern2 := fmt.Sprintf(`"%s"\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)
-	re2 := regexp.MustCompile(pattern2)
+var (
+	schemaRegexCache   = make(map[string]*schemaRegexes)
+	schemaRegexCacheMu sync.Mutex
+)
 
-	// Pattern 3: unquoted schema + dot + quoted object: schema."object"
-	// Example: public."table" -> "table"
-	// Use negative lookbehind to ensure schema isn't preceded by a quote
-	// and negative lookahead to ensure the dot after schema isn't inside quotes
-	pattern3 := fmt.Sprintf(`(?:^|[^"])%s\.(\"[^"]+\")`, escapedSchema)
-	re3 := regexp.MustCompile(pattern3)
+func getSchemaRegexes(schemaName string) *schemaRegexes {
+	schemaRegexCacheMu.Lock()
+	defer schemaRegexCacheMu.Unlock()
+	if cached, ok := schemaRegexCache[schemaName]; ok {
+		return cached
+	}
+	escapedSchema := regexp.QuoteMeta(schemaName)
+	sr := &schemaRegexes{
+		re1: regexp.MustCompile(fmt.Sprintf(`"%s"\.(\"[^"]+\")`, escapedSchema)),
+		re2: regexp.MustCompile(fmt.Sprintf(`"%s"\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)),
+		re3: regexp.MustCompile(fmt.Sprintf(`(?:^|[^"])%s\.(\"[^"]+\")`, escapedSchema)),
+		re4: regexp.MustCompile(fmt.Sprintf(`(?:^|[^"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)),
+	}
+	schemaRegexCache[schemaName] = sr
+	return sr
+}
 
-	// Pattern 4: unquoted schema + dot + unquoted object: schema.object
-	// Example: public.table -> table
-	// Use negative lookbehind to ensure schema isn't preceded by a quote
-	pattern4 := fmt.Sprintf(`(?:^|[^"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)
-	re4 := regexp.MustCompile(pattern4)
+// stripSchemaQualificationsFromText performs the actual schema qualification stripping on a text segment.
+// It handles 4 cases:
+// 1. unquoted_schema.unquoted_object  -> unquoted_object
+// 2. unquoted_schema."quoted_object"  -> "quoted_object"
+// 3. "quoted_schema".unquoted_object  -> unquoted_object
+// 4. "quoted_schema"."quoted_object"  -> "quoted_object"
+func stripSchemaQualificationsFromText(text string, schemaName string) string {
+	sr := getSchemaRegexes(schemaName)
 
 	result := text
 	// Apply in order: quoted schema first to avoid double-matching
-	result = re1.ReplaceAllString(result, "$1")
-	result = re2.ReplaceAllString(result, "$1")
+	result = sr.re1.ReplaceAllString(result, "$1")
+	result = sr.re2.ReplaceAllString(result, "$1")
 	// For patterns 3 and 4, we need to preserve the character before the schema
-	result = re3.ReplaceAllStringFunc(result, func(match string) string {
+	result = sr.re3.ReplaceAllStringFunc(result, func(match string) string {
 		// If match starts with a non-quote character, preserve it
 		if len(match) > 0 && match[0] != '"' {
-			// Extract the quote identifier (everything after schema.)
 			parts := strings.SplitN(match, ".", 2)
 			if len(parts) == 2 {
 				return string(match[0]) + parts[1]
 			}
 		}
-		// Otherwise just return the captured quoted identifier
 		parts := strings.SplitN(match, ".", 2)
 		if len(parts) == 2 {
 			return parts[1]
 		}
 		return match
 	})
-	result = re4.ReplaceAllStringFunc(result, func(match string) string {
+	result = sr.re4.ReplaceAllStringFunc(result, func(match string) string {
 		// If match starts with a non-quote character, preserve it
 		if len(match) > 0 && match[0] != '"' {
-			// Extract the unquoted identifier (everything after schema.)
 			parts := strings.SplitN(match, ".", 2)
 			if len(parts) == 2 {
 				return string(match[0]) + parts[1]
 			}
 		}
-		// Otherwise just return the captured unquoted identifier
 		parts := strings.SplitN(match, ".", 2)
 		if len(parts) == 2 {
 			return parts[1]
diff --git a/internal/postgres/desired_state_test.go b/internal/postgres/desired_state_test.go
index 4149ce8c..8dabe69d 100644
--- a/internal/postgres/desired_state_test.go
+++ b/internal/postgres/desired_state_test.go
@@ -238,6 +238,24 @@ func TestStripSchemaQualifications_PreservesStringLiterals(t *testing.T) {
 			schema:   "public",
 			expected: "SELECT 'public.a', t, 'public.b';",
 		},
+		{
+			name:     "handles apostrophe in line comment followed by schema-qualified identifier",
+			sql:      "SELECT 1; -- don't drop public.t\nDROP TABLE public.t;",
+			schema:   "public",
+			expected: "SELECT 1; -- don't drop public.t\nDROP TABLE t;",
+		},
+		{
+			name:     "handles block comment with apostrophe",
+			sql:      "/* it's public.t */ DROP TABLE public.t;",
+			schema:   "public",
+			expected: "/* it's public.t */ DROP TABLE t;",
+		},
+		{
+			name:     "handles block comment without apostrophe",
+			sql:      "/* drop public.t */ DROP TABLE public.t;",
+			schema:   "public",
+			expected: "/* drop public.t */ DROP TABLE t;",
+		},
 		{
 			// Known limitation: E'...' escape-string syntax with backslash-escaped quotes
 			// is not handled. The parser treats \' as ordinary char + string-closer,

From f729fc907ca616670815ea802e89711c24a5dac3 Mon Sep 17 00:00:00 2001
From: tianzhou <t@bytebase.com>
Date: Thu, 26 Mar 2026 01:44:08 -0700
Subject: [PATCH 4/4] fix: use word boundaries in schema-stripping regexes and
 simplify replacements

- Use [^a-zA-Z0-9_$"] boundary instead of [^"] to prevent matching
  schema name as suffix of longer identifiers (e.g., schema "s" no
  longer matches "sales.total")
- Use capture groups with ReplaceAllString instead of ReplaceAllStringFunc
  to correctly handle start-of-string matches
- Clarify E-string limitation doc: both false-negative and false-positive
  are possible

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 internal/postgres/desired_state.go      | 54 +++++++++----------------
 internal/postgres/desired_state_test.go | 12 ++++++
 2 files changed, 32 insertions(+), 34 deletions(-)

diff --git a/internal/postgres/desired_state.go b/internal/postgres/desired_state.go
index a07c8c73..6bb9fdde 100644
--- a/internal/postgres/desired_state.go
+++ b/internal/postgres/desired_state.go
@@ -125,9 +125,13 @@ func stripSchemaQualifications(sql string, schemaName string) string {
 // Limitation: E'...' escape-string syntax uses backslash-escaped quotes (E'it\'s')
 // rather than doubled quotes ('it''s'). This parser only recognises the '' form.
 // With E'content\'', a backslash-escaped quote may cause the parser to mistrack
-// string boundaries, resulting in schema qualifiers after the string not being
-// stripped (false-negative). This is safe — it preserves the original SQL — and
-// E'...' strings are extremely rare in DDL schema definitions.
+// string boundaries, which can result in either:
+//   - false-negative: schema qualifiers after the string are not stripped, or
+//   - false-positive: schema prefixes inside the E-string are incorrectly stripped.
+//
+// Both cases change semantics only for E'...' strings, which are extremely rare
+// in DDL schema definitions. The false-negative case preserves valid SQL; the
+// false-positive case could alter string content but is unlikely in practice.
 func stripSchemaQualificationsPreservingStrings(text string, schemaName string) string {
 	var result strings.Builder
 	result.Grow(len(text))
@@ -278,11 +282,18 @@ func getSchemaRegexes(schemaName string) *schemaRegexes {
 		return cached
 	}
 	escapedSchema := regexp.QuoteMeta(schemaName)
+	// Patterns 1-2: quoted schema ("schema".object / "schema"."object")
+	// The leading " already prevents suffix matching.
+	// Patterns 3-4: unquoted schema (schema.object / schema."object")
+	// Use a capture group for the optional non-identifier prefix so we can
+	// preserve it in replacement without the match[0] ambiguity at ^.
+	// The character class [^a-zA-Z0-9_$"] ensures the schema name isn't a
+	// suffix of a longer identifier (e.g., schema "s" won't match "sales").
 	sr := &schemaRegexes{
 		re1: regexp.MustCompile(fmt.Sprintf(`"%s"\.(\"[^"]+\")`, escapedSchema)),
 		re2: regexp.MustCompile(fmt.Sprintf(`"%s"\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)),
-		re3: regexp.MustCompile(fmt.Sprintf(`(?:^|[^"])%s\.(\"[^"]+\")`, escapedSchema)),
-		re4: regexp.MustCompile(fmt.Sprintf(`(?:^|[^"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)),
+		re3: regexp.MustCompile(fmt.Sprintf(`(^|[^a-zA-Z0-9_$"])%s\.(\"[^"]+\")`, escapedSchema)),
+		re4: regexp.MustCompile(fmt.Sprintf(`(^|[^a-zA-Z0-9_$"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)),
 	}
 	schemaRegexCache[schemaName] = sr
 	return sr
@@ -301,35 +312,10 @@ func stripSchemaQualificationsFromText(text string, schemaName string) string {
 	// Apply in order: quoted schema first to avoid double-matching
 	result = sr.re1.ReplaceAllString(result, "$1")
 	result = sr.re2.ReplaceAllString(result, "$1")
-	// For patterns 3 and 4, we need to preserve the character before the schema
-	result = sr.re3.ReplaceAllStringFunc(result, func(match string) string {
-		// If match starts with a non-quote character, preserve it
-		if len(match) > 0 && match[0] != '"' {
-			parts := strings.SplitN(match, ".", 2)
-			if len(parts) == 2 {
-				return string(match[0]) + parts[1]
-			}
-		}
-		parts := strings.SplitN(match, ".", 2)
-		if len(parts) == 2 {
-			return parts[1]
-		}
-		return match
-	})
-	result = sr.re4.ReplaceAllStringFunc(result, func(match string) string {
-		// If match starts with a non-quote character, preserve it
-		if len(match) > 0 && match[0] != '"' {
-			parts := strings.SplitN(match, ".", 2)
-			if len(parts) == 2 {
-				return string(match[0]) + parts[1]
-			}
-		}
-		parts := strings.SplitN(match, ".", 2)
-		if len(parts) == 2 {
-			return parts[1]
-		}
-		return match
-	})
+	// For patterns 3 and 4, $1 is the prefix (boundary char or empty at ^),
+	// $2 is the object name — preserve the prefix and keep only the object.
+	result = sr.re3.ReplaceAllString(result, "${1}${2}")
+	result = sr.re4.ReplaceAllString(result, "${1}${2}")
 
 	return result
 }
diff --git a/internal/postgres/desired_state_test.go b/internal/postgres/desired_state_test.go
index 8dabe69d..09267318 100644
--- a/internal/postgres/desired_state_test.go
+++ b/internal/postgres/desired_state_test.go
@@ -238,6 +238,18 @@ func TestStripSchemaQualifications_PreservesStringLiterals(t *testing.T) {
 			schema:   "public",
 			expected: "SELECT 'public.a', t, 'public.b';",
 		},
+		{
+			name:     "does not match schema as suffix of longer identifier",
+			sql:      "SELECT sales.total, s.items FROM s.orders;",
+			schema:   "s",
+			expected: "SELECT sales.total, items FROM orders;",
+		},
+		{
+			name:     "strips schema at start of string",
+			sql:      "public.t",
+			schema:   "public",
+			expected: "t",
+		},
 		{
 			name:     "handles apostrophe in line comment followed by schema-qualified identifier",
 			sql:      "SELECT 1; -- don't drop public.t\nDROP TABLE public.t;",