pgplex · tianzhou · Mar 26, 2026 · Mar 24, 2026 · Mar 26, 2026 · Mar 26, 2026
diff --git a/internal/postgres/desired_state.go b/internal/postgres/desired_state.go
@@ -9,6 +9,7 @@ import (
 	"fmt"
 	"regexp"
 	"strings"
+	"sync"
 	"time"
 )
 
@@ -92,7 +93,7 @@ func GenerateTempSchemaName() string {
 // Only qualifications matching the specified schemaName are stripped.
 // All other schema qualifications are preserved as intentional cross-schema references.
 func stripSchemaQualifications(sql string, schemaName string) string {
-	if schemaName == "" {
+	if schemaName == "" || !strings.Contains(sql, schemaName) {
 		return sql
 	}
 
@@ -108,12 +109,107 @@ func stripSchemaQualifications(sql string, schemaName string) string {
 			// Preserve dollar-quoted content as-is
 			result.WriteString(seg.text)
 		} else {
-			result.WriteString(stripSchemaQualificationsFromText(seg.text, schemaName))
+			// Further split on single-quoted string literals to avoid stripping
+			// schema prefixes from inside string constants (Issue #371).
+			// e.g., has_scope('s.manage') must NOT become has_scope('manage')
+			result.WriteString(stripSchemaQualificationsPreservingStrings(seg.text, schemaName))
 		}
 	}
 	return result.String()
 }
 
+// stripSchemaQualificationsPreservingStrings splits text on single-quoted string
+// literals and SQL comments, applies schema stripping only to non-string,
+// non-comment parts, and reassembles.
+//
+// Limitation: E'...' escape-string syntax uses backslash-escaped quotes (E'it\'s')
+// rather than doubled quotes ('it''s'). This parser only recognises the '' form.
+// With E'content\'', a backslash-escaped quote may cause the parser to mistrack
+// string boundaries, which can result in either:
+//   - false-negative: schema qualifiers after the string are not stripped, or
+//   - false-positive: schema prefixes inside the E-string are incorrectly stripped.
+//
+// Both cases change semantics only for E'...' strings, which are extremely rare
+// in DDL schema definitions. The false-negative case preserves valid SQL; the
+// false-positive case could alter string content but is unlikely in practice.
+func stripSchemaQualificationsPreservingStrings(text string, schemaName string) string {
+	var result strings.Builder
+	result.Grow(len(text))
+
+	// flushCode writes text[segStart:end] through schema stripping and advances segStart.
+	i := 0
+	segStart := 0
+
+	flushCode := func(end int) {
+		if end > segStart {
+			result.WriteString(stripSchemaQualificationsFromText(text[segStart:end], schemaName))
+		}
+		segStart = end
+	}
+	flushLiteral := func(end int) {
+		result.WriteString(text[segStart:end])
+		segStart = end
+	}
+
+	for i < len(text) {
+		ch := text[i]
+
+		// Start of single-quoted string literal
+		if ch == '\'' {
+			flushCode(i)
+			i++ // skip opening quote
+			for i < len(text) {
+				if text[i] == '\'' {
+					if i+1 < len(text) && text[i+1] == '\'' {
+						i += 2 // skip escaped ''
-
-	for i < len(text) {
-		ch := text[i]
-
-		// Start of single-quoted string literal
-		if ch == '\'' {
-			flushCode(i)
-			i++ // skip opening quote
-			for i < len(text) {
-				if text[i] == '\'' {
-					if i+1 < len(text) && text[i+1] == '\'' {
-						i += 2 // skip escaped ''
+	isIdentChar := func(b byte) bool {
+		return (b >= 'a' && b <= 'z') ||
+			(b >= 'A' && b <= 'Z') ||
+			(b >= '0' && b <= '9') ||
+			b == '_'
+	}
+
+	for i < len(text) {
+		ch := text[i]
+
+		// Start of single-quoted string literal (standard or Postgres E'...' escape string)
+		if ch == '\'' {
+			// Detect Postgres escape string literal prefix E'...' or e'...'
+			eString := false
+			literalStart := i
+			if i > 0 && (text[i-1] == 'E' || text[i-1] == 'e') {
+				// Ensure the preceding 'E' is not part of a larger identifier
+				if i-1 == 0 || !isIdentChar(text[i-2]) {
+					eString = true
+					literalStart = i - 1
+				}
+			}
+
+			flushCode(literalStart)
+			i++ // skip opening quote
+			for i < len(text) {
+				if text[i] == '\'' {
+					if eString {
+						// In an E-string, a quote is escaped if preceded by an odd number of backslashes.
+						backslashes := 0
+						j := i - 1
+						for j >= 0 && text[j] == '\\' {
+							backslashes++
+							j--
+						}
+						if backslashes%2 == 1 {
+							// Escaped quote, keep scanning literal.
+							i++
+							continue
+						}
+					}
+					if i+1 < len(text) && text[i+1] == '\'' {
+						i += 2 // skip escaped '' (standard SQL escaping)
-
-	for i < len(text) {
-		ch := text[i]
-
-		// Start of single-quoted string literal
-		if ch == '\'' {
-			flushCode(i)
-			i++ // skip opening quote
-			for i < len(text) {
-				if text[i] == '\'' {
-					if i+1 < len(text) && text[i+1] == '\'' {
-						i += 2 // skip escaped ''
+	isIdentChar := func(b byte) bool {
+		return (b >= 'a' && b <= 'z') ||
+			(b >= 'A' && b <= 'Z') ||
+			(b >= '0' && b <= '9') ||
+			b == '_'
+	}
+
+	for i < len(text) {
+		ch := text[i]
+
+		// Start of single-quoted string literal (standard or Postgres E'...' escape string)
+		if ch == '\'' {
+			// Detect Postgres escape string literal prefix E'...' or e'...'
+			eString := false
+			literalStart := i
+			if i > 0 && (text[i-1] == 'E' || text[i-1] == 'e') {
+				// Ensure the preceding 'E' is not part of a larger identifier
+				if i-1 == 0 || !isIdentChar(text[i-2]) {
+					eString = true
+					literalStart = i - 1
+				}
+			}
+
+			flushCode(literalStart)
+			i++ // skip opening quote
+			for i < len(text) {
+				if text[i] == '\'' {
+					if eString {
+						// In an E-string, a quote is escaped if preceded by an odd number of backslashes.
+						backslashes := 0
+						j := i - 1
+						for j >= 0 && text[j] == '\\' {
+							backslashes++
+							j--
+						}
+						if backslashes%2 == 1 {
+							// Escaped quote, keep scanning literal.
+							i++
+							continue
+						}
+					}
+					if i+1 < len(text) && text[i+1] == '\'' {
+						i += 2 // skip escaped '' (standard SQL escaping)
+					} else {
+						i++ // skip closing quote
+						break
+					}
+				} else {
+					i++
+				}
+			}
+			flushLiteral(i)
+			continue
+		}
+
+		// Start of line comment (--)
+		if ch == '-' && i+1 < len(text) && text[i+1] == '-' {
+			flushCode(i)
+			i += 2
+			for i < len(text) && text[i] != '\n' {
+				i++
+			}
+			if i < len(text) {
+				i++ // skip the newline
+			}
+			flushLiteral(i)
+			continue
+		}
+
+		// Start of block comment (/* ... */)
+		if ch == '/' && i+1 < len(text) && text[i+1] == '*' {
+			flushCode(i)
+			i += 2
+			for i < len(text) {
+				if text[i] == '*' && i+1 < len(text) && text[i+1] == '/' {
+					i += 2
+					break
+				}
+				i++
+			}
+			flushLiteral(i)
+			continue
+		}
+
+		i++
+	}
+	// Remaining text is code
+	flushCode(i)
+	return result.String()
+}
+
 // dollarQuotedSegment represents a segment of SQL text, either inside or outside a dollar-quoted block.
 type dollarQuotedSegment struct {
 	text   string
@@ -165,82 +261,61 @@ func splitDollarQuotedSegments(sql string) []dollarQuotedSegment {
 	return segments
 }
 
-// stripSchemaQualificationsFromText performs the actual schema qualification stripping on a text segment.
-func stripSchemaQualificationsFromText(text string, schemaName string) string {
-	// Escape the schema name for use in regex
-	escapedSchema := regexp.QuoteMeta(schemaName)
-
-	// Pattern matches schema qualification and captures the object name
-	// We need to handle 4 cases:
-	// 1. unquoted_schema.unquoted_object  -> unquoted_object
-	// 2. unquoted_schema."quoted_object"  -> "quoted_object"
-	// 3. "quoted_schema".unquoted_object  -> unquoted_object
-	// 4. "quoted_schema"."quoted_object"  -> "quoted_object"
-	//
-	// Key: The dot must be outside quotes (a schema.object separator, not part of an identifier)
-	// Important: For unquoted schema patterns, we must ensure the schema name isn't inside a quoted identifier
-	// Example: Don't match 'public' in CREATE INDEX "public.idx" where the whole thing is a quoted identifier
-
-	// Pattern 1: quoted schema + dot + quoted object: "schema"."object"
-	// Example: "public"."table" -> "table"
-	pattern1 := fmt.Sprintf(`"%s"\.(\"[^"]+\")`, escapedSchema)
-	re1 := regexp.MustCompile(pattern1)
+// schemaRegexes holds compiled regexes for a specific schema name, avoiding
+// recompilation on every call to stripSchemaQualificationsFromText.
+type schemaRegexes struct {
+	re1 *regexp.Regexp // "schema"."object"
+	re2 *regexp.Regexp // "schema".object
+	re3 *regexp.Regexp // schema."object"
+	re4 *regexp.Regexp // schema.object
+}
 
-	// Pattern 2: quoted schema + dot + unquoted object: "schema".object
-	// Example: "public".table -> table
-	pattern2 := fmt.Sprintf(`"%s"\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)
-	re2 := regexp.MustCompile(pattern2)
+var (
+	schemaRegexCache   = make(map[string]*schemaRegexes)
+	schemaRegexCacheMu sync.Mutex
+)
 
-	// Pattern 3: unquoted schema + dot + quoted object: schema."object"
-	// Example: public."table" -> "table"
-	// Use negative lookbehind to ensure schema isn't preceded by a quote
-	// and negative lookahead to ensure the dot after schema isn't inside quotes
-	pattern3 := fmt.Sprintf(`(?:^|[^"])%s\.(\"[^"]+\")`, escapedSchema)
-	re3 := regexp.MustCompile(pattern3)
+func getSchemaRegexes(schemaName string) *schemaRegexes {
+	schemaRegexCacheMu.Lock()
+	defer schemaRegexCacheMu.Unlock()
+	if cached, ok := schemaRegexCache[schemaName]; ok {
+		return cached
+	}
+	escapedSchema := regexp.QuoteMeta(schemaName)
+	// Patterns 1-2: quoted schema ("schema".object / "schema"."object")
+	// The leading " already prevents suffix matching.
+	// Patterns 3-4: unquoted schema (schema.object / schema."object")
+	// Use a capture group for the optional non-identifier prefix so we can
+	// preserve it in replacement without the match[0] ambiguity at ^.
+	// The character class [^a-zA-Z0-9_$"] ensures the schema name isn't a
+	// suffix of a longer identifier (e.g., schema "s" won't match "sales").
+	sr := &schemaRegexes{
+		re1: regexp.MustCompile(fmt.Sprintf(`"%s"\.(\"[^"]+\")`, escapedSchema)),
+		re2: regexp.MustCompile(fmt.Sprintf(`"%s"\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)),
+		re3: regexp.MustCompile(fmt.Sprintf(`(^|[^a-zA-Z0-9_$"])%s\.(\"[^"]+\")`, escapedSchema)),
+		re4: regexp.MustCompile(fmt.Sprintf(`(^|[^a-zA-Z0-9_$"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)),
+	}
+	schemaRegexCache[schemaName] = sr
+	return sr
+}
 
-	// Pattern 4: unquoted schema + dot + unquoted object: schema.object
-	// Example: public.table -> table
-	// Use negative lookbehind to ensure schema isn't preceded by a quote
-	pattern4 := fmt.Sprintf(`(?:^|[^"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)
-	re4 := regexp.MustCompile(pattern4)
+// stripSchemaQualificationsFromText performs the actual schema qualification stripping on a text segment.
+// It handles 4 cases:
+// 1. unquoted_schema.unquoted_object  -> unquoted_object
+// 2. unquoted_schema."quoted_object"  -> "quoted_object"
+// 3. "quoted_schema".unquoted_object  -> unquoted_object
+// 4. "quoted_schema"."quoted_object"  -> "quoted_object"
+func stripSchemaQualificationsFromText(text string, schemaName string) string {
+	sr := getSchemaRegexes(schemaName)
 
 	result := text
 	// Apply in order: quoted schema first to avoid double-matching
-	result = re1.ReplaceAllString(result, "$1")
-	result = re2.ReplaceAllString(result, "$1")
-	// For patterns 3 and 4, we need to preserve the character before the schema
-	result = re3.ReplaceAllStringFunc(result, func(match string) string {
-		// If match starts with a non-quote character, preserve it
-		if len(match) > 0 && match[0] != '"' {
-			// Extract the quote identifier (everything after schema.)
-			parts := strings.SplitN(match, ".", 2)
-			if len(parts) == 2 {
-				return string(match[0]) + parts[1]
-			}
-		}
-		// Otherwise just return the captured quoted identifier
-		parts := strings.SplitN(match, ".", 2)
-		if len(parts) == 2 {
-			return parts[1]
-		}
-		return match
-	})
-	result = re4.ReplaceAllStringFunc(result, func(match string) string {
-		// If match starts with a non-quote character, preserve it
-		if len(match) > 0 && match[0] != '"' {
-			// Extract the unquoted identifier (everything after schema.)
-			parts := strings.SplitN(match, ".", 2)
-			if len(parts) == 2 {
-				return string(match[0]) + parts[1]
-			}
-		}
-		// Otherwise just return the captured unquoted identifier
-		parts := strings.SplitN(match, ".", 2)
-		if len(parts) == 2 {
-			return parts[1]
-		}
-		return match
-	})
+	result = sr.re1.ReplaceAllString(result, "$1")
+	result = sr.re2.ReplaceAllString(result, "$1")
+	// For patterns 3 and 4, $1 is the prefix (boundary char or empty at ^),
+	// $2 is the object name — preserve the prefix and keep only the object.
+	result = sr.re3.ReplaceAllString(result, "${1}${2}")
+	result = sr.re4.ReplaceAllString(result, "${1}${2}")
 
 	return result
 }

diff --git a/internal/postgres/desired_state_test.go b/internal/postgres/desired_state_test.go
@@ -194,3 +194,100 @@ func TestReplaceSchemaInSearchPath(t *testing.T) {
 		})
 	}
 }
+
+func TestStripSchemaQualifications_PreservesStringLiterals(t *testing.T) {
+	tests := []struct {
+		name     string
+		sql      string
+		schema   string
+		expected string
+	}{
+		{
+			name:     "strips schema from table reference",
+			sql:      "CREATE TABLE public.items (id int);",
+			schema:   "public",
+			expected: "CREATE TABLE items (id int);",
+		},
+		{
+			name:     "preserves schema prefix inside single-quoted string",
+			sql:      "CREATE POLICY p ON items USING (has_scope('public.manage'));",
+			schema:   "public",
+			expected: "CREATE POLICY p ON items USING (has_scope('public.manage'));",
+		},
+		{
+			name:     "preserves schema prefix inside string with short schema name",
+			sql:      "CREATE POLICY p ON items USING (has_scope('s.manage')) WITH CHECK (has_scope('s.manage'));",
+			schema:   "s",
+			expected: "CREATE POLICY p ON items USING (has_scope('s.manage')) WITH CHECK (has_scope('s.manage'));",
+		},
+		{
+			name:     "strips schema from identifier but preserves string literal",
+			sql:      "CREATE POLICY p ON s.items USING (auth.has_scope('s.manage'));",
+			schema:   "s",
+			expected: "CREATE POLICY p ON items USING (auth.has_scope('s.manage'));",
+		},
+		{
+			name:     "preserves escaped quotes in string literals",
+			sql:      "SELECT 'it''s public.test' FROM public.t;",
+			schema:   "public",
+			expected: "SELECT 'it''s public.test' FROM t;",
+		},
+		{
+			name:     "handles multiple string literals",
+			sql:      "SELECT 'public.a', public.t, 'public.b';",
+			schema:   "public",
+			expected: "SELECT 'public.a', t, 'public.b';",
+		},
-		},
+		},
+		{
+			name: "handles apostrophe in comment followed by schema-qualified identifier",
+			sql:  "SELECT 1; -- don't drop public.t\nDROP TABLE public.t;",
+			schema:   "public",
+			expected: "SELECT 1; -- don't drop public.t\nDROP TABLE t;",
+		},
-		},
+		},
+		{
+			name: "handles apostrophe in comment followed by schema-qualified identifier",
+			sql:  "SELECT 1; -- don't drop public.t\nDROP TABLE public.t;",
+			schema:   "public",
+			expected: "SELECT 1; -- don't drop public.t\nDROP TABLE t;",
+		},
+		{
+			name:     "does not match schema as suffix of longer identifier",
+			sql:      "SELECT sales.total, s.items FROM s.orders;",
+			schema:   "s",
+			expected: "SELECT sales.total, items FROM orders;",
+		},
+		{
+			name:     "strips schema at start of string",
+			sql:      "public.t",
+			schema:   "public",
+			expected: "t",
+		},
+		{
+			name:     "handles apostrophe in line comment followed by schema-qualified identifier",
+			sql:      "SELECT 1; -- don't drop public.t\nDROP TABLE public.t;",
+			schema:   "public",
+			expected: "SELECT 1; -- don't drop public.t\nDROP TABLE t;",
+		},
+		{
+			name:     "handles block comment with apostrophe",
+			sql:      "/* it's public.t */ DROP TABLE public.t;",
+			schema:   "public",
+			expected: "/* it's public.t */ DROP TABLE t;",
+		},
+		{
+			name:     "handles block comment without apostrophe",
+			sql:      "/* drop public.t */ DROP TABLE public.t;",
+			schema:   "public",
+			expected: "/* drop public.t */ DROP TABLE t;",
+		},
+		{
+			// Known limitation: E'...' escape-string syntax with backslash-escaped quotes
+			// is not handled. The parser treats \' as ordinary char + string-closer,
+			// mistracking boundaries. Here it strips inside the string (wrong) and
+			// misses the identifier after (also wrong). Both are safe: the SQL remains
+			// valid, and the unstripped qualifier just means the object is looked up
+			// in the original schema. E'...' in DDL is extremely rare.
+			name:     "E-string with backslash-escaped quote (known limitation)",
+			sql:      "SELECT E'it\\'s public.test' FROM public.t;",
+			schema:   "public",
+			expected: "SELECT E'it\\'s test' FROM public.t;",
-			// Known limitation: E'...' escape-string syntax with backslash-escaped quotes
-			// is not handled. The parser treats \' as ordinary char + string-closer,
-			// mistracking boundaries. Here it strips inside the string (wrong) and
-			// misses the identifier after (also wrong). Both are safe: the SQL remains
-			// valid, and the unstripped qualifier just means the object is looked up
-			// in the original schema. E'...' in DDL is extremely rare.
-			name:     "E-string with backslash-escaped quote (known limitation)",
-			sql:      "SELECT E'it\\'s public.test' FROM public.t;",
-			schema:   "public",
-			expected: "SELECT E'it\\'s test' FROM public.t;",
+			// E'...' escape-string syntax with backslash-escaped quotes should be
+			// handled correctly: the string literal is preserved exactly, and schema
+			// qualifications are stripped only from identifiers outside of strings.
+			name:     "E-string with backslash-escaped quote",
+			sql:      "SELECT E'it\\'s public.test' FROM public.t;",
+			schema:   "public",
+			expected: "SELECT E'it\\'s public.test' FROM t;",
-			// Known limitation: E'...' escape-string syntax with backslash-escaped quotes
-			// is not handled. The parser treats \' as ordinary char + string-closer,
-			// mistracking boundaries. Here it strips inside the string (wrong) and
-			// misses the identifier after (also wrong). Both are safe: the SQL remains
-			// valid, and the unstripped qualifier just means the object is looked up
-			// in the original schema. E'...' in DDL is extremely rare.
-			name:     "E-string with backslash-escaped quote (known limitation)",
-			sql:      "SELECT E'it\\'s public.test' FROM public.t;",
-			schema:   "public",
-			expected: "SELECT E'it\\'s test' FROM public.t;",
+			// E'...' escape-string syntax with backslash-escaped quotes should be
+			// handled correctly: the string literal is preserved exactly, and schema
+			// qualifications are stripped only from identifiers outside of strings.
+			name:     "E-string with backslash-escaped quote",
+			sql:      "SELECT E'it\\'s public.test' FROM public.t;",
+			schema:   "public",
+			expected: "SELECT E'it\\'s public.test' FROM t;",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := stripSchemaQualifications(tt.sql, tt.schema)
+			if result != tt.expected {
+				t.Errorf("stripSchemaQualifications(%q, %q)\n  got:  %q\n  want: %q", tt.sql, tt.schema, result, tt.expected)
+			}
+		})
+	}
+}
diff --git a/testdata/diff/create_policy/alter_policy_using/new.sql b/testdata/diff/create_policy/alter_policy_using/new.sql
@@ -1,3 +1,6 @@
+CREATE FUNCTION has_scope(p_scope text) RETURNS boolean
+LANGUAGE sql STABLE AS $$ SELECT p_scope IS NOT NULL $$;
+
 CREATE TABLE users (
     id SERIAL PRIMARY KEY,
     name VARCHAR(100) NOT NULL,
@@ -10,4 +13,11 @@ ALTER TABLE users ENABLE ROW LEVEL SECURITY;
 CREATE POLICY user_tenant_isolation ON users
     FOR ALL
     TO PUBLIC
-    USING (tenant_id = 2);
+    USING (tenant_id = 2);
+
+-- Policy with string literal containing schema prefix (Issue #371)
+-- This must NOT produce a false-positive diff
+CREATE POLICY scope_check ON users
+    FOR SELECT
+    TO PUBLIC
+    USING (has_scope('public.manage'));