Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
219 changes: 147 additions & 72 deletions internal/postgres/desired_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"fmt"
"regexp"
"strings"
"sync"
"time"
)

Expand Down Expand Up @@ -92,7 +93,7 @@ func GenerateTempSchemaName() string {
// Only qualifications matching the specified schemaName are stripped.
// All other schema qualifications are preserved as intentional cross-schema references.
func stripSchemaQualifications(sql string, schemaName string) string {
if schemaName == "" {
if schemaName == "" || !strings.Contains(sql, schemaName) {
return sql
}

Expand All @@ -108,12 +109,107 @@ func stripSchemaQualifications(sql string, schemaName string) string {
// Preserve dollar-quoted content as-is
result.WriteString(seg.text)
} else {
result.WriteString(stripSchemaQualificationsFromText(seg.text, schemaName))
// Further split on single-quoted string literals to avoid stripping
// schema prefixes from inside string constants (Issue #371).
// e.g., has_scope('s.manage') must NOT become has_scope('manage')
result.WriteString(stripSchemaQualificationsPreservingStrings(seg.text, schemaName))
}
}
return result.String()
}

// stripSchemaQualificationsPreservingStrings splits text on single-quoted string
// literals and SQL comments, applies schema stripping only to non-string,
// non-comment parts, and reassembles.
//
// Limitation: E'...' escape-string syntax uses backslash-escaped quotes (E'it\'s')
// rather than doubled quotes ('it''s'). This parser only recognises the '' form.
// With E'content\'', a backslash-escaped quote may cause the parser to mistrack
// string boundaries, which can result in either:
// - false-negative: schema qualifiers after the string are not stripped, or
// - false-positive: schema prefixes inside the E-string are incorrectly stripped.
//
// Both cases change semantics only for E'...' strings, which are extremely rare
// in DDL schema definitions. The false-negative case preserves valid SQL; the
// false-positive case could alter string content but is unlikely in practice.
func stripSchemaQualificationsPreservingStrings(text string, schemaName string) string {
var result strings.Builder
result.Grow(len(text))

// flushCode writes text[segStart:end] through schema stripping and advances segStart.
i := 0
segStart := 0

flushCode := func(end int) {
if end > segStart {
result.WriteString(stripSchemaQualificationsFromText(text[segStart:end], schemaName))
}
segStart = end
}
flushLiteral := func(end int) {
result.WriteString(text[segStart:end])
segStart = end
}

for i < len(text) {
ch := text[i]

// Start of single-quoted string literal
if ch == '\'' {
flushCode(i)
i++ // skip opening quote
for i < len(text) {
if text[i] == '\'' {
if i+1 < len(text) && text[i+1] == '\'' {
i += 2 // skip escaped ''
Comment on lines +153 to +164
Copy link

Copilot AI Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

stripSchemaQualificationsPreservingStrings does not handle Postgres escape-string literals (E'...') with backslash-escaped quotes. With input like E'it\'s public.test' FROM public.t;, the scanner will treat the \' as a string terminator, causing the later ' to be treated as a new (unterminated) string. This can produce invalid SQL and/or skip stripping after the string. Consider detecting E-strings and treating a quote as escaped when preceded by an odd number of backslashes (or avoiding splitting on ' when preceded by E and then using an E-string-aware scan).

Suggested change
for i < len(text) {
ch := text[i]
// Start of single-quoted string literal
if ch == '\'' {
flushCode(i)
i++ // skip opening quote
for i < len(text) {
if text[i] == '\'' {
if i+1 < len(text) && text[i+1] == '\'' {
i += 2 // skip escaped ''
isIdentChar := func(b byte) bool {
return (b >= 'a' && b <= 'z') ||
(b >= 'A' && b <= 'Z') ||
(b >= '0' && b <= '9') ||
b == '_'
}
for i < len(text) {
ch := text[i]
// Start of single-quoted string literal (standard or Postgres E'...' escape string)
if ch == '\'' {
// Detect Postgres escape string literal prefix E'...' or e'...'
eString := false
literalStart := i
if i > 0 && (text[i-1] == 'E' || text[i-1] == 'e') {
// Ensure the preceding 'E' is not part of a larger identifier
if i-1 == 0 || !isIdentChar(text[i-2]) {
eString = true
literalStart = i - 1
}
}
flushCode(literalStart)
i++ // skip opening quote
for i < len(text) {
if text[i] == '\'' {
if eString {
// In an E-string, a quote is escaped if preceded by an odd number of backslashes.
backslashes := 0
j := i - 1
for j >= 0 && text[j] == '\\' {
backslashes++
j--
}
if backslashes%2 == 1 {
// Escaped quote, keep scanning literal.
i++
continue
}
}
if i+1 < len(text) && text[i+1] == '\'' {
i += 2 // skip escaped '' (standard SQL escaping)

Copilot uses AI. Check for mistakes.
} else {
i++ // skip closing quote
break
}
} else {
i++
Comment on lines +135 to +170
Copy link

Copilot AI Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

stripSchemaQualificationsPreservingStrings toggles inString on every single quote without accounting for SQL comments (-- ... / /* ... */). A single apostrophe inside a comment (e.g., -- don't strip) can cause the rest of the file to be treated as an unterminated string, skipping schema stripping for subsequent statements and potentially executing schema-qualified DDL against the real schema instead of the temp schema. Consider extending the scanner to recognize and skip comment regions (or strip comments before scanning), and add a unit test that includes an apostrophe in a comment before a public.-qualified reference.

Copilot uses AI. Check for mistakes.
}
}
flushLiteral(i)
continue
}

// Start of line comment (--)
if ch == '-' && i+1 < len(text) && text[i+1] == '-' {
flushCode(i)
i += 2
for i < len(text) && text[i] != '\n' {
i++
}
if i < len(text) {
i++ // skip the newline
}
flushLiteral(i)
continue
}

// Start of block comment (/* ... */)
if ch == '/' && i+1 < len(text) && text[i+1] == '*' {
flushCode(i)
i += 2
for i < len(text) {
if text[i] == '*' && i+1 < len(text) && text[i+1] == '/' {
i += 2
break
}
i++
}
flushLiteral(i)
continue
}

i++
}
// Remaining text is code
flushCode(i)
return result.String()
}

// dollarQuotedSegment represents a segment of SQL text, either inside or outside a dollar-quoted block.
type dollarQuotedSegment struct {
text string
Expand Down Expand Up @@ -165,82 +261,61 @@ func splitDollarQuotedSegments(sql string) []dollarQuotedSegment {
return segments
}

// stripSchemaQualificationsFromText performs the actual schema qualification stripping on a text segment.
func stripSchemaQualificationsFromText(text string, schemaName string) string {
// Escape the schema name for use in regex
escapedSchema := regexp.QuoteMeta(schemaName)

// Pattern matches schema qualification and captures the object name
// We need to handle 4 cases:
// 1. unquoted_schema.unquoted_object -> unquoted_object
// 2. unquoted_schema."quoted_object" -> "quoted_object"
// 3. "quoted_schema".unquoted_object -> unquoted_object
// 4. "quoted_schema"."quoted_object" -> "quoted_object"
//
// Key: The dot must be outside quotes (a schema.object separator, not part of an identifier)
// Important: For unquoted schema patterns, we must ensure the schema name isn't inside a quoted identifier
// Example: Don't match 'public' in CREATE INDEX "public.idx" where the whole thing is a quoted identifier

// Pattern 1: quoted schema + dot + quoted object: "schema"."object"
// Example: "public"."table" -> "table"
pattern1 := fmt.Sprintf(`"%s"\.(\"[^"]+\")`, escapedSchema)
re1 := regexp.MustCompile(pattern1)
// schemaRegexes holds compiled regexes for a specific schema name, avoiding
// recompilation on every call to stripSchemaQualificationsFromText.
type schemaRegexes struct {
re1 *regexp.Regexp // "schema"."object"
re2 *regexp.Regexp // "schema".object
re3 *regexp.Regexp // schema."object"
re4 *regexp.Regexp // schema.object
}

// Pattern 2: quoted schema + dot + unquoted object: "schema".object
// Example: "public".table -> table
pattern2 := fmt.Sprintf(`"%s"\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)
re2 := regexp.MustCompile(pattern2)
var (
schemaRegexCache = make(map[string]*schemaRegexes)
schemaRegexCacheMu sync.Mutex
)

// Pattern 3: unquoted schema + dot + quoted object: schema."object"
// Example: public."table" -> "table"
// Use negative lookbehind to ensure schema isn't preceded by a quote
// and negative lookahead to ensure the dot after schema isn't inside quotes
pattern3 := fmt.Sprintf(`(?:^|[^"])%s\.(\"[^"]+\")`, escapedSchema)
re3 := regexp.MustCompile(pattern3)
func getSchemaRegexes(schemaName string) *schemaRegexes {
schemaRegexCacheMu.Lock()
defer schemaRegexCacheMu.Unlock()
if cached, ok := schemaRegexCache[schemaName]; ok {
return cached
}
escapedSchema := regexp.QuoteMeta(schemaName)
// Patterns 1-2: quoted schema ("schema".object / "schema"."object")
// The leading " already prevents suffix matching.
// Patterns 3-4: unquoted schema (schema.object / schema."object")
// Use a capture group for the optional non-identifier prefix so we can
// preserve it in replacement without the match[0] ambiguity at ^.
// The character class [^a-zA-Z0-9_$"] ensures the schema name isn't a
// suffix of a longer identifier (e.g., schema "s" won't match "sales").
sr := &schemaRegexes{
re1: regexp.MustCompile(fmt.Sprintf(`"%s"\.(\"[^"]+\")`, escapedSchema)),
re2: regexp.MustCompile(fmt.Sprintf(`"%s"\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)),
re3: regexp.MustCompile(fmt.Sprintf(`(^|[^a-zA-Z0-9_$"])%s\.(\"[^"]+\")`, escapedSchema)),
re4: regexp.MustCompile(fmt.Sprintf(`(^|[^a-zA-Z0-9_$"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)),
}
Comment on lines +293 to +297
Copy link

Copilot AI Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The unquoted-schema regexes (re3/re4) can match the target schema name as a suffix of a longer identifier because the only prefix check is (?:^|[^"]). For short schema names (e.g., schemaName=s), a reference like sales.table can be partially matched and rewritten incorrectly (e.g., corrupting sales.table to saletable). Consider requiring a non-identifier boundary before the schema name (start or a char not in [a-zA-Z0-9_$"]) and preserving the delimiter via a capture group rather than consuming an arbitrary preceding character.

Copilot uses AI. Check for mistakes.
schemaRegexCache[schemaName] = sr
return sr
}

// Pattern 4: unquoted schema + dot + unquoted object: schema.object
// Example: public.table -> table
// Use negative lookbehind to ensure schema isn't preceded by a quote
pattern4 := fmt.Sprintf(`(?:^|[^"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)
re4 := regexp.MustCompile(pattern4)
// stripSchemaQualificationsFromText performs the actual schema qualification stripping on a text segment.
// It handles 4 cases:
// 1. unquoted_schema.unquoted_object -> unquoted_object
// 2. unquoted_schema."quoted_object" -> "quoted_object"
// 3. "quoted_schema".unquoted_object -> unquoted_object
// 4. "quoted_schema"."quoted_object" -> "quoted_object"
func stripSchemaQualificationsFromText(text string, schemaName string) string {
sr := getSchemaRegexes(schemaName)

result := text
// Apply in order: quoted schema first to avoid double-matching
result = re1.ReplaceAllString(result, "$1")
result = re2.ReplaceAllString(result, "$1")
// For patterns 3 and 4, we need to preserve the character before the schema
result = re3.ReplaceAllStringFunc(result, func(match string) string {
// If match starts with a non-quote character, preserve it
if len(match) > 0 && match[0] != '"' {
// Extract the quote identifier (everything after schema.)
parts := strings.SplitN(match, ".", 2)
if len(parts) == 2 {
return string(match[0]) + parts[1]
}
}
// Otherwise just return the captured quoted identifier
parts := strings.SplitN(match, ".", 2)
if len(parts) == 2 {
return parts[1]
}
return match
})
result = re4.ReplaceAllStringFunc(result, func(match string) string {
// If match starts with a non-quote character, preserve it
if len(match) > 0 && match[0] != '"' {
// Extract the unquoted identifier (everything after schema.)
parts := strings.SplitN(match, ".", 2)
if len(parts) == 2 {
return string(match[0]) + parts[1]
}
}
// Otherwise just return the captured unquoted identifier
parts := strings.SplitN(match, ".", 2)
if len(parts) == 2 {
return parts[1]
}
return match
})
result = sr.re1.ReplaceAllString(result, "$1")
result = sr.re2.ReplaceAllString(result, "$1")
// For patterns 3 and 4, $1 is the prefix (boundary char or empty at ^),
// $2 is the object name — preserve the prefix and keep only the object.
result = sr.re3.ReplaceAllString(result, "${1}${2}")
result = sr.re4.ReplaceAllString(result, "${1}${2}")

return result
}
Expand Down
97 changes: 97 additions & 0 deletions internal/postgres/desired_state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,3 +194,100 @@ func TestReplaceSchemaInSearchPath(t *testing.T) {
})
}
}

func TestStripSchemaQualifications_PreservesStringLiterals(t *testing.T) {
tests := []struct {
name string
sql string
schema string
expected string
}{
{
name: "strips schema from table reference",
sql: "CREATE TABLE public.items (id int);",
schema: "public",
expected: "CREATE TABLE items (id int);",
},
{
name: "preserves schema prefix inside single-quoted string",
sql: "CREATE POLICY p ON items USING (has_scope('public.manage'));",
schema: "public",
expected: "CREATE POLICY p ON items USING (has_scope('public.manage'));",
},
{
name: "preserves schema prefix inside string with short schema name",
sql: "CREATE POLICY p ON items USING (has_scope('s.manage')) WITH CHECK (has_scope('s.manage'));",
schema: "s",
expected: "CREATE POLICY p ON items USING (has_scope('s.manage')) WITH CHECK (has_scope('s.manage'));",
},
{
name: "strips schema from identifier but preserves string literal",
sql: "CREATE POLICY p ON s.items USING (auth.has_scope('s.manage'));",
schema: "s",
expected: "CREATE POLICY p ON items USING (auth.has_scope('s.manage'));",
},
{
name: "preserves escaped quotes in string literals",
sql: "SELECT 'it''s public.test' FROM public.t;",
schema: "public",
expected: "SELECT 'it''s public.test' FROM t;",
},
{
name: "handles multiple string literals",
sql: "SELECT 'public.a', public.t, 'public.b';",
schema: "public",
expected: "SELECT 'public.a', t, 'public.b';",
},
Copy link

Copilot AI Mar 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new unit tests cover basic string-literal preservation, but they don’t cover the failure mode where apostrophes appear inside SQL comments (e.g., -- don't ...) and interfere with the string-splitting state machine. Adding a test case with a comment containing a single quote followed by a schema-qualified identifier would help prevent regressions once comment-handling is fixed.

Suggested change
},
},
{
name: "handles apostrophe in comment followed by schema-qualified identifier",
sql: "SELECT 1; -- don't drop public.t\nDROP TABLE public.t;",
schema: "public",
expected: "SELECT 1; -- don't drop public.t\nDROP TABLE t;",
},

Copilot uses AI. Check for mistakes.
{
name: "does not match schema as suffix of longer identifier",
sql: "SELECT sales.total, s.items FROM s.orders;",
schema: "s",
expected: "SELECT sales.total, items FROM orders;",
},
{
name: "strips schema at start of string",
sql: "public.t",
schema: "public",
expected: "t",
},
{
name: "handles apostrophe in line comment followed by schema-qualified identifier",
sql: "SELECT 1; -- don't drop public.t\nDROP TABLE public.t;",
schema: "public",
expected: "SELECT 1; -- don't drop public.t\nDROP TABLE t;",
},
{
name: "handles block comment with apostrophe",
sql: "/* it's public.t */ DROP TABLE public.t;",
schema: "public",
expected: "/* it's public.t */ DROP TABLE t;",
},
{
name: "handles block comment without apostrophe",
sql: "/* drop public.t */ DROP TABLE public.t;",
schema: "public",
expected: "/* drop public.t */ DROP TABLE t;",
},
{
// Known limitation: E'...' escape-string syntax with backslash-escaped quotes
// is not handled. The parser treats \' as ordinary char + string-closer,
// mistracking boundaries. Here it strips inside the string (wrong) and
// misses the identifier after (also wrong). Both are safe: the SQL remains
// valid, and the unstripped qualifier just means the object is looked up
// in the original schema. E'...' in DDL is extremely rare.
name: "E-string with backslash-escaped quote (known limitation)",
sql: "SELECT E'it\\'s public.test' FROM public.t;",
schema: "public",
expected: "SELECT E'it\\'s test' FROM public.t;",
Comment on lines +272 to +281
Copy link

Copilot AI Mar 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The “E-string with backslash-escaped quote” case assumes the output remains valid SQL (and expects a specific transformation), but the current splitter will treat \' as a string terminator and can end up producing an unterminated string literal / skipping subsequent stripping. Once E-string handling is fixed in stripSchemaQualificationsPreservingStrings, update this expected value to reflect the corrected behavior (and consider asserting validity/round-trip expectations for E-strings explicitly).

Suggested change
// Known limitation: E'...' escape-string syntax with backslash-escaped quotes
// is not handled. The parser treats \' as ordinary char + string-closer,
// mistracking boundaries. Here it strips inside the string (wrong) and
// misses the identifier after (also wrong). Both are safe: the SQL remains
// valid, and the unstripped qualifier just means the object is looked up
// in the original schema. E'...' in DDL is extremely rare.
name: "E-string with backslash-escaped quote (known limitation)",
sql: "SELECT E'it\\'s public.test' FROM public.t;",
schema: "public",
expected: "SELECT E'it\\'s test' FROM public.t;",
// E'...' escape-string syntax with backslash-escaped quotes should be
// handled correctly: the string literal is preserved exactly, and schema
// qualifications are stripped only from identifiers outside of strings.
name: "E-string with backslash-escaped quote",
sql: "SELECT E'it\\'s public.test' FROM public.t;",
schema: "public",
expected: "SELECT E'it\\'s public.test' FROM t;",

Copilot uses AI. Check for mistakes.
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := stripSchemaQualifications(tt.sql, tt.schema)
if result != tt.expected {
t.Errorf("stripSchemaQualifications(%q, %q)\n got: %q\n want: %q", tt.sql, tt.schema, result, tt.expected)
}
})
}
}
12 changes: 11 additions & 1 deletion testdata/diff/create_policy/alter_policy_using/new.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
CREATE FUNCTION has_scope(p_scope text) RETURNS boolean
LANGUAGE sql STABLE AS $$ SELECT p_scope IS NOT NULL $$;

CREATE TABLE users (
id SERIAL PRIMARY KEY,
name VARCHAR(100) NOT NULL,
Expand All @@ -10,4 +13,11 @@ ALTER TABLE users ENABLE ROW LEVEL SECURITY;
CREATE POLICY user_tenant_isolation ON users
FOR ALL
TO PUBLIC
USING (tenant_id = 2);
USING (tenant_id = 2);

-- Policy with string literal containing schema prefix (Issue #371)
-- This must NOT produce a false-positive diff
CREATE POLICY scope_check ON users
FOR SELECT
TO PUBLIC
USING (has_scope('public.manage'));
Loading
Loading