-
Notifications
You must be signed in to change notification settings - Fork 37
fix: preserve string literals during schema qualification stripping (#371) #372
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
076989a
b1dcba8
75949fd
f729fc9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,7 @@ import ( | |
| "fmt" | ||
| "regexp" | ||
| "strings" | ||
| "sync" | ||
| "time" | ||
| ) | ||
|
|
||
|
|
@@ -92,7 +93,7 @@ func GenerateTempSchemaName() string { | |
| // Only qualifications matching the specified schemaName are stripped. | ||
| // All other schema qualifications are preserved as intentional cross-schema references. | ||
| func stripSchemaQualifications(sql string, schemaName string) string { | ||
| if schemaName == "" { | ||
| if schemaName == "" || !strings.Contains(sql, schemaName) { | ||
| return sql | ||
| } | ||
|
|
||
|
|
@@ -108,12 +109,107 @@ func stripSchemaQualifications(sql string, schemaName string) string { | |
| // Preserve dollar-quoted content as-is | ||
| result.WriteString(seg.text) | ||
| } else { | ||
| result.WriteString(stripSchemaQualificationsFromText(seg.text, schemaName)) | ||
| // Further split on single-quoted string literals to avoid stripping | ||
| // schema prefixes from inside string constants (Issue #371). | ||
| // e.g., has_scope('s.manage') must NOT become has_scope('manage') | ||
| result.WriteString(stripSchemaQualificationsPreservingStrings(seg.text, schemaName)) | ||
| } | ||
| } | ||
| return result.String() | ||
| } | ||
|
|
||
| // stripSchemaQualificationsPreservingStrings splits text on single-quoted string | ||
| // literals and SQL comments, applies schema stripping only to non-string, | ||
| // non-comment parts, and reassembles. | ||
| // | ||
| // Limitation: E'...' escape-string syntax uses backslash-escaped quotes (E'it\'s') | ||
| // rather than doubled quotes ('it''s'). This parser only recognises the '' form. | ||
| // With E'content\'', a backslash-escaped quote may cause the parser to mistrack | ||
| // string boundaries, which can result in either: | ||
| // - false-negative: schema qualifiers after the string are not stripped, or | ||
| // - false-positive: schema prefixes inside the E-string are incorrectly stripped. | ||
| // | ||
| // Both cases change semantics only for E'...' strings, which are extremely rare | ||
| // in DDL schema definitions. The false-negative case preserves valid SQL; the | ||
| // false-positive case could alter string content but is unlikely in practice. | ||
| func stripSchemaQualificationsPreservingStrings(text string, schemaName string) string { | ||
| var result strings.Builder | ||
| result.Grow(len(text)) | ||
|
|
||
| // flushCode writes text[segStart:end] through schema stripping and advances segStart. | ||
| i := 0 | ||
| segStart := 0 | ||
|
|
||
| flushCode := func(end int) { | ||
| if end > segStart { | ||
| result.WriteString(stripSchemaQualificationsFromText(text[segStart:end], schemaName)) | ||
| } | ||
| segStart = end | ||
| } | ||
| flushLiteral := func(end int) { | ||
| result.WriteString(text[segStart:end]) | ||
| segStart = end | ||
| } | ||
|
|
||
| for i < len(text) { | ||
| ch := text[i] | ||
|
|
||
| // Start of single-quoted string literal | ||
| if ch == '\'' { | ||
| flushCode(i) | ||
| i++ // skip opening quote | ||
| for i < len(text) { | ||
| if text[i] == '\'' { | ||
| if i+1 < len(text) && text[i+1] == '\'' { | ||
| i += 2 // skip escaped '' | ||
| } else { | ||
| i++ // skip closing quote | ||
| break | ||
| } | ||
| } else { | ||
| i++ | ||
|
Comment on lines
+135
to
+170
|
||
| } | ||
| } | ||
| flushLiteral(i) | ||
| continue | ||
| } | ||
|
|
||
| // Start of line comment (--) | ||
| if ch == '-' && i+1 < len(text) && text[i+1] == '-' { | ||
| flushCode(i) | ||
| i += 2 | ||
| for i < len(text) && text[i] != '\n' { | ||
| i++ | ||
| } | ||
| if i < len(text) { | ||
| i++ // skip the newline | ||
| } | ||
| flushLiteral(i) | ||
| continue | ||
| } | ||
|
|
||
| // Start of block comment (/* ... */) | ||
| if ch == '/' && i+1 < len(text) && text[i+1] == '*' { | ||
| flushCode(i) | ||
| i += 2 | ||
| for i < len(text) { | ||
| if text[i] == '*' && i+1 < len(text) && text[i+1] == '/' { | ||
| i += 2 | ||
| break | ||
| } | ||
| i++ | ||
| } | ||
| flushLiteral(i) | ||
| continue | ||
| } | ||
|
|
||
| i++ | ||
| } | ||
| // Remaining text is code | ||
| flushCode(i) | ||
| return result.String() | ||
| } | ||
|
|
||
| // dollarQuotedSegment represents a segment of SQL text, either inside or outside a dollar-quoted block. | ||
| type dollarQuotedSegment struct { | ||
| text string | ||
|
|
@@ -165,82 +261,61 @@ func splitDollarQuotedSegments(sql string) []dollarQuotedSegment { | |
| return segments | ||
| } | ||
|
|
||
| // stripSchemaQualificationsFromText performs the actual schema qualification stripping on a text segment. | ||
| func stripSchemaQualificationsFromText(text string, schemaName string) string { | ||
| // Escape the schema name for use in regex | ||
| escapedSchema := regexp.QuoteMeta(schemaName) | ||
|
|
||
| // Pattern matches schema qualification and captures the object name | ||
| // We need to handle 4 cases: | ||
| // 1. unquoted_schema.unquoted_object -> unquoted_object | ||
| // 2. unquoted_schema."quoted_object" -> "quoted_object" | ||
| // 3. "quoted_schema".unquoted_object -> unquoted_object | ||
| // 4. "quoted_schema"."quoted_object" -> "quoted_object" | ||
| // | ||
| // Key: The dot must be outside quotes (a schema.object separator, not part of an identifier) | ||
| // Important: For unquoted schema patterns, we must ensure the schema name isn't inside a quoted identifier | ||
| // Example: Don't match 'public' in CREATE INDEX "public.idx" where the whole thing is a quoted identifier | ||
|
|
||
| // Pattern 1: quoted schema + dot + quoted object: "schema"."object" | ||
| // Example: "public"."table" -> "table" | ||
| pattern1 := fmt.Sprintf(`"%s"\.(\"[^"]+\")`, escapedSchema) | ||
| re1 := regexp.MustCompile(pattern1) | ||
| // schemaRegexes holds compiled regexes for a specific schema name, avoiding | ||
| // recompilation on every call to stripSchemaQualificationsFromText. | ||
| type schemaRegexes struct { | ||
| re1 *regexp.Regexp // "schema"."object" | ||
| re2 *regexp.Regexp // "schema".object | ||
| re3 *regexp.Regexp // schema."object" | ||
| re4 *regexp.Regexp // schema.object | ||
| } | ||
|
|
||
| // Pattern 2: quoted schema + dot + unquoted object: "schema".object | ||
| // Example: "public".table -> table | ||
| pattern2 := fmt.Sprintf(`"%s"\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema) | ||
| re2 := regexp.MustCompile(pattern2) | ||
| var ( | ||
| schemaRegexCache = make(map[string]*schemaRegexes) | ||
| schemaRegexCacheMu sync.Mutex | ||
| ) | ||
|
|
||
| // Pattern 3: unquoted schema + dot + quoted object: schema."object" | ||
| // Example: public."table" -> "table" | ||
| // Use negative lookbehind to ensure schema isn't preceded by a quote | ||
| // and negative lookahead to ensure the dot after schema isn't inside quotes | ||
| pattern3 := fmt.Sprintf(`(?:^|[^"])%s\.(\"[^"]+\")`, escapedSchema) | ||
| re3 := regexp.MustCompile(pattern3) | ||
| func getSchemaRegexes(schemaName string) *schemaRegexes { | ||
| schemaRegexCacheMu.Lock() | ||
| defer schemaRegexCacheMu.Unlock() | ||
| if cached, ok := schemaRegexCache[schemaName]; ok { | ||
| return cached | ||
| } | ||
| escapedSchema := regexp.QuoteMeta(schemaName) | ||
| // Patterns 1-2: quoted schema ("schema".object / "schema"."object") | ||
| // The leading " already prevents suffix matching. | ||
| // Patterns 3-4: unquoted schema (schema.object / schema."object") | ||
| // Use a capture group for the optional non-identifier prefix so we can | ||
| // preserve it in replacement without the match[0] ambiguity at ^. | ||
| // The character class [^a-zA-Z0-9_$"] ensures the schema name isn't a | ||
| // suffix of a longer identifier (e.g., schema "s" won't match "sales"). | ||
| sr := &schemaRegexes{ | ||
| re1: regexp.MustCompile(fmt.Sprintf(`"%s"\.(\"[^"]+\")`, escapedSchema)), | ||
| re2: regexp.MustCompile(fmt.Sprintf(`"%s"\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)), | ||
| re3: regexp.MustCompile(fmt.Sprintf(`(^|[^a-zA-Z0-9_$"])%s\.(\"[^"]+\")`, escapedSchema)), | ||
| re4: regexp.MustCompile(fmt.Sprintf(`(^|[^a-zA-Z0-9_$"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)), | ||
| } | ||
|
Comment on lines
+293
to
+297
|
||
| schemaRegexCache[schemaName] = sr | ||
| return sr | ||
| } | ||
|
|
||
| // Pattern 4: unquoted schema + dot + unquoted object: schema.object | ||
| // Example: public.table -> table | ||
| // Use negative lookbehind to ensure schema isn't preceded by a quote | ||
| pattern4 := fmt.Sprintf(`(?:^|[^"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema) | ||
| re4 := regexp.MustCompile(pattern4) | ||
| // stripSchemaQualificationsFromText performs the actual schema qualification stripping on a text segment. | ||
| // It handles 4 cases: | ||
| // 1. unquoted_schema.unquoted_object -> unquoted_object | ||
| // 2. unquoted_schema."quoted_object" -> "quoted_object" | ||
| // 3. "quoted_schema".unquoted_object -> unquoted_object | ||
| // 4. "quoted_schema"."quoted_object" -> "quoted_object" | ||
| func stripSchemaQualificationsFromText(text string, schemaName string) string { | ||
| sr := getSchemaRegexes(schemaName) | ||
|
|
||
| result := text | ||
| // Apply in order: quoted schema first to avoid double-matching | ||
| result = re1.ReplaceAllString(result, "$1") | ||
| result = re2.ReplaceAllString(result, "$1") | ||
| // For patterns 3 and 4, we need to preserve the character before the schema | ||
| result = re3.ReplaceAllStringFunc(result, func(match string) string { | ||
| // If match starts with a non-quote character, preserve it | ||
| if len(match) > 0 && match[0] != '"' { | ||
| // Extract the quote identifier (everything after schema.) | ||
| parts := strings.SplitN(match, ".", 2) | ||
| if len(parts) == 2 { | ||
| return string(match[0]) + parts[1] | ||
| } | ||
| } | ||
| // Otherwise just return the captured quoted identifier | ||
| parts := strings.SplitN(match, ".", 2) | ||
| if len(parts) == 2 { | ||
| return parts[1] | ||
| } | ||
| return match | ||
| }) | ||
| result = re4.ReplaceAllStringFunc(result, func(match string) string { | ||
| // If match starts with a non-quote character, preserve it | ||
| if len(match) > 0 && match[0] != '"' { | ||
| // Extract the unquoted identifier (everything after schema.) | ||
| parts := strings.SplitN(match, ".", 2) | ||
| if len(parts) == 2 { | ||
| return string(match[0]) + parts[1] | ||
| } | ||
| } | ||
| // Otherwise just return the captured unquoted identifier | ||
| parts := strings.SplitN(match, ".", 2) | ||
| if len(parts) == 2 { | ||
| return parts[1] | ||
| } | ||
| return match | ||
| }) | ||
| result = sr.re1.ReplaceAllString(result, "$1") | ||
| result = sr.re2.ReplaceAllString(result, "$1") | ||
| // For patterns 3 and 4, $1 is the prefix (boundary char or empty at ^), | ||
| // $2 is the object name — preserve the prefix and keep only the object. | ||
| result = sr.re3.ReplaceAllString(result, "${1}${2}") | ||
| result = sr.re4.ReplaceAllString(result, "${1}${2}") | ||
|
|
||
| return result | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -194,3 +194,100 @@ func TestReplaceSchemaInSearchPath(t *testing.T) { | |||||||||||||||||||||||||||||||||||
| }) | ||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||
| func TestStripSchemaQualifications_PreservesStringLiterals(t *testing.T) { | ||||||||||||||||||||||||||||||||||||
| tests := []struct { | ||||||||||||||||||||||||||||||||||||
| name string | ||||||||||||||||||||||||||||||||||||
| sql string | ||||||||||||||||||||||||||||||||||||
| schema string | ||||||||||||||||||||||||||||||||||||
| expected string | ||||||||||||||||||||||||||||||||||||
| }{ | ||||||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||||||
| name: "strips schema from table reference", | ||||||||||||||||||||||||||||||||||||
| sql: "CREATE TABLE public.items (id int);", | ||||||||||||||||||||||||||||||||||||
| schema: "public", | ||||||||||||||||||||||||||||||||||||
| expected: "CREATE TABLE items (id int);", | ||||||||||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||||||
| name: "preserves schema prefix inside single-quoted string", | ||||||||||||||||||||||||||||||||||||
| sql: "CREATE POLICY p ON items USING (has_scope('public.manage'));", | ||||||||||||||||||||||||||||||||||||
| schema: "public", | ||||||||||||||||||||||||||||||||||||
| expected: "CREATE POLICY p ON items USING (has_scope('public.manage'));", | ||||||||||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||||||
| name: "preserves schema prefix inside string with short schema name", | ||||||||||||||||||||||||||||||||||||
| sql: "CREATE POLICY p ON items USING (has_scope('s.manage')) WITH CHECK (has_scope('s.manage'));", | ||||||||||||||||||||||||||||||||||||
| schema: "s", | ||||||||||||||||||||||||||||||||||||
| expected: "CREATE POLICY p ON items USING (has_scope('s.manage')) WITH CHECK (has_scope('s.manage'));", | ||||||||||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||||||
| name: "strips schema from identifier but preserves string literal", | ||||||||||||||||||||||||||||||||||||
| sql: "CREATE POLICY p ON s.items USING (auth.has_scope('s.manage'));", | ||||||||||||||||||||||||||||||||||||
| schema: "s", | ||||||||||||||||||||||||||||||||||||
| expected: "CREATE POLICY p ON items USING (auth.has_scope('s.manage'));", | ||||||||||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||||||
| name: "preserves escaped quotes in string literals", | ||||||||||||||||||||||||||||||||||||
| sql: "SELECT 'it''s public.test' FROM public.t;", | ||||||||||||||||||||||||||||||||||||
| schema: "public", | ||||||||||||||||||||||||||||||||||||
| expected: "SELECT 'it''s public.test' FROM t;", | ||||||||||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||||||||||
| { | ||||||||||||||||||||||||||||||||||||
| name: "handles multiple string literals", | ||||||||||||||||||||||||||||||||||||
| sql: "SELECT 'public.a', public.t, 'public.b';", | ||||||||||||||||||||||||||||||||||||
| schema: "public", | ||||||||||||||||||||||||||||||||||||
| expected: "SELECT 'public.a', t, 'public.b';", | ||||||||||||||||||||||||||||||||||||
| }, | ||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||
| }, | |
| }, | |
| { | |
| name: "handles apostrophe in comment followed by schema-qualified identifier", | |
| sql: "SELECT 1; -- don't drop public.t\nDROP TABLE public.t;", | |
| schema: "public", | |
| expected: "SELECT 1; -- don't drop public.t\nDROP TABLE t;", | |
| }, |
Copilot
AI
Mar 26, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The “E-string with backslash-escaped quote” case assumes the output remains valid SQL (and expects a specific transformation), but the current splitter will treat \' as a string terminator and can end up producing an unterminated string literal / skipping subsequent stripping. Once E-string handling is fixed in stripSchemaQualificationsPreservingStrings, update this expected value to reflect the corrected behavior (and consider asserting validity/round-trip expectations for E-strings explicitly).
| // Known limitation: E'...' escape-string syntax with backslash-escaped quotes | |
| // is not handled. The parser treats \' as ordinary char + string-closer, | |
| // mistracking boundaries. Here it strips inside the string (wrong) and | |
| // misses the identifier after (also wrong). Both are safe: the SQL remains | |
| // valid, and the unstripped qualifier just means the object is looked up | |
| // in the original schema. E'...' in DDL is extremely rare. | |
| name: "E-string with backslash-escaped quote (known limitation)", | |
| sql: "SELECT E'it\\'s public.test' FROM public.t;", | |
| schema: "public", | |
| expected: "SELECT E'it\\'s test' FROM public.t;", | |
| // E'...' escape-string syntax with backslash-escaped quotes should be | |
| // handled correctly: the string literal is preserved exactly, and schema | |
| // qualifications are stripped only from identifiers outside of strings. | |
| name: "E-string with backslash-escaped quote", | |
| sql: "SELECT E'it\\'s public.test' FROM public.t;", | |
| schema: "public", | |
| expected: "SELECT E'it\\'s public.test' FROM t;", |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
stripSchemaQualificationsPreservingStringsdoes not handle Postgres escape-string literals (E'...') with backslash-escaped quotes. With input likeE'it\'s public.test' FROM public.t;, the scanner will treat the\'as a string terminator, causing the later'to be treated as a new (unterminated) string. This can produce invalid SQL and/or skip stripping after the string. Consider detecting E-strings and treating a quote as escaped when preceded by an odd number of backslashes (or avoiding splitting on'when preceded byEand then using an E-string-aware scan).