-
Notifications
You must be signed in to change notification settings - Fork 4
feat: Add setup_statements parameter to execute_sql for session setup #96
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -102,6 +102,7 @@ def execute_sql_with_connection_json( | |
| audit_sql_comment="", | ||
| sql_cache_mode="cache_disabled", | ||
| return_variable_type="dataframe", | ||
| setup_statements=None, | ||
| ): | ||
| """ | ||
| Executes a SQL query using the given connection JSON (string). | ||
|
|
@@ -111,6 +112,11 @@ def execute_sql_with_connection_json( | |
| :param sql_alchemy_json: String containing JSON with the connection details. | ||
| Mandatory fields: url, params, param_style | ||
| :param sql_cache_mode: SQL caching setting for the query. Possible values: "cache_disabled", "always_write", "read_or_write" | ||
| :param setup_statements: Optional list of raw SQL statements to run on the | ||
| same connection right before *template*. Use for session setup such as | ||
| ``USE WAREHOUSE abc`` whose effect must be visible to the main query. | ||
| Statements are not Jinja-rendered, parameter-bound, or audit-commented; | ||
| they are executed in order via ``connection.exec_driver_sql``. | ||
| :return: Pandas dataframe with the result | ||
| """ | ||
|
|
||
|
|
@@ -221,6 +227,7 @@ class ExecuteSqlError(Exception): | |
| sql_cache_mode, | ||
| return_variable_type, | ||
| query_preview_source, | ||
| setup_statements=setup_statements, | ||
| ) | ||
|
|
||
|
|
||
|
|
@@ -230,13 +237,15 @@ def execute_sql( | |
| audit_sql_comment="", | ||
| sql_cache_mode="cache_disabled", | ||
| return_variable_type="dataframe", | ||
| setup_statements=None, | ||
| ): | ||
| """ | ||
| Wrapper around execute_sql_with_connection_json which reads the connection JSON from | ||
| environment variable. | ||
| :param template: Templated SQL | ||
| :param sql_alchemy_json_env_var: Name of the environment variable containing the connection JSON | ||
| :param sql_cache_mode: SQL caching setting for the query. Possible values: "cache_disabled", "always_write", "read_or_write" | ||
| :param setup_statements: See ``execute_sql_with_connection_json``. | ||
| :return: Pandas dataframe with the result | ||
| """ | ||
|
|
||
|
|
@@ -260,6 +269,7 @@ class ExecuteSqlError(Exception): | |
| audit_sql_comment=audit_sql_comment, | ||
| sql_cache_mode=sql_cache_mode, | ||
| return_variable_type=return_variable_type, | ||
| setup_statements=setup_statements, | ||
| ) | ||
|
|
||
|
|
||
|
|
@@ -406,9 +416,14 @@ def _execute_sql_with_caching( | |
| sql_cache_mode, | ||
| return_variable_type, | ||
| query_preview_source, | ||
| setup_statements=None, | ||
| ): | ||
| # duckdb SQL is not cached, so we can skip the logic below for duckdb | ||
| if requires_duckdb: | ||
| # DuckDB uses a process-wide singleton connection, so session state set | ||
| # by setup_statements naturally persists for the main query. | ||
| for stmt in setup_statements or []: | ||
| execute_duckdb_sql(stmt, {}) | ||
| dataframe = execute_duckdb_sql(query, bind_params) | ||
| # for Chained SQL we return the dataframe with the SQL source attached as DeepnoteQueryPreview object | ||
| if return_variable_type == "query_preview": | ||
|
|
@@ -447,6 +462,7 @@ def _execute_sql_with_caching( | |
| cache_upload_url, | ||
| return_variable_type, | ||
| query_preview_source, # The original query before any transformations such as appending a LIMIT clause | ||
| setup_statements=setup_statements, | ||
| ) | ||
|
|
||
|
|
||
|
|
@@ -478,6 +494,7 @@ def _query_data_source( | |
| cache_upload_url, | ||
| return_variable_type, | ||
| query_preview_source, | ||
| setup_statements=None, | ||
| ): | ||
| sshEnabled = sql_alchemy_dict.get("ssh_options", {}).get("enabled", False) | ||
|
|
||
|
|
@@ -491,7 +508,9 @@ def _query_data_source( | |
| ) | ||
|
|
||
| try: | ||
| dataframe = _execute_sql_on_engine(engine, query, bind_params) | ||
| dataframe = _execute_sql_on_engine( | ||
| engine, query, bind_params, setup_statements=setup_statements | ||
| ) | ||
|
|
||
| if dataframe is None: | ||
| return None | ||
|
|
@@ -609,13 +628,19 @@ def _cancel_cursor(cursor: "DBAPICursor") -> None: | |
| pass # Best effort, ignore all errors | ||
|
|
||
|
|
||
| def _execute_sql_on_engine(engine, query, bind_params): | ||
| def _execute_sql_on_engine(engine, query, bind_params, setup_statements=None): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Validate Line 669 accepts any iterable. If a caller passes a string, it runs one-character “statements” and fails with misleading SQL errors. Suggested fix def _execute_sql_on_engine(engine, query, bind_params, setup_statements=None):
@@
+ if setup_statements is None:
+ normalized_setup_statements: list[str] = []
+ elif isinstance(setup_statements, list) and all(
+ isinstance(stmt, str) for stmt in setup_statements
+ ):
+ normalized_setup_statements = setup_statements
+ else:
+ raise TypeError("setup_statements must be a list[str] or None")
+
with engine.begin() as connection:
@@
- for stmt in setup_statements or []:
+ for stmt in normalized_setup_statements:
connection.exec_driver_sql(stmt)Also applies to: 667-670 🧰 Tools🪛 Ruff (0.15.10)[warning] 631-631: Missing return type annotation for private function (ANN202) 🤖 Prompt for AI Agents |
||
| """Run *query* on *engine* and return a DataFrame. | ||
|
|
||
| Uses pandas.read_sql_query to execute the query with a SQLAlchemy connection. | ||
| For pandas 2.2+ and SQLAlchemy < 2.0, which requires a raw DB-API connection with a `.cursor()` attribute, | ||
| we use the underlying connection. | ||
|
|
||
| When *setup_statements* is provided, each statement is executed on the | ||
| same DBAPI connection right before the main query so any session state | ||
| it sets (e.g. Snowflake ``USE WAREHOUSE``) is in effect when the main | ||
| query runs. Setup statements are issued via ``connection.exec_driver_sql`` | ||
| and any failure aborts the main query. | ||
|
|
||
| On exceptions (including KeyboardInterrupt from cell cancellation), all cursors | ||
| created during execution are cancelled to stop running queries on the server. | ||
| """ | ||
|
|
@@ -639,6 +664,11 @@ def _execute_sql_on_engine(engine, query, bind_params): | |
| ) | ||
|
|
||
| with engine.begin() as connection: | ||
| # Run setup statements first on the same physical connection so any | ||
| # session state they set is visible to the main query below. | ||
| for stmt in setup_statements or []: | ||
| connection.exec_driver_sql(stmt) | ||
|
|
||
| # For pandas 2.2+ with SQLAlchemy < 2.0, use raw DBAPI connection | ||
| if needs_raw_connection: | ||
| tracking_connection = CursorTrackingDBAPIConnection(connection.connection) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🧩 Analysis chain
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 110
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 976
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 868
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 973
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 581
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 957
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 682
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 979
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 1272
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 928
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 1882
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 344
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 331
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 453
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 395
🏁 Script executed:
Repository: deepnote/deepnote-toolkit
Length of output: 1076
Add type hints to all function parameters and return annotations.
The five functions at lines 99, 234, 410, 490, and 631 lack any parameter or return type annotations. Replace
setup_statements=Nonewithsetup_statements: Optional[...]and add explicit return type annotations to each function. Per coding guidelines, useOptional[T]instead ofT = None, and include return types for all functions.🤖 Prompt for AI Agents