From 7625c0b22c52cbe63e70e2149ef69ef1d86994e4 Mon Sep 17 00:00:00 2001
From: Aarthy Adityan <aarthy@datakitchen.io>
Date: Tue, 21 Apr 2026 14:14:17 -0400
Subject: [PATCH] refactor: make codebase compatible with Python 3.11

---
 README.md                                     |   4 +-
 docs/local_development.md                     |   2 +-
 pyproject.toml                                |   8 +-
 .../flavor/bigquery_flavor_service.py         |   3 +-
 testgen/common/models/scores.py               |   2 +-
 testgen/ui/queries/profiling_queries.py       | 144 ++++++++++--------
 testgen/ui/scripts/patch_streamlit.py         |   2 +-
 testgen/ui/views/data_catalog.py              |   5 +-
 .../dialogs/table_create_script_dialog.py     |   3 +-
 testgen/ui/views/monitors_dashboard.py        |   3 +-
 testgen/ui/views/profiling_results.py         |  31 ++--
 testgen/ui/views/score_details.py             |   2 +-
 testgen/ui/views/score_explorer.py            |  14 +-
 testgen/ui/views/test_results.py              |   6 +-
 14 files changed, 135 insertions(+), 94 deletions(-)
diff --git a/README.md b/README.md
index 4b6a5726..5383516d 100644
--- a/README.md
+++ b/README.md
@@ -84,14 +84,14 @@ As an alternative to the Docker Compose [installation with dk-installer (recomme
 
 | Software                                                                                                                                                                         | Tested Versions  | Command to check version               |
 |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------|------------------------------|
-| [Python](https://www.python.org/downloads/) <br/>- Most Linux and macOS systems have Python pre-installed. <br/>- On Windows machines, you will need to download and install it. | 3.12 | `python3 --version`               |
+| [Python](https://www.python.org/downloads/) <br/>- Most Linux and macOS systems have Python pre-installed. <br/>- On Windows machines, you will need to download and install it. | 3.11, 3.12, 3.13 | `python3 --version`               |
 | [PostgreSQL](https://www.postgresql.org/download/)                                                                                                                                                                     | 14.1, 15.8, 16.4       | `psql --version`|
 
 ### Install the TestGen package
 
 We recommend using a Python virtual environment to avoid any dependency conflicts with other applications installed on your machine. The [venv](https://docs.python.org/3/library/venv.html#creating-virtual-environments) module, which is part of the Python standard library, or other third-party tools, like [virtualenv](https://virtualenv.pypa.io/en/latest/) or [conda](https://docs.conda.io/en/latest/), can be used.
 
-Create and activate a virtual environment with a TestGen-compatible version of Python (`>=3.12`). The steps may vary based on your operating system and Python installation - the [Python packaging user guide](https://packaging.python.org/en/latest/tutorials/installing-packages/) is a useful reference.
+Create and activate a virtual environment with a TestGen-compatible version of Python (`>=3.11`). The steps may vary based on your operating system and Python installation - the [Python packaging user guide](https://packaging.python.org/en/latest/tutorials/installing-packages/) is a useful reference.
 
 _On Linux/Mac_
 ```shell
diff --git a/docs/local_development.md b/docs/local_development.md
index cff533ec..95e2948f 100644
--- a/docs/local_development.md
+++ b/docs/local_development.md
@@ -23,7 +23,7 @@ git clone https://github.com/YOUR-USERNAME/dataops-testgen
 
 We recommend using a Python virtual environment to avoid any dependency conflicts with other applications installed on your machine. The [venv](https://docs.python.org/3/library/venv.html#creating-virtual-environments) module, which is part of the Python standard library, or other third-party tools, like [virtualenv](https://virtualenv.pypa.io/en/latest/) or [conda](https://docs.conda.io/en/latest/), can be used.
 
-From the root of your local repository, create and activate a virtual environment with a TestGen-compatible version of Python (`>=3.12`). The steps may vary based on your operating system and Python installation - the [Python packaging user guide](https://packaging.python.org/en/latest/tutorials/installing-packages/) is a useful reference.
+From the root of your local repository, create and activate a virtual environment with a TestGen-compatible version of Python (`>=3.11`; we develop on 3.13). The steps may vary based on your operating system and Python installation - the [Python packaging user guide](https://packaging.python.org/en/latest/tutorials/installing-packages/) is a useful reference.
 
 _On Linux/Mac_
 ```shell
diff --git a/pyproject.toml b/pyproject.toml
index 43851025..63406242 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,11 +21,13 @@ classifiers = [
     "License :: OSI Approved :: Apache Software License",
     "Development Status :: 5 - Production/Stable",
     "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
     "Topic :: System :: Monitoring",
 ]
 keywords = [ "dataops", "data", "quality", "testing", "database", "profiling" ]
-requires-python = ">=3.12"
+requires-python = ">=3.11"
 
 dependencies = [
     "PyYAML==6.0.3",
@@ -170,7 +172,7 @@ filterwarnings = [
 # for an explanation of their functionality.
 # WARNING: When changing mypy configurations, be sure to test them after removing your .mypy_cache
 [tool.mypy]
-python_version = "3.13"
+python_version = "3.11"
 check_untyped_defs = true
 disallow_untyped_decorators = true
 disallow_untyped_defs = true
@@ -211,7 +213,7 @@ exclude = [
 ]
 
 [tool.ruff]
-target-version = "py310"
+target-version = "py311"
 line-length = 120
 indent-width = 4
 include = [
diff --git a/testgen/common/database/flavor/bigquery_flavor_service.py b/testgen/common/database/flavor/bigquery_flavor_service.py
index 47150a73..5facf1e2 100644
--- a/testgen/common/database/flavor/bigquery_flavor_service.py
+++ b/testgen/common/database/flavor/bigquery_flavor_service.py
@@ -14,7 +14,8 @@ def get_connection_string_head(self, params: ResolvedConnectionParams) -> str:
         return f"{self.url_scheme}://"
 
     def get_connection_string_from_fields(self, params: ResolvedConnectionParams) -> str:
-        return f"{self.url_scheme}://{params.service_account_key["project_id"] if params.service_account_key else ""}"
+        project_id = params.service_account_key["project_id"] if params.service_account_key else ""
+        return f"{self.url_scheme}://{project_id}"
 
     def get_connect_args(self, params: ResolvedConnectionParams) -> dict:  # noqa: ARG002
         return {}
diff --git a/testgen/common/models/scores.py b/testgen/common/models/scores.py
index 6eee93c3..617f3fdb 100644
--- a/testgen/common/models/scores.py
+++ b/testgen/common/models/scores.py
@@ -522,7 +522,7 @@ def get_as_sql(
                 for _, field_filters in grouped_filters:
                     field_filters_sql = [f.get_as_sql(prefix=prefix, operand="AND") for f in field_filters]
                     filters_sql.append(
-                        f"({" OR ".join(field_filters_sql)})" if len(field_filters_sql) > 1 else field_filters_sql[0]
+                        f"({' OR '.join(field_filters_sql)})" if len(field_filters_sql) > 1 else field_filters_sql[0]
                     )
             else:
                 filters_sql = [ f.get_as_sql(prefix=prefix, operand="AND") for f in self.filters ]
diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py
index a0cb7873..65de8ccc 100644
--- a/testgen/ui/queries/profiling_queries.py
+++ b/testgen/ui/queries/profiling_queries.py
@@ -180,8 +180,7 @@ def get_tables_by_condition(
     include_active_tests: bool = False,
     include_scores: bool = False,
 ) -> list[dict]:
-    query = f"""
-    {"""
+    active_tests_cte = """
     WITH active_test_definitions AS (
         SELECT
             test_defs.table_groups_id,
@@ -201,30 +200,18 @@ def get_tables_by_condition(
             test_defs.schema_name,
             test_defs.table_name
     )
-    """ if include_active_tests else ""}
-    SELECT
-        table_chars.table_id::VARCHAR AS id,
-        'table' AS type,
-        table_chars.table_name,
-        table_chars.schema_name,
-        table_chars.table_groups_id::VARCHAR AS table_group_id,
-        -- Characteristics
-        functional_table_type,
-        approx_record_ct,
-        table_chars.record_ct,
-        table_chars.column_ct,
-        add_date,
-        last_refresh_date,
-        drop_date,
-        {f"""
+    """ if include_active_tests else ""
+
+    table_tags_select = f"""
         -- Table Tags
         table_chars.description,
         table_chars.critical_data_element,
         {", ".join([ f"table_chars.{tag}" for tag in TAG_FIELDS ])},
         -- Table Groups Tags
         {", ".join([ f"table_groups.{tag} AS table_group_{tag}" for tag in TAG_FIELDS if tag != "aggregation_level" ])},
-        """ if include_tags else ""}
-        {"""
+    """ if include_tags else ""
+
+    has_test_runs_select = """
         -- Has Test Runs
         EXISTS(
             SELECT 1
@@ -232,16 +219,47 @@ def get_tables_by_condition(
             WHERE table_groups_id = table_chars.table_groups_id
                 AND table_name = table_chars.table_name
         ) AS has_test_runs,
-        """ if include_has_test_runs else ""}
-        {"""
+    """ if include_has_test_runs else ""
+
+    active_tests_select = """
         -- Test Definition Count
         active_tests.count AS active_test_count,
-        """ if include_active_tests else ""}
-        {"""
+    """ if include_active_tests else ""
+
+    scores_select = """
         -- Scores
         table_chars.dq_score_profiling,
         table_chars.dq_score_testing,
-        """ if include_scores else ""}
+    """ if include_scores else ""
+
+    active_tests_join = """
+        LEFT JOIN active_test_definitions active_tests ON (
+            table_chars.table_groups_id = active_tests.table_groups_id
+            AND table_chars.schema_name = active_tests.schema_name
+            AND table_chars.table_name = active_tests.table_name
+        )
+    """ if include_active_tests else ""
+
+    query = f"""
+    {active_tests_cte}
+    SELECT
+        table_chars.table_id::VARCHAR AS id,
+        'table' AS type,
+        table_chars.table_name,
+        table_chars.schema_name,
+        table_chars.table_groups_id::VARCHAR AS table_group_id,
+        -- Characteristics
+        functional_table_type,
+        approx_record_ct,
+        table_chars.record_ct,
+        table_chars.column_ct,
+        add_date,
+        last_refresh_date,
+        drop_date,
+        {table_tags_select}
+        {has_test_runs_select}
+        {active_tests_select}
+        {scores_select}
         -- Profile Run
         table_chars.last_complete_profile_run_id::VARCHAR AS profile_run_id,
         profiling_starttime AS profile_run_date,
@@ -255,13 +273,7 @@ def get_tables_by_condition(
         LEFT JOIN table_groups ON (
             table_chars.table_groups_id = table_groups.id
         )
-        {"""
-        LEFT JOIN active_test_definitions active_tests ON (
-            table_chars.table_groups_id = active_tests.table_groups_id
-            AND table_chars.schema_name = active_tests.schema_name
-            AND table_chars.table_name = active_tests.table_name
-        )
-        """ if include_active_tests else ""}
+        {active_tests_join}
     {filter_condition}
     ORDER BY LOWER(table_chars.table_name);
     """
@@ -347,24 +359,7 @@ def get_columns_by_condition(
     include_active_tests: bool = False,
     include_scores: bool = False,
 ) -> list[dict]:
-    query = f"""
-    SELECT
-        column_chars.column_id::VARCHAR AS id,
-        'column' AS type,
-        column_chars.column_name,
-        column_chars.table_name,
-        column_chars.schema_name,
-        column_chars.table_groups_id::VARCHAR AS table_group_id,
-        column_chars.ordinal_position,
-        -- Characteristics
-        column_chars.general_type,
-        column_chars.db_data_type,
-        column_chars.functional_data_type,
-        datatype_suggestion,
-        column_chars.add_date,
-        column_chars.last_mod_date,
-        column_chars.drop_date,
-        {f"""
+    column_tags_select = f"""
         -- Column Tags
         column_chars.description,
         column_chars.critical_data_element,
@@ -376,13 +371,9 @@ def get_columns_by_condition(
         {", ".join([ f"table_chars.{tag} AS table_{tag}" for tag in TAG_FIELDS ])},
         -- Table Groups Tags
         {", ".join([ f"table_groups.{tag} AS table_group_{tag}" for tag in TAG_FIELDS if tag != "aggregation_level" ])},
-        """ if include_tags else ""}
-        -- Profile Run
-        column_chars.last_complete_profile_run_id::VARCHAR AS profile_run_id,
-        run_date AS profile_run_date,
-        TRUE AS is_latest_profile,
-        query_error AS profiling_error,
-        {"""
+    """ if include_tags else ""
+
+    has_test_runs_select = """
         -- Has Test Runs
         EXISTS(
             SELECT 1
@@ -391,8 +382,9 @@ def get_columns_by_condition(
                 AND table_name = column_chars.table_name
                 AND column_names = column_chars.column_name
         ) AS has_test_runs,
-        """ if include_has_test_runs else ""}
-        {"""
+    """ if include_has_test_runs else ""
+
+    active_tests_select = """
         -- Test Definition Count
         (
             SELECT COUNT(*)
@@ -402,12 +394,40 @@ def get_columns_by_condition(
                 AND column_name = column_chars.column_name
                 AND test_active = 'Y'
         ) AS active_test_count,
-        """ if include_active_tests else ""}
-        {"""
+    """ if include_active_tests else ""
+
+    scores_select = """
         -- Scores
         column_chars.dq_score_profiling,
         column_chars.dq_score_testing,
-        """ if include_scores else ""}
+    """ if include_scores else ""
+
+    query = f"""
+    SELECT
+        column_chars.column_id::VARCHAR AS id,
+        'column' AS type,
+        column_chars.column_name,
+        column_chars.table_name,
+        column_chars.schema_name,
+        column_chars.table_groups_id::VARCHAR AS table_group_id,
+        column_chars.ordinal_position,
+        -- Characteristics
+        column_chars.general_type,
+        column_chars.db_data_type,
+        column_chars.functional_data_type,
+        datatype_suggestion,
+        column_chars.add_date,
+        column_chars.last_mod_date,
+        column_chars.drop_date,
+        {column_tags_select}
+        -- Profile Run
+        column_chars.last_complete_profile_run_id::VARCHAR AS profile_run_id,
+        run_date AS profile_run_date,
+        TRUE AS is_latest_profile,
+        query_error AS profiling_error,
+        {has_test_runs_select}
+        {active_tests_select}
+        {scores_select}
         table_chars.approx_record_ct,
         table_groups.project_code,
         table_groups.connection_id::VARCHAR AS connection_id,
diff --git a/testgen/ui/scripts/patch_streamlit.py b/testgen/ui/scripts/patch_streamlit.py
index b9683003..37925626 100644
--- a/testgen/ui/scripts/patch_streamlit.py
+++ b/testgen/ui/scripts/patch_streamlit.py
@@ -79,7 +79,7 @@ def _create_tag(relative_filepath: str, html: BeautifulSoup) -> Tag | None:
         ),
     }
 
-    extension = f".{relative_filepath.split(".")[-1]}"
+    extension = f".{relative_filepath.split('.')[-1]}"
     if extension in tag_for_ext:
         return tag_for_ext[extension]()
     return None
diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py
index d89a4680..8fe773c3 100644
--- a/testgen/ui/views/data_catalog.py
+++ b/testgen/ui/views/data_catalog.py
@@ -251,12 +251,13 @@ def get_excel_report_data(
     data["excluded_data_element"] = data["excluded_data_element"].apply(lambda val: "Yes" if val else None)
     data["pii_flag"] = data["pii_flag"].apply(lambda val: "Yes" if val else None)
     data["top_freq_values"] = data["top_freq_values"].apply(
-        lambda val: "\n".join([f"{part.split(" | ")[1]} | {part.split(" | ")[0]}" for part in val[2:].split("\n| ")])
+        lambda val: "\n".join([f"{part.split(' | ')[1]} | {part.split(' | ')[0]}" for part in val[2:].split("\n| ")])
         if not pd.isna(val) and val != PII_REDACTED
         else val
     )
+    nl = "\n" # For Python 3.11 compatibility
     data["top_patterns"] = data["top_patterns"].apply(
-        lambda val: "".join([f"{part}{'\n' if index % 2 else ' | '}" for index, part in enumerate(val.split(" | "))])
+        lambda val: "".join([f"{part}{nl if index % 2 else ' | '}" for index, part in enumerate(val.split(" | "))])
         if not pd.isna(val) and val != PII_REDACTED
         else val
     )
diff --git a/testgen/ui/views/dialogs/table_create_script_dialog.py b/testgen/ui/views/dialogs/table_create_script_dialog.py
index 1bcd386e..468a9754 100644
--- a/testgen/ui/views/dialogs/table_create_script_dialog.py
+++ b/testgen/ui/views/dialogs/table_create_script_dialog.py
@@ -30,7 +30,8 @@ def generate_create_script(table_name: str, data: list[dict]) -> str | None:
         separator = " " if index == len(table_data) - 1 else ","
         col_defs.append(f"{col['column_name']:<{max_name}} {(col_type):<{max_type}}{separator}    {comment}")
 
+    col_defs_joined = "\n    ".join(col_defs)
     return f"""
 CREATE TABLE {table_data[0]['schema_name']}.{table_data[0]['table_name']} (
-    {"\n    ".join(col_defs)}
+    {col_defs_joined}
 );"""
diff --git a/testgen/ui/views/monitors_dashboard.py b/testgen/ui/views/monitors_dashboard.py
index e19145a1..bcaedde6 100644
--- a/testgen/ui/views/monitors_dashboard.py
+++ b/testgen/ui/views/monitors_dashboard.py
@@ -483,9 +483,10 @@ def _monitor_changes_by_tables_query(
     {"OFFSET :offset" if offset else ""}
     """
 
+    escaped_table_name_filter = table_name_filter.replace("_", "\\_") if table_name_filter else None
     params = {
         "table_group_id": table_group_id,
-        "table_name_filter": f"%{table_name_filter.replace('_', '\\_')}%" if table_name_filter else None,
+        "table_name_filter": f"%{escaped_table_name_filter}%" if escaped_table_name_filter else None,
         "sort_field": sort_field,
         "limit": limit,
         "offset": offset,
diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py
index a1529f95..62368aac 100644
--- a/testgen/ui/views/profiling_results.py
+++ b/testgen/ui/views/profiling_results.py
@@ -225,16 +225,27 @@ def get_excel_report_data(
     type_map = {"A": "Alpha", "B": "Boolean", "D": "Datetime", "N": "Numeric"}
     data["general_type"] = data["general_type"].apply(lambda val: type_map.get(val))
 
-    data["top_freq_values"] = data["top_freq_values"].apply(
-        lambda val: "\n".join([ f"{part.split(" | ")[1]} | {part.split(" | ")[0]}" for part in val[2:].split("\n| ") ])
-        if val and val != PII_REDACTED
-        else val
-    )
-    data["top_patterns"] = data["top_patterns"].apply(
-        lambda val: "".join([ f"{part}{'\n' if index % 2 else ' | '}" for index, part in enumerate(val.split(" | ")) ])
-        if val and val != PII_REDACTED
-        else val
-    )
+    def _format_top_freq_values(val):
+        if not val or val == PII_REDACTED:
+            return val
+        lines = []
+        for part in val[2:].split("\n| "):
+            left, right = part.split(" | ")
+            lines.append(f"{right} | {left}")
+        return "\n".join(lines)
+
+    def _format_top_patterns(val):
+        if not val or val == PII_REDACTED:
+            return val
+        parts = val.split(" | ")
+        formatted = []
+        for index, part in enumerate(parts):
+            separator = "\n" if index % 2 else " | "
+            formatted.append(f"{part}{separator}")
+        return "".join(formatted)
+
+    data["top_freq_values"] = data["top_freq_values"].apply(_format_top_freq_values)
+    data["top_patterns"] = data["top_patterns"].apply(_format_top_patterns)
 
     columns = {
         "table_name": {"header": "Table"},
diff --git a/testgen/ui/views/score_details.py b/testgen/ui/views/score_details.py
index edc8c33c..dd98f588 100644
--- a/testgen/ui/views/score_details.py
+++ b/testgen/ui/views/score_details.py
@@ -196,7 +196,7 @@ def get_report_file_data(update_progress, issue) -> FILE_DATA_TYPE:
         update_progress(1.0)
         buffer.seek(0)
 
-        file_name = f"testgen_{issue["issue_type"]}_issue_report_{issue_id}_{timestamp}.pdf"
+        file_name = f"testgen_{issue['issue_type']}_issue_report_{issue_id}_{timestamp}.pdf"
         return file_name, "application/pdf", buffer.read()
 
 
diff --git a/testgen/ui/views/score_explorer.py b/testgen/ui/views/score_explorer.py
index 1e9352ce..4e383cf0 100644
--- a/testgen/ui/views/score_explorer.py
+++ b/testgen/ui/views/score_explorer.py
@@ -265,7 +265,7 @@ def get_report_file_data(update_progress, issue) -> FILE_DATA_TYPE:
         update_progress(1.0)
         buffer.seek(0)
 
-        file_name = f"testgen_{issue["issue_type"]}_issue_report_{issue_id}_{timestamp}.pdf"
+        file_name = f"testgen_{issue['issue_type']}_issue_report_{issue_id}_{timestamp}.pdf"
         return file_name, "application/pdf", buffer.read()
 
 
@@ -282,15 +282,15 @@ def dialog_content() -> None:
 
         column_filters = get_column_filters(project_code)
         for column in column_filters:
-            table_group_selected = (f"table_groups_name={column["table_group"]}",) in selected_filters
+            table_group_selected = (f"table_groups_name={column['table_group']}",) in selected_filters
             table_selected = (
-                f"table_groups_name={column["table_group"]}",
-                f"table_name={column["table"]}",
+                f"table_groups_name={column['table_group']}",
+                f"table_name={column['table']}",
             ) in selected_filters
             column_selected = (
-                f"table_groups_name={column["table_group"]}",
-                f"table_name={column["table"]}",
-                f"column_name={column["name"]}",
+                f"table_groups_name={column['table_group']}",
+                f"table_name={column['table']}",
+                f"column_name={column['name']}",
             ) in selected_filters
             column["selected"] = table_group_selected or table_selected or column_selected
 
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index ff8a1188..66c2aec0 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -811,7 +811,11 @@ def render_binary_chart(data: pd.DataFrame, **params: dict) -> None:
     history["test_start"] = history["test_date"].apply(datetime.fromisoformat)
     history["test_end"] = history["test_start"].apply(lambda start: start + timedelta(seconds=60))
     history["formatted_test_date"] = history["test_date"].apply(lambda date_str: datetime.fromisoformat(date_str).strftime("%I:%M:%S %p, %d/%m/%Y"))
-    history["result_measure_with_status"] = history.apply(lambda row: f"{legend_labels[str(int(row['result_measure'])) if not pd.isnull(row['result_measure']) else "0"]} ({row['result_status']})", axis=1)
+    def _format_measure_with_status(row):
+        measure_key = str(int(row["result_measure"])) if not pd.isnull(row["result_measure"]) else "0"
+        return f"{legend_labels[measure_key]} ({row['result_status']})"
+
+    history["result_measure_with_status"] = history.apply(_format_measure_with_status, axis=1)
 
     fig = px.timeline(
         history,