diff --git a/README.md b/README.md
index 4b6a5726..5383516d 100644
--- a/README.md
+++ b/README.md
@@ -84,14 +84,14 @@ As an alternative to the Docker Compose [installation with dk-installer (recomme
| Software | Tested Versions | Command to check version |
|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------|------------------------------|
-| [Python](https://www.python.org/downloads/)
- Most Linux and macOS systems have Python pre-installed.
- On Windows machines, you will need to download and install it. | 3.12 | `python3 --version` |
+| [Python](https://www.python.org/downloads/)
- Most Linux and macOS systems have Python pre-installed.
- On Windows machines, you will need to download and install it. | 3.11, 3.12, 3.13 | `python3 --version` |
| [PostgreSQL](https://www.postgresql.org/download/) | 14.1, 15.8, 16.4 | `psql --version`|
### Install the TestGen package
We recommend using a Python virtual environment to avoid any dependency conflicts with other applications installed on your machine. The [venv](https://docs.python.org/3/library/venv.html#creating-virtual-environments) module, which is part of the Python standard library, or other third-party tools, like [virtualenv](https://virtualenv.pypa.io/en/latest/) or [conda](https://docs.conda.io/en/latest/), can be used.
-Create and activate a virtual environment with a TestGen-compatible version of Python (`>=3.12`). The steps may vary based on your operating system and Python installation - the [Python packaging user guide](https://packaging.python.org/en/latest/tutorials/installing-packages/) is a useful reference.
+Create and activate a virtual environment with a TestGen-compatible version of Python (`>=3.11`). The steps may vary based on your operating system and Python installation - the [Python packaging user guide](https://packaging.python.org/en/latest/tutorials/installing-packages/) is a useful reference.
_On Linux/Mac_
```shell
diff --git a/docs/local_development.md b/docs/local_development.md
index cff533ec..95e2948f 100644
--- a/docs/local_development.md
+++ b/docs/local_development.md
@@ -23,7 +23,7 @@ git clone https://github.com/YOUR-USERNAME/dataops-testgen
We recommend using a Python virtual environment to avoid any dependency conflicts with other applications installed on your machine. The [venv](https://docs.python.org/3/library/venv.html#creating-virtual-environments) module, which is part of the Python standard library, or other third-party tools, like [virtualenv](https://virtualenv.pypa.io/en/latest/) or [conda](https://docs.conda.io/en/latest/), can be used.
-From the root of your local repository, create and activate a virtual environment with a TestGen-compatible version of Python (`>=3.12`). The steps may vary based on your operating system and Python installation - the [Python packaging user guide](https://packaging.python.org/en/latest/tutorials/installing-packages/) is a useful reference.
+From the root of your local repository, create and activate a virtual environment with a TestGen-compatible version of Python (`>=3.11`; we develop on 3.13). The steps may vary based on your operating system and Python installation - the [Python packaging user guide](https://packaging.python.org/en/latest/tutorials/installing-packages/) is a useful reference.
_On Linux/Mac_
```shell
diff --git a/pyproject.toml b/pyproject.toml
index 43851025..63406242 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,11 +21,13 @@ classifiers = [
"License :: OSI Approved :: Apache Software License",
"Development Status :: 5 - Production/Stable",
"Operating System :: OS Independent",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: System :: Monitoring",
]
keywords = [ "dataops", "data", "quality", "testing", "database", "profiling" ]
-requires-python = ">=3.12"
+requires-python = ">=3.11"
dependencies = [
"PyYAML==6.0.3",
@@ -170,7 +172,7 @@ filterwarnings = [
# for an explanation of their functionality.
# WARNING: When changing mypy configurations, be sure to test them after removing your .mypy_cache
[tool.mypy]
-python_version = "3.13"
+python_version = "3.11"
check_untyped_defs = true
disallow_untyped_decorators = true
disallow_untyped_defs = true
@@ -211,7 +213,7 @@ exclude = [
]
[tool.ruff]
-target-version = "py310"
+target-version = "py311"
line-length = 120
indent-width = 4
include = [
diff --git a/testgen/common/database/flavor/bigquery_flavor_service.py b/testgen/common/database/flavor/bigquery_flavor_service.py
index 47150a73..5facf1e2 100644
--- a/testgen/common/database/flavor/bigquery_flavor_service.py
+++ b/testgen/common/database/flavor/bigquery_flavor_service.py
@@ -14,7 +14,8 @@ def get_connection_string_head(self, params: ResolvedConnectionParams) -> str:
return f"{self.url_scheme}://"
def get_connection_string_from_fields(self, params: ResolvedConnectionParams) -> str:
- return f"{self.url_scheme}://{params.service_account_key["project_id"] if params.service_account_key else ""}"
+ project_id = params.service_account_key["project_id"] if params.service_account_key else ""
+ return f"{self.url_scheme}://{project_id}"
def get_connect_args(self, params: ResolvedConnectionParams) -> dict: # noqa: ARG002
return {}
diff --git a/testgen/common/models/scores.py b/testgen/common/models/scores.py
index 6eee93c3..617f3fdb 100644
--- a/testgen/common/models/scores.py
+++ b/testgen/common/models/scores.py
@@ -522,7 +522,7 @@ def get_as_sql(
for _, field_filters in grouped_filters:
field_filters_sql = [f.get_as_sql(prefix=prefix, operand="AND") for f in field_filters]
filters_sql.append(
- f"({" OR ".join(field_filters_sql)})" if len(field_filters_sql) > 1 else field_filters_sql[0]
+ f"({' OR '.join(field_filters_sql)})" if len(field_filters_sql) > 1 else field_filters_sql[0]
)
else:
filters_sql = [ f.get_as_sql(prefix=prefix, operand="AND") for f in self.filters ]
diff --git a/testgen/ui/queries/profiling_queries.py b/testgen/ui/queries/profiling_queries.py
index a0cb7873..65de8ccc 100644
--- a/testgen/ui/queries/profiling_queries.py
+++ b/testgen/ui/queries/profiling_queries.py
@@ -180,8 +180,7 @@ def get_tables_by_condition(
include_active_tests: bool = False,
include_scores: bool = False,
) -> list[dict]:
- query = f"""
- {"""
+ active_tests_cte = """
WITH active_test_definitions AS (
SELECT
test_defs.table_groups_id,
@@ -201,30 +200,18 @@ def get_tables_by_condition(
test_defs.schema_name,
test_defs.table_name
)
- """ if include_active_tests else ""}
- SELECT
- table_chars.table_id::VARCHAR AS id,
- 'table' AS type,
- table_chars.table_name,
- table_chars.schema_name,
- table_chars.table_groups_id::VARCHAR AS table_group_id,
- -- Characteristics
- functional_table_type,
- approx_record_ct,
- table_chars.record_ct,
- table_chars.column_ct,
- add_date,
- last_refresh_date,
- drop_date,
- {f"""
+ """ if include_active_tests else ""
+
+ table_tags_select = f"""
-- Table Tags
table_chars.description,
table_chars.critical_data_element,
{", ".join([ f"table_chars.{tag}" for tag in TAG_FIELDS ])},
-- Table Groups Tags
{", ".join([ f"table_groups.{tag} AS table_group_{tag}" for tag in TAG_FIELDS if tag != "aggregation_level" ])},
- """ if include_tags else ""}
- {"""
+ """ if include_tags else ""
+
+ has_test_runs_select = """
-- Has Test Runs
EXISTS(
SELECT 1
@@ -232,16 +219,47 @@ def get_tables_by_condition(
WHERE table_groups_id = table_chars.table_groups_id
AND table_name = table_chars.table_name
) AS has_test_runs,
- """ if include_has_test_runs else ""}
- {"""
+ """ if include_has_test_runs else ""
+
+ active_tests_select = """
-- Test Definition Count
active_tests.count AS active_test_count,
- """ if include_active_tests else ""}
- {"""
+ """ if include_active_tests else ""
+
+ scores_select = """
-- Scores
table_chars.dq_score_profiling,
table_chars.dq_score_testing,
- """ if include_scores else ""}
+ """ if include_scores else ""
+
+ active_tests_join = """
+ LEFT JOIN active_test_definitions active_tests ON (
+ table_chars.table_groups_id = active_tests.table_groups_id
+ AND table_chars.schema_name = active_tests.schema_name
+ AND table_chars.table_name = active_tests.table_name
+ )
+ """ if include_active_tests else ""
+
+ query = f"""
+ {active_tests_cte}
+ SELECT
+ table_chars.table_id::VARCHAR AS id,
+ 'table' AS type,
+ table_chars.table_name,
+ table_chars.schema_name,
+ table_chars.table_groups_id::VARCHAR AS table_group_id,
+ -- Characteristics
+ functional_table_type,
+ approx_record_ct,
+ table_chars.record_ct,
+ table_chars.column_ct,
+ add_date,
+ last_refresh_date,
+ drop_date,
+ {table_tags_select}
+ {has_test_runs_select}
+ {active_tests_select}
+ {scores_select}
-- Profile Run
table_chars.last_complete_profile_run_id::VARCHAR AS profile_run_id,
profiling_starttime AS profile_run_date,
@@ -255,13 +273,7 @@ def get_tables_by_condition(
LEFT JOIN table_groups ON (
table_chars.table_groups_id = table_groups.id
)
- {"""
- LEFT JOIN active_test_definitions active_tests ON (
- table_chars.table_groups_id = active_tests.table_groups_id
- AND table_chars.schema_name = active_tests.schema_name
- AND table_chars.table_name = active_tests.table_name
- )
- """ if include_active_tests else ""}
+ {active_tests_join}
{filter_condition}
ORDER BY LOWER(table_chars.table_name);
"""
@@ -347,24 +359,7 @@ def get_columns_by_condition(
include_active_tests: bool = False,
include_scores: bool = False,
) -> list[dict]:
- query = f"""
- SELECT
- column_chars.column_id::VARCHAR AS id,
- 'column' AS type,
- column_chars.column_name,
- column_chars.table_name,
- column_chars.schema_name,
- column_chars.table_groups_id::VARCHAR AS table_group_id,
- column_chars.ordinal_position,
- -- Characteristics
- column_chars.general_type,
- column_chars.db_data_type,
- column_chars.functional_data_type,
- datatype_suggestion,
- column_chars.add_date,
- column_chars.last_mod_date,
- column_chars.drop_date,
- {f"""
+ column_tags_select = f"""
-- Column Tags
column_chars.description,
column_chars.critical_data_element,
@@ -376,13 +371,9 @@ def get_columns_by_condition(
{", ".join([ f"table_chars.{tag} AS table_{tag}" for tag in TAG_FIELDS ])},
-- Table Groups Tags
{", ".join([ f"table_groups.{tag} AS table_group_{tag}" for tag in TAG_FIELDS if tag != "aggregation_level" ])},
- """ if include_tags else ""}
- -- Profile Run
- column_chars.last_complete_profile_run_id::VARCHAR AS profile_run_id,
- run_date AS profile_run_date,
- TRUE AS is_latest_profile,
- query_error AS profiling_error,
- {"""
+ """ if include_tags else ""
+
+ has_test_runs_select = """
-- Has Test Runs
EXISTS(
SELECT 1
@@ -391,8 +382,9 @@ def get_columns_by_condition(
AND table_name = column_chars.table_name
AND column_names = column_chars.column_name
) AS has_test_runs,
- """ if include_has_test_runs else ""}
- {"""
+ """ if include_has_test_runs else ""
+
+ active_tests_select = """
-- Test Definition Count
(
SELECT COUNT(*)
@@ -402,12 +394,40 @@ def get_columns_by_condition(
AND column_name = column_chars.column_name
AND test_active = 'Y'
) AS active_test_count,
- """ if include_active_tests else ""}
- {"""
+ """ if include_active_tests else ""
+
+ scores_select = """
-- Scores
column_chars.dq_score_profiling,
column_chars.dq_score_testing,
- """ if include_scores else ""}
+ """ if include_scores else ""
+
+ query = f"""
+ SELECT
+ column_chars.column_id::VARCHAR AS id,
+ 'column' AS type,
+ column_chars.column_name,
+ column_chars.table_name,
+ column_chars.schema_name,
+ column_chars.table_groups_id::VARCHAR AS table_group_id,
+ column_chars.ordinal_position,
+ -- Characteristics
+ column_chars.general_type,
+ column_chars.db_data_type,
+ column_chars.functional_data_type,
+ datatype_suggestion,
+ column_chars.add_date,
+ column_chars.last_mod_date,
+ column_chars.drop_date,
+ {column_tags_select}
+ -- Profile Run
+ column_chars.last_complete_profile_run_id::VARCHAR AS profile_run_id,
+ run_date AS profile_run_date,
+ TRUE AS is_latest_profile,
+ query_error AS profiling_error,
+ {has_test_runs_select}
+ {active_tests_select}
+ {scores_select}
table_chars.approx_record_ct,
table_groups.project_code,
table_groups.connection_id::VARCHAR AS connection_id,
diff --git a/testgen/ui/scripts/patch_streamlit.py b/testgen/ui/scripts/patch_streamlit.py
index b9683003..37925626 100644
--- a/testgen/ui/scripts/patch_streamlit.py
+++ b/testgen/ui/scripts/patch_streamlit.py
@@ -79,7 +79,7 @@ def _create_tag(relative_filepath: str, html: BeautifulSoup) -> Tag | None:
),
}
- extension = f".{relative_filepath.split(".")[-1]}"
+ extension = f".{relative_filepath.split('.')[-1]}"
if extension in tag_for_ext:
return tag_for_ext[extension]()
return None
diff --git a/testgen/ui/views/data_catalog.py b/testgen/ui/views/data_catalog.py
index d89a4680..8fe773c3 100644
--- a/testgen/ui/views/data_catalog.py
+++ b/testgen/ui/views/data_catalog.py
@@ -251,12 +251,13 @@ def get_excel_report_data(
data["excluded_data_element"] = data["excluded_data_element"].apply(lambda val: "Yes" if val else None)
data["pii_flag"] = data["pii_flag"].apply(lambda val: "Yes" if val else None)
data["top_freq_values"] = data["top_freq_values"].apply(
- lambda val: "\n".join([f"{part.split(" | ")[1]} | {part.split(" | ")[0]}" for part in val[2:].split("\n| ")])
+ lambda val: "\n".join([f"{part.split(' | ')[1]} | {part.split(' | ')[0]}" for part in val[2:].split("\n| ")])
if not pd.isna(val) and val != PII_REDACTED
else val
)
+ nl = "\n" # For Python 3.11 compatibility
data["top_patterns"] = data["top_patterns"].apply(
- lambda val: "".join([f"{part}{'\n' if index % 2 else ' | '}" for index, part in enumerate(val.split(" | "))])
+ lambda val: "".join([f"{part}{nl if index % 2 else ' | '}" for index, part in enumerate(val.split(" | "))])
if not pd.isna(val) and val != PII_REDACTED
else val
)
diff --git a/testgen/ui/views/dialogs/table_create_script_dialog.py b/testgen/ui/views/dialogs/table_create_script_dialog.py
index 1bcd386e..468a9754 100644
--- a/testgen/ui/views/dialogs/table_create_script_dialog.py
+++ b/testgen/ui/views/dialogs/table_create_script_dialog.py
@@ -30,7 +30,8 @@ def generate_create_script(table_name: str, data: list[dict]) -> str | None:
separator = " " if index == len(table_data) - 1 else ","
col_defs.append(f"{col['column_name']:<{max_name}} {(col_type):<{max_type}}{separator} {comment}")
+ col_defs_joined = "\n ".join(col_defs)
return f"""
CREATE TABLE {table_data[0]['schema_name']}.{table_data[0]['table_name']} (
- {"\n ".join(col_defs)}
+ {col_defs_joined}
);"""
diff --git a/testgen/ui/views/monitors_dashboard.py b/testgen/ui/views/monitors_dashboard.py
index e19145a1..bcaedde6 100644
--- a/testgen/ui/views/monitors_dashboard.py
+++ b/testgen/ui/views/monitors_dashboard.py
@@ -483,9 +483,10 @@ def _monitor_changes_by_tables_query(
{"OFFSET :offset" if offset else ""}
"""
+ escaped_table_name_filter = table_name_filter.replace("_", "\\_") if table_name_filter else None
params = {
"table_group_id": table_group_id,
- "table_name_filter": f"%{table_name_filter.replace('_', '\\_')}%" if table_name_filter else None,
+ "table_name_filter": f"%{escaped_table_name_filter}%" if escaped_table_name_filter else None,
"sort_field": sort_field,
"limit": limit,
"offset": offset,
diff --git a/testgen/ui/views/profiling_results.py b/testgen/ui/views/profiling_results.py
index a1529f95..62368aac 100644
--- a/testgen/ui/views/profiling_results.py
+++ b/testgen/ui/views/profiling_results.py
@@ -225,16 +225,27 @@ def get_excel_report_data(
type_map = {"A": "Alpha", "B": "Boolean", "D": "Datetime", "N": "Numeric"}
data["general_type"] = data["general_type"].apply(lambda val: type_map.get(val))
- data["top_freq_values"] = data["top_freq_values"].apply(
- lambda val: "\n".join([ f"{part.split(" | ")[1]} | {part.split(" | ")[0]}" for part in val[2:].split("\n| ") ])
- if val and val != PII_REDACTED
- else val
- )
- data["top_patterns"] = data["top_patterns"].apply(
- lambda val: "".join([ f"{part}{'\n' if index % 2 else ' | '}" for index, part in enumerate(val.split(" | ")) ])
- if val and val != PII_REDACTED
- else val
- )
+ def _format_top_freq_values(val):
+ if not val or val == PII_REDACTED:
+ return val
+ lines = []
+ for part in val[2:].split("\n| "):
+ left, right = part.split(" | ")
+ lines.append(f"{right} | {left}")
+ return "\n".join(lines)
+
+ def _format_top_patterns(val):
+ if not val or val == PII_REDACTED:
+ return val
+ parts = val.split(" | ")
+ formatted = []
+ for index, part in enumerate(parts):
+ separator = "\n" if index % 2 else " | "
+ formatted.append(f"{part}{separator}")
+ return "".join(formatted)
+
+ data["top_freq_values"] = data["top_freq_values"].apply(_format_top_freq_values)
+ data["top_patterns"] = data["top_patterns"].apply(_format_top_patterns)
columns = {
"table_name": {"header": "Table"},
diff --git a/testgen/ui/views/score_details.py b/testgen/ui/views/score_details.py
index edc8c33c..dd98f588 100644
--- a/testgen/ui/views/score_details.py
+++ b/testgen/ui/views/score_details.py
@@ -196,7 +196,7 @@ def get_report_file_data(update_progress, issue) -> FILE_DATA_TYPE:
update_progress(1.0)
buffer.seek(0)
- file_name = f"testgen_{issue["issue_type"]}_issue_report_{issue_id}_{timestamp}.pdf"
+ file_name = f"testgen_{issue['issue_type']}_issue_report_{issue_id}_{timestamp}.pdf"
return file_name, "application/pdf", buffer.read()
diff --git a/testgen/ui/views/score_explorer.py b/testgen/ui/views/score_explorer.py
index 1e9352ce..4e383cf0 100644
--- a/testgen/ui/views/score_explorer.py
+++ b/testgen/ui/views/score_explorer.py
@@ -265,7 +265,7 @@ def get_report_file_data(update_progress, issue) -> FILE_DATA_TYPE:
update_progress(1.0)
buffer.seek(0)
- file_name = f"testgen_{issue["issue_type"]}_issue_report_{issue_id}_{timestamp}.pdf"
+ file_name = f"testgen_{issue['issue_type']}_issue_report_{issue_id}_{timestamp}.pdf"
return file_name, "application/pdf", buffer.read()
@@ -282,15 +282,15 @@ def dialog_content() -> None:
column_filters = get_column_filters(project_code)
for column in column_filters:
- table_group_selected = (f"table_groups_name={column["table_group"]}",) in selected_filters
+ table_group_selected = (f"table_groups_name={column['table_group']}",) in selected_filters
table_selected = (
- f"table_groups_name={column["table_group"]}",
- f"table_name={column["table"]}",
+ f"table_groups_name={column['table_group']}",
+ f"table_name={column['table']}",
) in selected_filters
column_selected = (
- f"table_groups_name={column["table_group"]}",
- f"table_name={column["table"]}",
- f"column_name={column["name"]}",
+ f"table_groups_name={column['table_group']}",
+ f"table_name={column['table']}",
+ f"column_name={column['name']}",
) in selected_filters
column["selected"] = table_group_selected or table_selected or column_selected
diff --git a/testgen/ui/views/test_results.py b/testgen/ui/views/test_results.py
index ff8a1188..66c2aec0 100644
--- a/testgen/ui/views/test_results.py
+++ b/testgen/ui/views/test_results.py
@@ -811,7 +811,11 @@ def render_binary_chart(data: pd.DataFrame, **params: dict) -> None:
history["test_start"] = history["test_date"].apply(datetime.fromisoformat)
history["test_end"] = history["test_start"].apply(lambda start: start + timedelta(seconds=60))
history["formatted_test_date"] = history["test_date"].apply(lambda date_str: datetime.fromisoformat(date_str).strftime("%I:%M:%S %p, %d/%m/%Y"))
- history["result_measure_with_status"] = history.apply(lambda row: f"{legend_labels[str(int(row['result_measure'])) if not pd.isnull(row['result_measure']) else "0"]} ({row['result_status']})", axis=1)
+ def _format_measure_with_status(row):
+ measure_key = str(int(row["result_measure"])) if not pd.isnull(row["result_measure"]) else "0"
+ return f"{legend_labels[measure_key]} ({row['result_status']})"
+
+ history["result_measure_with_status"] = history.apply(_format_measure_with_status, axis=1)
fig = px.timeline(
history,