Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ source venv/bin/activate

_On Windows_
```powershell
py -3.12 -m venv venv
py -3.13 -m venv venv
venv\Scripts\activate
```

Expand Down
9 changes: 3 additions & 6 deletions deploy/install_linuxodbc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,16 @@ fi
openssl x509 -inform DER -in cert.crt -out /usr/local/share/ca-certificates/microsoft_tls_g2_ecc_ocsp_02.pem
update-ca-certificates

# Download the desired packages
# Download the ODBC driver (msodbcsql18) only — mssql-tools18 (sqlcmd, bcp, iusql)
# is not needed at runtime and triggers false-positive secret findings in security scans
curl -O https://download.microsoft.com/download/9dcab408-e0d4-4571-a81a-5a0951e3445f/msodbcsql18_18.6.1.1-1_$architecture.apk
curl -O https://download.microsoft.com/download/b60bb8b6-d398-4819-9950-2e30cf725fb0/mssql-tools18_18.6.1.1-1_$architecture.apk

# Verify signature, if 'gpg' is missing install it using 'apk add gnupg':
curl -O https://download.microsoft.com/download/9dcab408-e0d4-4571-a81a-5a0951e3445f/msodbcsql18_18.6.1.1-1_$architecture.sig
curl -O https://download.microsoft.com/download/b60bb8b6-d398-4819-9950-2e30cf725fb0/mssql-tools18_18.6.1.1-1_$architecture.sig

curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > microsoft.gpg
gpgv --keyring ./microsoft.gpg msodbcsql18_*.sig msodbcsql18_*.apk
gpgv --keyring ./microsoft.gpg mssql-tools18_*.sig mssql-tools18_*.apk

# Install the packages
# Install the ODBC driver
apk add --no-cache --allow-untrusted msodbcsql18_18.6.1.1-1_$architecture.apk
apk add --no-cache --allow-untrusted mssql-tools18_18.6.1.1-1_$architecture.apk
)
12 changes: 8 additions & 4 deletions deploy/testgen-base.dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.12-alpine3.23
FROM python:3.13-alpine3.23

ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
Expand Down Expand Up @@ -47,12 +47,12 @@ RUN python3 -m pip install --no-cache-dir --upgrade pip==26.0
# We download the wheel for the correct arch, then extract it directly into site-packages
# (wheels are zip files). gcompat provides the glibc shim needed at runtime.
RUN ARCH=$(uname -m) && \
pip download --platform manylinux2014_${ARCH} --python-version 3.12 --only-binary :all: \
pip download --platform manylinux2014_${ARCH} --python-version 3.13 --only-binary :all: \
--no-deps -d /tmp/wheels hdbcli==2.25.31 && \
python3 -m zipfile -e /tmp/wheels/hdbcli-*.whl /dk/lib/python3.12/site-packages/ && \
python3 -m zipfile -e /tmp/wheels/hdbcli-*.whl /dk/lib/python3.13/site-packages/ && \
# Copy dist-info to system site-packages so pip sees hdbcli as installed during
# dependency resolution (sqlalchemy-hana transitively depends on hdbcli~=2.10)
cp -r /dk/lib/python3.12/site-packages/hdbcli-*.dist-info \
cp -r /dk/lib/python3.13/site-packages/hdbcli-*.dist-info \
"$(python3 -c 'import sysconfig; print(sysconfig.get_path("purelib"))')"/ && \
rm -rf /tmp/wheels

Expand All @@ -78,4 +78,8 @@ RUN apk del \
unixodbc-dev \
apache-arrow-dev

# Remove interactive ODBC tools — not needed at runtime, and iusql triggers
# false-positive secret detection in security scanners (SECRET-3010)
RUN rm -f /usr/bin/iusql /usr/bin/isql

RUN rm -rf /root/.cache/pip /tmp/dk/install_linuxodbc.sh
10 changes: 7 additions & 3 deletions deploy/testgen.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,15 @@ ARG TESTGEN_VERSION
ARG TESTGEN_DOCKER_HUB_REPO
ARG TESTGEN_SUPPORT_EMAIL

ENV PYTHONPATH=/dk/lib/python3.12/site-packages
ENV PYTHONPATH=/dk/lib/python3.13/site-packages
ENV PATH=$PATH:/dk/bin

RUN apk upgrade

# Remove interactive ODBC tools — not needed at runtime, and iusql triggers
# false-positive secret detection in security scanners (SECRET-3010)
RUN rm -f /usr/bin/iusql /usr/bin/isql

# Now install everything (hdbcli is pre-installed in the base image via manual wheel extraction)
COPY . /tmp/dk/
RUN sed -i '/hdbcli/d' /tmp/dk/pyproject.toml /tmp/dk/testgen/pyproject.toml 2>/dev/null; \
Expand All @@ -20,7 +24,7 @@ RUN sed -i '/hdbcli/d' /tmp/dk/pyproject.toml /tmp/dk/testgen/pyproject.toml 2>/
# Generate third-party license notices from installed packages
RUN pip install --no-cache-dir pip-licenses \
&& SCRIPT=$(find /tmp/dk -name generate_third_party_notices.py | head -1) \
&& PYTHONPATH=/dk/lib/python3.12/site-packages python3 "$SCRIPT" --output /dk/THIRD-PARTY-NOTICES \
&& PYTHONPATH=/dk/lib/python3.13/site-packages python3 "$SCRIPT" --output /dk/THIRD-PARTY-NOTICES \
&& pip uninstall -y pip-licenses

RUN rm -Rf /tmp/dk /root/.cache/pip
Expand All @@ -31,7 +35,7 @@ RUN addgroup -S testgen && adduser -S testgen -G testgen

# Streamlit has to be able to write to these dirs
RUN mkdir /var/lib/testgen
RUN chown -R testgen:testgen /var/lib/testgen /dk/lib/python3.12/site-packages/streamlit/static /dk/lib/python3.12/site-packages/testgen/ui/components/frontend
RUN chown -R testgen:testgen /var/lib/testgen /dk/lib/python3.13/site-packages/streamlit/static /dk/lib/python3.13/site-packages/testgen/ui/components/frontend

ENV TESTGEN_VERSION=${TESTGEN_VERSION}
ENV TESTGEN_DOCKER_HUB_REPO=${TESTGEN_DOCKER_HUB_REPO}
Expand Down
4 changes: 2 additions & 2 deletions docs/local_development.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ From the root of your local repository, create and activate a virtual environmen

_On Linux/Mac_
```shell
python3.12 -m venv venv
python3.13 -m venv venv
source venv/bin/activate
```

_On Windows_
```powershell
py -3.12 -m venv venv
py -3.13 -m venv venv
venv\Scripts\activate
```

Expand Down
29 changes: 17 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,30 +21,31 @@ classifiers = [
"License :: OSI Approved :: Apache Software License",
"Development Status :: 5 - Production/Stable",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: System :: Monitoring",
]
keywords = [ "dataops", "data", "quality", "testing", "database", "profiling" ]
requires-python = ">=3.12"

dependencies = [
"PyYAML==6.0.1",
"click==8.1.3",
"sqlalchemy==1.4.46",
"databricks-sql-connector==2.9.3",
"PyYAML==6.0.3",
"click==8.3.1",
"sqlalchemy==2.0.48",
"databricks-sql-connector==4.2.5",
"databricks-sqlalchemy==2.0.9",
"databricks-sdk>=0.20.0",
"snowflake-sqlalchemy==1.9.0",
"sqlalchemy-bigquery==1.14.1",
"sqlalchemy-bigquery==1.16.0",
"oracledb==3.4.0",
"hdbcli==2.25.31",
"sqlalchemy-hana==2.1.0",
"pyodbc==5.0.0",
"psycopg2-binary==2.9.9",
"sqlalchemy-hana==4.4.0",
"pyodbc==5.2.0",
"psycopg2-binary==2.9.11",
"pycryptodome==3.21",
"prettytable==3.7.0",
"requests_extensions==1.1.3",
"numpy==1.26.4",
"pandas==2.1.4",
"numpy==2.1.3",
"pandas==2.2.3",
"streamlit==1.55.0",
"streamlit-extras==0.3.0",
"streamlit-aggrid==0.3.4.post3",
Expand Down Expand Up @@ -87,6 +88,10 @@ dependencies = [
]

[project.optional-dependencies]
standalone = [
"pixeltable-pgserver>=0.5.1",
]

dev = [
"invoke==2.2.0",
"ruff==0.4.1",
Expand Down Expand Up @@ -165,7 +170,7 @@ filterwarnings = [
# for an explanation of their functionality.
# WARNING: When changing mypy configurations, be sure to test them after removing your .mypy_cache
[tool.mypy]
python_version = "3.12"
python_version = "3.13"
check_untyped_defs = true
disallow_untyped_decorators = true
disallow_untyped_defs = true
Expand Down
142 changes: 136 additions & 6 deletions testgen/__main__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import base64
import importlib
import logging
import os
import platform
import secrets
import signal
import subprocess
import sys
from dataclasses import dataclass, field
from datetime import UTC, datetime, timedelta
from importlib.metadata import version as pkg_version
from pathlib import Path

import click
from click.core import Context
Expand Down Expand Up @@ -42,6 +48,13 @@
get_tg_schema,
version_service,
)
from testgen.common.standalone_postgres import (
STANDALONE_URI_ENV_VAR,
get_home_dir as get_testgen_home,
get_server_uri,
is_standalone_mode,
start_server as start_standalone_postgres,
)
from testgen.common.models import with_database_session
from testgen.common.models.profiling_run import ProfilingRun
from testgen.common.models.settings import PersistedSetting
Expand Down Expand Up @@ -99,19 +112,23 @@ def invoke(self, ctx: Context):
)
@click.pass_context
def cli(ctx: Context, verbose: bool):
if is_standalone_mode():
start_standalone_postgres()

if verbose:
configure_logging(level=logging.DEBUG)
else:
configure_logging(level=logging.INFO)

ctx.obj = Configuration(verbose=verbose)
status_ok, message = docker_service.check_basic_configuration()
if not status_ok:
click.secho(message, fg="red")
sys.exit(1)
if not is_standalone_mode() and ctx.invoked_subcommand != "standalone-setup":
status_ok, message = docker_service.check_basic_configuration()
if not status_ok:
click.secho(message, fg="red")
sys.exit(1)

if (
ctx.invoked_subcommand not in ["run-app", "ui", "setup-system-db", "upgrade-system-version", "quick-start"]
ctx.invoked_subcommand not in ["run-app", "ui", "setup-system-db", "upgrade-system-version", "quick-start", "standalone-setup"]
and not is_db_revision_up_to_date()
):
click.secho("The system database schema is outdated. Automatically running the following command:", fg="red")
Expand Down Expand Up @@ -472,6 +489,110 @@ def quick_start(
click.echo("Quick start has successfully finished.")


@cli.command("standalone-setup", help="Set up TestGen for standalone use with embedded PostgreSQL (no Docker required).")
@click.option("--username", prompt="Admin username", default="admin", help="Username for the TestGen web UI.")
@click.option(
"--password", prompt="Admin password", hide_input=True, confirmation_prompt=True,
default="testgen", help="Password for the TestGen web UI.",
)
def setup_standalone(username: str, password: str):
config_dir = get_testgen_home()
config_path = config_dir / "config.env"

if config_path.exists():
if not click.confirm(f"Config already exists at {config_path}. Overwrite?"):
click.echo("Aborted.")
return

# Generate secrets (same approach as dk-installer)
def generate_secret(length: int = 12) -> str:
alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
return "".join(secrets.choice(alphabet) for _ in range(length))

jwt_key = base64.b64encode(secrets.token_bytes(32)).decode()
decrypt_salt = generate_secret()
decrypt_password = generate_secret()
log_dir = str(config_dir / "log")

config_dir.mkdir(parents=True, exist_ok=True)

config_lines = [
"# TestGen standalone configuration",
"# Generated by: testgen standalone-setup",
"",
"# Standalone mode (embedded PostgreSQL)",
"TG_STANDALONE_MODE=yes",
"",
"# UI credentials",
f"TESTGEN_USERNAME={username}",
f"TESTGEN_PASSWORD={password}",
"",
"# Encryption keys",
f"TG_DECRYPT_SALT={decrypt_salt}",
f"TG_DECRYPT_PASSWORD={decrypt_password}",
f"TG_JWT_HASHING_KEY={jwt_key}",
"",
"# Logging",
f"TESTGEN_LOG_FILE_PATH={log_dir}",
"",
"# Analytics",
"TG_ANALYTICS=yes",
"",
"# Trust target database certificates (for SQL Server, etc.)",
"TG_TARGET_DB_TRUST_SERVER_CERTIFICATE=yes",
"TG_EXPORT_TO_OBSERVABILITY_VERIFY_SSL=no",
]
config_path.write_text("\n".join(config_lines) + "\n")
click.echo(f"Config written to {config_path}")

# Reload settings — the module was already evaluated at import time
# before the config file existed. Reloading re-reads the new file
# and re-evaluates all module-level variables.
importlib.reload(settings)

# Patch Streamlit to support editable-install component resolution
click.echo("Patching Streamlit...")
from testgen.ui.scripts.patch_streamlit import patch as patch_streamlit
patch_streamlit(dev=True)

# Start embedded PostgreSQL (standalone mode is now active via config)
start_standalone_postgres()

# Initialize the database
click.echo("Initializing database...")
run_launch_db_config(delete_db=False)

# Send analytics event for pip install tracking
try:
from testgen.common.mixpanel_service import MixpanelService

mp = MixpanelService()
mp.send_event(
"standalone_setup",
username=username,
install_type="standalone",
version=pkg_version("dataops-testgen"),
python_info=f"{platform.python_implementation()} {platform.python_version()}",
**{"$os": platform.system()},
os_version=platform.release(),
os_arch=platform.machine(),
)
except Exception: # noqa: S110
pass

click.echo("")
click.echo(click.style("TestGen is ready!", fg="green", bold=True))
click.echo("")
click.echo(" To load demo data (optional):")
click.echo(" testgen quick-start")
click.echo("")
click.echo(" Start the application:")
click.echo(" testgen run-app")
click.echo("")
click.echo(" Then open http://localhost:8501 in your browser.")
click.echo(f" Log in with username: {username}")


@cli.command("setup-system-db", help="Use to initialize the TestGen system database.")
@click.option(
"--delete-db",
Expand Down Expand Up @@ -728,6 +849,15 @@ def init_ui():
init_ui()

app_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), "ui/app.py")

# In standalone mode, pass the pgserver URI to the Streamlit subprocess
# so it can connect without acquiring the pgserver file lock.
child_env = {**os.environ, "TG_JOB_SOURCE": "UI"}
if is_standalone_mode():
server_uri = get_server_uri()
if server_uri:
child_env = {**os.environ, "TG_JOB_SOURCE": "UI", STANDALONE_URI_ENV_VAR: server_uri}

process= subprocess.Popen(
[ # noqa: S607
"streamlit",
Expand All @@ -742,7 +872,7 @@ def init_ui():
"--",
f"{'--debug' if settings.IS_DEBUG else ''}",
],
env={**os.environ, "TG_JOB_SOURCE": "UI"}
env=child_env,
)
def term_ui(signum, _):
LOG.info(f"Sending termination signal {signum} to Testgen UI")
Expand Down
2 changes: 1 addition & 1 deletion testgen/commands/queries/refresh_data_chars_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def get_row_counts(self, table_names: Iterable[str]) -> list[tuple[str, None]]:
schema = self.table_group.table_group_schema
quote = self.flavor_service.quote_character
count_queries = [
f"SELECT '{table}', COUNT(*) FROM {quote}{schema}{quote}.{quote}{table}{quote}"
f"SELECT '{table}' AS table_name, COUNT(*) AS row_count FROM {quote}{schema}{quote}.{quote}{table}{quote}"
for table in table_names
]
chunked_queries = chunk_queries(count_queries, " UNION ALL ", self.connection.max_query_chars)
Expand Down
Loading
Loading