diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index 2d3326b23d6..fcfeda7ed20 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -609,6 +609,29 @@ func getSkipReason(config *internal.TestConfig, configPath string) string { return "" } +var ciRunID = regexp.MustCompile(`^[0-9]{1,16}$`) + +// ciUniqueName embeds a CI run id into the random unique name as "cix". +// The result stays purely lowercase-alphanumeric like the base32 name it replaces, so it +// remains valid everywhere $UNIQUE_NAME is used: app names (no hyphens would be fine but +// underscores/uppercase are not), Python and Unity Catalog identifiers (no hyphens). No +// punctuation separator works for all of them, so the run id (all digits) is delimited by +// the letter "x", which also keeps the sweep prefix "cix" collision-free between +// runs whose ids share a prefix. Length is preserved ("app-$UNIQUE_NAME" is exactly the +// 30-char app name maximum). Returns random unchanged when runID is absent, malformed, or +// too long to leave at least 8 random characters. +func ciUniqueName(runID, random string) string { + if !ciRunID.MatchString(runID) { + return random + } + prefix := "ci" + runID + "x" + randLen := len(random) - len(prefix) + if randLen < 8 { + return random + } + return prefix + random[:randLen] +} + func runTest(t *testing.T, dir string, variant int, @@ -643,6 +666,8 @@ func runTest(t *testing.T, id := uuid.New() uniqueName := strings.ToLower(strings.Trim(base32.StdEncoding.EncodeToString(id[:]), "=")) + // Embed the CI run id, when present, so leaked resources can be attributed to a run and swept by prefix. + uniqueName = ciUniqueName(os.Getenv("GITHUB_RUN_ID"), uniqueName) repls.Set(uniqueName, "[UNIQUE_NAME]") var tmpDir string diff --git a/acceptance/unique_name_test.go b/acceptance/unique_name_test.go new file mode 100644 index 00000000000..181c648b98b --- /dev/null +++ b/acceptance/unique_name_test.go @@ -0,0 +1,27 @@ +package acceptance_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCIUniqueName(t *testing.T) { + // 26 lowercase base32 characters, like the generated unique name. + random := "osr5mzrrvzb73juixjoviti24y" + + // Run id embedded, same length as input, lowercase-alphanumeric, sweepable prefix. + assert.Equal(t, "ci15799017600xosr5mzrrvzb7", ciUniqueName("15799017600", random)) + assert.Equal(t, "ci1xosr5mzrrvzb73juixjovit", ciUniqueName("1", random)) + + // No or invalid run id: unchanged. + assert.Equal(t, random, ciUniqueName("", random)) + assert.Equal(t, random, ciUniqueName("abc123", random)) + assert.Equal(t, random, ciUniqueName("123 456", random)) + + // 15-digit run id still leaves exactly the 8-char random minimum: prefixed. + assert.Equal(t, "ci123456789012345xosr5mzrr", ciUniqueName("123456789012345", random)) + + // 16-digit run id is too long to leave enough randomness: unchanged. + assert.Equal(t, random, ciUniqueName("1234567890123456", random)) +} diff --git a/tools/sweep_test_resources.py b/tools/sweep_test_resources.py new file mode 100755 index 00000000000..d17521871fa --- /dev/null +++ b/tools/sweep_test_resources.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +"""Sweep leaked acceptance-test resources by name prefix. + +Lists (and with --delete, deletes) warehouses, pipelines and jobs whose names +start with the given prefix, e.g. the per-run prefix "cix" that +the acceptance harness embeds into $UNIQUE_NAME on CI cloud runs. + +Authentication is taken from the environment (DATABRICKS_HOST, DATABRICKS_TOKEN +or any other auth supported by the databricks CLI). + +Usage: + tools/sweep_test_resources.py ci15799017600x # dry run: list only + tools/sweep_test_resources.py ci15799017600x --delete # delete matches +""" + +import argparse +import json +import subprocess +import sys + + +def run_json(*args): + out = subprocess.check_output(["databricks", *args, "--output", "json"], text=True) + return json.loads(out) if out.strip() else [] + + +def sweep(kind, items, name_of, id_of, delete_args, prefix, delete): + failures = 0 + for item in items: + name = name_of(item) or "" + if not name.startswith(prefix): + continue + res_id = str(id_of(item)) + print(f"{kind}\t{res_id}\t{name}") + if delete: + try: + subprocess.check_call(["databricks", *delete_args, res_id]) + except subprocess.CalledProcessError as e: + print(f"failed to delete {kind} {res_id}: {e}", file=sys.stderr) + failures += 1 + return failures + + +def main(): + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument("prefix", help="resource name prefix, e.g. cix") + parser.add_argument("--delete", action="store_true", help="delete matches (default: list only)") + args = parser.parse_args() + + if not args.prefix: + parser.error("prefix must not be empty") + + failures = 0 + failures += sweep( + "warehouse", + run_json("warehouses", "list"), + lambda w: w.get("name"), + lambda w: w.get("id"), + ["warehouses", "delete"], + args.prefix, + args.delete, + ) + failures += sweep( + "pipeline", + run_json("pipelines", "list-pipelines"), + lambda p: p.get("name"), + lambda p: p.get("pipeline_id"), + ["pipelines", "delete"], + args.prefix, + args.delete, + ) + failures += sweep( + "job", + run_json("jobs", "list"), + lambda j: j.get("settings", {}).get("name"), + lambda j: j.get("job_id"), + ["jobs", "delete"], + args.prefix, + args.delete, + ) + return 1 if failures else 0 + + +if __name__ == "__main__": + sys.exit(main())