diff --git a/acceptance/bundle/resource_deps/model_id_ref/databricks.yml b/acceptance/bundle/resource_deps/model_id_ref/databricks.yml new file mode 100644 index 00000000000..7dedadc6432 --- /dev/null +++ b/acceptance/bundle/resource_deps/model_id_ref/databricks.yml @@ -0,0 +1,16 @@ +bundle: + name: test-bundle + +# A model's numeric ID is named registered_model_id in Terraform state and model_id in +# direct state. terraform_dabs_map bridges the two names, so a reference to +# registered_model_id resolves on both engines. +resources: + models: + my_model: + name: my-model + description: my model + + jobs: + consumer: + name: consumer + description: model id is ${resources.models.my_model.registered_model_id} diff --git a/acceptance/bundle/resource_deps/model_id_ref/out.test.toml b/acceptance/bundle/resource_deps/model_id_ref/out.test.toml new file mode 100644 index 00000000000..f784a183258 --- /dev/null +++ b/acceptance/bundle/resource_deps/model_id_ref/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/resource_deps/model_id_ref/output.txt b/acceptance/bundle/resource_deps/model_id_ref/output.txt new file mode 100644 index 00000000000..c10a3c79920 --- /dev/null +++ b/acceptance/bundle/resource_deps/model_id_ref/output.txt @@ -0,0 +1,26 @@ + +=== the consumer job implicitly depends on the model +>>> [CLI] bundle plan +create jobs.consumer +create models.my_model + +Plan: 2 to add, 0 to change, 0 to delete, 0 unchanged + +=== after deploy, registered_model_id is resolved to the model's numeric id +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> print_requests +{ + "description": "my model", + "name": "my-model", + "path": "/api/2.0/mlflow/registered-models/create" +} +{ + "description": "model id is [MY_MODEL_ID]", + "name": "consumer", + "path": "/api/2.2/jobs/create" +} diff --git a/acceptance/bundle/resource_deps/model_id_ref/script b/acceptance/bundle/resource_deps/model_id_ref/script new file mode 100644 index 00000000000..b678e97dc5c --- /dev/null +++ b/acceptance/bundle/resource_deps/model_id_ref/script @@ -0,0 +1,19 @@ +# Show the create requests with the model-id reference resolved in the job description. +print_requests() { + jq --sort-keys 'select(.method != "GET" and (.path | contains("/jobs/create") or contains("/mlflow/registered-models/create"))) | {path, name: (.body.name // .body.new_settings.name), description: (.body.description // .body.new_settings.description)}' < out.requests.txt + rm out.requests.txt +} + +title "the consumer job implicitly depends on the model" +trace $CLI bundle plan + +title "after deploy, registered_model_id is resolved to the model's numeric id" +trace $CLI bundle deploy + +# Bind the model's exact numeric id to [MY_MODEL_ID] so the resolved reference is matched +# precisely rather than by the broad [NUMID] pattern. The id comes from the API (independent +# of the deploy engine), so terraform and direct must both resolve to this exact value. +model_id=$($CLI model-registry get-model my-model | jq -r '.registered_model_databricks.id') +add_repl.py "$model_id" MY_MODEL_ID + +trace print_requests diff --git a/bundle/terraform_dabs_map/generate_test.go b/bundle/terraform_dabs_map/generate_test.go index 0a313c071c4..f576949b086 100644 --- a/bundle/terraform_dabs_map/generate_test.go +++ b/bundle/terraform_dabs_map/generate_test.go @@ -74,15 +74,25 @@ var tfKnownSegments = map[string]bool{ "provider_config": true, // Terraform provider metadata, not a DABs concept } +// manualRenames maps DABs group → DABs field path → TF field path for renames the +// heuristic matcher cannot derive. These are state-computed fields that live in the +// RemoteType (not the config struct), whose DABs and TF names are semantically equivalent +// but lexically unrelated, so neither exact nor stemmed matching can connect them. +var manualRenames = map[string]map[string]string{ + // models.model_id is the numeric model ID; TF names it registered_model_id. + "models": {"model_id": "registered_model_id"}, +} + type groupResult struct { - group string - tfType string - hasTFType bool - renames map[string]string // TF path → DABs path (renamed fields only) - unwraps []string // TF paths that are structural wrappers (Unwrap: true) - dabsOnly map[string]bool // DABs clean paths with no Terraform equivalent - tfOnly map[string]bool // TF clean paths with no DABs equivalent - matchCount int // used for stats output only, not written to generated.go + group string + tfType string + hasTFType bool + renames map[string]string // TF path → DABs path (renamed fields only) + unwraps []string // TF paths that are structural wrappers (Unwrap: true) + dabsOnly map[string]bool // DABs clean paths with no Terraform equivalent + tfOnly map[string]bool // TF clean paths with no DABs equivalent + matchCount int // used for stats output only, not written to generated.go + tfWrapperFirstSegs map[string]bool // first-level DABs field names that go under the wrapper (only set when unwraps is non-empty) } func buildAll() ([]groupResult, error) { @@ -248,6 +258,32 @@ func buildGroup(group string, adapter *dresources.Adapter) (groupResult, error) res.unwraps = append(res.unwraps, wrapper) } + // Collect first-level DABs field names that go under the wrapper for groups with wrappers. + if len(res.unwraps) > 0 { + res.tfWrapperFirstSegs = make(map[string]bool) + for _, wrapper := range res.unwraps { + prefix := wrapper + "." + for tf := range tfFields { + if matchedTF[tf] { + if after, ok := strings.CutPrefix(tf, prefix); ok { + res.tfWrapperFirstSegs[topSegment(after)] = true + } + } + } + } + } + + // Apply manual renames for fields the heuristic matcher cannot derive. These connect a + // state-computed DABs field to its differently-named TF counterpart; recording them as + // renames also marks the TF field matched so it does not surface as Terraform-only. + for dabsPath, tfPath := range manualRenames[group] { + if !tfFields[tfPath] { + return groupResult{}, fmt.Errorf("manual rename %s.%s: TF field %q not found", group, dabsPath, tfPath) + } + res.renames[tfPath] = dabsPath + matchedTF[tfPath] = true + } + // Step 4: remaining unmatched fields. for dabs := range dabsFields { if !matchedDABs[dabs] && !dabsKnownFields[topSegment(dabs)] { @@ -463,6 +499,22 @@ func renderSource(results []groupResult) ([]byte, error) { w("\t%q: %q,\n", r.group, wrapper) } } + w("}\n\n") + + w("// DABsToTerraformWrapperFields maps DABs group name → first-level DABs field names that\n") + w("// live under the TF wrapper. For wrapper groups, a DABs path is prefixed with the wrapper\n") + w("// in DABsPathToTerraform only when its first segment appears here.\n") + w("var DABsToTerraformWrapperFields = map[string]FieldSet{\n") + for _, r := range results { + if !r.hasTFType || len(r.tfWrapperFirstSegs) == 0 { + continue + } + w("\t%q: {\n", r.group) + for _, key := range slices.Sorted(maps.Keys(r.tfWrapperFirstSegs)) { + w("\t\t%q: {},\n", key) + } + w("\t},\n") + } w("}\n") return format.Source([]byte(b.String())) diff --git a/bundle/terraform_dabs_map/generated.go b/bundle/terraform_dabs_map/generated.go index f4c8ec8d7a2..dd974dd626e 100644 --- a/bundle/terraform_dabs_map/generated.go +++ b/bundle/terraform_dabs_map/generated.go @@ -15,7 +15,7 @@ package terraform_dabs_map // jobs / databricks_job: 10 dabs-only // jobs / databricks_job: 257 tf-only // model_serving_endpoints / databricks_model_serving: 2 tf-only -// models / databricks_mlflow_model: 1 tf-only +// models / databricks_mlflow_model: 1 renames // pipelines / databricks_pipeline: 3 renames // pipelines / databricks_pipeline: 7 dabs-only // pipelines / databricks_pipeline: 2 tf-only @@ -56,6 +56,9 @@ var TerraformToDABsFieldMap = map[string]RenameTree{ "library": {NewName: "libraries"}, }}, }, + "models": { + "registered_model_id": {NewName: "model_id"}, + }, "pipelines": { "cluster": {NewName: "clusters"}, "library": {NewName: "libraries"}, @@ -570,9 +573,6 @@ var TerraformOnlyFields = map[string]FieldSet{ "endpoint_url": {}, "serving_endpoint_id": {}, }, - "models": { - "registered_model_id": {}, - }, "pipelines": { "expected_last_modified": {}, "url": {}, @@ -618,6 +618,9 @@ var DABsToTerraformRenameMap = map[string]RenameTree{ "libraries": {NewName: "library"}, }}, }, + "models": { + "model_id": {NewName: "registered_model_id"}, + }, "pipelines": { "clusters": {NewName: "cluster"}, "libraries": {NewName: "library"}, @@ -637,3 +640,67 @@ var DABsToTerraformWrappers = map[string]string{ "postgres_roles": "spec", "postgres_synced_tables": "spec", } + +// DABsToTerraformWrapperFields maps DABs group name → first-level DABs field names that +// live under the TF wrapper. For wrapper groups, a DABs path is prefixed with the wrapper +// in DABsPathToTerraform only when its first segment appears here. +var DABsToTerraformWrapperFields = map[string]FieldSet{ + "postgres_branches": { + "expire_time": {}, + "is_protected": {}, + "no_expiry": {}, + "source_branch": {}, + "source_branch_lsn": {}, + "source_branch_time": {}, + "ttl": {}, + }, + "postgres_catalogs": { + "branch": {}, + "create_database_if_missing": {}, + "postgres_database": {}, + }, + "postgres_databases": { + "postgres_database": {}, + "role": {}, + }, + "postgres_endpoints": { + "autoscaling_limit_max_cu": {}, + "autoscaling_limit_min_cu": {}, + "disabled": {}, + "endpoint_type": {}, + "group": {}, + "no_suspension": {}, + "settings": {}, + "suspend_timeout_duration": {}, + }, + "postgres_projects": { + "budget_policy_id": {}, + "custom_tags": {}, + "default_branch": {}, + "default_endpoint_settings": {}, + "display_name": {}, + "enable_pg_native_login": {}, + "history_retention_duration": {}, + "pg_version": {}, + }, + "postgres_roles": { + "attributes": {}, + "auth_method": {}, + "identity_type": {}, + "membership_roles": {}, + "postgres_role": {}, + }, + "postgres_synced_tables": { + "accelerated_sync": {}, + "branch": {}, + "create_database_objects_if_missing": {}, + "existing_pipeline_id": {}, + "new_pipeline_spec": {}, + "postgres_database": {}, + "primary_key_columns": {}, + "scheduling_policy": {}, + "source_table_full_name": {}, + "timeseries_key": {}, + "type_overrides": {}, + }, +} diff --git a/bundle/terraform_dabs_map/translate.go b/bundle/terraform_dabs_map/translate.go index d3d0517cffb..5f725e7f4a0 100644 --- a/bundle/terraform_dabs_map/translate.go +++ b/bundle/terraform_dabs_map/translate.go @@ -9,12 +9,14 @@ import ( // DABsPathToTerraform translates a field path from DABs naming conventions // to Terraform naming conventions for the given resource group. // -// It is the inverse of TerraformPathToDABs. For groups whose TF schema wraps fields -// under a structural prefix (e.g. "spec"), that prefix is prepended to the result. -// Each field name segment is looked up in the DABsToTerraformRenameMap: when found the TF -// name is used and the tree descends for the remainder of the path. Array indices pass -// through unchanged without advancing the tree position. An unrecognised segment stops -// further renaming; remaining segments are kept as-is. Returns nil when path is nil. +// It is the inverse of TerraformPathToDABs. For groups whose TF schema wraps config fields +// under a structural prefix (e.g. "spec"), that prefix is prepended when the path's first +// segment is listed in DABsToTerraformWrapperFields. Root-level fields and unrecognised +// segments pass through without the wrapper. Each field name segment is looked up in the +// DABsToTerraformRenameMap: when found the TF name is used and the tree descends for the +// remainder of the path. Array indices pass through unchanged without advancing the tree +// position. An unrecognised segment stops further renaming; remaining segments are kept +// as-is. Returns nil when path is nil. // Returns an error when path is a known DABs-only field with no Terraform equivalent. // // The path must be relative to the resource root (e.g. "tasks", not @@ -28,10 +30,18 @@ func DABsPathToTerraform(group string, path *structpath.PathNode) (*structpath.P return nil, fmt.Errorf("%s: %q is a DABs-only field with no Terraform equivalent", group, path) } - // For groups with a TF wrapper (Unwrap inverse), prepend it as the first segment. + // For groups with a TF wrapper, prepend it only when the first segment is a known + // spec field. Unknown paths (root-level outputs, unrecognised segments) pass through unchanged. var result *structpath.PathNode if wrapper, ok := DABsToTerraformWrappers[group]; ok { - result = structpath.NewDotString(nil, wrapper) + segs := path.AsSlice() + if len(segs) > 0 { + if firstKey, ok := segs[0].StringKey(); ok { + if _, isWrapped := DABsToTerraformWrapperFields[group][firstKey]; isWrapped { + result = structpath.NewDotString(nil, wrapper) + } + } + } } tree := DABsToTerraformRenameMap[group] diff --git a/bundle/terraform_dabs_map/translate_test.go b/bundle/terraform_dabs_map/translate_test.go index c61858a5039..2f3ae5cd7ae 100644 --- a/bundle/terraform_dabs_map/translate_test.go +++ b/bundle/terraform_dabs_map/translate_test.go @@ -11,11 +11,10 @@ import ( func TestTerraformPathToDABs(t *testing.T) { tests := []struct { - group string - terrPath string - dabsPath string - noRoundtrip bool // if true, DABsPathToTerraform(dabsPath) != terrPath (non-invertible path) - expectErr bool // if true, translation must return an error (Terraform-only field) + group string + terrPath string + dabsPath string + expectErr bool // if true, translation must return an error (Terraform-only field) }{ // Top-level renames - jobs { @@ -140,19 +139,35 @@ func TestTerraformPathToDABs(t *testing.T) { dabsPath: "endpoint_type", }, - // TF-computed paths (status, timestamps) pass through unchanged but are not - // roundtrippable: DABsPathToTerraform would incorrectly prepend the spec wrapper. + // TF root-level paths (status, timestamps, IDs) pass through unchanged and + // round-trip correctly: DABsPathToTerraform recognises them as root fields. { - group: "postgres_projects", - terrPath: "status.display_name", - dabsPath: "status.display_name", - noRoundtrip: true, + group: "postgres_projects", + terrPath: "status.display_name", + dabsPath: "status.display_name", }, { - group: "postgres_projects", - terrPath: "create_time", - dabsPath: "create_time", - noRoundtrip: true, + group: "postgres_projects", + terrPath: "create_time", + dabsPath: "create_time", + }, + { + group: "postgres_projects", + terrPath: "project_id", + dabsPath: "project_id", + }, + { + group: "postgres_projects", + terrPath: "name", + dabsPath: "name", + }, + + // Manual rename: registered_model_id is a state-computed field whose DABs name + // (model_id) is lexically unrelated, so it is wired up via manualRenames in codegen. + { + group: "models", + terrPath: "registered_model_id", + dabsPath: "model_id", }, // Terraform-only fields: must return an error @@ -199,12 +214,20 @@ func TestTerraformPathToDABs(t *testing.T) { require.NotNil(t, result) assert.Equal(t, tt.dabsPath, result.String()) - if !tt.noRoundtrip { - back, err := terraform_dabs_map.DABsPathToTerraform(tt.group, result) - require.NoError(t, err) - require.NotNil(t, back) - assert.Equal(t, tt.terrPath, back.String(), "roundtrip DABsPathToTerraform(TerraformPathToDABs(terrPath))") - } + // Fixed-point: the DABs result is already in DABs format, so a second + // TerraformPathToDABs pass must leave it unchanged. + // DABsPathToTerraform does NOT have this property: spec fields like + // "display_name" map to "spec.display_name", and applying the function + // again would incorrectly prepend another "spec." prefix. + result2, err := terraform_dabs_map.TerraformPathToDABs(tt.group, result) + require.NoError(t, err) + require.NotNil(t, result2) + assert.Equal(t, result.String(), result2.String(), "TerraformPathToDABs(TerraformPathToDABs(terrPath)) == TerraformPathToDABs(terrPath)") + + back, err := terraform_dabs_map.DABsPathToTerraform(tt.group, result) + require.NoError(t, err) + require.NotNil(t, back) + assert.Equal(t, tt.terrPath, back.String(), "roundtrip DABsPathToTerraform(TerraformPathToDABs(terrPath))") }) } }