Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEXT_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* direct: Fix resolving a resource reference that is used more than once within the same field ([#5558](https://github.com/databricks/cli/pull/5558)).
* Bundle variable references now accept Unicode letters in path segments (e.g. `${var.变量}`). ([#5532](https://github.com/databricks/cli/pull/5532))
* Ignore remote changes for vector search direct_access_index_spec.schema_json to prevent drift when the backend normalizes the schema ([#5481](https://github.com/databricks/cli/pull/5481)).
* direct: Fix permanent drift on `permissions` when the parent resource is deleted and recreated out-of-band with the same name ([#5587](https://github.com/databricks/cli/pull/5587)).

### Dependency updates

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
bundle:
name: drift-mse-recreated-same-name-$UNIQUE_NAME

sync:
paths: []

resources:
model_serving_endpoints:
my_endpoint:
name: mse-endpoint-$UNIQUE_NAME
permissions:
- level: CAN_VIEW
user_name: deco-test-user@databricks.com

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@

=== Initial deployment
>>> [CLI] bundle deploy
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/drift-mse-recreated-same-name-[UNIQUE_NAME]/default/files...
Deploying resources...
Updating deployment state...
Deployment complete!

>>> [CLI] serving-endpoints get mse-endpoint-[UNIQUE_NAME]
{
"name": "mse-endpoint-[UNIQUE_NAME]",
"creator": "[USERNAME]"
}

=== Delete and recreate remotely with the same name
>>> [CLI] serving-endpoints delete mse-endpoint-[UNIQUE_NAME]

>>> [CLI] serving-endpoints create mse-endpoint-[UNIQUE_NAME] --no-wait
{
"name": "mse-endpoint-[UNIQUE_NAME]",
"creator": "[USERNAME]"
}

>>> [CLI] serving-endpoints get mse-endpoint-[UNIQUE_NAME]
{
"name": "mse-endpoint-[UNIQUE_NAME]",
"creator": "[USERNAME]"
}
Original endpoint id: [ORIGINAL_ENDPOINT_ID]
Remote recreated endpoint id: [REMOTE_RECREATED_ENDPOINT_ID]

=== Plan after out-of-band recreate
>>> [CLI] bundle plan
update model_serving_endpoints.my_endpoint.permissions

Plan: 0 to add, 1 to change, 0 to delete, 1 unchanged

>>> [CLI] bundle deploy
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/drift-mse-recreated-same-name-[UNIQUE_NAME]/default/files...
Deploying resources...
Updating deployment state...
Deployment complete!

>>> [CLI] serving-endpoints get mse-endpoint-[UNIQUE_NAME]
{
"name": "mse-endpoint-[UNIQUE_NAME]",
"creator": "[USERNAME]"
}

=== Verify no permanent drift after deploy
>>> [CLI] bundle plan
Plan: 0 to add, 0 to change, 0 to delete, 2 unchanged

>>> [CLI] bundle destroy --auto-approve
The following resources will be deleted:
delete resources.model_serving_endpoints.my_endpoint

All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/drift-mse-recreated-same-name-[UNIQUE_NAME]/default

Deleting files...
Destroy complete!
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
envsubst < databricks.yml.tmpl > databricks.yml

cleanup() {
trace $CLI bundle destroy --auto-approve
rm -f out.requests.txt
}
trap cleanup EXIT

endpoint_name="mse-endpoint-${UNIQUE_NAME}"

title "Initial deployment"
trace $CLI bundle deploy

original_endpoint_id=$($CLI serving-endpoints get "${endpoint_name}" | jq -r '.id')
add_repl.py "$original_endpoint_id" "ORIGINAL_ENDPOINT_ID"
trace $CLI serving-endpoints get "${endpoint_name}" | jq '{name, creator}'

title "Delete and recreate remotely with the same name"
trace $CLI serving-endpoints delete "${endpoint_name}"
trace $CLI serving-endpoints create "${endpoint_name}" --no-wait | jq '{name, creator}'

remote_recreated_endpoint_id=$($CLI serving-endpoints get "${endpoint_name}" | jq -r '.id')
add_repl.py "$remote_recreated_endpoint_id" "REMOTE_RECREATED_ENDPOINT_ID"
trace $CLI serving-endpoints get "${endpoint_name}" | jq '{name, creator}'

printf "Original endpoint id: %s\n" "$original_endpoint_id"
printf "Remote recreated endpoint id: %s\n" "$remote_recreated_endpoint_id"

if [ "$original_endpoint_id" = "$remote_recreated_endpoint_id" ]; then
echo "Expected remote recreation to assign a different endpoint id" >&2
exit 1
fi

title "Plan after out-of-band recreate"
trace $CLI bundle plan

trace $CLI bundle deploy
trace $CLI serving-endpoints get "${endpoint_name}" | jq '{name, creator}'

title "Verify no permanent drift after deploy"
trace $CLI bundle plan | contains.py "Plan: 0 to add, 0 to change, 0 to delete"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Badness = "After deleting and recreating a model serving endpoint remotely with the same name but a different endpoint_id, bundle plan shows an update on permissions (instead of create as in V2) because the V1 permissions API does not delete ACLs immediately when the parent is gone. UpdateWithID persists the new object_id in deployment state so subsequent plans do not show permanent drift."

Local = true
Cloud = true
RequiresUnityCatalog = true
RecordRequests = false

EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
bundle:
name: registered-model-cascade-recreate

resources:
schemas:
parent_schema:
name: myschema
catalog_name: main
storage_root: dbfs:/parent_storage_root_v1
comment: parent schema

registered_models:
child_model:
name: mymodel
catalog_name: main
schema_name: ${resources.schemas.parent_schema.name}
comment: child model

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@

>>> [CLI] bundle deploy
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/registered-model-cascade-recreate/default/files...
Deploying resources...
Updating deployment state...
Deployment complete!

=== Change schema's storage_root to trigger schema recreate
>>> update_file.py databricks.yml dbfs:/parent_storage_root_v1 dbfs:/parent_storage_root_v2

=== Plan should recreate BOTH the schema and its dependent registered_model, but currently only the schema is recreated
>>> [CLI] bundle plan
recreate schemas.parent_schema

Plan: 1 to add, 0 to change, 1 to delete, 1 unchanged

>>> [CLI] bundle plan --output json
{
"plan_version": 2,
"cli_version": "[DEV_VERSION]",
"lineage": "[UUID]",
"serial": 1,
"plan": {
"resources.registered_models.child_model": {
"depends_on": [
{
"node": "resources.schemas.parent_schema",
"label": "${resources.schemas.parent_schema.name}"
}
],
"action": "skip",
"remote_state": {
"catalog_name": "main",
"comment": "child model",
"created_at": [UNIX_TIME_MILLIS][0],
"created_by": "[USERNAME]",
"full_name": "main.myschema.mymodel",
"metastore_id": "[UUID]",
"name": "mymodel",
"owner": "[USERNAME]",
"schema_name": "myschema",
"updated_at": [UNIX_TIME_MILLIS][0],
"updated_by": "[USERNAME]"
},
"changes": {
"created_at": {
"action": "skip",
"reason": "empty",
"remote": 0
},
"created_by": {
"action": "skip",
"reason": "empty",
"remote": ""
},
"full_name": {
"action": "skip",
"reason": "backend_default",
"remote": "main.myschema.mymodel"
},
"metastore_id": {
"action": "skip",
"reason": "backend_default",
"remote": "[UUID]"
},
"owner": {
"action": "skip",
"reason": "backend_default",
"remote": "[USERNAME]"
},
"updated_at": {
"action": "skip",
"reason": "empty",
"remote": 0
},
"updated_by": {
"action": "skip",
"reason": "empty",
"remote": ""
}
}
},
"resources.schemas.parent_schema": {
"action": "recreate",
"new_state": {
"value": {
"catalog_name": "main",
"comment": "parent schema",
"name": "myschema",
"storage_root": "dbfs:/parent_storage_root_v2"
}
},
"remote_state": {
"browse_only": false,
"catalog_name": "main",
"catalog_type": "MANAGED_CATALOG",
"comment": "parent schema",
"created_at": [UNIX_TIME_MILLIS][1],
"created_by": "[USERNAME]",
"effective_predictive_optimization_flag": {
"inherited_from_name": "[METASTORE_NAME]",
"inherited_from_type": "METASTORE",
"value": "ENABLE"
},
"enable_predictive_optimization": "INHERIT",
"full_name": "main.myschema",
"metastore_id": "[UUID]",
"name": "myschema",
"owner": "[USERNAME]",
"schema_id": "[UUID]",
"storage_root": "dbfs:/parent_storage_root_v1",
"updated_at": [UNIX_TIME_MILLIS][1],
"updated_by": "[USERNAME]"
},
"changes": {
"storage_root": {
"action": "recreate",
"reason": "immutable",
"old": "dbfs:/parent_storage_root_v1",
"new": "dbfs:/parent_storage_root_v2",
"remote": "dbfs:/parent_storage_root_v1"
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
echo "*" > .gitignore
trace $CLI bundle deploy

title "Change schema's storage_root to trigger schema recreate"
trace update_file.py databricks.yml "dbfs:/parent_storage_root_v1" "dbfs:/parent_storage_root_v2"

title "Plan should recreate BOTH the schema and its dependent registered_model, but currently only the schema is recreated"
trace $CLI bundle plan
trace $CLI bundle plan --output json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Local = true
Cloud = false
RecordRequests = false
EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"]

Badness = "Recreating a parent resource (schema) should propagate Recreate to dependents (registered_model) that reference it, but the planner does not. The dependent plans Skip while the parent is deleted underneath it. On a real workspace this either fails the parent delete (children still attached) or orphans the child. Tracked as a framework-level fix in bundle/direct/bundle_plan.go."

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not permissions, but also recreate cascade issue

Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ Remote recreated endpoint UUID: [REMOTE_RECREATED_ENDPOINT_UUID]

=== Plan after out-of-band recreate
>>> [CLI] bundle plan
create vector_search_endpoints.my_endpoint.permissions
update vector_search_endpoints.my_endpoint.permissions

Plan: 1 to add, 0 to change, 0 to delete, 1 unchanged
Plan: 0 to add, 1 to change, 0 to delete, 1 unchanged

>>> [CLI] bundle deploy
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/drift-vs-endpoint-recreated-same-name-[UNIQUE_NAME]/default/files...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"label": "${resources.jobs.my_job.id}"
}
],
"action": "update",
"action": "update_id",
"new_state": {
"value": {
"object_id": "",
Expand Down Expand Up @@ -79,7 +79,8 @@
]
},
"object_id": {
"action": "update",
"action": "update_id",
"reason": "id_changes",
"old": "/jobs/123",
"new": "",
"remote": "/jobs/123"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Deployment complete!
}
},
"resources.jobs.my_job.permissions": {
"__id__": "/jobs/123",
"__id__": "/jobs/[NUMID]",
"state": {
"object_id": "/jobs/[NUMID]",
"__embed__": [
Expand Down
4 changes: 2 additions & 2 deletions bundle/direct/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,12 @@ func (d *DeploymentUnit) loadPersistedState(db *dstate.DeploymentState) (any, er
return state, nil
}

func (d *DeploymentUnit) refreshRemoteState(ctx context.Context, id string) error {
func (d *DeploymentUnit) refreshRemoteState(ctx context.Context, id string, newState any) error {
if d.RemoteState != nil {
return nil
}
remoteState, err := retryOnTransient(ctx, func() (any, error) {
return d.Adapter.DoRead(ctx, id)
return d.Adapter.DoRead(ctx, id, newState)
})
if err != nil {
return fmt.Errorf("failed to refresh remote state id=%s: %w", id, err)
Expand Down
5 changes: 4 additions & 1 deletion bundle/direct/bundle_apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ func (b *DeploymentBundle) Apply(ctx context.Context, client *databricks.Workspa

// We don't keep NewState around for 'skip' nodes

var newState any
if action != deployplan.Skip {
if !b.resolveReferences(ctx, resourceKey, entry, errorPrefix, false) {
return false
Expand All @@ -123,6 +124,8 @@ func (b *DeploymentBundle) Apply(ctx context.Context, client *databricks.Workspa
return false
}

newState = sv.Value

if migrateMode {
// In migration mode we're reading resources in DAG order so that we have fully resolved config snapshots stored
id := b.StateDB.GetResourceID(resourceKey)
Expand Down Expand Up @@ -153,7 +156,7 @@ func (b *DeploymentBundle) Apply(ctx context.Context, client *databricks.Workspa
return false
}

err = d.refreshRemoteState(ctx, id)
err = d.refreshRemoteState(ctx, id, newState)
if err != nil {
logdiag.LogError(ctx, fmt.Errorf("%s: failed to read remote state: %w", errorPrefix, err))
return false
Expand Down
Loading