From da60eeb9a91e130cba0b902496ee5313286da186 Mon Sep 17 00:00:00 2001 From: Art Chen Date: Thu, 18 Jun 2026 10:53:44 -0700 Subject: [PATCH] notebook: never strip the .designer.ipynb extension Lakeflow Designer notebooks are stored in the workspace as DESIGNER_FILE objects with a compound ".designer.ipynb" extension. Several call sites stripped notebook extensions with path.Ext + TrimSuffix, which removed only the trailing ".ipynb" and left an orphaned ".designer", producing a path that no longer round-trips on deploy/import. Route these sites through the existing notebook.StripExtension helper, which preserves the ".designer.ipynb" suffix: - bundle/generate/downloader.go (the reported `bundle generate` bug). Also download Designer files as Jupyter notebooks, since get-status reports no export format for them. The export format is now selected generically via a new notebook.FixedExportFormat(objectType) lookup keyed on the object type rather than a designer-specific check. - libs/sync/snapshot_state.go, which otherwise tracked Designer notebooks at the wrong remote name, inconsistent with translate_paths. - cmd/workspace/workspace/import_dir.go (logged remote path). Co-authored-by: Isaac --- .../generate/designer_job/databricks.yml | 2 + .../bundle/generate/designer_job/out.job.yml | 8 ++ .../generate/designer_job/out.test.toml | 3 + .../designer_job/outnotebook.designer.ipynb | 52 +++++++++++ .../bundle/generate/designer_job/output.txt | 2 + .../bundle/generate/designer_job/script | 1 + .../bundle/generate/designer_job/test.toml | 86 +++++++++++++++++++ bundle/generate/downloader.go | 29 ++++--- bundle/generate/downloader_test.go | 46 ++++++++++ cmd/workspace/workspace/import_dir.go | 4 +- libs/notebook/ext.go | 18 ++++ libs/notebook/ext_test.go | 12 +++ libs/sync/snapshot_state.go | 6 +- libs/sync/snapshot_state_test.go | 20 +++-- .../sync-fileset/designer-nb.designer.ipynb | 21 +++++ 15 files changed, 281 insertions(+), 29 deletions(-) create mode 100644 acceptance/bundle/generate/designer_job/databricks.yml create mode 100644 acceptance/bundle/generate/designer_job/out.job.yml create mode 100644 acceptance/bundle/generate/designer_job/out.test.toml create mode 100644 acceptance/bundle/generate/designer_job/outnotebook.designer.ipynb create mode 100644 acceptance/bundle/generate/designer_job/output.txt create mode 100644 acceptance/bundle/generate/designer_job/script create mode 100644 acceptance/bundle/generate/designer_job/test.toml create mode 100644 libs/sync/testdata/sync-fileset/designer-nb.designer.ipynb diff --git a/acceptance/bundle/generate/designer_job/databricks.yml b/acceptance/bundle/generate/designer_job/databricks.yml new file mode 100644 index 00000000000..c613c584b4c --- /dev/null +++ b/acceptance/bundle/generate/designer_job/databricks.yml @@ -0,0 +1,2 @@ +bundle: + name: designer_job diff --git a/acceptance/bundle/generate/designer_job/out.job.yml b/acceptance/bundle/generate/designer_job/out.job.yml new file mode 100644 index 00000000000..26afb0e4911 --- /dev/null +++ b/acceptance/bundle/generate/designer_job/out.job.yml @@ -0,0 +1,8 @@ +resources: + jobs: + out: + name: designer_job + tasks: + - task_key: test_task + notebook_task: + notebook_path: outnotebook.designer.ipynb diff --git a/acceptance/bundle/generate/designer_job/out.test.toml b/acceptance/bundle/generate/designer_job/out.test.toml new file mode 100644 index 00000000000..f784a183258 --- /dev/null +++ b/acceptance/bundle/generate/designer_job/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/generate/designer_job/outnotebook.designer.ipynb b/acceptance/bundle/generate/designer_job/outnotebook.designer.ipynb new file mode 100644 index 00000000000..80786d9e2f0 --- /dev/null +++ b/acceptance/bundle/generate/designer_job/outnotebook.designer.ipynb @@ -0,0 +1,52 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "[UUID]", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%python\n", + "\n", + "print(\"Hello, World!\")" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": { + "hardware": { + "accelerator": null, + "gpuPoolId": null, + "memory": null + } + }, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "1" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "test", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/acceptance/bundle/generate/designer_job/output.txt b/acceptance/bundle/generate/designer_job/output.txt new file mode 100644 index 00000000000..d0009533565 --- /dev/null +++ b/acceptance/bundle/generate/designer_job/output.txt @@ -0,0 +1,2 @@ +File successfully saved to outnotebook.designer.ipynb +Job configuration successfully saved to out.job.yml diff --git a/acceptance/bundle/generate/designer_job/script b/acceptance/bundle/generate/designer_job/script new file mode 100644 index 00000000000..16cfc1589ce --- /dev/null +++ b/acceptance/bundle/generate/designer_job/script @@ -0,0 +1 @@ +$CLI bundle generate job --existing-job-id 1234 --config-dir . --key out --force --source-dir . diff --git a/acceptance/bundle/generate/designer_job/test.toml b/acceptance/bundle/generate/designer_job/test.toml new file mode 100644 index 00000000000..adbcec08593 --- /dev/null +++ b/acceptance/bundle/generate/designer_job/test.toml @@ -0,0 +1,86 @@ +[[Server]] +Pattern = "GET /api/2.2/jobs/get" +Response.Body = ''' +{ + "job_id": 11223344, + "settings": { + "name": "designer_job", + "tasks": [ + { + "task_key": "test_task", + "notebook_task": { + "notebook_path": "/Workspace/Users/tester@databricks.com/outnotebook.designer.ipynb" + } + } + ] + } +} +''' + +# Lakeflow Designer files are stored as DESIGNER_FILE objects and get-status +# reports no repos_export_format for them. +[[Server]] +Pattern = "GET /api/2.0/workspace/get-status" +Response.Body = ''' +{ + "path": "/Workspace/Users/tester@databricks.com/outnotebook.designer.ipynb", + "object_type": "DESIGNER_FILE" +} +''' + +[[Server]] +Pattern = "GET /api/2.0/workspace/export" +Response.Body = ''' +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "7027244a-b958-4dca-aca6-57a2c638f368", + "showTitle": false, + "tableResultSettingsMap": {}, + "title": "" + } + }, + "outputs": [], + "source": [ + "%python\n", + "\n", + "print(\"Hello, World!\")" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "computePreferences": { + "hardware": { + "accelerator": null, + "gpuPoolId": null, + "memory": null + } + }, + "dashboards": [], + "environmentMetadata": { + "base_environment": "", + "environment_version": "1" + }, + "inputWidgetPreferences": null, + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "test", + "widgets": {} + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} +''' diff --git a/bundle/generate/downloader.go b/bundle/generate/downloader.go index 24503fe363d..87aa4ef5751 100644 --- a/bundle/generate/downloader.go +++ b/bundle/generate/downloader.go @@ -176,27 +176,30 @@ func (n *Downloader) markNotebookForDownload(ctx context.Context, notebookPath * } relPath := n.relativePath(*notebookPath) - // If the path has any extension, strip it - ext := path.Ext(relPath) - if ext != "" { - relPath = strings.TrimSuffix(relPath, ext) - } - ext = notebook.GetExtensionByLanguage(&workspace.ObjectInfo{ - Language: stat.Language, - ObjectType: stat.ObjectType, - }) + relPath = notebook.StripExtension(relPath) - if stat.ExportFormat == workspace.ExportFormatJupyter { - ext = ".ipynb" + format := stat.ExportFormat + if fixed, ok := notebook.FixedExportFormat(stat.ObjectType); ok { + // These object types carry their full extension in the workspace path + // (preserved above) and report no export format, so we use a fixed one. + format = fixed + } else { + ext := notebook.GetExtensionByLanguage(&workspace.ObjectInfo{ + Language: stat.Language, + ObjectType: stat.ObjectType, + }) + if format == workspace.ExportFormatJupyter { + ext = notebook.ExtensionJupyter + } + relPath += ext } - relPath = relPath + ext targetPath := filepath.Join(n.sourceDir, relPath) n.files[targetPath] = exportFile{ path: *notebookPath, - format: stat.ExportFormat, + format: format, } // Update the notebook path to be relative to the config dir diff --git a/bundle/generate/downloader_test.go b/bundle/generate/downloader_test.go index 5ce4889c5a2..3e96b2e1015 100644 --- a/bundle/generate/downloader_test.go +++ b/bundle/generate/downloader_test.go @@ -192,6 +192,52 @@ func notebookStatusHandler(t *testing.T) http.HandlerFunc { } } +// designerStatusHandler mimics get-status for a Lakeflow Designer file: object +// type DESIGNER_FILE and no export format reported. +func designerStatusHandler(t *testing.T) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/api/2.0/workspace/get-status" { + t.Fatalf("unexpected request path: %s", r.URL.Path) + } + resp := workspaceStatus{ + ObjectType: workspace.ObjectType("DESIGNER_FILE"), + } + w.Header().Set("Content-Type", "application/json") + require.NoError(t, json.NewEncoder(w).Encode(resp)) + } +} + +func TestDownloader_MarkTasksForDownload_DesignerNotebook(t *testing.T) { + ctx := t.Context() + w := newTestWorkspaceClient(t, designerStatusHandler(t)) + + dir := "base/dir" + sourceDir := filepath.Join(dir, "source") + configDir := filepath.Join(dir, "config") + downloader := NewDownloader(w, sourceDir, configDir) + + tasks := []jobs.Task{ + { + TaskKey: "designer_task", + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "/Users/user/project/My Pipeline.designer.ipynb", + }, + }, + } + + err := downloader.MarkTasksForDownload(ctx, tasks) + require.NoError(t, err) + + // The ".designer.ipynb" suffix must be preserved, not stripped to ".designer". + assert.Equal(t, "../source/My Pipeline.designer.ipynb", tasks[0].NotebookTask.NotebookPath) + require.Len(t, downloader.files, 1) + f := downloader.files[filepath.Join(sourceDir, "My Pipeline.designer.ipynb")] + assert.Equal(t, "/Users/user/project/My Pipeline.designer.ipynb", f.path) + // Designer files round-trip as Jupyter notebooks even though get-status + // reports no export format. + assert.Equal(t, workspace.ExportFormatJupyter, f.format) +} + func TestDownloader_MarkTasksForDownload_PreservesStructure(t *testing.T) { w := newTestWorkspaceClient(t, notebookStatusHandler(t)) diff --git a/cmd/workspace/workspace/import_dir.go b/cmd/workspace/workspace/import_dir.go index daaf3ef0dd8..86853faef76 100644 --- a/cmd/workspace/workspace/import_dir.go +++ b/cmd/workspace/workspace/import_dir.go @@ -7,7 +7,6 @@ import ( "os" "path" "path/filepath" - "strings" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/libs/cmdctx" @@ -96,8 +95,7 @@ func (opts importDirOptions) callback(ctx context.Context, workspaceFiler filer. return err } if isNotebook { - ext := path.Ext(remoteName) - remoteName = strings.TrimSuffix(remoteName, ext) + remoteName = notebook.StripExtension(remoteName) } // Open the local file diff --git a/libs/notebook/ext.go b/libs/notebook/ext.go index 49155aaa8eb..69d6eab6c92 100644 --- a/libs/notebook/ext.go +++ b/libs/notebook/ext.go @@ -20,6 +20,10 @@ const ( ExtensionDesigner string = ".designer.ipynb" ) +// ObjectTypeDesignerFile is the workspace object type for Lakeflow Designer +// notebooks. The SDK does not define a constant for it. +const ObjectTypeDesignerFile workspace.ObjectType = "DESIGNER_FILE" + // StripExtension returns the workspace path for a local notebook file. // Designer files keep their full ".designer.ipynb" suffix in the workspace; // other notebook types lose their extension on import. @@ -30,6 +34,20 @@ func StripExtension(name string) string { return strings.TrimSuffix(name, path.Ext(name)) } +// FixedExportFormat returns the export format an object type must be downloaded +// in when get-status reports none for it. Lakeflow Designer files (DESIGNER_FILE) +// carry their full extension in the workspace path and round-trip as Jupyter +// notebooks. The boolean is false for object types that report their own export +// format (e.g. regular notebooks). +func FixedExportFormat(objectType workspace.ObjectType) (workspace.ExportFormat, bool) { + switch objectType { + case ObjectTypeDesignerFile: + return workspace.ExportFormatJupyter, true + default: + return "", false + } +} + // Extensions lists all notebook file extensions. var Extensions = []string{ ExtensionPython, diff --git a/libs/notebook/ext_test.go b/libs/notebook/ext_test.go index c1eb47ccd20..7ccd0d33af8 100644 --- a/libs/notebook/ext_test.go +++ b/libs/notebook/ext_test.go @@ -3,6 +3,7 @@ package notebook import ( "testing" + "github.com/databricks/databricks-sdk-go/service/workspace" "github.com/stretchr/testify/assert" ) @@ -30,3 +31,14 @@ func TestStripExtension(t *testing.T) { assert.Equal(t, tc.want, StripExtension(tc.in), "input=%q", tc.in) } } + +func TestFixedExportFormat(t *testing.T) { + // Designer files report no export format and must round-trip as Jupyter. + format, ok := FixedExportFormat(ObjectTypeDesignerFile) + assert.True(t, ok) + assert.Equal(t, workspace.ExportFormatJupyter, format) + + // Regular notebooks report their own export format. + _, ok = FixedExportFormat(workspace.ObjectTypeNotebook) + assert.False(t, ok) +} diff --git a/libs/sync/snapshot_state.go b/libs/sync/snapshot_state.go index 38ee2880748..61b24310272 100644 --- a/libs/sync/snapshot_state.go +++ b/libs/sync/snapshot_state.go @@ -2,12 +2,11 @@ package sync import ( "fmt" - "path" "path/filepath" - "strings" "time" "github.com/databricks/cli/libs/fileset" + "github.com/databricks/cli/libs/notebook" ) // SnapshotState keeps track of files on the local filesystem and their corresponding @@ -57,8 +56,7 @@ func NewSnapshotState(localFiles []fileset.File) (*SnapshotState, error) { continue } if isNotebook { - ext := path.Ext(remoteName) - remoteName = strings.TrimSuffix(remoteName, ext) + remoteName = notebook.StripExtension(remoteName) } // Add the file to snapshot state diff --git a/libs/sync/snapshot_state_test.go b/libs/sync/snapshot_state_test.go index 248e5832ca7..2c5591c4572 100644 --- a/libs/sync/snapshot_state_test.go +++ b/libs/sync/snapshot_state_test.go @@ -17,19 +17,21 @@ func TestSnapshotState(t *testing.T) { require.NoError(t, err) // Assert initial contents of the fileset - assert.Len(t, files, 4) - assert.Equal(t, "invalid-nb.ipynb", files[0].Relative) - assert.Equal(t, "my-nb.py", files[1].Relative) - assert.Equal(t, "my-script.py", files[2].Relative) - assert.Equal(t, "valid-nb.ipynb", files[3].Relative) + assert.Len(t, files, 5) + assert.Equal(t, "designer-nb.designer.ipynb", files[0].Relative) + assert.Equal(t, "invalid-nb.ipynb", files[1].Relative) + assert.Equal(t, "my-nb.py", files[2].Relative) + assert.Equal(t, "my-script.py", files[3].Relative) + assert.Equal(t, "valid-nb.ipynb", files[4].Relative) // Assert snapshot state generated from the fileset. Note that the invalid notebook - // has been ignored. + // has been ignored. The Designer notebook keeps its full ".designer.ipynb" suffix + // in the remote name, while other notebooks have their extension stripped. s, err := NewSnapshotState(files) require.NoError(t, err) - assertKeysOfMap(t, s.LastModifiedTimes, []string{"valid-nb.ipynb", "my-nb.py", "my-script.py"}) - assertKeysOfMap(t, s.LocalToRemoteNames, []string{"valid-nb.ipynb", "my-nb.py", "my-script.py"}) - assertKeysOfMap(t, s.RemoteToLocalNames, []string{"valid-nb", "my-nb", "my-script.py"}) + assertKeysOfMap(t, s.LastModifiedTimes, []string{"designer-nb.designer.ipynb", "valid-nb.ipynb", "my-nb.py", "my-script.py"}) + assertKeysOfMap(t, s.LocalToRemoteNames, []string{"designer-nb.designer.ipynb", "valid-nb.ipynb", "my-nb.py", "my-script.py"}) + assertKeysOfMap(t, s.RemoteToLocalNames, []string{"designer-nb.designer.ipynb", "valid-nb", "my-nb", "my-script.py"}) assert.NoError(t, s.validate()) } diff --git a/libs/sync/testdata/sync-fileset/designer-nb.designer.ipynb b/libs/sync/testdata/sync-fileset/designer-nb.designer.ipynb new file mode 100644 index 00000000000..6254635ef32 --- /dev/null +++ b/libs/sync/testdata/sync-fileset/designer-nb.designer.ipynb @@ -0,0 +1,21 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"designer\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +}