Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,10 @@ mattn/go-isatty - https://github.com/mattn/go-isatty
Copyright (c) Yasuhiro MATSUMOTO <mattn.jp@gmail.com>
License - https://github.com/mattn/go-isatty/blob/master/LICENSE

muesli/termenv - https://github.com/muesli/termenv
Copyright (c) 2019 Christian Muehlhaeuser
License - https://github.com/muesli/termenv/blob/master/LICENSE

sabhiram/go-gitignore - https://github.com/sabhiram/go-gitignore
Copyright (c) 2015 Shaba Abhiram
License - https://github.com/sabhiram/go-gitignore/blob/master/LICENSE
Expand Down
47 changes: 36 additions & 11 deletions acceptance/experimental/air/get/output.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,42 @@

=== get (text)
>>> [CLI] experimental air get run 123
Job Link: [DATABRICKS_URL]/jobs/runs/123?o=[NUMID]

Run ID: 123
Status: SUCCESS
Submitted: 2023-11-14 22:13 UTC
Retries: 0
Duration: 12s
Experiment: my-exp
MLflow Run: my-run
User: user@example.com
Accelerators: 8x H100

╭─ Configuration ────────────────────────────────────────────────╮
│ │
│ experiment_name: my-exp │
│ compute: │
│ accelerator_type: a10 │
│ num_accelerators: 1 │
│ command: | │
│ for i in $(seq 1 10); do │
│ echo "step $i" │
│ done │
│ │
╰────────────────────────────────────────────────────────────────╯

╭─ Training Progress ────────────────────────────────────────────╮
│ │
│ ██████████████████████████████████ 100% 10/10 steps · 12s │
│ │
╰────────────────────────────────────────────────────────────────╯

╭─ Metadata ─────────────────────────────────────────────────────╮
│ │
│ Run ID 123 │
│ Status ● SUCCESS │
│ Submitted 2023-11-14 22:13 UTC │
│ Retries 0 │
│ Max Retries 3 │
│ Duration 12s │
│ Experiment my-exp │
│ MLflow Run my-run │
│ User user@example.com │
│ Accelerators 1x A10 │
│ Environment ml-runtime-gpu:1.0 │
│ │
╰────────────────────────────────────────────────────────────────╯


=== get (json)
>>> [CLI] experimental air get run 123 -o json
Expand Down
10 changes: 7 additions & 3 deletions acceptance/experimental/air/get/test.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,12 @@ Response.Body = '''
{
"task_key": "train",
"attempt_number": 0,
"max_retries": 3,
"gen_ai_compute_task": {
"mlflow_experiment_name": "/Users/user@example.com/my-exp",
"compute": {"gpu_type": "GPU_8xH100", "num_gpus": 8}
"compute": {"gpu_type": "GPU_1xA10", "num_gpus": 1},
"dl_runtime_image": "ml-runtime-gpu:1.0",
"yaml_parameters": "experiment_name: my-exp\ncompute:\n accelerator_type: a10\n num_accelerators: 1\ncommand: |\n for i in $(seq 1 10); do\n echo \"step $i\"\n done\n"
}
}
]
Expand All @@ -39,9 +42,10 @@ Response.Body = '''
{"gen_ai_compute_output": {"run_info": {"mlflow_experiment_id": "exp1", "mlflow_run_id": "run1"}}}
'''

# The MLflow Run cell shows the run's name, fetched from the MLflow REST API.
# The MLflow Run cell shows the run's name; the progress bar reads the highest
# logged step (metrics) against the max_steps param.
[[Server]]
Pattern = "GET /api/2.0/mlflow/runs/get"
Response.Body = '''
{"run": {"info": {"run_name": "my-run"}}}
{"run": {"info": {"run_name": "my-run"}, "data": {"metrics": [{"step": 10}], "params": [{"key": "max_steps", "value": "10"}]}}}
'''
27 changes: 27 additions & 0 deletions experimental/air/cmd/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"io"
"math"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -263,3 +264,29 @@ func gpuDisplayName(gpuType string) string {
}
return gpuType
}

// environment returns the run's runtime image (the training environment), or an
// empty string if the run has no GenAI-compute task.
func environment(run *jobs.Run) string {
if len(run.Tasks) == 0 {
return ""
}
task := run.Tasks[0].GenAiComputeTask
if task == nil {
return ""
}
return task.DlRuntimeImage
}

// maxRetries returns the configured retry limit for the run's latest task as a
// display string: "unlimited" for the backend's -1, otherwise the count.
func maxRetries(run *jobs.Run) string {
if len(run.Tasks) == 0 {
return "0"
}
n := run.Tasks[len(run.Tasks)-1].MaxRetries
if n < 0 {
return "unlimited"
}
return strconv.Itoa(n)
}
122 changes: 28 additions & 94 deletions experimental/air/cmd/get.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
package aircmd

import (
"context"
"errors"
"fmt"
"io"
"strconv"

"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/cmdctx"
"github.com/databricks/cli/libs/flags"
"github.com/databricks/cli/libs/log"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/apierr"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/spf13/cobra"
Expand All @@ -30,26 +26,27 @@ type getData struct {
DashboardURL string `json:"dashboard_url"`
MLflowURL *string `json:"mlflow_url"`

// The fields below are pre-rendered for the text table and excluded from
// JSON (matching `air get run --json`). The table always shows every row,
// with "N/A" for missing values, in the same order as the Python CLI. The
// Run ID, Experiment, and MLflow Run cells carry terminal hyperlinks when
// stdout is a terminal, so the URLs don't appear as bare text.
RunIDDisplay string `json:"-"`
// The fields below are pre-rendered text-view cells, excluded from JSON
// (matching `air get run --json`). Each shows "N/A" when its value is
// missing. The styled single-run renderer (render.go) consumes them; the
// Run ID, Status, and MLflow Run cells it draws are styled and hyperlinked
// there rather than stored here.
SubmittedDisplay string `json:"-"`
DurationDisplay string `json:"-"`
ExperimentDisplay string `json:"-"`
MLflowDisplay string `json:"-"`
UserDisplay string `json:"-"`
AcceleratorsDisplay string `json:"-"`
EnvironmentDisplay string `json:"-"`
MaxRetriesDisplay string `json:"-"`
// Sweep replaces the single-run view for foreach runs.
Sweep *sweepInfo `json:"-"`
}

// getTemplate is the text-mode layout. It reads from the JSON envelope, so
// every field is reached through ".Data".
const getTemplate = `{{- if .Data.Sweep -}}
Sweep Run ID: {{.Data.RunID}}
// getTemplate is the text-mode layout for a sweep (foreach) run. Single runs are
// drawn by the styled renderer in render.go and never reach this template; it is
// used only when .Data.Sweep is set. It reads from the JSON envelope, so every
// field is reached through ".Data".
const getTemplate = `Sweep Run ID: {{.Data.RunID}}
Status: {{.Data.Status}}
Total: {{.Data.Sweep.Total}}
Completed: {{.Data.Sweep.Completed}}
Expand All @@ -64,17 +61,6 @@ Sweep Tasks:
{{printf " %-24s %-14s %-12s %s" .TaskKey .RunID .Status .Experiment}}
{{- end}}
{{- end}}
{{- else -}}
Run ID: {{.Data.RunIDDisplay}}
Status: {{.Data.Status}}
Submitted: {{.Data.SubmittedDisplay}}
Retries: {{.Data.AttemptNumber}}
Duration: {{.Data.DurationDisplay}}
Experiment: {{.Data.ExperimentDisplay}}
MLflow Run: {{.Data.MLflowDisplay}}
User: {{.Data.UserDisplay}}
Accelerators: {{.Data.AcceleratorsDisplay}}
{{- end}}
`

// newGetCommand is the `get` parent group. Subcommands name the resource to
Expand Down Expand Up @@ -146,31 +132,28 @@ func newGetRunCommand() *cobra.Command {
data.Sweep = buildSweepInfo(ctx, w, task)
}

if root.OutputType(cmd) == flags.OutputText {
out := cmd.OutOrStdout()
addTextLinks(ctx, out, w, &data, ids)
if root.OutputType(cmd) != flags.OutputText {
return renderEnvelope(ctx, data)
}

// Lead with the job run link (hyperlinked, falling back to the bare
// URL off a terminal), then a gap before the training config and the
// status table, mirroring the Python CLI's header.
out := cmd.OutOrStdout()
if data.Sweep != nil {
// A sweep has no single status, config, or timing, so lead with the
// job run link and render the foreach summary table (getTemplate).
fmt.Fprintf(out, "Job Link: %s\n\n", hyperlink(ctx, out, data.DashboardURL, data.DashboardURL))

// Text mode shows the training-config YAML before the status,
// mirroring `air get run`. JSON output omits it.
if path := yamlConfigPath(run); path != "" {
printConfigYAML(ctx, out, w, path)
}
return renderEnvelope(ctx, data)
}
return renderEnvelope(ctx, data)

renderRunText(ctx, out, w, run, &data, ids)
return nil
}

return cmd
}

// buildGetData extracts the fields we display from a run. The text-table cells
// are pre-rendered here with their "N/A" fallbacks; the Run ID, Experiment, and
// MLflow Run cells are finalized later by addTextLinks once the dashboard and
// MLflow identifiers are known.
// buildGetData extracts the fields we display from a run. The text-view cells
// are pre-rendered here with their "N/A" fallbacks; the styled renderer adds the
// hyperlinks and colors once the dashboard and MLflow identifiers are known.
func buildGetData(run *jobs.Run) getData {
data := getData{
RunID: strconv.FormatInt(run.RunId, 10),
Expand All @@ -180,7 +163,6 @@ func buildGetData(run *jobs.Run) getData {
AttemptNumber: latestAttemptNumber(run),
ExperimentName: experimentName(run),
}
data.RunIDDisplay = data.RunID
data.SubmittedDisplay = submittedDisplay(run)
data.DurationDisplay = na
if data.DurationSeconds != nil {
Expand All @@ -190,57 +172,9 @@ func buildGetData(run *jobs.Run) getData {
if data.ExperimentName != nil {
data.ExperimentDisplay = *data.ExperimentName
}
data.MLflowDisplay = na
data.UserDisplay = orNA(run.CreatorUserName)
data.AcceleratorsDisplay = orNA(accelerators(run))
data.EnvironmentDisplay = orNA(environment(run))
data.MaxRetriesDisplay = maxRetries(run)
return data
}

// addTextLinks adds the terminal hyperlinks shown in text mode: the Run ID links
// to the run's dashboard page (Python embeds this on the Run ID instead of a
// separate Dashboard row), and the Experiment and MLflow Run cells link to their
// MLflow pages. On a non-terminal these degrade to plain text.
func addTextLinks(ctx context.Context, out io.Writer, w *databricks.WorkspaceClient, data *getData, ids *mlflowIdentifiers) {
data.RunIDDisplay = hyperlink(ctx, out, data.RunID, data.DashboardURL)
if ids == nil {
return
}
if data.ExperimentName != nil {
data.ExperimentDisplay = hyperlink(ctx, out, *data.ExperimentName, mlflowExperimentURL(w.Config.Host, ids))
}
data.MLflowDisplay = hyperlink(ctx, out, mlflowRunLabel(ctx, w, ids.RunID), mlflowRunURL(w.Config.Host, ids))
}

// yamlConfigPath returns the run's training-config YAML path, or "" if none.
func yamlConfigPath(run *jobs.Run) string {
if len(run.Tasks) == 0 {
return ""
}
task := run.Tasks[0].GenAiComputeTask
if task == nil {
return ""
}
return task.YamlParametersFilePath
}

// printConfigYAML downloads the run's training-config YAML and writes it to out
// (stdout), mirroring the Python `air get`. It is best-effort: a download or read
// failure is surfaced as a warning on stderr but does not fail the command.
func printConfigYAML(ctx context.Context, out io.Writer, w *databricks.WorkspaceClient, path string) {
r, err := w.Workspace.Download(ctx, path)
if err != nil {
log.Warnf(ctx, "air get: could not download training config %s: %v", path, err)
return
}
defer r.Close()

content, err := io.ReadAll(r)
if err != nil {
log.Warnf(ctx, "air get: could not read training config %s: %v", path, err)
return
}

fmt.Fprintln(out, "Training Configuration:")
fmt.Fprintln(out, reformatYAMLForDisplay(content))
fmt.Fprintln(out)
}
Loading
Loading