diff --git a/experimental/air/cmd/runconfig.go b/experimental/air/cmd/runconfig.go new file mode 100644 index 0000000000..c0678c17cb --- /dev/null +++ b/experimental/air/cmd/runconfig.go @@ -0,0 +1,446 @@ +package aircmd + +import ( + "errors" + "fmt" + "regexp" + "slices" + "strings" + + "go.yaml.in/yaml/v3" +) + +// This file ports the run YAML schema and its structural validation from the +// Python CLI's sdk/config.py. "Structural" means types, required fields, and +// format/cross-field rules that need no workspace access. Online checks (compute +// pool resolution, GPU availability) and git/filesystem checks run at launch time +// and are intentionally not ported here. +// +// Divergences from the Python schema, both consequences of the training-service- +// only port: +// - compute.node_pool_id / compute.pool_name are dropped (see compute.go). +// - the top-level `priority` field is dropped: it is a node-pool queue-ordering +// knob with no meaning for serverless (poolless) workloads. + +// REGEX_TASK_KEY_CHARS: ASCII alphanumeric, hyphen, underscore only (no periods). +// Explicit ASCII class, not \w: \w matches Unicode letters that the ASCII-only +// Jobs API task_key rejects. +var taskKeyRe = regexp.MustCompile(`^[A-Za-z0-9_-]+$`) + +// gitRefRe guards branch/remote names against command injection. Only safe ref +// characters are allowed. +var gitRefRe = regexp.MustCompile(`^[\w./-]+$`) + +// runConfig is the top-level run YAML schema: experiment_name + compute / +// environment / code_source plus the command and run options. +type runConfig struct { + ExperimentName string `yaml:"experiment_name"` + Compute *computeConfig `yaml:"compute"` + Environment *environmentConfig `yaml:"environment"` + Command *string `yaml:"command"` + EnvVariables map[string]string `yaml:"env_variables"` + Secrets map[string]string `yaml:"secrets"` + CodeSource *codeSourceConfig `yaml:"code_source"` + // MaxRetries defaults to 3 when unset; default-filling is a normalization + // concern handled at launch, so a nil pointer is left as-is here. + MaxRetries *int `yaml:"max_retries"` + TimeoutMinutes *int `yaml:"timeout_minutes"` + IdempotencyToken *string `yaml:"idempotency_token"` + Parameters map[string]any `yaml:"parameters"` + MLflowRunName *string `yaml:"mlflow_run_name"` + MLflowExperimentDirectory *string `yaml:"mlflow_experiment_directory"` + Permissions []permission `yaml:"permissions"` + UsagePolicyName *string `yaml:"usage_policy_name"` +} + +// validate runs structural validation over the whole config, returning the first +// failure. Fields are checked in declaration order to keep error output stable. +func (c *runConfig) validate() error { + if err := validateExperimentName(c.ExperimentName); err != nil { + return err + } + + if c.Compute == nil { + return errors.New("compute: section is required") + } + if err := c.Compute.validate(); err != nil { + return err + } + + if c.Environment != nil { + if err := c.Environment.validate(); err != nil { + return err + } + } + + // command is optional in the type system but required in practice, matching + // the Python validate_script_fields model validator. + if c.Command == nil { + return errors.New("command is required") + } + if err := validateCommand(*c.Command); err != nil { + return err + } + + if err := validateSecretRefs(c.Secrets); err != nil { + return err + } + + if c.CodeSource != nil { + if err := c.CodeSource.validate(); err != nil { + return err + } + } + + if c.MaxRetries != nil && *c.MaxRetries < 0 { + return fmt.Errorf("max_retries must be >= 0, got %d", *c.MaxRetries) + } + + if c.TimeoutMinutes != nil && *c.TimeoutMinutes < 1 { + return fmt.Errorf("timeout_minutes must be >= 1, got %d", *c.TimeoutMinutes) + } + + if c.IdempotencyToken != nil { + v := strings.TrimSpace(*c.IdempotencyToken) + if v == "" { + return errors.New("idempotency_token cannot be empty") + } + if len(v) > 64 { + return errors.New("idempotency_token must be 64 characters or less") + } + } + + if c.MLflowRunName != nil { + v := strings.TrimSpace(*c.MLflowRunName) + if v == "" { + return errors.New("mlflow_run_name cannot be empty") + } + if !taskKeyRe.MatchString(v) { + return fmt.Errorf("invalid mlflow_run_name %q: only alphanumeric characters, hyphens, and underscores are allowed", v) + } + } + + if c.MLflowExperimentDirectory != nil { + v := strings.TrimSpace(*c.MLflowExperimentDirectory) + if v == "" { + return errors.New("mlflow_experiment_directory cannot be empty") + } + // MLflow experiments live under the workspace tree. + if !strings.HasPrefix(v, "/Workspace") { + return fmt.Errorf("mlflow_experiment_directory must start with '/Workspace', got: %s", v) + } + } + + for i := range c.Permissions { + if err := c.Permissions[i].validate(); err != nil { + return err + } + } + + if c.UsagePolicyName != nil { + v := strings.TrimSpace(*c.UsagePolicyName) + if v == "" { + return errors.New("usage_policy_name must not be empty") + } + // 127 matches the server-side max_length on the policy name filter. + if len(v) > 127 { + return fmt.Errorf("usage_policy_name must be at most 127 characters, got %d", len(v)) + } + } + + return nil +} + +// validateExperimentName enforces the Databricks Jobs API task_key constraints: +// the experiment_name becomes a task key, which caps at 100 characters and allows +// only alphanumerics, hyphens, and underscores. +func validateExperimentName(v string) error { + if v == "" { + return errors.New("experiment_name cannot be empty") + } + if len(v) > 100 { + return fmt.Errorf("experiment_name must be 100 characters or less (got %d); this is the Jobs API task_key length limit", len(v)) + } + if !taskKeyRe.MatchString(v) { + return fmt.Errorf("invalid experiment_name %q: only alphanumeric characters, hyphens (-), and underscores (_) are allowed", v) + } + return nil +} + +// validateCommand enforces command is non-empty and within the line-count cap. +func validateCommand(v string) error { + if strings.TrimSpace(v) == "" { + return errors.New("command cannot be empty") + } + lineCount := strings.Count(v, "\n") + 1 + if lineCount > 1000 { + return fmt.Errorf("command is too long (%d lines); maximum is 1000 lines — move complex logic into a script in your code_source", lineCount) + } + return nil +} + +// validateSecretRefs checks that secret references use the "scope/key" format. +func validateSecretRefs(secrets map[string]string) error { + for varName, ref := range secrets { + parts := strings.Split(ref, "/") + if len(parts) != 2 { + return fmt.Errorf("invalid secret reference %q for variable %q: expected format 'scope/key' (e.g., my_scope/hf_token)", ref, varName) + } + if parts[0] == "" || parts[1] == "" { + return fmt.Errorf("invalid secret reference %q for variable %q: scope and key cannot be empty", ref, varName) + } + } + return nil +} + +// environmentConfig is the `environment` block: dependencies and/or a custom +// docker image. +type environmentConfig struct { + Dependencies dependencies `yaml:"dependencies"` + Version stringOrInt `yaml:"version"` + DockerImage *dockerImageConfig `yaml:"docker_image"` +} + +func (e *environmentConfig) validate() error { + // docker_image is exclusive with dependencies/version: the image already pins + // the full runtime. + if e.DockerImage != nil { + var conflicting []string + if e.Dependencies.set { + conflicting = append(conflicting, "dependencies") + } + if e.Version.set { + conflicting = append(conflicting, "version") + } + if len(conflicting) > 0 { + return fmt.Errorf("when 'docker_image' is specified under 'environment', these fields are not allowed: %s", strings.Join(conflicting, ", ")) + } + return e.DockerImage.validate() + } + + // version pins the client image version, which is only meaningful for an + // inline (list) dependency set — a requirements.yaml file carries its own. + if e.Version.set { + if e.Dependencies.set && !e.Dependencies.isList { + return errors.New("'environment.version' is only valid with inline dependencies (a list); when 'dependencies' points to a requirements.yaml file, set the version inside that file") + } + if !e.Dependencies.set { + return errors.New("'environment.version' requires inline 'dependencies' (a list of packages)") + } + } + + return nil +} + +// dependencies is environment.dependencies, which is polymorphic: a string is a +// path to a requirements.yaml file; a list is an inline package list. +type dependencies struct { + set bool + isList bool + path string + list []string +} + +func (d *dependencies) UnmarshalYAML(node *yaml.Node) error { + switch node.Kind { + case yaml.ScalarNode: + d.set, d.isList = true, false + return node.Decode(&d.path) + case yaml.SequenceNode: + d.set, d.isList = true, true + return node.Decode(&d.list) + default: + return errors.New("environment.dependencies must be a string path or a list of packages") + } +} + +// stringOrInt holds a scalar that may be a string or an integer in YAML +// (environment.version). The raw text is kept; integer-format validation is a +// launch-time concern. +type stringOrInt struct { + set bool + raw string +} + +func (s *stringOrInt) UnmarshalYAML(node *yaml.Node) error { + if node.Kind != yaml.ScalarNode { + return errors.New("environment.version must be a string or integer") + } + s.set = true + s.raw = node.Value + return nil +} + +// dockerImageConfig is environment.docker_image. +type dockerImageConfig struct { + URL string `yaml:"url"` +} + +func (d *dockerImageConfig) validate() error { + if strings.TrimSpace(d.URL) == "" { + return errors.New("docker_image.url cannot be empty") + } + return nil +} + +// codeSourceConfig is the `code_source` block. Only the "snapshot" type exists. +type codeSourceConfig struct { + Type string `yaml:"type"` + Snapshot *snapshotSourceConfig `yaml:"snapshot"` +} + +func (c *codeSourceConfig) validate() error { + if c.Type != "snapshot" { + return fmt.Errorf("code_source.type must be 'snapshot', got %q", c.Type) + } + if c.Snapshot == nil { + return errors.New("code_source.type='snapshot' requires a snapshot configuration") + } + return c.Snapshot.validate() +} + +// snapshotSourceConfig describes a local directory to tar and upload. +type snapshotSourceConfig struct { + RootPath string `yaml:"root_path"` + RemoteVolume *string `yaml:"remote_volume"` + Git *gitRef `yaml:"git"` + IncludePaths []string `yaml:"include_paths"` +} + +func (s *snapshotSourceConfig) validate() error { + if strings.TrimSpace(s.RootPath) == "" { + return errors.New("code_source.snapshot.root_path cannot be empty") + } + + if s.RemoteVolume != nil && !strings.HasPrefix(*s.RemoteVolume, "/Volumes/") { + return errors.New("code_source.snapshot.remote_volume must start with '/Volumes/'") + } + + // A non-nil but empty include_paths is an explicit mistake (omit it instead). + if s.IncludePaths != nil && len(s.IncludePaths) == 0 { + return errors.New("code_source.snapshot.include_paths cannot be an empty list; either omit it or provide paths") + } + for _, p := range s.IncludePaths { + p = strings.TrimSpace(p) + if p == "" { + return errors.New("code_source.snapshot.include_paths entry cannot be empty") + } + if strings.HasPrefix(p, "/") { + return fmt.Errorf("code_source.snapshot.include_paths must be relative paths, got: %s", p) + } + // No parent traversal: snapshots must stay within root_path. + if slices.Contains(strings.Split(p, "/"), "..") { + return fmt.Errorf("code_source.snapshot.include_paths cannot contain '..' traversal, got: %s", p) + } + } + + if s.Git != nil { + return s.Git.validate() + } + return nil +} + +// gitRef pins a snapshot to a specific git ref. branch and commit are mutually +// exclusive; remote is only meaningful with branch. +type gitRef struct { + Branch *string `yaml:"branch"` + Commit *string `yaml:"commit"` + Remote gitRemote `yaml:"remote"` +} + +func (g *gitRef) validate() error { + if g.Branch != nil && !gitRefRe.MatchString(*g.Branch) { + return fmt.Errorf("invalid git.branch format %q: only alphanumeric characters, hyphens, dots, slashes, and underscores are allowed", *g.Branch) + } + if g.Remote.isString { + if g.Remote.name == "" { + return errors.New("git.remote string cannot be empty; use 'true' to auto-detect") + } + if !gitRefRe.MatchString(g.Remote.name) { + return fmt.Errorf("invalid git.remote name %q: only alphanumeric characters, hyphens, dots, slashes, and underscores are allowed", g.Remote.name) + } + } + + if g.Branch == nil && g.Commit == nil { + return errors.New("git: must specify either 'branch' or 'commit'") + } + if g.Branch != nil && g.Commit != nil { + return errors.New("git: 'branch' and 'commit' are mutually exclusive — specify only one") + } + if g.Remote.truthy() && g.Branch == nil { + return errors.New("git.remote requires git.branch (only valid with branch refs)") + } + return nil +} + +// gitRemote is git.remote: false (default, use local HEAD), true (auto-detect the +// remote), or a remote name string. +type gitRemote struct { + set bool + isString bool + name string + enabled bool +} + +func (r *gitRemote) UnmarshalYAML(node *yaml.Node) error { + if node.Kind != yaml.ScalarNode { + return errors.New("git.remote must be a boolean or a remote name string") + } + r.set = true + if node.Tag == "!!bool" { + return node.Decode(&r.enabled) + } + r.isString = true + r.name = node.Value + return nil +} + +// truthy reports whether remote requests a remote fetch (mirrors Python's +// truthiness of the bool|str union). +func (r *gitRemote) truthy() bool { + if r.isString { + return r.name != "" + } + return r.enabled +} + +// permission is a DABs-compatible permission grant: exactly one principal plus a +// level. +type permission struct { + UserName *string `yaml:"user_name"` + GroupName *string `yaml:"group_name"` + ServicePrincipalName *string `yaml:"service_principal_name"` + // Level is a databricks PermissionLevel (e.g. CAN_VIEW, CAN_MANAGE). Enum + // membership is validated server-side; here we only require it to be set. + Level string `yaml:"level"` +} + +func (p *permission) validate() error { + principals := map[string]*string{ + "user_name": p.UserName, + "group_name": p.GroupName, + "service_principal_name": p.ServicePrincipalName, + } + var set []string + for name, val := range principals { + if val != nil { + set = append(set, name) + } + } + switch len(set) { + case 0: + return errors.New("permissions: one of 'user_name', 'group_name', or 'service_principal_name' must be specified") + case 1: + name := set[0] + if strings.TrimSpace(*principals[name]) == "" { + return fmt.Errorf("permissions: '%s' cannot be empty", name) + } + default: + return errors.New("permissions: only one of 'user_name', 'group_name', or 'service_principal_name' can be specified") + } + + if strings.TrimSpace(p.Level) == "" { + return errors.New("permissions: 'level' is required") + } + return nil +} diff --git a/experimental/air/cmd/runconfig_load.go b/experimental/air/cmd/runconfig_load.go new file mode 100644 index 0000000000..4cdbd28308 --- /dev/null +++ b/experimental/air/cmd/runconfig_load.go @@ -0,0 +1,40 @@ +package aircmd + +import ( + "errors" + "fmt" + "io" + "os" + + "go.yaml.in/yaml/v3" +) + +// loadRunConfig reads a run YAML config file, decodes it into the schema, and +// runs structural validation. Unknown keys are rejected (KnownFields), mirroring +// the Python schema's extra="forbid". +// +// The `_bases_` composition feature and CLI `--override` handling are not yet +// ported; a config using `_bases_` is currently rejected as an unknown field. +func loadRunConfig(path string) (*runConfig, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + + dec := yaml.NewDecoder(f) + dec.KnownFields(true) + + var cfg runConfig + if err := dec.Decode(&cfg); err != nil { + if errors.Is(err, io.EOF) { + return nil, fmt.Errorf("config %s is empty", path) + } + return nil, fmt.Errorf("invalid config %s: %w", path, err) + } + + if err := cfg.validate(); err != nil { + return nil, err + } + return &cfg, nil +} diff --git a/experimental/air/cmd/runconfig_test.go b/experimental/air/cmd/runconfig_test.go new file mode 100644 index 0000000000..06501e6ea6 --- /dev/null +++ b/experimental/air/cmd/runconfig_test.go @@ -0,0 +1,407 @@ +package aircmd + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// writeConfig writes content to a temp YAML file and returns its path. +func writeConfig(t *testing.T, content string) string { + t.Helper() + dir := t.TempDir() + path := filepath.Join(dir, "config.yaml") + require.NoError(t, os.WriteFile(path, []byte(content), 0o600)) + return path +} + +// minimalConfig is the smallest valid config: the three required pieces. +const minimalConfig = ` +experiment_name: my-run +command: python train.py +compute: + accelerator_type: GPU_1xH100 + num_accelerators: 1 +` + +func TestLoadRunConfig_Minimal(t *testing.T) { + cfg, err := loadRunConfig(writeConfig(t, minimalConfig)) + require.NoError(t, err) + assert.Equal(t, "my-run", cfg.ExperimentName) + require.NotNil(t, cfg.Command) + assert.Equal(t, "python train.py", *cfg.Command) + require.NotNil(t, cfg.Compute) + assert.Equal(t, "GPU_1xH100", cfg.Compute.AcceleratorType) + assert.Equal(t, 1, cfg.Compute.NumAccelerators) +} + +func TestLoadRunConfig_FullFeatured(t *testing.T) { + cfg, err := loadRunConfig(writeConfig(t, ` +experiment_name: full_run +command: | + python train.py + echo done +compute: + accelerator_type: GPU_8xH100 + num_accelerators: 16 +environment: + dependencies: + - torch==2.3.0 + - numpy + version: 5 +env_variables: + FOO: bar +secrets: + HF_TOKEN: my_scope/hf_token +code_source: + type: snapshot + snapshot: + root_path: project_root/src + remote_volume: /Volumes/main/default/code + git: + branch: main + remote: origin + include_paths: + - src + - configs/train.yaml +max_retries: 5 +timeout_minutes: 120 +idempotency_token: abc-123 +mlflow_run_name: full_run_v2 +mlflow_experiment_directory: /Workspace/Users/me/exp +usage_policy_name: my-policy +permissions: + - group_name: users + level: CAN_VIEW + - user_name: alice@example.com + level: CAN_MANAGE +`)) + require.NoError(t, err) + assert.Equal(t, gpuType8xH100, gpuType(cfg.Compute.AcceleratorType)) + require.NotNil(t, cfg.Environment) + assert.True(t, cfg.Environment.Dependencies.isList) + assert.Equal(t, []string{"torch==2.3.0", "numpy"}, cfg.Environment.Dependencies.list) + assert.True(t, cfg.Environment.Version.set) + assert.Equal(t, "5", cfg.Environment.Version.raw) + require.NotNil(t, cfg.CodeSource) + require.NotNil(t, cfg.CodeSource.Snapshot) + require.NotNil(t, cfg.CodeSource.Snapshot.Git) + require.NotNil(t, cfg.CodeSource.Snapshot.Git.Branch) + assert.Equal(t, "main", *cfg.CodeSource.Snapshot.Git.Branch) + assert.True(t, cfg.CodeSource.Snapshot.Git.Remote.isString) + assert.Equal(t, "origin", cfg.CodeSource.Snapshot.Git.Remote.name) + assert.Len(t, cfg.Permissions, 2) +} + +// TestLoadRunConfig_PolymorphicFields exercises the str|list, str|int, and +// bool|str unions decoded by custom UnmarshalYAML. +func TestLoadRunConfig_PolymorphicFields(t *testing.T) { + t.Run("dependencies as string path", func(t *testing.T) { + cfg, err := loadRunConfig(writeConfig(t, minimalConfig+` +environment: + dependencies: requirements.yaml +`)) + require.NoError(t, err) + assert.True(t, cfg.Environment.Dependencies.set) + assert.False(t, cfg.Environment.Dependencies.isList) + assert.Equal(t, "requirements.yaml", cfg.Environment.Dependencies.path) + }) + + t.Run("git remote as bool true", func(t *testing.T) { + cfg, err := loadRunConfig(writeConfig(t, minimalConfig+` +code_source: + type: snapshot + snapshot: + root_path: . + git: + branch: main + remote: true +`)) + require.NoError(t, err) + r := cfg.CodeSource.Snapshot.Git.Remote + assert.False(t, r.isString) + assert.True(t, r.enabled) + assert.True(t, r.truthy()) + }) + + t.Run("git remote defaults to false when unset", func(t *testing.T) { + cfg, err := loadRunConfig(writeConfig(t, minimalConfig+` +code_source: + type: snapshot + snapshot: + root_path: . + git: + commit: deadbeef +`)) + require.NoError(t, err) + assert.False(t, cfg.CodeSource.Snapshot.Git.Remote.truthy()) + }) +} + +func TestLoadRunConfig_UnknownFieldRejected(t *testing.T) { + tests := []struct { + name string + extra string + errFrag string + }{ + {"top-level typo", "extra_field: nope\n", "extra_field"}, + // priority was intentionally dropped from the schema (pool-only concept). + {"dropped priority field", "priority: 100\n", "priority"}, + // _bases_ composition is not yet ported, so it surfaces as unknown. + {"unported _bases_", "_bases_: [base.yaml]\n", "_bases_"}, + {"nested typo", "environment:\n bogus: 1\n", "bogus"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := loadRunConfig(writeConfig(t, minimalConfig+tt.extra)) + require.Error(t, err) + assert.Contains(t, err.Error(), tt.errFrag) + }) + } +} + +func TestLoadRunConfig_Errors(t *testing.T) { + tests := []struct { + name string + yaml string + errFrag string + }{ + { + "missing experiment_name", + "command: x\ncompute:\n accelerator_type: GPU_1xH100\n num_accelerators: 1\n", + "experiment_name cannot be empty", + }, + { + "experiment_name bad chars", + "experiment_name: my.run\ncommand: x\ncompute:\n accelerator_type: GPU_1xH100\n num_accelerators: 1\n", + "invalid experiment_name", + }, + { + "missing compute", + "experiment_name: r\ncommand: x\n", + "compute: section is required", + }, + { + "missing command", + "experiment_name: r\ncompute:\n accelerator_type: GPU_1xH100\n num_accelerators: 1\n", + "command is required", + }, + { + "bad gpu type", + "experiment_name: r\ncommand: x\ncompute:\n accelerator_type: a100\n num_accelerators: 1\n", + "invalid GPU type", + }, + { + "num_accelerators not a multiple", + "experiment_name: r\ncommand: x\ncompute:\n accelerator_type: GPU_8xH100\n num_accelerators: 3\n", + "must be a multiple of 8", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := loadRunConfig(writeConfig(t, tt.yaml)) + require.Error(t, err) + assert.Contains(t, err.Error(), tt.errFrag) + }) + } +} + +// TestRunConfigValidate_FieldRules unit-tests validation rules directly, away +// from YAML decoding, to keep each rule's failure mode explicit. +func TestRunConfigValidate_FieldRules(t *testing.T) { + str := func(s string) *string { return &s } + intp := func(i int) *int { return &i } + base := func() *runConfig { + return &runConfig{ + ExperimentName: "r", + Command: str("x"), + Compute: &computeConfig{AcceleratorType: "GPU_1xH100", NumAccelerators: 1}, + } + } + + tests := []struct { + name string + mutate func(c *runConfig) + errFrag string + }{ + {"ok baseline", func(c *runConfig) {}, ""}, + {"empty command", func(c *runConfig) { c.Command = str(" ") }, "command cannot be empty"}, + {"negative max_retries", func(c *runConfig) { c.MaxRetries = intp(-1) }, "max_retries must be >= 0"}, + {"zero timeout", func(c *runConfig) { c.TimeoutMinutes = intp(0) }, "timeout_minutes must be >= 1"}, + {"empty idempotency", func(c *runConfig) { c.IdempotencyToken = str(" ") }, "idempotency_token cannot be empty"}, + {"long idempotency", func(c *runConfig) { c.IdempotencyToken = str(string(make([]byte, 65))) }, "64 characters or less"}, + {"bad mlflow_run_name", func(c *runConfig) { c.MLflowRunName = str("bad name") }, "invalid mlflow_run_name"}, + {"bad experiment dir", func(c *runConfig) { c.MLflowExperimentDirectory = str("/Users/me") }, "must start with '/Workspace'"}, + {"empty usage policy", func(c *runConfig) { c.UsagePolicyName = str(" ") }, "usage_policy_name must not be empty"}, + {"bad secret ref", func(c *runConfig) { c.Secrets = map[string]string{"T": "noslash"} }, "expected format 'scope/key'"}, + {"empty secret scope", func(c *runConfig) { c.Secrets = map[string]string{"T": "/key"} }, "scope and key cannot be empty"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := base() + tt.mutate(c) + err := c.validate() + if tt.errFrag == "" { + assert.NoError(t, err) + return + } + require.Error(t, err) + assert.Contains(t, err.Error(), tt.errFrag) + }) + } +} + +func TestEnvironmentConfigValidate(t *testing.T) { + tests := []struct { + name string + env environmentConfig + errFrag string + }{ + { + "docker image alone ok", + environmentConfig{DockerImage: &dockerImageConfig{URL: "org/repo:tag"}}, + "", + }, + { + "docker image with deps conflicts", + environmentConfig{ + DockerImage: &dockerImageConfig{URL: "org/repo:tag"}, + Dependencies: dependencies{set: true, isList: true, list: []string{"torch"}}, + }, + "not allowed: dependencies", + }, + { + "empty docker url", + environmentConfig{DockerImage: &dockerImageConfig{URL: " "}}, + "docker_image.url cannot be empty", + }, + { + "version with file deps", + environmentConfig{ + Version: stringOrInt{set: true, raw: "5"}, + Dependencies: dependencies{set: true, isList: false, path: "req.yaml"}, + }, + "only valid with inline dependencies", + }, + { + "version without deps", + environmentConfig{Version: stringOrInt{set: true, raw: "5"}}, + "requires inline 'dependencies'", + }, + { + "version with inline deps ok", + environmentConfig{ + Version: stringOrInt{set: true, raw: "5"}, + Dependencies: dependencies{set: true, isList: true, list: []string{"torch"}}, + }, + "", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.env.validate() + if tt.errFrag == "" { + assert.NoError(t, err) + return + } + require.Error(t, err) + assert.Contains(t, err.Error(), tt.errFrag) + }) + } +} + +func TestGitRefValidate(t *testing.T) { + str := func(s string) *string { return &s } + tests := []struct { + name string + ref gitRef + errFrag string + }{ + {"branch only ok", gitRef{Branch: str("main")}, ""}, + {"commit only ok", gitRef{Commit: str("abc123")}, ""}, + {"branch with remote ok", gitRef{Branch: str("main"), Remote: gitRemote{set: true, enabled: true}}, ""}, + {"neither branch nor commit", gitRef{}, "must specify either 'branch' or 'commit'"}, + {"both branch and commit", gitRef{Branch: str("main"), Commit: str("abc")}, "mutually exclusive"}, + {"remote without branch", gitRef{Commit: str("abc"), Remote: gitRemote{set: true, isString: true, name: "origin"}}, "requires git.branch"}, + {"bad branch chars", gitRef{Branch: str("bad branch")}, "invalid git.branch"}, + {"empty remote string", gitRef{Branch: str("main"), Remote: gitRemote{set: true, isString: true, name: ""}}, "cannot be empty"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.ref.validate() + if tt.errFrag == "" { + assert.NoError(t, err) + return + } + require.Error(t, err) + assert.Contains(t, err.Error(), tt.errFrag) + }) + } +} + +func TestSnapshotSourceConfigValidate(t *testing.T) { + tests := []struct { + name string + snap snapshotSourceConfig + errFrag string + }{ + {"ok", snapshotSourceConfig{RootPath: "src"}, ""}, + {"empty root_path", snapshotSourceConfig{RootPath: " "}, "root_path cannot be empty"}, + {"bad volume", snapshotSourceConfig{RootPath: "src", RemoteVolume: new("/mnt/x")}, "must start with '/Volumes/'"}, + {"empty include list", snapshotSourceConfig{RootPath: "src", IncludePaths: []string{}}, "cannot be an empty list"}, + {"absolute include", snapshotSourceConfig{RootPath: "src", IncludePaths: []string{"/etc"}}, "must be relative"}, + {"traversal include", snapshotSourceConfig{RootPath: "src", IncludePaths: []string{"../x"}}, "'..' traversal"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.snap.validate() + if tt.errFrag == "" { + assert.NoError(t, err) + return + } + require.Error(t, err) + assert.Contains(t, err.Error(), tt.errFrag) + }) + } +} + +func TestPermissionValidate(t *testing.T) { + str := func(s string) *string { return &s } + tests := []struct { + name string + perm permission + errFrag string + }{ + {"ok user", permission{UserName: str("alice@example.com"), Level: "CAN_VIEW"}, ""}, + {"no principal", permission{Level: "CAN_VIEW"}, "must be specified"}, + {"two principals", permission{UserName: str("a"), GroupName: str("g"), Level: "CAN_VIEW"}, "only one of"}, + {"empty principal", permission{UserName: str(" "), Level: "CAN_VIEW"}, "cannot be empty"}, + {"missing level", permission{GroupName: str("users")}, "'level' is required"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := tt.perm.validate() + if tt.errFrag == "" { + assert.NoError(t, err) + return + } + require.Error(t, err) + assert.Contains(t, err.Error(), tt.errFrag) + }) + } +} + +func TestLoadRunConfig_FileErrors(t *testing.T) { + t.Run("missing file", func(t *testing.T) { + _, err := loadRunConfig(filepath.Join(t.TempDir(), "nope.yaml")) + assert.Error(t, err) + }) + t.Run("empty file", func(t *testing.T) { + _, err := loadRunConfig(writeConfig(t, "")) + require.Error(t, err) + assert.Contains(t, err.Error(), "is empty") + }) +}