diff --git a/experimental/air/cmd/runconfig.go b/experimental/air/cmd/runconfig.go
new file mode 100644
index 0000000000..c0678c17cb
--- /dev/null
+++ b/experimental/air/cmd/runconfig.go
@@ -0,0 +1,446 @@
+package aircmd
+
+import (
+	"errors"
+	"fmt"
+	"regexp"
+	"slices"
+	"strings"
+
+	"go.yaml.in/yaml/v3"
+)
+
+// This file ports the run YAML schema and its structural validation from the
+// Python CLI's sdk/config.py. "Structural" means types, required fields, and
+// format/cross-field rules that need no workspace access. Online checks (compute
+// pool resolution, GPU availability) and git/filesystem checks run at launch time
+// and are intentionally not ported here.
+//
+// Divergences from the Python schema, both consequences of the training-service-
+// only port:
+//   - compute.node_pool_id / compute.pool_name are dropped (see compute.go).
+//   - the top-level `priority` field is dropped: it is a node-pool queue-ordering
+//     knob with no meaning for serverless (poolless) workloads.
+
+// REGEX_TASK_KEY_CHARS: ASCII alphanumeric, hyphen, underscore only (no periods).
+// Explicit ASCII class, not \w: \w matches Unicode letters that the ASCII-only
+// Jobs API task_key rejects.
+var taskKeyRe = regexp.MustCompile(`^[A-Za-z0-9_-]+$`)
+
+// gitRefRe guards branch/remote names against command injection. Only safe ref
+// characters are allowed.
+var gitRefRe = regexp.MustCompile(`^[\w./-]+$`)
+
+// runConfig is the top-level run YAML schema: experiment_name + compute /
+// environment / code_source plus the command and run options.
+type runConfig struct {
+	ExperimentName string             `yaml:"experiment_name"`
+	Compute        *computeConfig     `yaml:"compute"`
+	Environment    *environmentConfig `yaml:"environment"`
+	Command        *string            `yaml:"command"`
+	EnvVariables   map[string]string  `yaml:"env_variables"`
+	Secrets        map[string]string  `yaml:"secrets"`
+	CodeSource     *codeSourceConfig  `yaml:"code_source"`
+	// MaxRetries defaults to 3 when unset; default-filling is a normalization
+	// concern handled at launch, so a nil pointer is left as-is here.
+	MaxRetries                *int           `yaml:"max_retries"`
+	TimeoutMinutes            *int           `yaml:"timeout_minutes"`
+	IdempotencyToken          *string        `yaml:"idempotency_token"`
+	Parameters                map[string]any `yaml:"parameters"`
+	MLflowRunName             *string        `yaml:"mlflow_run_name"`
+	MLflowExperimentDirectory *string        `yaml:"mlflow_experiment_directory"`
+	Permissions               []permission   `yaml:"permissions"`
+	UsagePolicyName           *string        `yaml:"usage_policy_name"`
+}
+
+// validate runs structural validation over the whole config, returning the first
+// failure. Fields are checked in declaration order to keep error output stable.
+func (c *runConfig) validate() error {
+	if err := validateExperimentName(c.ExperimentName); err != nil {
+		return err
+	}
+
+	if c.Compute == nil {
+		return errors.New("compute: section is required")
+	}
+	if err := c.Compute.validate(); err != nil {
+		return err
+	}
+
+	if c.Environment != nil {
+		if err := c.Environment.validate(); err != nil {
+			return err
+		}
+	}
+
+	// command is optional in the type system but required in practice, matching
+	// the Python validate_script_fields model validator.
+	if c.Command == nil {
+		return errors.New("command is required")
+	}
+	if err := validateCommand(*c.Command); err != nil {
+		return err
+	}
+
+	if err := validateSecretRefs(c.Secrets); err != nil {
+		return err
+	}
+
+	if c.CodeSource != nil {
+		if err := c.CodeSource.validate(); err != nil {
+			return err
+		}
+	}
+
+	if c.MaxRetries != nil && *c.MaxRetries < 0 {
+		return fmt.Errorf("max_retries must be >= 0, got %d", *c.MaxRetries)
+	}
+
+	if c.TimeoutMinutes != nil && *c.TimeoutMinutes < 1 {
+		return fmt.Errorf("timeout_minutes must be >= 1, got %d", *c.TimeoutMinutes)
+	}
+
+	if c.IdempotencyToken != nil {
+		v := strings.TrimSpace(*c.IdempotencyToken)
+		if v == "" {
+			return errors.New("idempotency_token cannot be empty")
+		}
+		if len(v) > 64 {
+			return errors.New("idempotency_token must be 64 characters or less")
+		}
+	}
+
+	if c.MLflowRunName != nil {
+		v := strings.TrimSpace(*c.MLflowRunName)
+		if v == "" {
+			return errors.New("mlflow_run_name cannot be empty")
+		}
+		if !taskKeyRe.MatchString(v) {
+			return fmt.Errorf("invalid mlflow_run_name %q: only alphanumeric characters, hyphens, and underscores are allowed", v)
+		}
+	}
+
+	if c.MLflowExperimentDirectory != nil {
+		v := strings.TrimSpace(*c.MLflowExperimentDirectory)
+		if v == "" {
+			return errors.New("mlflow_experiment_directory cannot be empty")
+		}
+		// MLflow experiments live under the workspace tree.
+		if !strings.HasPrefix(v, "/Workspace") {
+			return fmt.Errorf("mlflow_experiment_directory must start with '/Workspace', got: %s", v)
+		}
+	}
+
+	for i := range c.Permissions {
+		if err := c.Permissions[i].validate(); err != nil {
+			return err
+		}
+	}
+
+	if c.UsagePolicyName != nil {
+		v := strings.TrimSpace(*c.UsagePolicyName)
+		if v == "" {
+			return errors.New("usage_policy_name must not be empty")
+		}
+		// 127 matches the server-side max_length on the policy name filter.
+		if len(v) > 127 {
+			return fmt.Errorf("usage_policy_name must be at most 127 characters, got %d", len(v))
+		}
+	}
+
+	return nil
+}
+
+// validateExperimentName enforces the Databricks Jobs API task_key constraints:
+// the experiment_name becomes a task key, which caps at 100 characters and allows
+// only alphanumerics, hyphens, and underscores.
+func validateExperimentName(v string) error {
+	if v == "" {
+		return errors.New("experiment_name cannot be empty")
+	}
+	if len(v) > 100 {
+		return fmt.Errorf("experiment_name must be 100 characters or less (got %d); this is the Jobs API task_key length limit", len(v))
+	}
+	if !taskKeyRe.MatchString(v) {
+		return fmt.Errorf("invalid experiment_name %q: only alphanumeric characters, hyphens (-), and underscores (_) are allowed", v)
+	}
+	return nil
+}
+
+// validateCommand enforces command is non-empty and within the line-count cap.
+func validateCommand(v string) error {
+	if strings.TrimSpace(v) == "" {
+		return errors.New("command cannot be empty")
+	}
+	lineCount := strings.Count(v, "\n") + 1
+	if lineCount > 1000 {
+		return fmt.Errorf("command is too long (%d lines); maximum is 1000 lines — move complex logic into a script in your code_source", lineCount)
+	}
+	return nil
+}
+
+// validateSecretRefs checks that secret references use the "scope/key" format.
+func validateSecretRefs(secrets map[string]string) error {
+	for varName, ref := range secrets {
+		parts := strings.Split(ref, "/")
+		if len(parts) != 2 {
+			return fmt.Errorf("invalid secret reference %q for variable %q: expected format 'scope/key' (e.g., my_scope/hf_token)", ref, varName)
+		}
+		if parts[0] == "" || parts[1] == "" {
+			return fmt.Errorf("invalid secret reference %q for variable %q: scope and key cannot be empty", ref, varName)
+		}
+	}
+	return nil
+}
+
+// environmentConfig is the `environment` block: dependencies and/or a custom
+// docker image.
+type environmentConfig struct {
+	Dependencies dependencies       `yaml:"dependencies"`
+	Version      stringOrInt        `yaml:"version"`
+	DockerImage  *dockerImageConfig `yaml:"docker_image"`
+}
+
+func (e *environmentConfig) validate() error {
+	// docker_image is exclusive with dependencies/version: the image already pins
+	// the full runtime.
+	if e.DockerImage != nil {
+		var conflicting []string
+		if e.Dependencies.set {
+			conflicting = append(conflicting, "dependencies")
+		}
+		if e.Version.set {
+			conflicting = append(conflicting, "version")
+		}
+		if len(conflicting) > 0 {
+			return fmt.Errorf("when 'docker_image' is specified under 'environment', these fields are not allowed: %s", strings.Join(conflicting, ", "))
+		}
+		return e.DockerImage.validate()
+	}
+
+	// version pins the client image version, which is only meaningful for an
+	// inline (list) dependency set — a requirements.yaml file carries its own.
+	if e.Version.set {
+		if e.Dependencies.set && !e.Dependencies.isList {
+			return errors.New("'environment.version' is only valid with inline dependencies (a list); when 'dependencies' points to a requirements.yaml file, set the version inside that file")
+		}
+		if !e.Dependencies.set {
+			return errors.New("'environment.version' requires inline 'dependencies' (a list of packages)")
+		}
+	}
+
+	return nil
+}
+
+// dependencies is environment.dependencies, which is polymorphic: a string is a
+// path to a requirements.yaml file; a list is an inline package list.
+type dependencies struct {
+	set    bool
+	isList bool
+	path   string
+	list   []string
+}
+
+func (d *dependencies) UnmarshalYAML(node *yaml.Node) error {
+	switch node.Kind {
+	case yaml.ScalarNode:
+		d.set, d.isList = true, false
+		return node.Decode(&d.path)
+	case yaml.SequenceNode:
+		d.set, d.isList = true, true
+		return node.Decode(&d.list)
+	default:
+		return errors.New("environment.dependencies must be a string path or a list of packages")
+	}
+}
+
+// stringOrInt holds a scalar that may be a string or an integer in YAML
+// (environment.version). The raw text is kept; integer-format validation is a
+// launch-time concern.
+type stringOrInt struct {
+	set bool
+	raw string
+}
+
+func (s *stringOrInt) UnmarshalYAML(node *yaml.Node) error {
+	if node.Kind != yaml.ScalarNode {
+		return errors.New("environment.version must be a string or integer")
+	}
+	s.set = true
+	s.raw = node.Value
+	return nil
+}
+
+// dockerImageConfig is environment.docker_image.
+type dockerImageConfig struct {
+	URL string `yaml:"url"`
+}
+
+func (d *dockerImageConfig) validate() error {
+	if strings.TrimSpace(d.URL) == "" {
+		return errors.New("docker_image.url cannot be empty")
+	}
+	return nil
+}
+
+// codeSourceConfig is the `code_source` block. Only the "snapshot" type exists.
+type codeSourceConfig struct {
+	Type     string                `yaml:"type"`
+	Snapshot *snapshotSourceConfig `yaml:"snapshot"`
+}
+
+func (c *codeSourceConfig) validate() error {
+	if c.Type != "snapshot" {
+		return fmt.Errorf("code_source.type must be 'snapshot', got %q", c.Type)
+	}
+	if c.Snapshot == nil {
+		return errors.New("code_source.type='snapshot' requires a snapshot configuration")
+	}
+	return c.Snapshot.validate()
+}
+
+// snapshotSourceConfig describes a local directory to tar and upload.
+type snapshotSourceConfig struct {
+	RootPath     string   `yaml:"root_path"`
+	RemoteVolume *string  `yaml:"remote_volume"`
+	Git          *gitRef  `yaml:"git"`
+	IncludePaths []string `yaml:"include_paths"`
+}
+
+func (s *snapshotSourceConfig) validate() error {
+	if strings.TrimSpace(s.RootPath) == "" {
+		return errors.New("code_source.snapshot.root_path cannot be empty")
+	}
+
+	if s.RemoteVolume != nil && !strings.HasPrefix(*s.RemoteVolume, "/Volumes/") {
+		return errors.New("code_source.snapshot.remote_volume must start with '/Volumes/'")
+	}
+
+	// A non-nil but empty include_paths is an explicit mistake (omit it instead).
+	if s.IncludePaths != nil && len(s.IncludePaths) == 0 {
+		return errors.New("code_source.snapshot.include_paths cannot be an empty list; either omit it or provide paths")
+	}
+	for _, p := range s.IncludePaths {
+		p = strings.TrimSpace(p)
+		if p == "" {
+			return errors.New("code_source.snapshot.include_paths entry cannot be empty")
+		}
+		if strings.HasPrefix(p, "/") {
+			return fmt.Errorf("code_source.snapshot.include_paths must be relative paths, got: %s", p)
+		}
+		// No parent traversal: snapshots must stay within root_path.
+		if slices.Contains(strings.Split(p, "/"), "..") {
+			return fmt.Errorf("code_source.snapshot.include_paths cannot contain '..' traversal, got: %s", p)
+		}
+	}
+
+	if s.Git != nil {
+		return s.Git.validate()
+	}
+	return nil
+}
+
+// gitRef pins a snapshot to a specific git ref. branch and commit are mutually
+// exclusive; remote is only meaningful with branch.
+type gitRef struct {
+	Branch *string   `yaml:"branch"`
+	Commit *string   `yaml:"commit"`
+	Remote gitRemote `yaml:"remote"`
+}
+
+func (g *gitRef) validate() error {
+	if g.Branch != nil && !gitRefRe.MatchString(*g.Branch) {
+		return fmt.Errorf("invalid git.branch format %q: only alphanumeric characters, hyphens, dots, slashes, and underscores are allowed", *g.Branch)
+	}
+	if g.Remote.isString {
+		if g.Remote.name == "" {
+			return errors.New("git.remote string cannot be empty; use 'true' to auto-detect")
+		}
+		if !gitRefRe.MatchString(g.Remote.name) {
+			return fmt.Errorf("invalid git.remote name %q: only alphanumeric characters, hyphens, dots, slashes, and underscores are allowed", g.Remote.name)
+		}
+	}
+
+	if g.Branch == nil && g.Commit == nil {
+		return errors.New("git: must specify either 'branch' or 'commit'")
+	}
+	if g.Branch != nil && g.Commit != nil {
+		return errors.New("git: 'branch' and 'commit' are mutually exclusive — specify only one")
+	}
+	if g.Remote.truthy() && g.Branch == nil {
+		return errors.New("git.remote requires git.branch (only valid with branch refs)")
+	}
+	return nil
+}
+
+// gitRemote is git.remote: false (default, use local HEAD), true (auto-detect the
+// remote), or a remote name string.
+type gitRemote struct {
+	set      bool
+	isString bool
+	name     string
+	enabled  bool
+}
+
+func (r *gitRemote) UnmarshalYAML(node *yaml.Node) error {
+	if node.Kind != yaml.ScalarNode {
+		return errors.New("git.remote must be a boolean or a remote name string")
+	}
+	r.set = true
+	if node.Tag == "!!bool" {
+		return node.Decode(&r.enabled)
+	}
+	r.isString = true
+	r.name = node.Value
+	return nil
+}
+
+// truthy reports whether remote requests a remote fetch (mirrors Python's
+// truthiness of the bool|str union).
+func (r *gitRemote) truthy() bool {
+	if r.isString {
+		return r.name != ""
+	}
+	return r.enabled
+}
+
+// permission is a DABs-compatible permission grant: exactly one principal plus a
+// level.
+type permission struct {
+	UserName             *string `yaml:"user_name"`
+	GroupName            *string `yaml:"group_name"`
+	ServicePrincipalName *string `yaml:"service_principal_name"`
+	// Level is a databricks PermissionLevel (e.g. CAN_VIEW, CAN_MANAGE). Enum
+	// membership is validated server-side; here we only require it to be set.
+	Level string `yaml:"level"`
+}
+
+func (p *permission) validate() error {
+	principals := map[string]*string{
+		"user_name":              p.UserName,
+		"group_name":             p.GroupName,
+		"service_principal_name": p.ServicePrincipalName,
+	}
+	var set []string
+	for name, val := range principals {
+		if val != nil {
+			set = append(set, name)
+		}
+	}
+	switch len(set) {
+	case 0:
+		return errors.New("permissions: one of 'user_name', 'group_name', or 'service_principal_name' must be specified")
+	case 1:
+		name := set[0]
+		if strings.TrimSpace(*principals[name]) == "" {
+			return fmt.Errorf("permissions: '%s' cannot be empty", name)
+		}
+	default:
+		return errors.New("permissions: only one of 'user_name', 'group_name', or 'service_principal_name' can be specified")
+	}
+
+	if strings.TrimSpace(p.Level) == "" {
+		return errors.New("permissions: 'level' is required")
+	}
+	return nil
+}
diff --git a/experimental/air/cmd/runconfig_load.go b/experimental/air/cmd/runconfig_load.go
new file mode 100644
index 0000000000..4cdbd28308
--- /dev/null
+++ b/experimental/air/cmd/runconfig_load.go
@@ -0,0 +1,40 @@
+package aircmd
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"os"
+
+	"go.yaml.in/yaml/v3"
+)
+
+// loadRunConfig reads a run YAML config file, decodes it into the schema, and
+// runs structural validation. Unknown keys are rejected (KnownFields), mirroring
+// the Python schema's extra="forbid".
+//
+// The `_bases_` composition feature and CLI `--override` handling are not yet
+// ported; a config using `_bases_` is currently rejected as an unknown field.
+func loadRunConfig(path string) (*runConfig, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	dec := yaml.NewDecoder(f)
+	dec.KnownFields(true)
+
+	var cfg runConfig
+	if err := dec.Decode(&cfg); err != nil {
+		if errors.Is(err, io.EOF) {
+			return nil, fmt.Errorf("config %s is empty", path)
+		}
+		return nil, fmt.Errorf("invalid config %s: %w", path, err)
+	}
+
+	if err := cfg.validate(); err != nil {
+		return nil, err
+	}
+	return &cfg, nil
+}
diff --git a/experimental/air/cmd/runconfig_test.go b/experimental/air/cmd/runconfig_test.go
new file mode 100644
index 0000000000..06501e6ea6
--- /dev/null
+++ b/experimental/air/cmd/runconfig_test.go
@@ -0,0 +1,407 @@
+package aircmd
+
+import (
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// writeConfig writes content to a temp YAML file and returns its path.
+func writeConfig(t *testing.T, content string) string {
+	t.Helper()
+	dir := t.TempDir()
+	path := filepath.Join(dir, "config.yaml")
+	require.NoError(t, os.WriteFile(path, []byte(content), 0o600))
+	return path
+}
+
+// minimalConfig is the smallest valid config: the three required pieces.
+const minimalConfig = `
+experiment_name: my-run
+command: python train.py
+compute:
+  accelerator_type: GPU_1xH100
+  num_accelerators: 1
+`
+
+func TestLoadRunConfig_Minimal(t *testing.T) {
+	cfg, err := loadRunConfig(writeConfig(t, minimalConfig))
+	require.NoError(t, err)
+	assert.Equal(t, "my-run", cfg.ExperimentName)
+	require.NotNil(t, cfg.Command)
+	assert.Equal(t, "python train.py", *cfg.Command)
+	require.NotNil(t, cfg.Compute)
+	assert.Equal(t, "GPU_1xH100", cfg.Compute.AcceleratorType)
+	assert.Equal(t, 1, cfg.Compute.NumAccelerators)
+}
+
+func TestLoadRunConfig_FullFeatured(t *testing.T) {
+	cfg, err := loadRunConfig(writeConfig(t, `
+experiment_name: full_run
+command: |
+  python train.py
+  echo done
+compute:
+  accelerator_type: GPU_8xH100
+  num_accelerators: 16
+environment:
+  dependencies:
+    - torch==2.3.0
+    - numpy
+  version: 5
+env_variables:
+  FOO: bar
+secrets:
+  HF_TOKEN: my_scope/hf_token
+code_source:
+  type: snapshot
+  snapshot:
+    root_path: project_root/src
+    remote_volume: /Volumes/main/default/code
+    git:
+      branch: main
+      remote: origin
+    include_paths:
+      - src
+      - configs/train.yaml
+max_retries: 5
+timeout_minutes: 120
+idempotency_token: abc-123
+mlflow_run_name: full_run_v2
+mlflow_experiment_directory: /Workspace/Users/me/exp
+usage_policy_name: my-policy
+permissions:
+  - group_name: users
+    level: CAN_VIEW
+  - user_name: alice@example.com
+    level: CAN_MANAGE
+`))
+	require.NoError(t, err)
+	assert.Equal(t, gpuType8xH100, gpuType(cfg.Compute.AcceleratorType))
+	require.NotNil(t, cfg.Environment)
+	assert.True(t, cfg.Environment.Dependencies.isList)
+	assert.Equal(t, []string{"torch==2.3.0", "numpy"}, cfg.Environment.Dependencies.list)
+	assert.True(t, cfg.Environment.Version.set)
+	assert.Equal(t, "5", cfg.Environment.Version.raw)
+	require.NotNil(t, cfg.CodeSource)
+	require.NotNil(t, cfg.CodeSource.Snapshot)
+	require.NotNil(t, cfg.CodeSource.Snapshot.Git)
+	require.NotNil(t, cfg.CodeSource.Snapshot.Git.Branch)
+	assert.Equal(t, "main", *cfg.CodeSource.Snapshot.Git.Branch)
+	assert.True(t, cfg.CodeSource.Snapshot.Git.Remote.isString)
+	assert.Equal(t, "origin", cfg.CodeSource.Snapshot.Git.Remote.name)
+	assert.Len(t, cfg.Permissions, 2)
+}
+
+// TestLoadRunConfig_PolymorphicFields exercises the str|list, str|int, and
+// bool|str unions decoded by custom UnmarshalYAML.
+func TestLoadRunConfig_PolymorphicFields(t *testing.T) {
+	t.Run("dependencies as string path", func(t *testing.T) {
+		cfg, err := loadRunConfig(writeConfig(t, minimalConfig+`
+environment:
+  dependencies: requirements.yaml
+`))
+		require.NoError(t, err)
+		assert.True(t, cfg.Environment.Dependencies.set)
+		assert.False(t, cfg.Environment.Dependencies.isList)
+		assert.Equal(t, "requirements.yaml", cfg.Environment.Dependencies.path)
+	})
+
+	t.Run("git remote as bool true", func(t *testing.T) {
+		cfg, err := loadRunConfig(writeConfig(t, minimalConfig+`
+code_source:
+  type: snapshot
+  snapshot:
+    root_path: .
+    git:
+      branch: main
+      remote: true
+`))
+		require.NoError(t, err)
+		r := cfg.CodeSource.Snapshot.Git.Remote
+		assert.False(t, r.isString)
+		assert.True(t, r.enabled)
+		assert.True(t, r.truthy())
+	})
+
+	t.Run("git remote defaults to false when unset", func(t *testing.T) {
+		cfg, err := loadRunConfig(writeConfig(t, minimalConfig+`
+code_source:
+  type: snapshot
+  snapshot:
+    root_path: .
+    git:
+      commit: deadbeef
+`))
+		require.NoError(t, err)
+		assert.False(t, cfg.CodeSource.Snapshot.Git.Remote.truthy())
+	})
+}
+
+func TestLoadRunConfig_UnknownFieldRejected(t *testing.T) {
+	tests := []struct {
+		name    string
+		extra   string
+		errFrag string
+	}{
+		{"top-level typo", "extra_field: nope\n", "extra_field"},
+		// priority was intentionally dropped from the schema (pool-only concept).
+		{"dropped priority field", "priority: 100\n", "priority"},
+		// _bases_ composition is not yet ported, so it surfaces as unknown.
+		{"unported _bases_", "_bases_: [base.yaml]\n", "_bases_"},
+		{"nested typo", "environment:\n  bogus: 1\n", "bogus"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			_, err := loadRunConfig(writeConfig(t, minimalConfig+tt.extra))
+			require.Error(t, err)
+			assert.Contains(t, err.Error(), tt.errFrag)
+		})
+	}
+}
+
+func TestLoadRunConfig_Errors(t *testing.T) {
+	tests := []struct {
+		name    string
+		yaml    string
+		errFrag string
+	}{
+		{
+			"missing experiment_name",
+			"command: x\ncompute:\n  accelerator_type: GPU_1xH100\n  num_accelerators: 1\n",
+			"experiment_name cannot be empty",
+		},
+		{
+			"experiment_name bad chars",
+			"experiment_name: my.run\ncommand: x\ncompute:\n  accelerator_type: GPU_1xH100\n  num_accelerators: 1\n",
+			"invalid experiment_name",
+		},
+		{
+			"missing compute",
+			"experiment_name: r\ncommand: x\n",
+			"compute: section is required",
+		},
+		{
+			"missing command",
+			"experiment_name: r\ncompute:\n  accelerator_type: GPU_1xH100\n  num_accelerators: 1\n",
+			"command is required",
+		},
+		{
+			"bad gpu type",
+			"experiment_name: r\ncommand: x\ncompute:\n  accelerator_type: a100\n  num_accelerators: 1\n",
+			"invalid GPU type",
+		},
+		{
+			"num_accelerators not a multiple",
+			"experiment_name: r\ncommand: x\ncompute:\n  accelerator_type: GPU_8xH100\n  num_accelerators: 3\n",
+			"must be a multiple of 8",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			_, err := loadRunConfig(writeConfig(t, tt.yaml))
+			require.Error(t, err)
+			assert.Contains(t, err.Error(), tt.errFrag)
+		})
+	}
+}
+
+// TestRunConfigValidate_FieldRules unit-tests validation rules directly, away
+// from YAML decoding, to keep each rule's failure mode explicit.
+func TestRunConfigValidate_FieldRules(t *testing.T) {
+	str := func(s string) *string { return &s }
+	intp := func(i int) *int { return &i }
+	base := func() *runConfig {
+		return &runConfig{
+			ExperimentName: "r",
+			Command:        str("x"),
+			Compute:        &computeConfig{AcceleratorType: "GPU_1xH100", NumAccelerators: 1},
+		}
+	}
+
+	tests := []struct {
+		name    string
+		mutate  func(c *runConfig)
+		errFrag string
+	}{
+		{"ok baseline", func(c *runConfig) {}, ""},
+		{"empty command", func(c *runConfig) { c.Command = str("   ") }, "command cannot be empty"},
+		{"negative max_retries", func(c *runConfig) { c.MaxRetries = intp(-1) }, "max_retries must be >= 0"},
+		{"zero timeout", func(c *runConfig) { c.TimeoutMinutes = intp(0) }, "timeout_minutes must be >= 1"},
+		{"empty idempotency", func(c *runConfig) { c.IdempotencyToken = str("  ") }, "idempotency_token cannot be empty"},
+		{"long idempotency", func(c *runConfig) { c.IdempotencyToken = str(string(make([]byte, 65))) }, "64 characters or less"},
+		{"bad mlflow_run_name", func(c *runConfig) { c.MLflowRunName = str("bad name") }, "invalid mlflow_run_name"},
+		{"bad experiment dir", func(c *runConfig) { c.MLflowExperimentDirectory = str("/Users/me") }, "must start with '/Workspace'"},
+		{"empty usage policy", func(c *runConfig) { c.UsagePolicyName = str(" ") }, "usage_policy_name must not be empty"},
+		{"bad secret ref", func(c *runConfig) { c.Secrets = map[string]string{"T": "noslash"} }, "expected format 'scope/key'"},
+		{"empty secret scope", func(c *runConfig) { c.Secrets = map[string]string{"T": "/key"} }, "scope and key cannot be empty"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			c := base()
+			tt.mutate(c)
+			err := c.validate()
+			if tt.errFrag == "" {
+				assert.NoError(t, err)
+				return
+			}
+			require.Error(t, err)
+			assert.Contains(t, err.Error(), tt.errFrag)
+		})
+	}
+}
+
+func TestEnvironmentConfigValidate(t *testing.T) {
+	tests := []struct {
+		name    string
+		env     environmentConfig
+		errFrag string
+	}{
+		{
+			"docker image alone ok",
+			environmentConfig{DockerImage: &dockerImageConfig{URL: "org/repo:tag"}},
+			"",
+		},
+		{
+			"docker image with deps conflicts",
+			environmentConfig{
+				DockerImage:  &dockerImageConfig{URL: "org/repo:tag"},
+				Dependencies: dependencies{set: true, isList: true, list: []string{"torch"}},
+			},
+			"not allowed: dependencies",
+		},
+		{
+			"empty docker url",
+			environmentConfig{DockerImage: &dockerImageConfig{URL: "  "}},
+			"docker_image.url cannot be empty",
+		},
+		{
+			"version with file deps",
+			environmentConfig{
+				Version:      stringOrInt{set: true, raw: "5"},
+				Dependencies: dependencies{set: true, isList: false, path: "req.yaml"},
+			},
+			"only valid with inline dependencies",
+		},
+		{
+			"version without deps",
+			environmentConfig{Version: stringOrInt{set: true, raw: "5"}},
+			"requires inline 'dependencies'",
+		},
+		{
+			"version with inline deps ok",
+			environmentConfig{
+				Version:      stringOrInt{set: true, raw: "5"},
+				Dependencies: dependencies{set: true, isList: true, list: []string{"torch"}},
+			},
+			"",
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := tt.env.validate()
+			if tt.errFrag == "" {
+				assert.NoError(t, err)
+				return
+			}
+			require.Error(t, err)
+			assert.Contains(t, err.Error(), tt.errFrag)
+		})
+	}
+}
+
+func TestGitRefValidate(t *testing.T) {
+	str := func(s string) *string { return &s }
+	tests := []struct {
+		name    string
+		ref     gitRef
+		errFrag string
+	}{
+		{"branch only ok", gitRef{Branch: str("main")}, ""},
+		{"commit only ok", gitRef{Commit: str("abc123")}, ""},
+		{"branch with remote ok", gitRef{Branch: str("main"), Remote: gitRemote{set: true, enabled: true}}, ""},
+		{"neither branch nor commit", gitRef{}, "must specify either 'branch' or 'commit'"},
+		{"both branch and commit", gitRef{Branch: str("main"), Commit: str("abc")}, "mutually exclusive"},
+		{"remote without branch", gitRef{Commit: str("abc"), Remote: gitRemote{set: true, isString: true, name: "origin"}}, "requires git.branch"},
+		{"bad branch chars", gitRef{Branch: str("bad branch")}, "invalid git.branch"},
+		{"empty remote string", gitRef{Branch: str("main"), Remote: gitRemote{set: true, isString: true, name: ""}}, "cannot be empty"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := tt.ref.validate()
+			if tt.errFrag == "" {
+				assert.NoError(t, err)
+				return
+			}
+			require.Error(t, err)
+			assert.Contains(t, err.Error(), tt.errFrag)
+		})
+	}
+}
+
+func TestSnapshotSourceConfigValidate(t *testing.T) {
+	tests := []struct {
+		name    string
+		snap    snapshotSourceConfig
+		errFrag string
+	}{
+		{"ok", snapshotSourceConfig{RootPath: "src"}, ""},
+		{"empty root_path", snapshotSourceConfig{RootPath: "  "}, "root_path cannot be empty"},
+		{"bad volume", snapshotSourceConfig{RootPath: "src", RemoteVolume: new("/mnt/x")}, "must start with '/Volumes/'"},
+		{"empty include list", snapshotSourceConfig{RootPath: "src", IncludePaths: []string{}}, "cannot be an empty list"},
+		{"absolute include", snapshotSourceConfig{RootPath: "src", IncludePaths: []string{"/etc"}}, "must be relative"},
+		{"traversal include", snapshotSourceConfig{RootPath: "src", IncludePaths: []string{"../x"}}, "'..' traversal"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := tt.snap.validate()
+			if tt.errFrag == "" {
+				assert.NoError(t, err)
+				return
+			}
+			require.Error(t, err)
+			assert.Contains(t, err.Error(), tt.errFrag)
+		})
+	}
+}
+
+func TestPermissionValidate(t *testing.T) {
+	str := func(s string) *string { return &s }
+	tests := []struct {
+		name    string
+		perm    permission
+		errFrag string
+	}{
+		{"ok user", permission{UserName: str("alice@example.com"), Level: "CAN_VIEW"}, ""},
+		{"no principal", permission{Level: "CAN_VIEW"}, "must be specified"},
+		{"two principals", permission{UserName: str("a"), GroupName: str("g"), Level: "CAN_VIEW"}, "only one of"},
+		{"empty principal", permission{UserName: str("  "), Level: "CAN_VIEW"}, "cannot be empty"},
+		{"missing level", permission{GroupName: str("users")}, "'level' is required"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := tt.perm.validate()
+			if tt.errFrag == "" {
+				assert.NoError(t, err)
+				return
+			}
+			require.Error(t, err)
+			assert.Contains(t, err.Error(), tt.errFrag)
+		})
+	}
+}
+
+func TestLoadRunConfig_FileErrors(t *testing.T) {
+	t.Run("missing file", func(t *testing.T) {
+		_, err := loadRunConfig(filepath.Join(t.TempDir(), "nope.yaml"))
+		assert.Error(t, err)
+	})
+	t.Run("empty file", func(t *testing.T) {
+		_, err := loadRunConfig(writeConfig(t, ""))
+		require.Error(t, err)
+		assert.Contains(t, err.Error(), "is empty")
+	})
+}