diff --git a/cmd/stepsecurity-dev-machine-guard-task/main.go b/cmd/stepsecurity-dev-machine-guard-task/main.go index 96ca252..5c02763 100644 --- a/cmd/stepsecurity-dev-machine-guard-task/main.go +++ b/cmd/stepsecurity-dev-machine-guard-task/main.go @@ -1,58 +1,85 @@ //go:build windows // Command stepsecurity-dev-machine-guard-task is a GUI-subsystem -// launcher that invokes the console-subsystem agent under +// launcher that invokes a console-subsystem child under // CREATE_NO_WINDOW. Built with `-ldflags "-H windowsgui"`. // // Why a separate binary: Windows allocates a console for any -// console-subsystem process whose parent has none. Task Scheduler -// under /ru INTERACTIVE is such a parent, so the agent itself would -// always flash a window. The only fully-reliable suppression is for -// the parent CreateProcess call to pass CREATE_NO_WINDOW, and the -// only way to be that parent without flashing our own console is to -// be GUI-subsystem. The agent stays console-subsystem so interactive -// CLI use (install, configure, manual scans) still works normally. +// console-subsystem process whose parent has none. Task Scheduler under +// /ru INTERACTIVE is such a parent, so a console-subsystem child +// invoked directly would always flash a window. The only fully reliable +// suppression is for the parent CreateProcess call to pass +// CREATE_NO_WINDOW, and the only way to be that parent without flashing +// our own console is to be GUI-subsystem. // -// Layout: both binaries sit in the same directory. The scheduled task -// points at this launcher; arguments forward unchanged. +// Two operating modes (target-resolution lives in internal/launcher so +// it can be unit-tested cross-platform): // -// Lifecycle: the agent is assigned to a Job Object with -// JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE so it dies when the launcher -// does — including under Stop-ScheduledTask, which only terminates -// the registered action's PID. +// - Default. Invoked without --exec, the launcher spawns its sibling +// stepsecurity-dev-machine-guard.exe and forwards argv unchanged. +// This is what the MSI install layout's scheduled-task action uses. +// +// - --exec mode. Invoked as `task.exe --exec [args...]`, the +// launcher spawns (exec.LookPath resolved) with the remaining +// args. Used by the PowerShell loader's scheduled task to wrap +// `powershell.exe -File loader.ps1 send-telemetry` in the same +// no-console envelope the MSI flow uses for the agent. +// +// The agent (and any --exec target) stays console-subsystem so +// interactive CLI use continues to work normally. +// +// Lifecycle: the child is assigned to a Job Object with +// JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE so it dies when the launcher does +// — including under Stop-ScheduledTask, which only terminates the +// registered action's PID. package main import ( "errors" + "fmt" "os" "os/exec" - "path/filepath" "syscall" "unsafe" + "github.com/step-security/dev-machine-guard/internal/launcher" "golang.org/x/sys/windows" ) -const ( - createNoWindow uint32 = 0x08000000 - agentBinary = "stepsecurity-dev-machine-guard.exe" -) +const createNoWindow uint32 = 0x08000000 func main() { - os.Exit(run()) + os.Exit(run(os.Args[1:])) } -func run() int { - me, err := os.Executable() +// run is split out so the entrypoint stays one line. argv is the slice +// after the program name (os.Args[1:] in main); accepting it explicitly +// keeps the windows-only test path open if we add one later. +func run(argv []string) int { + target, childArgs, err := launcher.ResolveTarget(argv) if err != nil { - return 1 - } - agent := filepath.Join(filepath.Dir(me), agentBinary) - if _, err := os.Stat(agent); err != nil { + // Two distinct failure shapes, matched to the legacy contract: + // + // - Default mode (no --exec): the launcher silently exits 1. + // This preserves byte-for-byte compatibility with MSI installs + // that the pre-1.11.5 launcher served. Task Scheduler records + // "LastTaskResult=1" — same value MSI deployments have always + // observed when the sibling agent is absent. A behavioral + // change here would shift downstream dashboards/alerts that + // key on the result code. + // + // - --exec mode: the caller asked for a feature; surface the + // concrete misuse (missing target, unresolved PATH, etc.) on + // stderr with a distinct exit code so dispatch failures are + // diagnosable. + if len(argv) > 0 && argv[0] == launcher.ExecFlag { + fmt.Fprintln(os.Stderr, err) + return 2 + } return 1 } - cmd := exec.Command(agent, os.Args[1:]...) + cmd := exec.Command(target, childArgs...) cmd.SysProcAttr = &syscall.SysProcAttr{ HideWindow: true, CreationFlags: createNoWindow, @@ -62,10 +89,11 @@ func run() int { return 1 } - // Best-effort: bind the agent to a kill-on-close job. The job + // Best-effort: bind the child to a kill-on-close job. The job // handle stays open in this process; the kernel closes it on our // exit, which fires the kill. Failure here only weakens lifecycle - // (orphan possible on forced termination), not the scan itself. + // (orphan possible on forced termination), not the work the child + // was started to do. if job, jerr := newKillOnCloseJob(); jerr == nil { if h, oerr := windows.OpenProcess( windows.PROCESS_SET_QUOTA|windows.PROCESS_TERMINATE, diff --git a/internal/launcher/launcher.go b/internal/launcher/launcher.go new file mode 100644 index 0000000..7c9f77a --- /dev/null +++ b/internal/launcher/launcher.go @@ -0,0 +1,82 @@ +// Package launcher resolves the child process the GUI-subsystem +// launcher (cmd/stepsecurity-dev-machine-guard-task) should spawn for a +// given invocation. The launcher itself lives in cmd/ and is Windows-only +// because it depends on Job Objects and CREATE_NO_WINDOW; the resolution +// logic is platform-agnostic and lives here so it can be unit-tested on +// the macOS CI runners that drive the rest of the test suite. +package launcher + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" +) + +const ( + // AgentBinary is the default child the launcher invokes when no + // --exec flag is present. Sits next to the launcher in the install + // directory (MSI: C:\Program Files\StepSecurity\, PowerShell: + // %USERPROFILE%\.stepsecurity\bin\). + AgentBinary = "stepsecurity-dev-machine-guard.exe" + + // ExecFlag opts into the generic-target launch mode used by the + // PowerShell loader. See ResolveTarget for the contract. + ExecFlag = "--exec" +) + +// ResolveTarget returns the child executable's absolute path and the +// argv slice to forward to it, derived from the launcher's own argv +// (i.e. os.Args[1:], not including the program name). +// +// Two modes, dispatched on the first argument: +// +// - "--exec" mode. argv begins with --exec; the next element is the +// child binary (resolved via exec.LookPath so bare basenames like +// "powershell.exe" are accepted alongside fully-qualified paths), +// and the rest of argv is forwarded to it. Used by the PowerShell +// loader's scheduled task to wrap `powershell.exe -File loader.ps1 +// send-telemetry` under the launcher's no-console envelope. +// +// - Default (legacy / MSI) mode. argv does not begin with --exec; the +// child is the sibling AgentBinary in the launcher's own directory, +// and all of argv is forwarded to it. Preserves byte-for-byte +// compatibility with the launcher's pre-1.11.5 behaviour, which is +// what the MSI install layout's scheduled task action uses. +// +// Errors (all returned from this function; the caller in cmd/.../main.go +// is responsible for mapping them to exit codes + stderr output per the +// contract below): +// +// - --exec without a target: malformed task action. +// - --exec target not on PATH: visible at install time rather than +// silently exiting. +// - Default mode with no sibling agent: most commonly indicates the +// launcher was deployed without its companion. +// +// The launcher entrypoint distinguishes the two error contexts: +// --exec errors are written to stderr and the process exits 2, while +// default-mode errors stay silent and exit 1 (preserving byte-for-byte +// compatibility with the pre-1.11.5 launcher that MSI installs rely on). +func ResolveTarget(argv []string) (string, []string, error) { + if len(argv) > 0 && argv[0] == ExecFlag { + if len(argv) < 2 { + return "", nil, fmt.Errorf("%s requires a target executable", ExecFlag) + } + target, err := exec.LookPath(argv[1]) + if err != nil { + return "", nil, fmt.Errorf("%s: cannot resolve %q: %w", ExecFlag, argv[1], err) + } + return target, argv[2:], nil + } + + me, err := os.Executable() + if err != nil { + return "", nil, err + } + agent := filepath.Join(filepath.Dir(me), AgentBinary) + if _, err := os.Stat(agent); err != nil { + return "", nil, err + } + return agent, argv, nil +} diff --git a/internal/launcher/launcher_test.go b/internal/launcher/launcher_test.go new file mode 100644 index 0000000..8b10796 --- /dev/null +++ b/internal/launcher/launcher_test.go @@ -0,0 +1,179 @@ +package launcher + +import ( + "errors" + "io/fs" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "testing" +) + +// resolvableTarget picks a binary we know is on PATH for the host running +// the test, so the exec.LookPath inside ResolveTarget actually succeeds. +// macOS CI doesn't have powershell.exe; Windows local devs don't have +// /bin/sh. Pick per-OS so the cross-platform tests stay honest about +// covering the resolve path end-to-end rather than just the argv parsing. +func resolvableTarget(t *testing.T) (basename, wantSuffix string) { + t.Helper() + if runtime.GOOS == "windows" { + // cmd.exe ships on every Windows host the test would ever run on. + return "cmd.exe", "cmd.exe" + } + return "sh", "/sh" +} + +func TestResolveTarget_ExecMode_ForwardsArgs(t *testing.T) { + basename, wantSuffix := resolvableTarget(t) + target, args, err := ResolveTarget([]string{"--exec", basename, "-c", "echo hi"}) + if err != nil { + t.Fatalf("ResolveTarget returned error: %v", err) + } + if !strings.HasSuffix(target, wantSuffix) { + t.Errorf("target = %q, want suffix %q", target, wantSuffix) + } + if len(args) != 2 || args[0] != "-c" || args[1] != "echo hi" { + t.Errorf("args = %v, want [-c, \"echo hi\"]", args) + } +} + +// --exec with no further args is the misuse case: a customer-supplied +// task definition that includes the flag but no target. Must error +// rather than fall through to the default agent-sibling path — that +// would silently swallow a malformed task action. +func TestResolveTarget_ExecMode_MissingTarget(t *testing.T) { + _, _, err := ResolveTarget([]string{"--exec"}) + if err == nil { + t.Fatal("expected error for --exec with no target, got nil") + } + if !strings.Contains(err.Error(), ExecFlag) { + t.Errorf("error %q does not mention %q", err, ExecFlag) + } +} + +// An --exec target that doesn't resolve through LookPath must produce a +// distinct error rather than silently exiting (which Task Scheduler +// would record as a generic non-zero exit, indistinguishable from a +// transient failure). exec.LookPath is wrapped, so the inner error is +// preserved via errors.Is for diagnostic chains. +func TestResolveTarget_ExecMode_TargetNotFound(t *testing.T) { + bogus := "this-binary-definitely-does-not-exist-on-PATH-xyzzy.exe" + _, _, err := ResolveTarget([]string{"--exec", bogus}) + if err == nil { + t.Fatalf("expected error resolving %q, got nil", bogus) + } + if !errors.Is(err, exec.ErrNotFound) { + t.Errorf("expected error chain to include exec.ErrNotFound, got %v", err) + } +} + +// --exec with no further child args means "spawn target with empty argv" +// — useful for the trivial case (launcher.exe --exec foo.exe) where the +// child doesn't need flags. Forwards an empty slice, not a nil slice +// (callers passing this directly into exec.Command must get a usable +// value). +func TestResolveTarget_ExecMode_NoChildArgs(t *testing.T) { + basename, _ := resolvableTarget(t) + _, args, err := ResolveTarget([]string{"--exec", basename}) + if err != nil { + t.Fatalf("ResolveTarget returned error: %v", err) + } + if args == nil { + t.Error("args is nil; expected empty (but non-nil) slice") + } + if len(args) != 0 { + t.Errorf("args = %v, want []", args) + } +} + +// Default mode (no --exec) computes the sibling agent path relative to +// os.Executable(). During `go test` os.Executable() returns the compiled +// test binary, so we can stage a fake agent next to it and assert +// ResolveTarget picks it up. This is the lever the MSI install relies +// on — the launcher's directory dictates where the agent comes from. +func TestResolveTarget_DefaultMode_FindsSibling(t *testing.T) { + exePath, err := os.Executable() + if err != nil { + t.Skipf("os.Executable not available on this platform: %v", err) + } + siblingDir := filepath.Dir(exePath) + siblingPath := filepath.Join(siblingDir, AgentBinary) + + // Don't clobber a real agent that might be sitting next to the test + // binary in a developer's checkout. Three cases to handle correctly: + // + // - File doesn't exist (ErrNotExist) — seed the fake, delete on + // cleanup. Normal CI path. + // - File exists and is readable — capture bytes + mode, seed the + // fake, restore both on cleanup. + // - File exists but can't be read (permission, locked) — we'd + // destroy the developer's checkout if we overwrote it. Skip + // the test instead. + prev, readErr := os.ReadFile(siblingPath) + var prevMode os.FileMode = 0o644 + if readErr == nil { + // Capture the original mode so the file is restored byte-for- + // byte AND mode-for-mode. Without this, a real Authenticode- + // signed .exe with the executable bit set on POSIX (rare on + // CI but possible in a dual-platform checkout) would come back + // non-executable. + if info, statErr := os.Stat(siblingPath); statErr == nil { + prevMode = info.Mode().Perm() + } + } else if !errors.Is(readErr, fs.ErrNotExist) { + t.Skipf("cannot read existing sibling %q without risking developer state: %v", siblingPath, readErr) + } + + if err := os.WriteFile(siblingPath, []byte("fake-agent"), 0o644); err != nil { + t.Fatalf("seeding fake agent at %q: %v", siblingPath, err) + } + t.Cleanup(func() { + if readErr == nil { + // Restore both bytes and mode. + _ = os.WriteFile(siblingPath, prev, prevMode) + } else { + // Original was absent; remove our seed. + _ = os.Remove(siblingPath) + } + }) + + target, args, err := ResolveTarget([]string{"send-telemetry", "--install-dir=C:\\fake"}) + if err != nil { + t.Fatalf("ResolveTarget returned error: %v", err) + } + if target != siblingPath { + t.Errorf("target = %q, want %q", target, siblingPath) + } + if len(args) != 2 || args[0] != "send-telemetry" || args[1] != "--install-dir=C:\\fake" { + t.Errorf("args = %v, did not pass through unchanged", args) + } +} + +// When the sibling agent isn't on disk, ResolveTarget must return an +// error rather than a non-existent path. The launcher exit-1's on this; +// surfacing it as an explicit error here means callers (and tests) can +// distinguish "no agent installed" from other failure modes. +func TestResolveTarget_DefaultMode_NoSibling(t *testing.T) { + exePath, err := os.Executable() + if err != nil { + t.Skipf("os.Executable not available on this platform: %v", err) + } + siblingPath := filepath.Join(filepath.Dir(exePath), AgentBinary) + if _, statErr := os.Stat(siblingPath); statErr == nil { + // A real or stale sibling agent is in the way of this assertion. + // Move it aside for the test and restore on cleanup so we don't + // leave a half-broken dev checkout behind. + shadow := siblingPath + ".test-shadow" + if renameErr := os.Rename(siblingPath, shadow); renameErr != nil { + t.Skipf("could not move existing sibling agent %q out of the way: %v", siblingPath, renameErr) + } + t.Cleanup(func() { _ = os.Rename(shadow, siblingPath) }) + } + + _, _, err = ResolveTarget(nil) + if err == nil { + t.Fatal("expected error when sibling agent is absent, got nil") + } +}