From faab363e89c0c390f87af8974ac86ed211e5bf20 Mon Sep 17 00:00:00 2001 From: Swarit Pandey Date: Wed, 27 May 2026 01:39:31 +0530 Subject: [PATCH 1/2] feat(windows): add --exec mode to GUI launcher MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The launcher was hardcoded to spawn the sibling agent .exe, which is exactly what the MSI install layout needs but doesn't work for the PowerShell loader. The PS task action has to run `powershell.exe -File loader.ps1 send-telemetry` on every tick (loader owns auto-update), and powershell.exe is console-subsystem — Task Scheduler firing it directly allocates a console and flashes a window before -WindowStyle Hidden takes effect. Same root cause PR #104 fixed for the agent, just at the powershell layer. This adds an --exec mode so the PS loader's scheduled task can wrap powershell.exe in the launcher's no-console envelope: task.exe --exec powershell.exe -ExecutionPolicy Bypass ... When --exec is absent the launcher falls through to the existing sibling-agent behaviour, so MSI installs see no change. Target resolution lives in internal/launcher so it gets unit-test coverage on the macOS CI runner (the launcher binary itself stays Windows-only). --- .../main.go | 75 +++++---- internal/launcher/launcher.go | 78 +++++++++ internal/launcher/launcher_test.go | 156 ++++++++++++++++++ 3 files changed, 279 insertions(+), 30 deletions(-) create mode 100644 internal/launcher/launcher.go create mode 100644 internal/launcher/launcher_test.go diff --git a/cmd/stepsecurity-dev-machine-guard-task/main.go b/cmd/stepsecurity-dev-machine-guard-task/main.go index 96ca252..559ffef 100644 --- a/cmd/stepsecurity-dev-machine-guard-task/main.go +++ b/cmd/stepsecurity-dev-machine-guard-task/main.go @@ -1,58 +1,72 @@ //go:build windows // Command stepsecurity-dev-machine-guard-task is a GUI-subsystem -// launcher that invokes the console-subsystem agent under +// launcher that invokes a console-subsystem child under // CREATE_NO_WINDOW. Built with `-ldflags "-H windowsgui"`. // // Why a separate binary: Windows allocates a console for any -// console-subsystem process whose parent has none. Task Scheduler -// under /ru INTERACTIVE is such a parent, so the agent itself would -// always flash a window. The only fully-reliable suppression is for -// the parent CreateProcess call to pass CREATE_NO_WINDOW, and the -// only way to be that parent without flashing our own console is to -// be GUI-subsystem. The agent stays console-subsystem so interactive -// CLI use (install, configure, manual scans) still works normally. +// console-subsystem process whose parent has none. Task Scheduler under +// /ru INTERACTIVE is such a parent, so a console-subsystem child +// invoked directly would always flash a window. The only fully reliable +// suppression is for the parent CreateProcess call to pass +// CREATE_NO_WINDOW, and the only way to be that parent without flashing +// our own console is to be GUI-subsystem. // -// Layout: both binaries sit in the same directory. The scheduled task -// points at this launcher; arguments forward unchanged. +// Two operating modes (target-resolution lives in internal/launcher so +// it can be unit-tested cross-platform): // -// Lifecycle: the agent is assigned to a Job Object with -// JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE so it dies when the launcher -// does — including under Stop-ScheduledTask, which only terminates -// the registered action's PID. +// - Default. Invoked without --exec, the launcher spawns its sibling +// stepsecurity-dev-machine-guard.exe and forwards argv unchanged. +// This is what the MSI install layout's scheduled-task action uses. +// +// - --exec mode. Invoked as `task.exe --exec [args...]`, the +// launcher spawns (exec.LookPath resolved) with the remaining +// args. Used by the PowerShell loader's scheduled task to wrap +// `powershell.exe -File loader.ps1 send-telemetry` in the same +// no-console envelope the MSI flow uses for the agent. +// +// The agent (and any --exec target) stays console-subsystem so +// interactive CLI use continues to work normally. +// +// Lifecycle: the child is assigned to a Job Object with +// JOB_OBJECT_LIMIT_KILL_ON_JOB_CLOSE so it dies when the launcher does +// — including under Stop-ScheduledTask, which only terminates the +// registered action's PID. package main import ( "errors" + "fmt" "os" "os/exec" - "path/filepath" "syscall" "unsafe" + "github.com/step-security/dev-machine-guard/internal/launcher" "golang.org/x/sys/windows" ) -const ( - createNoWindow uint32 = 0x08000000 - agentBinary = "stepsecurity-dev-machine-guard.exe" -) +const createNoWindow uint32 = 0x08000000 func main() { - os.Exit(run()) + os.Exit(run(os.Args[1:])) } -func run() int { - me, err := os.Executable() +// run is split out so the entrypoint stays one line. argv is the slice +// after the program name (os.Args[1:] in main); accepting it explicitly +// keeps the windows-only test path open if we add one later. +func run(argv []string) int { + target, childArgs, err := launcher.ResolveTarget(argv) if err != nil { - return 1 - } - agent := filepath.Join(filepath.Dir(me), agentBinary) - if _, err := os.Stat(agent); err != nil { - return 1 + // stderr is normally unreachable for a GUI-subsystem process + // launched by Task Scheduler, but it's visible to interactive + // invocations and to test runs — both worth surfacing the + // concrete misuse string for. + fmt.Fprintln(os.Stderr, err) + return 2 } - cmd := exec.Command(agent, os.Args[1:]...) + cmd := exec.Command(target, childArgs...) cmd.SysProcAttr = &syscall.SysProcAttr{ HideWindow: true, CreationFlags: createNoWindow, @@ -62,10 +76,11 @@ func run() int { return 1 } - // Best-effort: bind the agent to a kill-on-close job. The job + // Best-effort: bind the child to a kill-on-close job. The job // handle stays open in this process; the kernel closes it on our // exit, which fires the kill. Failure here only weakens lifecycle - // (orphan possible on forced termination), not the scan itself. + // (orphan possible on forced termination), not the work the child + // was started to do. if job, jerr := newKillOnCloseJob(); jerr == nil { if h, oerr := windows.OpenProcess( windows.PROCESS_SET_QUOTA|windows.PROCESS_TERMINATE, diff --git a/internal/launcher/launcher.go b/internal/launcher/launcher.go new file mode 100644 index 0000000..2794b5d --- /dev/null +++ b/internal/launcher/launcher.go @@ -0,0 +1,78 @@ +// Package launcher resolves the child process the GUI-subsystem +// launcher (cmd/stepsecurity-dev-machine-guard-task) should spawn for a +// given invocation. The launcher itself lives in cmd/ and is Windows-only +// because it depends on Job Objects and CREATE_NO_WINDOW; the resolution +// logic is platform-agnostic and lives here so it can be unit-tested on +// the macOS CI runners that drive the rest of the test suite. +package launcher + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" +) + +const ( + // AgentBinary is the default child the launcher invokes when no + // --exec flag is present. Sits next to the launcher in the install + // directory (MSI: C:\Program Files\StepSecurity\, PowerShell: + // %USERPROFILE%\.stepsecurity\bin\). + AgentBinary = "stepsecurity-dev-machine-guard.exe" + + // ExecFlag opts into the generic-target launch mode used by the + // PowerShell loader. See ResolveTarget for the contract. + ExecFlag = "--exec" +) + +// ResolveTarget returns the child executable's absolute path and the +// argv slice to forward to it, derived from the launcher's own argv +// (i.e. os.Args[1:], not including the program name). +// +// Two modes, dispatched on the first argument: +// +// - "--exec" mode. argv begins with --exec; the next element is the +// child binary (resolved via exec.LookPath so bare basenames like +// "powershell.exe" are accepted alongside fully-qualified paths), +// and the rest of argv is forwarded to it. Used by the PowerShell +// loader's scheduled task to wrap `powershell.exe -File loader.ps1 +// send-telemetry` under the launcher's no-console envelope. +// +// - Default (legacy / MSI) mode. argv does not begin with --exec; the +// child is the sibling AgentBinary in the launcher's own directory, +// and all of argv is forwarded to it. Preserves byte-for-byte +// compatibility with the launcher's pre-1.11.5 behaviour, which is +// what the MSI install layout's scheduled task action uses. +// +// Errors: +// +// - --exec without a target: malformed task action; surfaced so Task +// Scheduler's "last result" column reflects the misconfiguration. +// - --exec target not on PATH: same idea — visible at install time +// rather than silently exiting 1. +// - Default mode with no sibling agent: most commonly indicates the +// launcher was deployed without its companion; matches the previous +// behaviour (return error → exit 1 → no console output) so existing +// MSI installs see no behaviour change. +func ResolveTarget(argv []string) (string, []string, error) { + if len(argv) > 0 && argv[0] == ExecFlag { + if len(argv) < 2 { + return "", nil, fmt.Errorf("%s requires a target executable", ExecFlag) + } + target, err := exec.LookPath(argv[1]) + if err != nil { + return "", nil, fmt.Errorf("%s: cannot resolve %q: %w", ExecFlag, argv[1], err) + } + return target, argv[2:], nil + } + + me, err := os.Executable() + if err != nil { + return "", nil, err + } + agent := filepath.Join(filepath.Dir(me), AgentBinary) + if _, err := os.Stat(agent); err != nil { + return "", nil, err + } + return agent, argv, nil +} diff --git a/internal/launcher/launcher_test.go b/internal/launcher/launcher_test.go new file mode 100644 index 0000000..1f6f59a --- /dev/null +++ b/internal/launcher/launcher_test.go @@ -0,0 +1,156 @@ +package launcher + +import ( + "errors" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "testing" +) + +// resolvableTarget picks a binary we know is on PATH for the host running +// the test, so the exec.LookPath inside ResolveTarget actually succeeds. +// macOS CI doesn't have powershell.exe; Windows local devs don't have +// /bin/sh. Pick per-OS so the cross-platform tests stay honest about +// covering the resolve path end-to-end rather than just the argv parsing. +func resolvableTarget(t *testing.T) (basename, wantSuffix string) { + t.Helper() + if runtime.GOOS == "windows" { + // cmd.exe ships on every Windows host the test would ever run on. + return "cmd.exe", "cmd.exe" + } + return "sh", "/sh" +} + +func TestResolveTarget_ExecMode_ForwardsArgs(t *testing.T) { + basename, wantSuffix := resolvableTarget(t) + target, args, err := ResolveTarget([]string{"--exec", basename, "-c", "echo hi"}) + if err != nil { + t.Fatalf("ResolveTarget returned error: %v", err) + } + if !strings.HasSuffix(target, wantSuffix) { + t.Errorf("target = %q, want suffix %q", target, wantSuffix) + } + if len(args) != 2 || args[0] != "-c" || args[1] != "echo hi" { + t.Errorf("args = %v, want [-c, \"echo hi\"]", args) + } +} + +// --exec with no further args is the misuse case: a customer-supplied +// task definition that includes the flag but no target. Must error +// rather than fall through to the default agent-sibling path — that +// would silently swallow a malformed task action. +func TestResolveTarget_ExecMode_MissingTarget(t *testing.T) { + _, _, err := ResolveTarget([]string{"--exec"}) + if err == nil { + t.Fatal("expected error for --exec with no target, got nil") + } + if !strings.Contains(err.Error(), ExecFlag) { + t.Errorf("error %q does not mention %q", err, ExecFlag) + } +} + +// An --exec target that doesn't resolve through LookPath must produce a +// distinct error rather than silently exiting (which Task Scheduler +// would record as a generic non-zero exit, indistinguishable from a +// transient failure). exec.LookPath is wrapped, so the inner error is +// preserved via errors.Is for diagnostic chains. +func TestResolveTarget_ExecMode_TargetNotFound(t *testing.T) { + bogus := "this-binary-definitely-does-not-exist-on-PATH-xyzzy.exe" + _, _, err := ResolveTarget([]string{"--exec", bogus}) + if err == nil { + t.Fatalf("expected error resolving %q, got nil", bogus) + } + if !errors.Is(err, exec.ErrNotFound) { + t.Errorf("expected error chain to include exec.ErrNotFound, got %v", err) + } +} + +// --exec with no further child args means "spawn target with empty argv" +// — useful for the trivial case (launcher.exe --exec foo.exe) where the +// child doesn't need flags. Forwards an empty slice, not a nil slice +// (callers passing this directly into exec.Command must get a usable +// value). +func TestResolveTarget_ExecMode_NoChildArgs(t *testing.T) { + basename, _ := resolvableTarget(t) + _, args, err := ResolveTarget([]string{"--exec", basename}) + if err != nil { + t.Fatalf("ResolveTarget returned error: %v", err) + } + if args == nil { + t.Error("args is nil; expected empty (but non-nil) slice") + } + if len(args) != 0 { + t.Errorf("args = %v, want []", args) + } +} + +// Default mode (no --exec) computes the sibling agent path relative to +// os.Executable(). During `go test` os.Executable() returns the compiled +// test binary, so we can stage a fake agent next to it and assert +// ResolveTarget picks it up. This is the lever the MSI install relies +// on — the launcher's directory dictates where the agent comes from. +func TestResolveTarget_DefaultMode_FindsSibling(t *testing.T) { + exePath, err := os.Executable() + if err != nil { + t.Skipf("os.Executable not available on this platform: %v", err) + } + siblingDir := filepath.Dir(exePath) + siblingPath := filepath.Join(siblingDir, AgentBinary) + + // Don't clobber a real agent that might be sitting next to the test + // binary in a developer's checkout — restore whatever was there. + prev, hadPrev := os.ReadFile(siblingPath) + _ = prev + + if err := os.WriteFile(siblingPath, []byte("fake-agent"), 0o644); err != nil { + t.Fatalf("seeding fake agent at %q: %v", siblingPath, err) + } + t.Cleanup(func() { + if hadPrev == nil { + _ = os.WriteFile(siblingPath, prev, 0o644) + } else { + _ = os.Remove(siblingPath) + } + }) + + target, args, err := ResolveTarget([]string{"send-telemetry", "--install-dir=C:\\fake"}) + if err != nil { + t.Fatalf("ResolveTarget returned error: %v", err) + } + if target != siblingPath { + t.Errorf("target = %q, want %q", target, siblingPath) + } + if len(args) != 2 || args[0] != "send-telemetry" || args[1] != "--install-dir=C:\\fake" { + t.Errorf("args = %v, did not pass through unchanged", args) + } +} + +// When the sibling agent isn't on disk, ResolveTarget must return an +// error rather than a non-existent path. The launcher exit-1's on this; +// surfacing it as an explicit error here means callers (and tests) can +// distinguish "no agent installed" from other failure modes. +func TestResolveTarget_DefaultMode_NoSibling(t *testing.T) { + exePath, err := os.Executable() + if err != nil { + t.Skipf("os.Executable not available on this platform: %v", err) + } + siblingPath := filepath.Join(filepath.Dir(exePath), AgentBinary) + if _, statErr := os.Stat(siblingPath); statErr == nil { + // A real or stale sibling agent is in the way of this assertion. + // Move it aside for the test and restore on cleanup so we don't + // leave a half-broken dev checkout behind. + shadow := siblingPath + ".test-shadow" + if renameErr := os.Rename(siblingPath, shadow); renameErr != nil { + t.Skipf("could not move existing sibling agent %q out of the way: %v", siblingPath, renameErr) + } + t.Cleanup(func() { _ = os.Rename(shadow, siblingPath) }) + } + + _, _, err = ResolveTarget(nil) + if err == nil { + t.Fatal("expected error when sibling agent is absent, got nil") + } +} From 4f2f6c3fe6fe59ecab7146b9bb051fd8226a87b3 Mon Sep 17 00:00:00 2001 From: Swarit Pandey Date: Wed, 27 May 2026 12:55:38 +0530 Subject: [PATCH 2/2] fix(launcher): preserve legacy exit-1-silent on default-mode failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three coordinated fixes from Copilot review on #113: - main.go: previous patch unified all ResolveTarget errors to "stderr + exit 2". For the default (sibling-agent) mode, that's a behavior change MSI installs would inherit — Task Scheduler's LastTaskResult column flips from 1 to 2 when the sibling is absent, which would silently break any downstream alerting that keys on the result code. Split: --exec errors still get stderr + exit 2 (caller asked for a feature, give them diagnostics), default-mode errors stay silent with exit 1 (pre-1.11.5 contract preserved). - launcher.go: docstring now reflects the actual split — the "matches the previous behaviour" line was stale once main.go did the unified mapping above; moved the contract into the docstring with a clear note about which mode produces which exit code. - launcher_test.go: TestResolveTarget_DefaultMode_FindsSibling's cleanup was buggy. The original code always overwrote siblingPath in cleanup even if the initial os.ReadFile failed for reasons other than non-existence (locked file, perm error), and it restored with a hardcoded 0o644 mode. Now: capture the original mode via os.Stat, treat any non-IsNotExist read error as a Skip (so we don't destroy real developer state we can't restore), restore both bytes AND mode on cleanup. --- .../main.go | 25 ++++++++++---- internal/launcher/launcher.go | 20 ++++++----- internal/launcher/launcher_test.go | 33 ++++++++++++++++--- 3 files changed, 59 insertions(+), 19 deletions(-) diff --git a/cmd/stepsecurity-dev-machine-guard-task/main.go b/cmd/stepsecurity-dev-machine-guard-task/main.go index 559ffef..5c02763 100644 --- a/cmd/stepsecurity-dev-machine-guard-task/main.go +++ b/cmd/stepsecurity-dev-machine-guard-task/main.go @@ -58,12 +58,25 @@ func main() { func run(argv []string) int { target, childArgs, err := launcher.ResolveTarget(argv) if err != nil { - // stderr is normally unreachable for a GUI-subsystem process - // launched by Task Scheduler, but it's visible to interactive - // invocations and to test runs — both worth surfacing the - // concrete misuse string for. - fmt.Fprintln(os.Stderr, err) - return 2 + // Two distinct failure shapes, matched to the legacy contract: + // + // - Default mode (no --exec): the launcher silently exits 1. + // This preserves byte-for-byte compatibility with MSI installs + // that the pre-1.11.5 launcher served. Task Scheduler records + // "LastTaskResult=1" — same value MSI deployments have always + // observed when the sibling agent is absent. A behavioral + // change here would shift downstream dashboards/alerts that + // key on the result code. + // + // - --exec mode: the caller asked for a feature; surface the + // concrete misuse (missing target, unresolved PATH, etc.) on + // stderr with a distinct exit code so dispatch failures are + // diagnosable. + if len(argv) > 0 && argv[0] == launcher.ExecFlag { + fmt.Fprintln(os.Stderr, err) + return 2 + } + return 1 } cmd := exec.Command(target, childArgs...) diff --git a/internal/launcher/launcher.go b/internal/launcher/launcher.go index 2794b5d..7c9f77a 100644 --- a/internal/launcher/launcher.go +++ b/internal/launcher/launcher.go @@ -44,16 +44,20 @@ const ( // compatibility with the launcher's pre-1.11.5 behaviour, which is // what the MSI install layout's scheduled task action uses. // -// Errors: +// Errors (all returned from this function; the caller in cmd/.../main.go +// is responsible for mapping them to exit codes + stderr output per the +// contract below): // -// - --exec without a target: malformed task action; surfaced so Task -// Scheduler's "last result" column reflects the misconfiguration. -// - --exec target not on PATH: same idea — visible at install time -// rather than silently exiting 1. +// - --exec without a target: malformed task action. +// - --exec target not on PATH: visible at install time rather than +// silently exiting. // - Default mode with no sibling agent: most commonly indicates the -// launcher was deployed without its companion; matches the previous -// behaviour (return error → exit 1 → no console output) so existing -// MSI installs see no behaviour change. +// launcher was deployed without its companion. +// +// The launcher entrypoint distinguishes the two error contexts: +// --exec errors are written to stderr and the process exits 2, while +// default-mode errors stay silent and exit 1 (preserving byte-for-byte +// compatibility with the pre-1.11.5 launcher that MSI installs rely on). func ResolveTarget(argv []string) (string, []string, error) { if len(argv) > 0 && argv[0] == ExecFlag { if len(argv) < 2 { diff --git a/internal/launcher/launcher_test.go b/internal/launcher/launcher_test.go index 1f6f59a..8b10796 100644 --- a/internal/launcher/launcher_test.go +++ b/internal/launcher/launcher_test.go @@ -2,6 +2,7 @@ package launcher import ( "errors" + "io/fs" "os" "os/exec" "path/filepath" @@ -101,17 +102,39 @@ func TestResolveTarget_DefaultMode_FindsSibling(t *testing.T) { siblingPath := filepath.Join(siblingDir, AgentBinary) // Don't clobber a real agent that might be sitting next to the test - // binary in a developer's checkout — restore whatever was there. - prev, hadPrev := os.ReadFile(siblingPath) - _ = prev + // binary in a developer's checkout. Three cases to handle correctly: + // + // - File doesn't exist (ErrNotExist) — seed the fake, delete on + // cleanup. Normal CI path. + // - File exists and is readable — capture bytes + mode, seed the + // fake, restore both on cleanup. + // - File exists but can't be read (permission, locked) — we'd + // destroy the developer's checkout if we overwrote it. Skip + // the test instead. + prev, readErr := os.ReadFile(siblingPath) + var prevMode os.FileMode = 0o644 + if readErr == nil { + // Capture the original mode so the file is restored byte-for- + // byte AND mode-for-mode. Without this, a real Authenticode- + // signed .exe with the executable bit set on POSIX (rare on + // CI but possible in a dual-platform checkout) would come back + // non-executable. + if info, statErr := os.Stat(siblingPath); statErr == nil { + prevMode = info.Mode().Perm() + } + } else if !errors.Is(readErr, fs.ErrNotExist) { + t.Skipf("cannot read existing sibling %q without risking developer state: %v", siblingPath, readErr) + } if err := os.WriteFile(siblingPath, []byte("fake-agent"), 0o644); err != nil { t.Fatalf("seeding fake agent at %q: %v", siblingPath, err) } t.Cleanup(func() { - if hadPrev == nil { - _ = os.WriteFile(siblingPath, prev, 0o644) + if readErr == nil { + // Restore both bytes and mode. + _ = os.WriteFile(siblingPath, prev, prevMode) } else { + // Original was absent; remove our seed. _ = os.Remove(siblingPath) } })