Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions docs/skills/skills-cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,59 @@ forge init my-agent --from-skills
forge build
```

## Security Audit

`forge skills audit` scores each skill in the project across four categories — egress, binary, env, script — and runs a `SecurityPolicy` check for hard violations. By default it uses the analyzer's `DefaultPolicy`. A custom policy YAML can be supplied with `--policy`.

```bash
# Audit skills in the default flat layout (skills/SKILL.md).
forge skills audit

# Audit subdirectory-style skills (skills/<name>/SKILL.md).
forge skills audit --dir skills

# JSON output for tooling.
forge skills audit --dir skills --format json

# Load a custom policy that adjusts both scoring and policy checks.
forge skills audit --dir skills --policy policy.yaml
```

### Policy YAML

```yaml
# policy.yaml
script_policy: allow # allow | warn (default) | deny
max_risk_score: 75 # PolicyViolation if exceeded

# Scoring overrides — reduce points for items the operator has accepted.
# Every affected RiskFactor's description carries "(via policy)" or
# "(acknowledged by policy)" so the override stays auditable.
trusted_domains:
- internal.example.com # +2 instead of +10 (unknown)
acknowledged_bins:
- python # +3 instead of +15 (high-risk)
acknowledged_env:
- DB_PASSWORD # +5 instead of +10 (sensitive)
```

Scoring overrides only down-weight builtin classifications — they cannot escalate a standard binary, env var, or domain. A binary not in the builtin high-risk set stays at +3 even if listed in `acknowledged_bins`.

### Audit output

Each `RiskFactor` records the override in its description, so policy-driven downgrades are visible in both text and JSON output:

```
risky-skill Risk: medium (30/100)
Factors:
egress +2 trusted domain (via policy): internal.example.com
binary +3 high-risk binary (acknowledged by policy): python
env +5 sensitive variable (acknowledged by policy): DB_PASSWORD
script +20 has executable script
```

Tooling can match on the substrings `(via policy)` or `(acknowledged by policy)` to flag policy-driven downgrades for review.

## Skill Builder (Web UI)

The [Web Dashboard](dashboard.md#skill-builder) includes an AI-powered Skill Builder that generates valid SKILL.md files and helper scripts through a conversational interface. It uses the agent's own LLM provider and includes server-side validation before saving to the agent's `skills/` directory. On save, the builder automatically parses the skill's requirements and:
Expand Down
9 changes: 9 additions & 0 deletions forge-cli/cmd/skills.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ var skillsTrustReportCmd = &cobra.Command{
var auditFormat string
var auditEmbedded bool
var auditDir string
var auditPolicyPath string
var signKeyPath string

func init() {
Expand All @@ -91,6 +92,7 @@ func init() {
skillsAuditCmd.Flags().StringVar(&auditFormat, "format", "text", "Output format: text or json")
skillsAuditCmd.Flags().BoolVar(&auditEmbedded, "embedded", false, "Audit embedded skills from the binary")
skillsAuditCmd.Flags().StringVar(&auditDir, "dir", "", "Audit skills from a directory of SKILL.md subdirectories")
skillsAuditCmd.Flags().StringVar(&auditPolicyPath, "policy", "", "Path to a YAML security policy file (overrides DefaultPolicy for both scoring and policy checks)")
skillsSignCmd.Flags().StringVar(&signKeyPath, "key", "", "Path to Ed25519 private key")
_ = skillsSignCmd.MarkFlagRequired("key")
}
Expand Down Expand Up @@ -375,6 +377,13 @@ func loadSecretPlaceholders(path string) map[string]bool {

func runSkillsAudit(cmd *cobra.Command, args []string) error {
policy := analyzer.DefaultPolicy()
if auditPolicyPath != "" {
loaded, err := analyzer.LoadPolicyFromFile(auditPolicyPath)
if err != nil {
return err
}
policy = loaded
}
var report *analyzer.AuditReport

switch {
Expand Down
2 changes: 1 addition & 1 deletion forge-skills/analyzer/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func CheckPolicy(sd *contract.SkillDescriptor, hasScript bool, policy SecurityPo

// Rule 6: MaxRiskScore
if policy.MaxRiskScore > 0 {
assessment := AnalyzeSkillDescriptor(sd, hasScript)
assessment := AnalyzeSkillDescriptor(sd, hasScript, policy)
if assessment.Score.Value > policy.MaxRiskScore {
violations = append(violations, PolicyViolation{
Rule: "max_risk_score",
Expand Down
23 changes: 23 additions & 0 deletions forge-skills/analyzer/policy_loader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package analyzer

import (
"fmt"
"os"

"gopkg.in/yaml.v3"
)

// LoadPolicyFromFile reads a YAML SecurityPolicy from path. Unspecified fields
// take their zero value, which means no override is applied — a minimal policy
// file can omit any rule it doesn't intend to change.
func LoadPolicyFromFile(path string) (SecurityPolicy, error) {
var p SecurityPolicy
data, err := os.ReadFile(path)
if err != nil {
return p, fmt.Errorf("reading policy file %q: %w", path, err)
}
if err := yaml.Unmarshal(data, &p); err != nil {
return p, fmt.Errorf("parsing policy file %q: %w", path, err)
}
return p, nil
}
95 changes: 95 additions & 0 deletions forge-skills/analyzer/policy_loader_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package analyzer

import (
"os"
"path/filepath"
"slices"
"testing"
)

func TestLoadPolicyFromFile(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "policy.yaml")
yaml := `
script_policy: allow
max_risk_score: 90
trusted_domains:
- internal.example.com
- corp.example.com
acknowledged_bins:
- python
- python3
acknowledged_env:
- DB_PASSWORD
`
if err := os.WriteFile(path, []byte(yaml), 0o644); err != nil {
t.Fatalf("writing policy file: %v", err)
}

p, err := LoadPolicyFromFile(path)
if err != nil {
t.Fatalf("LoadPolicyFromFile: %v", err)
}

if p.ScriptPolicy != "allow" {
t.Errorf("ScriptPolicy = %q, want %q", p.ScriptPolicy, "allow")
}
if p.MaxRiskScore != 90 {
t.Errorf("MaxRiskScore = %d, want 90", p.MaxRiskScore)
}
if !slices.Equal(p.TrustedDomains, []string{"internal.example.com", "corp.example.com"}) {
t.Errorf("TrustedDomains = %v", p.TrustedDomains)
}
if !slices.Equal(p.AcknowledgedBins, []string{"python", "python3"}) {
t.Errorf("AcknowledgedBins = %v", p.AcknowledgedBins)
}
if !slices.Equal(p.AcknowledgedEnv, []string{"DB_PASSWORD"}) {
t.Errorf("AcknowledgedEnv = %v", p.AcknowledgedEnv)
}
}

// TestLoadPolicyFromFile_PolicyAffectsScoring is an end-to-end check that a
// policy loaded from YAML actually changes the assessment — the regression
// the existing wiring gap created and #49 closes.
func TestLoadPolicyFromFile_PolicyAffectsScoring(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "policy.yaml")
yaml := "trusted_domains: [internal.example.com]\nacknowledged_bins: [python]\n"
if err := os.WriteFile(path, []byte(yaml), 0o644); err != nil {
t.Fatalf("writing policy file: %v", err)
}

policy, err := LoadPolicyFromFile(path)
if err != nil {
t.Fatalf("LoadPolicyFromFile: %v", err)
}

factors := scoreEgress([]string{"internal.example.com"}, policy)
if len(factors) != 1 || factors[0].Points != 2 {
t.Errorf("scoreEgress with loaded TrustedDomains: factors=%v", factors)
}

factors = scoreBinaries([]string{"python"}, policy)
if len(factors) != 1 || factors[0].Points != 3 {
t.Errorf("scoreBinaries with loaded AcknowledgedBins: factors=%v", factors)
}
}

func TestLoadPolicyFromFile_MissingFile(t *testing.T) {
_, err := LoadPolicyFromFile(filepath.Join(t.TempDir(), "missing.yaml"))
if err == nil {
t.Fatal("expected error for missing file")
}
}

func TestLoadPolicyFromFile_InvalidYAML(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "bad.yaml")
if err := os.WriteFile(path, []byte("this: is: not: valid: yaml: ["), 0o644); err != nil {
t.Fatalf("writing bad file: %v", err)
}
_, err := LoadPolicyFromFile(path)
if err == nil {
t.Fatal("expected parse error for invalid YAML")
}
}
2 changes: 1 addition & 1 deletion forge-skills/analyzer/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func GenerateReportFromEntries(entries []contract.SkillEntry, hasScript func(str
entry := &entries[i]
hs := hasScript != nil && hasScript(entry.Name)

assessment := AnalyzeSkillEntry(entry, hs)
assessment := AnalyzeSkillEntry(entry, hs, policy)

// Run policy checks
violations := CheckPolicyFromEntry(entry, hs, policy)
Expand Down
87 changes: 61 additions & 26 deletions forge-skills/analyzer/scoring.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,15 @@ var sensitiveEnvPatterns = []string{
"CREDENTIALS",
}

// AnalyzeSkillDescriptor scores a SkillDescriptor for security risk.
func AnalyzeSkillDescriptor(sd *contract.SkillDescriptor, hasScript bool) SkillRiskAssessment {
// AnalyzeSkillDescriptor scores a SkillDescriptor for security risk under the
// given policy. A zero-value SecurityPolicy{} preserves the historical default
// scoring (no overrides applied).
func AnalyzeSkillDescriptor(sd *contract.SkillDescriptor, hasScript bool, policy SecurityPolicy) SkillRiskAssessment {
var factors []RiskFactor

factors = append(factors, scoreEgress(sd.EgressDomains, nil)...)
factors = append(factors, scoreBinaries(sd.RequiredBins)...)
factors = append(factors, scoreEnv(sd.RequiredEnv, sd.OneOfEnv, sd.OptionalEnv)...)
factors = append(factors, scoreEgress(sd.EgressDomains, policy)...)
factors = append(factors, scoreBinaries(sd.RequiredBins, policy)...)
factors = append(factors, scoreEnv(sd.RequiredEnv, sd.OneOfEnv, sd.OptionalEnv, policy)...)
if hasScript {
factors = append(factors, scoreScript()...)
}
Expand All @@ -66,8 +68,10 @@ func AnalyzeSkillDescriptor(sd *contract.SkillDescriptor, hasScript bool) SkillR
}
}

// AnalyzeSkillEntry scores a SkillEntry for security risk.
func AnalyzeSkillEntry(entry *contract.SkillEntry, hasScript bool) SkillRiskAssessment {
// AnalyzeSkillEntry scores a SkillEntry for security risk under the given
// policy. A zero-value SecurityPolicy{} preserves the historical default
// scoring (no overrides applied).
func AnalyzeSkillEntry(entry *contract.SkillEntry, hasScript bool, policy SecurityPolicy) SkillRiskAssessment {
var factors []RiskFactor
var egressDomains []string
var bins []string
Expand Down Expand Up @@ -97,9 +101,9 @@ func AnalyzeSkillEntry(entry *contract.SkillEntry, hasScript bool) SkillRiskAsse
}
}

factors = append(factors, scoreEgress(egressDomains, nil)...)
factors = append(factors, scoreBinaries(bins)...)
factors = append(factors, scoreEnv(reqEnv, oneOfEnv, optEnv)...)
factors = append(factors, scoreEgress(egressDomains, policy)...)
factors = append(factors, scoreBinaries(bins, policy)...)
factors = append(factors, scoreEnv(reqEnv, oneOfEnv, optEnv, policy)...)
if hasScript {
factors = append(factors, scoreScript()...)
}
Expand All @@ -113,25 +117,32 @@ func AnalyzeSkillEntry(entry *contract.SkillEntry, hasScript bool) SkillRiskAsse
}
}

func scoreEgress(domains []string, extraTrusted []string) []RiskFactor {
func scoreEgress(domains []string, policy SecurityPolicy) []RiskFactor {
var factors []RiskFactor

trusted := make(map[string]bool)
for k, v := range trustedDomains {
trusted[k] = v
}
for _, d := range extraTrusted {
trusted[d] = true
// Union builtin trusted domains with policy.TrustedDomains. We track the
// policy-trusted set separately so the RiskFactor description can record
// that the trust came from a policy override.
policyTrusted := make(map[string]bool, len(policy.TrustedDomains))
for _, d := range policy.TrustedDomains {
policyTrusted[d] = true
}

for _, domain := range domains {
if trusted[domain] {
switch {
case trustedDomains[domain]:
factors = append(factors, RiskFactor{
Category: "egress",
Description: fmt.Sprintf("trusted domain: %s", domain),
Points: 2,
})
} else {
case policyTrusted[domain]:
factors = append(factors, RiskFactor{
Category: "egress",
Description: fmt.Sprintf("trusted domain (via policy): %s", domain),
Points: 2,
})
default:
factors = append(factors, RiskFactor{
Category: "egress",
Description: fmt.Sprintf("unknown domain: %s", domain),
Expand All @@ -151,16 +162,28 @@ func scoreEgress(domains []string, extraTrusted []string) []RiskFactor {
return factors
}

func scoreBinaries(bins []string) []RiskFactor {
func scoreBinaries(bins []string, policy SecurityPolicy) []RiskFactor {
acknowledged := make(map[string]bool, len(policy.AcknowledgedBins))
for _, b := range policy.AcknowledgedBins {
acknowledged[b] = true
}

var factors []RiskFactor
for _, bin := range bins {
if highRiskBinaries[bin] {
switch {
case highRiskBinaries[bin] && acknowledged[bin]:
factors = append(factors, RiskFactor{
Category: "binary",
Description: fmt.Sprintf("high-risk binary (acknowledged by policy): %s", bin),
Points: 3,
})
case highRiskBinaries[bin]:
factors = append(factors, RiskFactor{
Category: "binary",
Description: fmt.Sprintf("high-risk binary: %s", bin),
Points: 15,
})
} else {
default:
factors = append(factors, RiskFactor{
Category: "binary",
Description: fmt.Sprintf("standard binary: %s", bin),
Expand All @@ -171,21 +194,33 @@ func scoreBinaries(bins []string) []RiskFactor {
return factors
}

func scoreEnv(reqEnv, oneOfEnv, optEnv []string) []RiskFactor {
var factors []RiskFactor
func scoreEnv(reqEnv, oneOfEnv, optEnv []string, policy SecurityPolicy) []RiskFactor {
acknowledged := make(map[string]bool, len(policy.AcknowledgedEnv))
for _, e := range policy.AcknowledgedEnv {
acknowledged[e] = true
}

allEnv := make([]string, 0, len(reqEnv)+len(oneOfEnv)+len(optEnv))
allEnv = append(allEnv, reqEnv...)
allEnv = append(allEnv, oneOfEnv...)
allEnv = append(allEnv, optEnv...)

var factors []RiskFactor
for _, env := range allEnv {
if isSensitiveEnv(env) {
switch {
case isSensitiveEnv(env) && acknowledged[env]:
factors = append(factors, RiskFactor{
Category: "env",
Description: fmt.Sprintf("sensitive variable (acknowledged by policy): %s", env),
Points: 5,
})
case isSensitiveEnv(env):
factors = append(factors, RiskFactor{
Category: "env",
Description: fmt.Sprintf("sensitive variable: %s", env),
Points: 10,
})
} else {
default:
factors = append(factors, RiskFactor{
Category: "env",
Description: fmt.Sprintf("API key: %s", env),
Expand Down
Loading
Loading