Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
5983fe3
Add automated security audit workflow
PascalThuet May 2, 2026
c8f7c15
Address security audit review feedback
PascalThuet May 5, 2026
9069cf1
Add security workflow regression tests
PascalThuet May 5, 2026
6c4fa71
Address follow-up security workflow review
PascalThuet May 5, 2026
337e081
Use compile for security audit requirements
PascalThuet May 5, 2026
e73feb8
Address latest security workflow review
PascalThuet May 5, 2026
163b93a
Address latest security audit review
PascalThuet May 5, 2026
c802a49
Harden security-sensitive repository surfaces
PascalThuet May 5, 2026
279035e
Address remaining security review feedback
PascalThuet May 6, 2026
851ba8a
ci(security): tighten PR checks for security regressions
PascalThuet May 14, 2026
c929c79
ci(security): address review feedback
PascalThuet May 15, 2026
cc2a473
ci(security): tidy follow-up details
PascalThuet May 15, 2026
8530e84
ci(security): apply self-review follow-ups
PascalThuet May 15, 2026
9224927
ci(security): apply review #2 follow-ups
PascalThuet May 15, 2026
8af64bf
ci(security): apply review #3 follow-ups
PascalThuet May 15, 2026
bbf2d99
test(upgrade): polish TestBoundedRead readability
PascalThuet May 16, 2026
0eb0009
ci(security): address Copilot review #4300554119
PascalThuet May 16, 2026
a71f7b0
ci(security): refresh audit baselines
PascalThuet May 16, 2026
f5356bb
fix: address copilot security review follow-up
PascalThuet May 21, 2026
8a60c13
fix: wrap unsafe zip extraction errors
PascalThuet May 27, 2026
56999d5
fix: redact secrets baseline hash logs
PascalThuet May 29, 2026
a4b5e00
fix: keep secrets baseline hashes out of repr
PascalThuet May 29, 2026
9248db8
fix: address Copilot review on bounded reads and redirect-safety
PascalThuet May 30, 2026
7bb977a
fix: address follow-up Copilot review (error typing, docs, tests)
PascalThuet May 30, 2026
de28301
fix(security): bound inline ZIP manifest read; guard ADO token redirects
PascalThuet Jun 6, 2026
c3f4d2f
fix(security): pin tight read bounds on JSON responses; cap actual ZI…
PascalThuet Jun 8, 2026
6430259
fix: align checkout pins and centralize loopback predicate
PascalThuet Jun 10, 2026
28adfc0
fix: pre-empt review feedback on pins, predicate reuse, and baseline …
PascalThuet Jun 10, 2026
7e21a20
fix: error messages and docstring name the exact loopback hosts
PascalThuet Jun 11, 2026
befeebe
docs(http): clarify redirect scheme guard is unconditional
PascalThuet Jun 11, 2026
123f813
harden: reject hostless URLs in is_https_or_localhost_http
PascalThuet Jun 12, 2026
774da95
fix(workflows): reject hostless catalog URLs during fetch
PascalThuet Jun 16, 2026
c6dd574
docs(cli): clarify host requirement for URL validation
PascalThuet Jun 17, 2026
1f1b20c
fix: stabilize security rebase follow-ups
PascalThuet Jun 17, 2026
e3f0153
fix: address security audit follow-ups
PascalThuet Jun 18, 2026
ae66617
fix: enforce strict redirects for catalog downloads
PascalThuet Jun 18, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions .github/bandit-baseline.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
{
"results": [
{
"code": "168 return opener.open(req, timeout=timeout)\n169 return urllib.request.urlopen(req, timeout=timeout) # noqa: S310\n",
"col_offset": 11,
"end_col_offset": 55,
"filename": "src/specify_cli/authentication/http.py",
"issue_confidence": "HIGH",
"issue_cwe": {
"id": 22,
"link": "https://cwe.mitre.org/data/definitions/22.html"
},
"issue_severity": "MEDIUM",
"issue_text": "Audit url open for permitted schemes. Allowing use of file:/ or custom schemes is often unexpected.",
"line_number": 169,
"line_range": [
169
],
"more_info": "https://bandit.readthedocs.io/en/1.9.4/blacklists/blacklist_calls.html#b310-urllib-urlopen",
"test_id": "B310",
"test_name": "blacklist"
},
{
"code": "34 run_cmd,\n35 shell=True,\n36 capture_output=True,\n37 text=True,\n38 cwd=cwd,\n39 timeout=300,\n40 )\n41 output = {\n42 \"exit_code\": proc.returncode,\n43 \"stdout\": proc.stdout,\n",
"col_offset": 19,
"end_col_offset": 13,
"filename": "src/specify_cli/workflows/steps/shell/__init__.py",
"issue_confidence": "HIGH",
"issue_cwe": {
"id": 78,
"link": "https://cwe.mitre.org/data/definitions/78.html"
},
"issue_severity": "HIGH",
"issue_text": "subprocess call with shell=True identified, security issue.",
"line_number": 35,
"line_range": [
33,
34,
35,
36,
37,
38,
39,
40
],
"more_info": "https://bandit.readthedocs.io/en/1.9.4/plugins/b602_subprocess_popen_with_shell_equals_true.html",
"test_id": "B602",
"test_name": "subprocess_popen_with_shell_equals_true"
}
]
}
213 changes: 213 additions & 0 deletions .github/scripts/check_bandit_baseline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
"""Fail if new entries appear in the Bandit baseline without acknowledgement.

The bandit baseline whitelists known findings so they don't fail CI. If a
contributor adds a new entry, silent whitelisting becomes invisible in
review. This script compares the set of result *identities* in the
baseline at the PR head against the baseline at its base; if any new
identity appears, the PR must carry the label ``security-baseline-change``
to confirm the addition is intentional.

We compare identities (filename + line + test_id + issue_severity +
issue_confidence + hash-of-code-snippet) rather than raw counts so a PR
cannot remove one existing entry and add a different new one to keep the
count constant — which would silently whitelist a new finding.

When the baseline file does not exist at the base ref, this is the PR
that introduces it; we treat all entries as the starting baseline and
do not require the label.

For the head side we read the working tree directly (the CI runner is
checked out at the PR head, so the working-tree file IS the head state).
Reading via ``git show <head_ref>:`` would fail-open on unfetched refs
or detached checkouts — for a security gate we want fail-closed.

Required environment variables:
- ``BANDIT_BASELINE_BASE``: git ref of the PR base
- ``BANDIT_BASELINE_LABELS``: comma-separated PR labels

Outside of PR events, all inputs may be empty and the script no-ops.
"""

from __future__ import annotations

import hashlib
import json
import os
import re
import subprocess
import sys
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parents[2]
BASELINE_PATH = ".github/bandit-baseline.json"
ACK_LABEL = "security-baseline-change"


def _git_ok(*args: str) -> bool:
"""True if the git command exits 0 (output discarded)."""
return (
subprocess.run(
["git", *args],
cwd=REPO_ROOT,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
).returncode
== 0
)


def _read_baseline_at(ref: str) -> tuple[dict, bool]:
"""Return (baseline_json, file_existed_at_ref).

Used for the base side. The head side reads the working tree to avoid
silently fail-opening on an unfetched/invalid head ref.

Only a missing *path* at a resolvable ref counts as "did not exist";
an unresolvable ref or a failing ``git show`` aborts instead, so a
transient git failure cannot silently disable the gate.
"""
if not ref:
return {"results": []}, False
if not _git_ok("rev-parse", "--verify", "--quiet", f"{ref}^{{commit}}"):
raise SystemExit(
f"Base ref {ref!r} cannot be resolved (unfetched or invalid). "
f"Refusing to fail-open on a security gate."
)
if not _git_ok("cat-file", "-e", f"{ref}:{BASELINE_PATH}"):
return {"results": []}, False
try:
blob = subprocess.run(
["git", "show", f"{ref}:{BASELINE_PATH}"],
check=True,
cwd=REPO_ROOT,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
).stdout
except subprocess.CalledProcessError as exc:
raise SystemExit(
f"Could not read baseline at {ref!r}: {exc.stderr.strip()}. "
f"Refusing to fail-open on a security gate."
)
try:
return json.loads(blob), True
except json.JSONDecodeError:
print(f"Could not parse baseline at {ref}; treating as empty.", file=sys.stderr)
return {"results": []}, True


def _read_baseline_from_worktree() -> tuple[dict, bool]:
"""Return (baseline_json, file_exists_on_disk).

The CI runner is checked out at the PR head, so the working-tree
file IS the head state. Reading it directly sidesteps spurious
``git show`` failures that would otherwise let an unreadable head
silently pass the gate.

Asymmetric with the base reader: a corrupt JSON on disk is the
proposed PR state — we fail-closed there rather than treating
it as an empty baseline (which would silently drop the gate).
"""
path = REPO_ROOT / BASELINE_PATH
if not path.exists():
return {"results": []}, False
try:
return json.loads(path.read_text(encoding="utf-8")), True
except json.JSONDecodeError as exc:
raise SystemExit(
f"Working-tree baseline at {BASELINE_PATH} is corrupt: {exc}. "
f"Refusing to fail-open on a security gate."
)


_WHITESPACE_RE = re.compile(r"\s+")


def _identity(result: dict) -> str:
"""Stable identity for a baseline entry.

Combines location, test, severity, confidence, and a hash of the
pinned code snippet (whitespace-normalized) so reformatting changes
or upstream bandit-output tweaks don't register as new findings,
but a different finding at the same line does.
"""
code = result.get("code", "") or ""
normalized = _WHITESPACE_RE.sub(" ", code).strip()
code_hash = hashlib.sha256(normalized.encode("utf-8")).hexdigest()[:16]
return "|".join(
[
str(result.get("filename", "")),
str(result.get("line_number", "")),
str(result.get("test_id", "")),
str(result.get("issue_severity", "")),
str(result.get("issue_confidence", "")),
code_hash,
]
)


def main() -> int:
base_ref = os.environ.get("BANDIT_BASELINE_BASE", "").strip()

if not base_ref or set(base_ref) <= {"0"}:
Comment thread
PascalThuet marked this conversation as resolved.
print("No PR base ref; baseline diff check skipped.")
return 0

base_baseline, base_existed = _read_baseline_at(base_ref)
head_baseline, head_existed = _read_baseline_from_worktree()

Comment thread
PascalThuet marked this conversation as resolved.
if not base_existed:
print(
"Baseline file not present at base ref; treating this PR as the "
"introduction of the baseline. No acknowledgement required."
)
return 0

if not head_existed:
# Fail-closed: the file existed at base but is missing in the
# working tree. Either the PR deleted it (suspicious — the gate
# would no longer protect anything) or the workspace is incomplete.
print(
f"Baseline file {BASELINE_PATH} existed at the base ref but is "
f"missing in the working tree. Refusing to fail-open on a "
f"security gate.",
file=sys.stderr,
)
return 1

base_ids = {_identity(r) for r in base_baseline.get("results", [])}
head_ids = {_identity(r) for r in head_baseline.get("results", [])}

new_ids = head_ids - base_ids
if not new_ids:
print(
f"Bandit baseline entries: {len(base_ids)} -> {len(head_ids)} "
f"(no new identities)."
)
return 0

labels = {
label.strip()
for label in os.environ.get("BANDIT_BASELINE_LABELS", "").split(",")
if label.strip()
}
if ACK_LABEL in labels:
print(
f"Bandit baseline gained {len(new_ids)} new identities; "
f"acknowledged via label '{ACK_LABEL}'."
)
return 0

print(
f"Bandit baseline gained {len(new_ids)} new identities. "
f"Add label '{ACK_LABEL}' to the PR to acknowledge that the new "
f"whitelist entries are intentional.",
file=sys.stderr,
)
for identity in sorted(new_ids):
print(f" + {identity}", file=sys.stderr)
return 1


if __name__ == "__main__":
raise SystemExit(main())
Loading