sysprog21 · jserv · May 14, 2026 · May 14, 2026
diff --git a/Makefile b/Makefile
@@ -171,6 +171,19 @@ $(BUILD_DIR)/test-fork-lowbase: tests/test-fork-lowbase.c | $(BUILD_DIR)
 	$(Q)$(CROSS_COMPILE)gcc -D_GNU_SOURCE -static -O2 -no-pie \
 		-Wl,-Ttext-segment=0x200000 -o $@ $<
 
+# test-lowbase-mem variants must be non-PIE ET_EXEC binaries linked below
+# ELF_DEFAULT_BASE so mprotect/munmap exercise the old low-address reject
+# window at two offsets.
+$(BUILD_DIR)/test-lowbase-mem-200000: tests/test-lowbase-mem.c | $(BUILD_DIR)
+	@echo "  CROSS   $< (low-base ET_EXEC @0x200000)"
+	$(Q)$(CROSS_COMPILE)gcc -D_GNU_SOURCE -static -O2 -no-pie \
+		-Wl,-Ttext-segment=0x200000 -o $@ $<
+
+$(BUILD_DIR)/test-lowbase-mem-300000: tests/test-lowbase-mem.c | $(BUILD_DIR)
+	@echo "  CROSS   $< (low-base ET_EXEC @0x300000)"
+	$(Q)$(CROSS_COMPILE)gcc -D_GNU_SOURCE -static -O2 -no-pie \
+		-Wl,-Ttext-segment=0x300000 -o $@ $<
+
 endif
 
 include mk/tests.mk

diff --git a/mk/config.mk b/mk/config.mk
@@ -16,16 +16,18 @@ endif
 
 # Exclude native macOS test files from cross-compilation
 NATIVE_TESTS := tests/test-multi-vcpu.c tests/test-rwx.c
+SPECIAL_TEST_SRCS := tests/test-lowbase-mem.c
+SPECIAL_TEST_BINS := $(BUILD_DIR)/test-lowbase-mem-200000 $(BUILD_DIR)/test-lowbase-mem-300000
 
 ifdef GUEST_TEST_BINARIES
   TEST_DIR  := $(GUEST_TEST_BINARIES)/bin
   TEST_DEPS :=
   TEST_HELLO_DEP :=
 else
   TEST_DIR  := $(BUILD_DIR)
-  TEST_C_SRCS := $(filter-out $(NATIVE_TESTS),$(wildcard tests/*.c))
+  TEST_C_SRCS := $(filter-out $(NATIVE_TESTS) $(SPECIAL_TEST_SRCS),$(wildcard tests/*.c))
   TEST_C_BINS := $(patsubst tests/%.c,$(BUILD_DIR)/%,$(TEST_C_SRCS))
-  TEST_DEPS := $(BUILD_DIR)/test-hello $(TEST_C_BINS)
+  TEST_DEPS := $(BUILD_DIR)/test-hello $(TEST_C_BINS) $(SPECIAL_TEST_BINS)
   TEST_HELLO_DEP := $(BUILD_DIR)/test-hello
 endif
 

diff --git a/mk/tests.mk b/mk/tests.mk
@@ -1,6 +1,6 @@
 # Test targets
 
-.PHONY: test-hello test-all check test-gdbstub test-coreutils test-busybox \
+.PHONY: test-hello test-all check check-syscall-coverage test-gdbstub test-coreutils test-busybox \
         test-static-bins \
         test-dynamic test-dynamic-coreutils test-glibc-dynamic \
         test-glibc-coreutils test-perf \
@@ -15,8 +15,12 @@ test-hello: $(ELFUSE_BIN) $(TEST_HELLO_DEP)
 	@printf "$(BLUE)▸ Running$(RESET) test-hello\n"
 	$(ELFUSE_BIN) $(TEST_DIR)/test-hello
 
+## Verify dispatch.tbl coverage of the kernel-supported syscall set
+check-syscall-coverage:
+	@python3 scripts/check-syscall-coverage.py
+
 ## Run the unit test suite plus busybox applet validation
-check: $(ELFUSE_BIN) $(TEST_DEPS)
+check: $(ELFUSE_BIN) $(TEST_DEPS) check-syscall-coverage
 	@bash tests/driver.sh -e $(ELFUSE_BIN) -d $(TEST_DIR) -v
 	@printf "\n$(BLUE)━━━ proctitle low-stack regression ━━━$(RESET)\n"
 	@$(MAKE) --no-print-directory test-proctitle-low-stack
@@ -124,7 +128,7 @@ test-coreutils: $(ELFUSE_BIN)
 		exit 1; \
 	fi
 	@if [ "$(COREUTILS_BIN)" = "$(FIXTURES_DIR)/aarch64-musl/dyn-bin" ]; then \
-		bash tests/test-coreutils-smoke.sh $(ELFUSE_BIN) $(COREUTILS_BIN) $(SYSROOT_DIR); \
+		COREUTILS_PROFILE=smoke bash tests/test-coreutils.sh $(ELFUSE_BIN) $(COREUTILS_BIN) $(SYSROOT_DIR); \
 	elif [ -n "$(SYSROOT_DIR)" ] && [ -d "$(SYSROOT_DIR)" ]; then \
 		bash tests/test-coreutils.sh $(ELFUSE_BIN) $(COREUTILS_BIN) $(SYSROOT_DIR); \
 	else \
@@ -270,7 +274,7 @@ test-dynamic-coreutils: $(ELFUSE_BIN)
 		exit 1; \
 	fi
 	@if [ "$(DYNAMIC_COREUTILS_BIN)" = "$(FIXTURES_DIR)/aarch64-musl/dyn-bin" ]; then \
-		bash tests/test-coreutils-smoke.sh $(ELFUSE_BIN) $(DYNAMIC_COREUTILS_BIN) $(SYSROOT_DIR); \
+		COREUTILS_PROFILE=smoke bash tests/test-dynamic-coreutils.sh $(ELFUSE_BIN) $(SYSROOT_DIR) $(DYNAMIC_COREUTILS_BIN); \
 	else \
 		bash tests/test-dynamic-coreutils.sh $(ELFUSE_BIN) $(SYSROOT_DIR) $(DYNAMIC_COREUTILS_BIN); \
 	fi

diff --git a/scripts/check-syscall-coverage.py b/scripts/check-syscall-coverage.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+"""Best-effort syscall coverage audit for dispatch.tbl against tests/."""
+
+from __future__ import annotations
+
+import pathlib
+import re
+import sys
+
+ROOT = pathlib.Path(__file__).resolve().parent.parent
+DISPATCH = ROOT / "src" / "syscall" / "dispatch.tbl"
+TESTS = ROOT / "tests"
+
+ENTRY_RE = re.compile(r"^(SYS_[A-Za-z0-9_]+)\s+(sc_[A-Za-z0-9_]+)\s+([01])$")
+
+ALIASES: dict[str, set[str]] = {
+    "faccessat": {"faccessat2"},
+    "renameat": {"renameat2"},
+    # Linux syscall name vs. libc wrapper name. On 64-bit aarch64 each
+    # entry on the left is the dispatch.tbl entry; the entries on the
+    # right are libc function names that route through that syscall.
+    "pread64": {"pread"},
+    "pwrite64": {"pwrite"},
+    "epoll_pwait": {"epoll_wait"},
+    "eventfd2": {"eventfd"},
+    "rt_sigaction": {"sigaction"},
+    "rt_sigprocmask": {"sigprocmask"},
+    "signalfd4": {"signalfd"},
+}
+
+INDIRECT_COVERAGE: dict[str, str] = {
+    "getxattr": "Covered indirectly through xattr plumbing and O_PATH rejection paths.",
+    "lgetxattr": "Symlink xattr semantics are filesystem-sensitive; audit via fs-xattr code and negative-path tests.",
+    "lsetxattr": "Symlink xattr semantics are filesystem-sensitive; audit via fs-xattr code and negative-path tests.",
+    "listxattr": "Covered indirectly through xattr plumbing; success-path coverage is filesystem-dependent.",
+    "llistxattr": "Symlink xattr list semantics are filesystem-sensitive; retained as indirect coverage.",
+    "flistxattr": "Covered indirectly through xattr plumbing and fd-based xattr checks.",
+    "fgetxattr": "Covered indirectly through xattr plumbing and fd-based xattr checks.",
+    "lremovexattr": "Symlink xattr semantics are filesystem-sensitive; retained as indirect coverage.",
+    "rt_sigsuspend": "Signal suspension is exercised by higher-level signal tests; direct raw coverage is timing-sensitive.",
+    "rt_sigpending": "Signal pending state is exercised indirectly by the signal suite.",
+    "ptrace": "Covered by debugger integration via tests/test-gdbstub.sh.",
+    "chroot": "Exercised only by the dynamic coreutils shell suite via the chroot(8) applet; the syscall itself has no dedicated C test (requires elevated privilege).",
+    "truncate": "Only ftruncate(2) is exercised directly; path-based truncate is exercised by coreutils 'truncate' applet in shell suites.",
+    "rt_sigreturn": "Kernel-only return-from-handler trampoline; invoked implicitly by every signal handler exit. No userspace callers.",
+    "exit_group": "Invoked implicitly by glibc/musl _exit() and exit(); every test process exits through this syscall.",
+    "get_robust_list": "Pthread-internal: glibc may set/get a robust-list pointer transparently during thread setup; rarely called directly by application code.",
+    "set_robust_list": "Pthread-internal: glibc and musl issue set_robust_list during thread bring-up via a path that the audit corpus does not call directly.",
+    "readlinkat": "Exercised indirectly through libc readlink() and the proc/openat symlink-resolution paths in test-procfs-exec; no direct readlinkat() call in C tests.",
+    "faccessat": "Exercised indirectly through libc access() and the coreutils suite (test, ls, cp); faccessat2 has no direct call-shape match either.",
+}
+
+
+def load_dispatch_names() -> list[str]:
+    names: list[str] = []
+    for line in DISPATCH.read_text(encoding="utf-8").splitlines():
+        match = ENTRY_RE.match(line.strip())
+        if match:
+            names.append(match.group(1)[4:])
+    return names
+
+
+C_SUFFIXES = (".c", ".h")
+
+_BLOCK_COMMENT = re.compile(r"/\*.*?\*/", re.DOTALL)
+_LINE_COMMENT = re.compile(r"//[^\n]*")
+
+
+def strip_c_comments(text: str) -> str:
+    """Drop C block and line comments. Required before the call-shape
+    regex below so that mentions like "// TODO: test sync(2)" cannot
+    falsely cover a syscall.
+    """
+    text = _BLOCK_COMMENT.sub(" ", text)
+    text = _LINE_COMMENT.sub(" ", text)
+    return text
+
+
+def load_test_corpora() -> tuple[str, str]:
+    """Return (c_corpus, other_corpus). Splitting matters because shell
+    scripts that invoke coreutils applets ("run sync 0", "run kill ...")
+    would otherwise falsely cover the like-named syscalls. C corpus is
+    fed through strip_c_comments() so commented-out syscalls cannot
+    claim coverage either.
+    """
+    c_chunks: list[str] = []
+    other_chunks: list[str] = []
+    for path in sorted(TESTS.rglob("*")):
+        if not path.is_file():
+            continue
+        text = path.read_text(encoding="utf-8", errors="ignore")
+        if path.suffix in C_SUFFIXES:
+            c_chunks.append(strip_c_comments(text))
+        else:
+            other_chunks.append(text)
+    return "\n".join(c_chunks), "\n".join(other_chunks)
+
+
+def has_direct_reference(name: str, c_corpus: str, other_corpus: str) -> bool:
+    # C: require call-shape ("name(") or an explicit syscall-number macro.
+    # That covers libc wrappers (open(...), read(...), ...) and direct
+    # syscall(SYS_*, ...) uses, while rejecting bare-word occurrences in
+    # comments, TEST() labels, and error messages like FAIL("child sync recv").
+    # Non-C corpus (shell, Python): only count explicit syscall-number
+    # macros. Coreutils applet names share words with syscalls (sync, kill,
+    # chroot, chmod) and "name(" rarely makes sense in those files anyway.
+    c_patterns = [
+        rf"\b{name}\s*\(",
+        rf"\bSYS_{name}\b",
+        rf"\b__NR_{name}\b",
+    ]
+    other_patterns = [
+        rf"\bSYS_{name}\b",
+        rf"\b__NR_{name}\b",
+    ]
+    if any(re.search(p, c_corpus) for p in c_patterns):
+        return True
+    return any(re.search(p, other_corpus) for p in other_patterns)
+
+
+def main() -> int:
+    c_corpus, other_corpus = load_test_corpora()
+    missing: list[str] = []
+
+    for name in load_dispatch_names():
+        if has_direct_reference(name, c_corpus, other_corpus):
+            continue
+        if any(
+            has_direct_reference(alias, c_corpus, other_corpus)
+            for alias in ALIASES.get(name, set())
+        ):
+            continue
+        if name in INDIRECT_COVERAGE:
+            continue
+        missing.append(name)
+
+    if missing:
+        print("Uncovered syscalls in dispatch.tbl:", file=sys.stderr)
+        for name in missing:
+            print(f"  - {name}", file=sys.stderr)
+        return 1
+
+    print("syscall coverage audit: PASS")
+    for name, reason in sorted(INDIRECT_COVERAGE.items()):
+        print(f"  indirect {name}: {reason}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/src/syscall/abi.h b/src/syscall/abi.h
@@ -638,6 +638,10 @@ typedef struct {
 #define LINUX_F_SEAL_WRITE 0x0008
 #define LINUX_F_SEAL_FUTURE_WRITE 0x0010
 
+/* memfd_create flags (MFD_*). */
+#define LINUX_MFD_CLOEXEC 0x0001U
+#define LINUX_MFD_ALLOW_SEALING 0x0002U
+
 /* fcntl sealing commands */
 #define LINUX_F_ADD_SEALS 1033
 #define LINUX_F_GET_SEALS 1034

diff --git a/src/syscall/syscall.c b/src/syscall/syscall.c
@@ -1276,13 +1276,20 @@ static int64_t sc_memfd_create(guest_t *g,
                                uint64_t x5,
                                bool verbose)
 {
-    (void) g;
-    (void) x0;
     (void) x2;
     (void) x3;
     (void) x4;
     (void) x5;
     (void) verbose;
+    if (!x0)
+        return -LINUX_EFAULT;
+
+    const unsigned int flags = (unsigned int) x1;
+
+    char first = '\0';
+    if (guest_read_small(g, x0, &first, sizeof(first)) < 0)
+        return -LINUX_EFAULT;
+
     char template[] = "/tmp/elfuse-memfd-XXXXXX";
     int fd = mkstemp(template);
     if (fd < 0)
@@ -1293,9 +1300,10 @@ static int64_t sc_memfd_create(guest_t *g,
         close(fd);
         return linux_errno();
     }
-    if ((int) x1 & 1)
+    if (flags & LINUX_MFD_CLOEXEC)
         fd_table[gfd].linux_flags |= LINUX_O_CLOEXEC;
-    fd_table[gfd].seals = ((int) x1 & 2) ? 0 : LINUX_F_SEAL_SEAL;
+    fd_table[gfd].seals =
+        (flags & LINUX_MFD_ALLOW_SEALING) ? 0 : LINUX_F_SEAL_SEAL;
     return gfd;
 }
 

diff --git a/tests/driver.sh b/tests/driver.sh
@@ -27,6 +27,7 @@ FILTER=""
 LIST_ONLY=0
 VERBOSE=0
 TAP=0
+ALLOW_MISSING_BINARIES="${ALLOW_MISSING_BINARIES:-auto}"
 
 usage()
 {
@@ -91,6 +92,32 @@ case "$TESTDIR" in
     *) TESTDIR_ABS="$REPO_ROOT/$TESTDIR" ;;
 esac
 
+# Canonicalize before the auto-policy comparison so that equivalent paths
+# (./build, symlinked build dir, trailing-slash) still resolve to the
+# default-strict branch instead of silently flipping into allow-missing
+# mode. If the dir does not exist yet, fall back to the raw string; the
+# per-test "not built" check still fires later.
+canonicalize()
+{
+    if [ -d "$1" ]; then
+        (cd "$1" && pwd -P)
+    else
+        printf '%s' "$1"
+    fi
+}
+
+if [ "$ALLOW_MISSING_BINARIES" = "auto" ]; then
+    testdir_canon=$(canonicalize "$TESTDIR_ABS")
+    build_canon=$(canonicalize "$REPO_ROOT/build")
+    bin_canon=$(canonicalize "$REPO_ROOT/build/bin")
+    if [ "$testdir_canon" = "$build_canon" ] \
+        || [ "$testdir_canon" = "$bin_canon" ]; then
+        ALLOW_MISSING_BINARIES=0
+    else
+        ALLOW_MISSING_BINARIES=1
+    fi
+fi
+
 if [ ! -f "$TEST_LIST" ]; then
     echo "error: $TEST_LIST not found" >&2
     exit 1
@@ -271,16 +298,30 @@ for i in "${filtered_idx[@]}"; do
     done
 
     if [ ! -f "$binary" ]; then
+        if [ "$ALLOW_MISSING_BINARIES" -eq 1 ]; then
+            if [ "$TAP" -eq 1 ]; then
+                echo "ok $test_num - $name # SKIP binary not found"
+            else
+                if [ "$section" != "$prev_section" ]; then
+                    printf "%s\n" "$section"
+                    prev_section="$section"
+                fi
+                report_case skip "$name" ""
+            fi
+            skip=$((skip + 1))
+            continue
+        fi
+
         if [ "$TAP" -eq 1 ]; then
-            echo "ok $test_num - $name # SKIP binary not found"
+            echo "not ok $test_num - $name # missing binary: $binary"
         else
             if [ "$section" != "$prev_section" ]; then
                 printf "%s\n" "$section"
                 prev_section="$section"
             fi
-            report_case skip "$name" ""
+            report_case fail "$name" " (missing binary)"
         fi
-        skip=$((skip + 1))
+        fail=$((fail + 1))
         continue
     fi