Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,13 @@ $(BUILD_DIR)/test-proctitle-host: $(BUILD_DIR)/test-proctitle-host.o \
@echo " LD $@"
$(Q)$(CC) $(CFLAGS) -o $@ $^

## Build the shebang parsing host test (native macOS binary)
$(BUILD_DIR)/test-shebang-host: $(BUILD_DIR)/test-shebang-host.o \
$(BUILD_DIR)/core/elf.o | $(BUILD_DIR)
@echo " LD $@"
$(Q)$(CC) $(CFLAGS) -o $@ $^


# Guest test binaries (cross-compiled, aarch64-linux)
# Only used when GUEST_TEST_BINARIES is not set.

Expand Down
7 changes: 7 additions & 0 deletions mk/tests.mk
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ check: $(ELFUSE_BIN) $(TEST_DEPS) check-syscall-coverage \
@$(BUILD_DIR)/test-fork-ipc-protocol-host
@printf "\n$(BLUE)━━━ identity override unit test ━━━$(RESET)\n"
@$(BUILD_DIR)/test-identity-override-host
@printf "\n$(BLUE)━━━ shebang parser unit test ━━━$(RESET)\n"
@$(MAKE) --no-print-directory test-shebang-host
@printf "\n$(BLUE)━━━ proctitle argv-tail regression ━━━$(RESET)\n"
@$(MAKE) --no-print-directory test-proctitle-host
@printf "\n$(BLUE)━━━ proctitle low-stack regression ━━━$(RESET)\n"
Expand Down Expand Up @@ -584,3 +586,8 @@ test-fork-ipc-protocol-host: $(BUILD_DIR)/test-fork-ipc-protocol-host
## Run the deterministic argv-tail overshoot guard test
test-proctitle-host: $(BUILD_DIR)/test-proctitle-host
$(BUILD_DIR)/test-proctitle-host

# Shebang parser unit test
## Run shebang parsing unit tests
test-shebang-host: $(BUILD_DIR)/test-shebang-host
$(BUILD_DIR)/test-shebang-host
86 changes: 86 additions & 0 deletions src/core/elf.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <errno.h>

#include "core/elf.h"
#include "debug/log.h"
Expand Down Expand Up @@ -427,3 +428,88 @@ void elf_resolve_interp(const char *sysroot,
/* Strategy 3: use interp_path as-is */
str_copy_trunc(out, interp_path, out_sz);
}

int elf_read_shebang(const char *host_path,
char *interp_out,
size_t interp_sz,
char *arg_out,
size_t arg_sz)
{
int fd = open(host_path, O_RDONLY);
if (fd < 0)
return -errno;

char buf[512];

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Low: reads 511 of 512 bytes then treats the first line as complete; an over-long interpreter path is silently truncated rather than rejected. str_copy_trunc's size check catches the common case. Consider rejecting an unterminated first line. (Still more generous than Linux BINPRM_BUF_SIZE=256.)

ssize_t nread = read(fd, buf, sizeof(buf) - 1);
close_keep_errno(fd);

if (nread < 0) {
return -errno;
}
if (nread < 2 || buf[0] != '#' || buf[1] != '!') {
return 0; /* Not a shebang script */
}
Comment on lines +449 to +451

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is confusing to have shebang detection in function elf_parse_shebang. Clarify it.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is a fair point. The function is responsible for both detection (checking for the #! signature) and parsing. Combining them into a single function avoids having to open, read, and close the file twice (once for detection and once for parsing).

To clarify this dual responsibility, I have:

  1. Updated the doc comment in elf.h and the function header in elf.c to explicitly state that elf_parse_shebang performs both detection (returning 0 if not a shebang script) and parsing.
  2. Kept the callers in main.c and exec.c simple by delegating both checks entirely to this function."

buf[nread] = '\0';

/* Ignore script bytes after the first line (find \n or \r). If the shebang
* line is longer than our 511-byte buffer (no EOL found but buffer is
* full), reject it.
*/
char *eol = strpbrk(buf + 2, "\r\n");
if (!eol) {
if (nread == (ssize_t) (sizeof(buf) - 1)) {
return -ENOEXEC; /* Shebang line too long */
}
} else {
*eol = '\0';
}

char *ptr = buf + 2;
while (*ptr == ' ' || *ptr == '\t') {
ptr++;
}

/* Strip trailing whitespace/newlines of the whole shebang line */
size_t len = strlen(ptr);
while (len > 0 && (ptr[len - 1] == ' ' || ptr[len - 1] == '\t' ||
ptr[len - 1] == '\r' || ptr[len - 1] == '\n')) {
ptr[--len] = '\0';
}

if (len == 0) {
return -ENOEXEC; /* Empty shebang interpreter */
}

/* Parse interpreter path and single optional argument */
char *interp = ptr;
char *space = strpbrk(ptr, " \t");
char *arg = NULL;
if (space) {
*space = '\0';
arg = space + 1;
/* Strip leading space of the argument */
while (*arg == ' ' || *arg == '\t') {
arg++;
}
/* Strip trailing space/newlines/tabs of the argument */
size_t arg_len = strlen(arg);
while (arg_len > 0 &&
(arg[arg_len - 1] == ' ' || arg[arg_len - 1] == '\t' ||
arg[arg_len - 1] == '\r' || arg[arg_len - 1] == '\n')) {
arg[--arg_len] = '\0';
}
if (strlen(arg) == 0) {
arg = NULL;
}
}

if (str_copy_trunc(interp_out, interp, interp_sz) >= interp_sz) {
return -ENOEXEC; /* Buffer too small */
}

if (str_copy_trunc(arg_out, arg ? arg : "", arg_sz) >= arg_sz) {
return -ENOEXEC; /* Buffer too small */
}

return 1; /* Successfully parsed shebang */
}
20 changes: 20 additions & 0 deletions src/core/elf.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,26 @@ void elf_resolve_interp(const char *sysroot,
char *out,
size_t out_sz);

/* Read, probe, and parse a shebang script header from host_path.
* Writes interpreter path to interp_out and the single optional argument
* (if present) to arg_out. arg_out will be set to an empty string if there
* is no optional argument.
*
* Supports LF (\n), CRLF (\r\n), and CR (\r) line endings. If the shebang
* line is not terminated within the 511-byte buffer limit, returns -ENOEXEC.
*
* Returns:
* 1 if a shebang script was successfully parsed
* 0 if the file is not a shebang script
* Negative errno on failure (e.g. -ENOENT, -ENOEXEC, or insufficient
* buffer size)
*/
int elf_read_shebang(const char *host_path,
char *interp_out,
size_t interp_sz,
char *arg_out,
size_t arg_sz);

/* Translate ELF program-header flags (PF_R=4, PF_W=2, PF_X=1) into the
* R=1/W=2/X=4 bitset shared by both MEM_PERM_R/W/X (page-table permissions) and
* LINUX_PROT_READ/WRITE/EXEC (mmap prot bits).
Expand Down
123 changes: 116 additions & 7 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -453,16 +453,125 @@ int main(int argc, char **argv)
}

proc_set_sysroot(sysroot);
if (resolve_guest_elf_host_path(elf_path, elf_host_path,
sizeof(elf_host_path),
&elf_host_temp) < 0) {
log_error("failed to resolve ELF path %s: %s", elf_path,
strerror(errno));

int shebang_depth = 0;
const int max_shebang_depth = 5;

while (shebang_depth < max_shebang_depth) {
if (resolve_guest_elf_host_path(elf_path, elf_host_path,
sizeof(elf_host_path),
&elf_host_temp) < 0) {
log_error("failed to resolve ELF path %s: %s", elf_path,
strerror(errno));
cleanup_main_resources(&g, guest_initialized, &sysroot_mount,
have_host_cwd ? host_cwd : NULL, guest_argv,
guest_argc, elf_path, sysroot_path);
if (elf_host_temp)
unlink(elf_host_path);
return 1;
}

/* Check if the file starts with "#!" */
char interp[LINUX_PATH_MAX];
char arg[LINUX_PATH_MAX];
int rc = elf_read_shebang(elf_host_path, interp, sizeof(interp), arg,
sizeof(arg));
if (rc == 0) {
/* Not a shebang script, proceed to boot */
break;
}

if (rc < 0) {
log_error("empty or invalid shebang interpreter in %s", elf_path);
cleanup_main_resources(&g, guest_initialized, &sysroot_mount,
have_host_cwd ? host_cwd : NULL, guest_argv,
guest_argc, elf_path, sysroot_path);
if (elf_host_temp)
unlink(elf_host_path);
return 1;
}

shebang_depth++;

/* Prepend interpreter (and argument if present) to guest_argv */
bool has_arg = (arg[0] != '\0');
int add_count = has_arg ? 2 : 1;
int new_argc = guest_argc + add_count;
const char **new_argv =
(const char **) calloc((size_t) new_argc, sizeof(char *));
if (!new_argv) {
log_error("out of memory");
cleanup_main_resources(&g, guest_initialized, &sysroot_mount,
have_host_cwd ? host_cwd : NULL, guest_argv,
guest_argc, elf_path, sysroot_path);
if (elf_host_temp)
unlink(elf_host_path);
return 1;
}

new_argv[0] = strdup(interp);
if (!new_argv[0]) {
log_error("out of memory");
free((void *) new_argv);
cleanup_main_resources(&g, guest_initialized, &sysroot_mount,
have_host_cwd ? host_cwd : NULL, guest_argv,
guest_argc, elf_path, sysroot_path);
if (elf_host_temp)
unlink(elf_host_path);
return 1;
}
if (has_arg) {
new_argv[1] = strdup(arg);
if (!new_argv[1]) {
log_error("out of memory");
free((void *) new_argv[0]);
free((void *) new_argv);
cleanup_main_resources(&g, guest_initialized, &sysroot_mount,
have_host_cwd ? host_cwd : NULL,
guest_argv, guest_argc, elf_path,
sysroot_path);
if (elf_host_temp)
unlink(elf_host_path);
return 1;
}
}

/* Transfer ownership of the previous guest_argv elements */
for (int i = 0; i < guest_argc; i++) {
new_argv[i + add_count] = guest_argv[i];
}

free((void *) guest_argv);
guest_argv = new_argv;
guest_argc = new_argc;

/* Update elf_path to point to the interpreter path */
char *new_elf_path = strdup(interp);
if (!new_elf_path) {
log_error("out of memory");
cleanup_main_resources(&g, guest_initialized, &sysroot_mount,
have_host_cwd ? host_cwd : NULL, guest_argv,
guest_argc, elf_path, sysroot_path);
if (elf_host_temp)
unlink(elf_host_path);
return 1;
}
free(elf_path);
elf_path = new_elf_path;

/* Clean up any materialized temp file before resolving the next path */
if (elf_host_temp) {
unlink(elf_host_path);
elf_host_temp = false;
}
}

if (shebang_depth >= max_shebang_depth) {
log_error("too many levels of shebang recursion (max %d) resolving %s",
max_shebang_depth, argv[arg_start]);
cleanup_main_resources(&g, guest_initialized, &sysroot_mount,
have_host_cwd ? host_cwd : NULL, guest_argv,
guest_argc, elf_path, sysroot_path);
if (elf_host_temp)
unlink(elf_host_path);
return 1;
}

Expand Down
Loading
Loading