diff --git a/.github/ubsan-suppressions.txt b/.github/ubsan-suppressions.txt new file mode 100644 index 000000000..6a22b08a4 --- /dev/null +++ b/.github/ubsan-suppressions.txt @@ -0,0 +1,23 @@ +# UBSan runtime suppressions for rsync. +# +# These entries suppress intentional, architecture-safe UB that we do NOT want +# to fix in source. Every other UBSan check class runs FULLY. +# +# Format: : +# "alignment" fires when a misaligned pointer dereference is detected. +# +# byteorder.h -- deliberate unaligned 32/64-bit access on the !CAREFUL_ALIGNMENT +# (x86/amd64) fast path. The union-pun approach is intentional; the header's own +# comment documents this. CAREFUL_ALIGNMENT=1 selects a byte-shuffle fallback for +# strict-alignment architectures; the CI runner is x86-64 so the fast path is +# compiled. Suppressed at runtime rather than disabled class-wide so that any NEW +# misaligned access outside byteorder.h is still caught. +alignment:IVALu +alignment:SIVALu +alignment:IVAL64 +alignment:SIVAL64 +# log.c log_delete() -- intentional pool-style allocation: new_array0(char,...) + +# pointer-arithmetic offset then cast to struct file_struct*. Same x86 alignment- +# tolerance assumption as byteorder.h; not a memory-safety bug. +alignment:log_delete +alignment:log_formatted diff --git a/.github/workflows/analysis-codeql.yml b/.github/workflows/analysis-codeql.yml new file mode 100644 index 000000000..76bf1199b --- /dev/null +++ b/.github/workflows/analysis-codeql.yml @@ -0,0 +1,51 @@ +name: CodeQL (cpp) + +# GitHub CodeQL static analysis for C/C++. Uses a MANUAL build (not autobuild) +# because rsync needs ./prepare-source to generate sources before configure/make, +# which autobuild can miss. +# +# GATING: NON-gating by default -- CodeQL surfaces results in the repository's +# Security tab and as PR code-scanning annotations rather than failing this job. +# (Branch-protection "code scanning results" rules are the proper place to gate +# on CodeQL, configured in repo settings, not here.) CI-ONLY: no source/flag +# change; the build uses --disable-md2man like every other CI job. + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + schedule: + - cron: '17 5 * * 1' + workflow_dispatch: + +jobs: + analyze: + name: CodeQL analyze (cpp) + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: prep + run: | + sudo apt-get update + sudo apt-get install -y acl libacl1-dev attr libattr1-dev \ + liblz4-dev libzstd-dev libxxhash-dev python3-cmarkgfm openssl + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: cpp + - name: Manual build + run: | + ./prepare-source + ./configure --disable-md2man + make + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:cpp" diff --git a/.github/workflows/analysis-cppcheck.yml b/.github/workflows/analysis-cppcheck.yml new file mode 100644 index 000000000..0c925fa96 --- /dev/null +++ b/.github/workflows/analysis-cppcheck.yml @@ -0,0 +1,65 @@ +name: Static analysis (cppcheck) + +# cppcheck baseline scan. GATING: NON-gating -- `--error-exitcode=0` means +# cppcheck never fails the job; it produces a baseline report artifact + a +# job-summary count. No local baseline (cppcheck is not in the dev shell), so +# this stays non-gating until a CI baseline is triaged. CI-ONLY: no source/flag +# change, cppcheck only reads the sources. + +on: + push: + branches: [ master ] + paths-ignore: + - '.github/workflows/*.yml' + - '!.github/workflows/analysis-cppcheck.yml' + pull_request: + branches: [ master ] + paths-ignore: + - '.github/workflows/*.yml' + - '!.github/workflows/analysis-cppcheck.yml' + schedule: + - cron: '17 3 * * *' + workflow_dispatch: + +jobs: + cppcheck: + runs-on: ubuntu-latest + name: cppcheck + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: prep + run: | + sudo apt-get update + sudo apt-get install -y cppcheck + - name: prepare-source + # Generate config/derived sources so cppcheck sees the real tree. + run: ./prepare-source || true + - name: cppcheck + run: | + set -o pipefail + cppcheck --enable=warning,portability --error-exitcode=0 \ + --inline-suppr --std=c11 \ + --suppress=missingInclude --suppress=missingIncludeSystem \ + -i zlib -i popt \ + . 2>&1 | tee cppcheck.log + - name: report + if: always() + run: | + { + echo "## cppcheck (baseline, non-gating)" + n=$(grep -c "): " cppcheck.log || true) + echo "Findings: ${n:-0}" + echo '```' + grep -oE '\[[a-zA-Z]+\]$' cppcheck.log | sort | uniq -c | sort -rn | head -20 || true + echo '```' + } >> "$GITHUB_STEP_SUMMARY" + - name: upload report + if: always() + uses: actions/upload-artifact@v4 + with: + retention-days: 45 + name: cppcheck-log + path: cppcheck.log + if-no-files-found: ignore diff --git a/.github/workflows/analysis-gcc-fanalyzer.yml b/.github/workflows/analysis-gcc-fanalyzer.yml new file mode 100644 index 000000000..9ee5affe8 --- /dev/null +++ b/.github/workflows/analysis-gcc-fanalyzer.yml @@ -0,0 +1,79 @@ +name: Static analysis (gcc -fanalyzer) + +# GCC's built-in static analyzer, enabled purely via env CFLAGS passthrough +# (configure.ac preserves CFLAGS -- the same mechanism --enable-coverage uses). +# +# GATING: NON-gating. On HEAD this is clean (0 warnings locally with gcc 15), +# but -fanalyzer's findings vary across gcc versions and configure paths and are +# prone to false positives, so a finding produces an artifact + job-summary note +# rather than failing the build. The build step is continue-on-error. +# +# CI-ONLY: no default flags change; -fanalyzer is injected only into this job's +# environment. No source or Makefile change. + +on: + push: + branches: [ master ] + paths-ignore: + - '.github/workflows/*.yml' + - '!.github/workflows/analysis-gcc-fanalyzer.yml' + pull_request: + branches: [ master ] + paths-ignore: + - '.github/workflows/*.yml' + - '!.github/workflows/analysis-gcc-fanalyzer.yml' + schedule: + - cron: '17 6 * * *' + workflow_dispatch: + +jobs: + fanalyzer: + runs-on: ubuntu-latest + name: gcc -fanalyzer + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: prep + run: | + sudo apt-get update + sudo apt-get install -y gcc acl libacl1-dev attr libattr1-dev \ + liblz4-dev libzstd-dev libxxhash-dev python3-cmarkgfm openssl + - name: prepare-source + run: ./prepare-source + - name: configure + # -fanalyzer needs an optimized build to be effective; -O2 also matches the + # flags that surface the most analyzer paths. + env: + CFLAGS: "-fanalyzer -O2" + run: ./configure --disable-md2man + - name: make + id: build + continue-on-error: true + run: | + set -o pipefail + make 2>&1 | tee fanalyzer.log + # Fail this (non-gating) step if any analyzer warning was emitted, so the + # report step can flag it; continue-on-error keeps the workflow green. + ! grep -q "warning:" fanalyzer.log + - name: report + if: always() + run: | + { + echo "## gcc -fanalyzer" + n=$(grep -c "warning:" fanalyzer.log || true) + echo "Analyzer warnings: ${n:-0}" + if [ "${n:-0}" != "0" ]; then + echo '```' + grep -oE '\[-W[a-z0-9-]+\]' fanalyzer.log | sort | uniq -c | sort -rn + echo '```' + fi + } >> "$GITHUB_STEP_SUMMARY" + - name: upload log + if: always() + uses: actions/upload-artifact@v4 + with: + retention-days: 45 + name: fanalyzer-log + path: fanalyzer.log + if-no-files-found: ignore diff --git a/.github/workflows/analysis-scan-build.yml b/.github/workflows/analysis-scan-build.yml new file mode 100644 index 000000000..b7bea7249 --- /dev/null +++ b/.github/workflows/analysis-scan-build.yml @@ -0,0 +1,70 @@ +name: Static analysis (clang scan-build) + +# Clang static analyzer over a full ./configure && make build. +# +# GATING: NON-gating. scan-build's analyzer is prone to false positives and no +# local baseline could be established (the nix-packaged scan-build cannot find +# ccc-analyzer). The job runs `scan-build --status-bugs make`, which exits +# non-zero when bugs are found, but the step is wrapped in continue-on-error so +# a finding does NOT fail the workflow -- it surfaces as an uploaded HTML report +# + a job-summary note. Promote to gating only after a clean CI baseline exists. +# +# CI-ONLY: this changes no default build flags and no source. The analyzer wraps +# the normal configure/make; exotic-platform builds are unaffected. + +on: + push: + branches: [ master ] + paths-ignore: + - '.github/workflows/*.yml' + - '!.github/workflows/analysis-scan-build.yml' + pull_request: + branches: [ master ] + paths-ignore: + - '.github/workflows/*.yml' + - '!.github/workflows/analysis-scan-build.yml' + schedule: + - cron: '17 7 * * *' + workflow_dispatch: + +jobs: + scan-build: + runs-on: ubuntu-latest + name: clang scan-build + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: prep + run: | + sudo apt-get update + sudo apt-get install -y clang clang-tools acl libacl1-dev attr libattr1-dev \ + liblz4-dev libzstd-dev libxxhash-dev python3-cmarkgfm openssl + - name: prepare-source + run: ./prepare-source + - name: scan-build configure + run: scan-build -o scan-build-report ./configure --disable-md2man + - name: scan-build make + id: scan + continue-on-error: true + run: scan-build -o scan-build-report --status-bugs make + - name: report + if: always() + run: | + { + echo "## clang scan-build" + if [ "${{ steps.scan.outcome }}" = "success" ]; then + echo "No analyzer bugs reported." + else + echo "scan-build reported findings (non-gating baseline). See the" + echo "**scan-build-report** artifact for the HTML report." + fi + } >> "$GITHUB_STEP_SUMMARY" + - name: upload report + if: always() + uses: actions/upload-artifact@v4 + with: + retention-days: 45 + name: scan-build-report + path: scan-build-report + if-no-files-found: ignore diff --git a/.github/workflows/hardened-build.yml b/.github/workflows/hardened-build.yml new file mode 100644 index 000000000..740652170 --- /dev/null +++ b/.github/workflows/hardened-build.yml @@ -0,0 +1,75 @@ +name: Hardened build (-Werror) + +# Compile the whole tree under hardened, warning-fatal flags to surface latent +# warnings. +# +# GATING: GATING. Verified clean on HEAD locally -- 58 translation units, 0 +# warnings/errors -- so a NEW warning under these flags MUST fail CI. -Werror is +# fatal ONLY in this job; it does not affect default builds. +# +# CI-ONLY: flags are injected via env CFLAGS passthrough. No default-flag change, +# no source/Makefile change -- exotic platforms are untouched. +# +# IMPORTANT mechanic: -Werror is applied at MAKE time, NOT at configure time. +# autoconf feature-detection probes legitimately emit warnings; with -Werror in +# CFLAGS during ./configure those probes "fail", so configure mis-detects e.g. +# struct addrinfo / sockaddr_storage as absent and emits colliding fallback +# definitions (redefinition errors in lib/addrinfo.h). So: configure with the +# hardened flags MINUS -Werror, then `make CFLAGS=`. +# The make override must re-add -DHAVE_CONFIG_H because overriding CFLAGS on the +# make line replaces autoconf's substituted @CFLAGS@ (which carried it); the +# .c.o rule keeps -I./-I$(srcdir) but not the substituted CFLAGS. +# _FORTIFY_SOURCE=2 requires optimization, so -O2 is included. + +on: + push: + branches: [ master ] + paths-ignore: + - '.github/workflows/*.yml' + - '!.github/workflows/hardened-build.yml' + pull_request: + branches: [ master ] + paths-ignore: + - '.github/workflows/*.yml' + - '!.github/workflows/hardened-build.yml' + schedule: + - cron: '17 2 * * *' + workflow_dispatch: + +jobs: + hardened: + runs-on: ubuntu-latest + name: hardened -Werror (${{ matrix.cc }}) + strategy: + fail-fast: false + matrix: + cc: [ gcc, clang ] + env: + # Flags WITHOUT -Werror, used for ./configure so feature probes pass. + CONFIGURE_CFLAGS: >- + -Wall -Wextra -Wformat -Wformat-security + -D_FORTIFY_SOURCE=2 -fstack-protector-strong -O2 + # Full hardened set WITH -Werror, applied at make time only. + MAKE_CFLAGS: >- + -Wall -Wextra -Werror -Wformat -Wformat-security + -D_FORTIFY_SOURCE=2 -fstack-protector-strong -O2 -DHAVE_CONFIG_H + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: prep + run: | + sudo apt-get update + sudo apt-get install -y ${{ matrix.cc }} acl libacl1-dev attr libattr1-dev \ + liblz4-dev libzstd-dev libxxhash-dev python3-cmarkgfm openssl + - name: prepare-source + run: ./prepare-source + - name: configure (hardened, no -Werror) + env: + CC: ${{ matrix.cc }} + CFLAGS: ${{ env.CONFIGURE_CFLAGS }} + run: ./configure --disable-md2man + - name: make (-Werror) + run: make CFLAGS="${MAKE_CFLAGS}" + - name: info + run: ./rsync --version diff --git a/.github/workflows/sanitizers-asan-ubsan.yml b/.github/workflows/sanitizers-asan-ubsan.yml new file mode 100644 index 000000000..1a99c9e1a --- /dev/null +++ b/.github/workflows/sanitizers-asan-ubsan.yml @@ -0,0 +1,92 @@ +name: Sanitizers (ASan + UBSan) + +# Address + Undefined-Behavior sanitizer build, exercised by the full test +# suite, the TCP-bound equivalence matrix, and the WS2 fuzz regression. +# +# GATING: GATING. Verified clean on HEAD locally: +# make check -> 99 passed, 6 skipped, 0 failed +# make check-equiv -> 1 passed (real loopback rsyncd, --use-tcp) +# fuzz regression -> 829041 runs, 0 crashes +# A new ASan/UBSan error (heap/stack overflow, use-after-free, UB) MUST fail CI. +# +# UBSan runs FULL (all check classes enabled, including pointer-overflow, +# nonnull-attribute, null, alignment). The two zero-count edge cases in xattrs.c +# are FIXED in source so UBSan is silent there. The deliberate unaligned-access +# sites (byteorder.h IVALu/SIVALu/IVAL64/SIVAL64 on !CAREFUL_ALIGNMENT x86, and +# log_delete/log_formatted pool-allocated file_struct) are suppressed at runtime +# via .github/ubsan-suppressions.txt -- this is a narrow per-function suppression, +# not a class-wide -fno-sanitize, so NEW misaligned accesses elsewhere are caught. +# ASan itself is left fully enabled. +# +# CI-ONLY: sanitizers are injected via env CFLAGS passthrough (the mechanism +# configure.ac already supports for --enable-coverage). No default build flags +# and no source/Makefile change -- exotic-platform builds are untouched. + +on: + push: + branches: [ master ] + paths-ignore: + - '.github/workflows/*.yml' + - '!.github/workflows/sanitizers-asan-ubsan.yml' + pull_request: + branches: [ master ] + paths-ignore: + - '.github/workflows/*.yml' + - '!.github/workflows/sanitizers-asan-ubsan.yml' + schedule: + - cron: '17 4 * * *' + workflow_dispatch: + +env: + # Halt on the first sanitizer error and dump a stack so the failing step is + # actionable. detect_leaks=0: rsync's short-lived processes intentionally let + # the OS reclaim some allocations; LeakSanitizer noise is out of scope here. + # suppressions= silences the intentional unaligned-access sites in byteorder.h + # and log.c; all other UBSan checks remain fully active. The path is absolute + # so it resolves correctly when rsync child processes cd into test scratch dirs. + UBSAN_OPTIONS: suppressions=${{ github.workspace }}/.github/ubsan-suppressions.txt:halt_on_error=1:print_stacktrace=1 + ASAN_OPTIONS: halt_on_error=1:abort_on_error=1:detect_leaks=0 + +jobs: + asan-ubsan: + runs-on: ubuntu-latest + name: ASan + UBSan + check + check-equiv + fuzz + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: prep + run: | + sudo apt-get update + sudo apt-get install -y clang acl libacl1-dev attr libattr1-dev \ + liblz4-dev libzstd-dev libxxhash-dev python3-cmarkgfm openssl + - name: prepare-source + run: ./prepare-source + - name: configure (sanitizers) + env: + CC: clang + # fuzzer-no-link instruments the rsync wire-parser objects for the fuzz + # regression below while staying a normal (non-libFuzzer) rsync binary. + CFLAGS: >- + -g -O1 -fno-omit-frame-pointer + -fsanitize=address,undefined,fuzzer-no-link + run: ./configure --disable-md2man + - name: make + run: make + - name: info + run: ./rsync --version + # `make check` runs the suite over the secure stdio-pipe transport. Tests that + # need privileges or absent features SKIP cleanly (exit 77) and do NOT fail the + # job -- the runner distinguishes skip (77) from fail. No sudo here so the + # ASAN/UBSAN env propagates to the test rsync processes unchanged. + - name: make check (ASan/UBSan) + run: make check + # The equivalence matrix with a GUARANTEED real loopback rsyncd (--use-tcp). + - name: make check-equiv (ASan/UBSan, TCP) + run: make check-equiv + # WS2 fuzz regression: bounded corpus replay + short top-up under the same + # sanitizers; exits non-zero on any crash. + - name: fuzz regression (ASan/UBSan) + env: + FUZZ_MAX_TIME: "30" + run: make -C fuzz regression diff --git a/Makefile.in b/Makefile.in index bde2c5897..437ec7fde 100644 --- a/Makefile.in +++ b/Makefile.in @@ -363,6 +363,16 @@ check29: all $(CHECK_PROGS) $(CHECK_SYMLINKS) check30: all $(CHECK_PROGS) $(CHECK_SYMLINKS) $(srcdir)/runtests.py --rsync-bin=`pwd`/rsync$(EXEEXT) -j $(CHECK_J) --protocol=30 +# Transport-equivalence matrix with a GUARANTEED real bound socket. +# Unlike plain `make check` (which runs the *-equiv tests with the TCP-daemon +# leg skipped via require_tcp/SkipLeg), this runs them with --use-tcp so the +# daemon legs bind a real 127.0.0.1 rsyncd -- socket-bound coverage is not +# optional here. Scoped to the *-equiv tests so the loopback ports are claimed +# for the equivalence matrix only. +.PHONY: check-equiv +check-equiv: all $(CHECK_PROGS) $(CHECK_SYMLINKS) + $(srcdir)/runtests.py --rsync-bin=`pwd`/rsync$(EXEEXT) -j $(CHECK_J) --use-tcp '*-equiv' + # Whole-suite gcov coverage report (HTML, with branch + decision coverage). # Requires a build configured with --enable-coverage and the `gcovr` tool # (pip install gcovr). Runs the suite in parallel (COVERAGE_J, default CHECK_J): diff --git a/configure.ac b/configure.ac index 4faab5fcb..967dc14f4 100644 --- a/configure.ac +++ b/configure.ac @@ -96,6 +96,24 @@ if test x"$enable_coverage" = x"yes"; then [Flush gcov counters at exit_cleanup: rsync's children exit via _exit(), which bypasses the gcov atexit handler, so without this no .gcda is written for the receiver/generator/daemon-worker processes.]) fi +dnl Hardened build. Appends defensive compiler flags (FORTIFY_SOURCE, stack +dnl protector, format-string warnings). Defaults OFF: a plain ./configure is +dnl unchanged, so exotic platforms are unaffected. NOTE: -Werror is deliberately +dnl NOT added here -- these flags run BEFORE autoconf's feature-detection probes +dnl (struct addrinfo, sockaddr_storage, ...), and -Werror would make a probe's +dnl benign warning look like a failure, causing mis-detection and colliding +dnl fallback definitions. _FORTIFY_SOURCE=2 needs optimization, so -O2 is added +dnl only when the user has not already requested an optimization level. +AC_ARG_ENABLE(hardened, + AS_HELP_STRING([--enable-hardened],[build with hardening flags (FORTIFY_SOURCE, stack protector, format-security)])) +if test x"$enable_hardened" = x"yes"; then + case " $CFLAGS " in + *\ -O*) ;; dnl user already set an -O level; respect it + *) CFLAGS="$CFLAGS -O2" ;; + esac + CFLAGS="$CFLAGS -Wall -Wextra -Wformat -Wformat-security -D_FORTIFY_SOURCE=2 -fstack-protector-strong" +fi + dnl openat2(RESOLVE_BENEATH) is used on Linux 5.6+ for the secure resolver. dnl --disable-openat2 forces the portable per-component O_NOFOLLOW fallback to dnl run as the primary resolver on ordinary Linux, so that tier is exercised diff --git a/flist.c b/flist.c index 2ec07f54a..70580c781 100644 --- a/flist.c +++ b/flist.c @@ -3432,3 +3432,50 @@ struct file_list *get_dirlist(char *dirname, int dlen, int flags) return dirlist; } + +#ifdef RSYNC_FUZZ_FLIST +/* Fuzzing hook (compiled ONLY when RSYNC_FUZZ_FLIST is defined; the normal + * rsync build never sees this). It exposes the file-internal static function + * recv_file_entry() to fuzz/fuzz_flist.c, replicating exactly the per-entry + * work the real caller (recv_file_list, lines ~2625-2658) does around it: + * flist_expand(flist, 1); file = recv_file_entry(f, flist, flags); + * flist->files[flist->used++] = file; + * It deliberately omits the heavy, non-target tail of recv_file_list + * (flist_sort_and_clean / recv_id_list / fsort) so the fuzzer stays focused on + * the wire parser and its stateful lastname[] reconstruction. The harness owns + * setjmp; an over-range guard's overflow_exit/exit_cleanup longjmps out. */ +struct file_list *fuzz_flist_new(void) +{ + struct file_list *flist = new0(struct file_list); + flist->file_pool = pool_create(NORMAL_EXTENT, 0, _out_of_memory, POOL_INTERN); + if (!flist->file_pool) + out_of_memory("fuzz_flist_new"); + flist->ndx_start = 0; + flist->pool_boundary = pool_boundary(flist->file_pool, 0); + /* recv_file_entry consults first_flist/cur_flist indirectly via globals + * in some paths; mirror flist_new's first-list bookkeeping. */ + first_flist = cur_flist = flist->prev = flist; + return flist; +} + +struct file_struct *fuzz_recv_file_entry(int f, struct file_list *flist, int xflags) +{ + struct file_struct *file; + flist_expand(flist, 1); + file = recv_file_entry(f, flist, xflags); + if (file) + flist->files[flist->used++] = file; + return file; +} + +void fuzz_flist_free(struct file_list *flist) +{ + if (!flist) + return; + if (flist->file_pool) + pool_destroy(flist->file_pool); + free(flist->files); + free(flist); + first_flist = cur_flist = NULL; +} +#endif /* RSYNC_FUZZ_FLIST */ diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 000000000..57d10fdb2 --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,25 @@ +# Build artifacts (harness binaries live at the fuzz/ root; anchor with a +# leading slash so the names do not also match corpus// directories) +*.o +/fuzz_io +/fuzz_flist +/fuzz_token +/fuzz_deflated_token +/fuzz_xattrs +/fuzz_recv_discard + +# libFuzzer crash / leak reproducers (a real find should be MINIMIZED and +# committed under corpus// with a descriptive name, not left here) +crash-* +leak-* +timeout-* +oom-* + +# libFuzzer-generated corpus entries (40-hex-char names). The committed seed +# corpus uses descriptive names; generated growth is not tracked. +corpus/*/[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]* + +# Transient fuzzer working corpora (deep-run mutation pools); only the +# descriptive seeds under corpus// are tracked. +/corpus_flist/ +/corpus_xattrs/ diff --git a/fuzz/Makefile b/fuzz/Makefile new file mode 100644 index 000000000..3261f48eb --- /dev/null +++ b/fuzz/Makefile @@ -0,0 +1,166 @@ +# fuzz/Makefile - libFuzzer harnesses for rsync's UNTRUSTED-PEER wire parsers. +# +# This is the ONLY build-system addition in the fuzzing workstream. The +# top-level Makefile.in is untouched. The wire-parser objects (io.o, token.o, +# ...) are produced by the normal rsync build, which must have been configured +# with the campaign sanitizer CFLAGS so those objects are instrumented: +# +# CFLAGS="-g -O1 -fsanitize=fuzzer-no-link,address,undefined -fno-omit-frame-pointer" \ +# CC=clang ./configure --disable-md2man +# make io.o token.o # (configure preserves env CFLAGS) +# +# Then build the harnesses: +# +# make -C fuzz # builds every harness +# make -C fuzz fuzz_io # one harness +# +# Run regression mode (bounded, exits non-zero on crash): +# +# make -C fuzz regression +# ./fuzz/run-regression.sh # equivalent standalone runner +# +# Inside the nix shell, prefix with: +# nix develop path:$HOME/git/rsync --command bash -c 'make -C fuzz ...' + +CC ?= clang +RSYNC_DIR := .. + +# Harnesses LINK with -fsanitize=fuzzer (provides main + the coverage runtime); +# address,undefined match the instrumentation baked into the rsync objects. +FUZZ_SAN := -fsanitize=fuzzer,address,undefined +# Compiling our own harness/stub TUs: instrument them too, but no libFuzzer main. +HARNESS_SAN := -fsanitize=fuzzer-no-link,address,undefined -fno-omit-frame-pointer + +CPPFLAGS := -I$(RSYNC_DIR) -I$(RSYNC_DIR)/zlib -DHAVE_CONFIG_H +HARNESS_CFLAGS := -g -O1 -std=gnu23 $(HARNESS_SAN) $(CPPFLAGS) + +# Each harness links the named rsync object(s) + the shared stubs. +# io.o is enough for fuzz_io; token.o additionally needs io.o + zlib + (because +# token.o references them even on the CPRES_NONE path) liblz4 / libzstd. +IO_OBJS := $(RSYNC_DIR)/io.o +# rsync bundles its own zlib objects (used by recv_deflated_token); link them +# even though fuzz_token currently drives only the CPRES_NONE/simple path, so +# the object resolves all references. +ZLIB_OBJS := $(RSYNC_DIR)/zlib/deflate.o $(RSYNC_DIR)/zlib/inffast.o \ + $(RSYNC_DIR)/zlib/inflate.o $(RSYNC_DIR)/zlib/inftrees.o \ + $(RSYNC_DIR)/zlib/trees.o $(RSYNC_DIR)/zlib/zutil.o \ + $(RSYNC_DIR)/zlib/adler32.o $(RSYNC_DIR)/zlib/compress.o \ + $(RSYNC_DIR)/zlib/crc32.o +TOKEN_OBJS := $(RSYNC_DIR)/token.o $(RSYNC_DIR)/io.o $(ZLIB_OBJS) +TOKEN_LIBS := -llz4 -lzstd + +# fuzz_recv_discard drives the receiver discard-path NULL-deref (static-findings +# REAL #1). It needs the real read_sum_head/recv_token (io.o + token.o + zlib) +# plus the real full_fname (util1.o), where the NULL deref actually lands. +RECV_DISCARD_OBJS := $(RSYNC_DIR)/util1.o $(TOKEN_OBJS) +RECV_DISCARD_LIBS := -llz4 -lzstd + +# fuzz_deflated_token: drives recv_deflated_token (zlib/CPRES_ZLIB path), which +# is a file-static. token_fuzz.o is the REAL token.c recompiled with the in-file +# fuzz hook enabled (-DRSYNC_FUZZ_TOKEN) - same parser/inflate/bounds code, plus +# the thin reset+entry wrappers. Links the REAL io.o + REAL bundled zlib. +DEFL_TOKEN_OBJS := $(RSYNC_DIR)/token_fuzz.o $(RSYNC_DIR)/io.o $(ZLIB_OBJS) +DEFL_TOKEN_LIBS := -llz4 -lzstd + +# fuzz_flist: links the REAL flist.o (compiled -DRSYNC_FUZZ_FLIST as flist_fuzz.o) +# plus its whole reachable call graph. The fuzz wrappers live inside flist.c +# under #ifdef RSYNC_FUZZ_FLIST, so we build a dedicated flist_fuzz.o. +# The whole reachable call graph of recv_file_entry / receive_xattr, linked +# REAL & instrumented. REAL_CORE excludes xattrs.o so each harness can pick the +# plain (fuzz_flist) or -DRSYNC_FUZZ_XATTRS (fuzz_xattrs) build of it. +REAL_CORE := $(RSYNC_DIR)/io.o $(RSYNC_DIR)/util1.o $(RSYNC_DIR)/util2.o \ + $(RSYNC_DIR)/uidlist.o $(RSYNC_DIR)/exclude.o \ + $(RSYNC_DIR)/hashtable.o $(RSYNC_DIR)/checksum.o \ + $(RSYNC_DIR)/syscall.o $(RSYNC_DIR)/acls.o \ + $(RSYNC_DIR)/fileio.o \ + $(RSYNC_DIR)/lib/wildmatch.o $(RSYNC_DIR)/lib/compat.o \ + $(RSYNC_DIR)/lib/snprintf.o $(RSYNC_DIR)/lib/mdfour.o \ + $(RSYNC_DIR)/lib/md5.o $(RSYNC_DIR)/lib/permstring.o \ + $(RSYNC_DIR)/lib/pool_alloc.o $(RSYNC_DIR)/lib/sysacls.o \ + $(RSYNC_DIR)/lib/sysxattrs.o $(RSYNC_DIR)/chmod.o +FLIST_OBJS := $(RSYNC_DIR)/flist_fuzz.o globals.o $(RSYNC_DIR)/xattrs.o $(REAL_CORE) +FLIST_LIBS := -lcrypto -lxxhash -lacl + +# fuzz_xattrs: receive_xattr lives in xattrs.o; storage tail pulls checksum.o + +# hashtable.o + acls/exclude. Built as xattrs_fuzz.o (#ifdef RSYNC_FUZZ_XATTRS). +# receive_xattr's storage tail (rsync_xal_store -> hashtable + checksum) and +# f_name reach into flist.o, so we link flist_fuzz.o (which also supplies the +# globals flist.c owns) alongside the same real core. +XATTRS_OBJS := $(RSYNC_DIR)/xattrs_fuzz.o $(RSYNC_DIR)/flist_fuzz.o globals.o $(REAL_CORE) +XATTRS_LIBS := -lcrypto -lxxhash -lacl + +HARNESSES := fuzz_io fuzz_token fuzz_recv_discard fuzz_deflated_token fuzz_flist fuzz_xattrs + +.PHONY: all clean regression +all: $(HARNESSES) + +stubs.o: stubs.c + $(CC) $(HARNESS_CFLAGS) -c $< -o $@ + +globals.o: globals.c + $(CC) $(HARNESS_CFLAGS) -c $< -o $@ + +fuzz_io.o: fuzz_io.c + $(CC) $(HARNESS_CFLAGS) -c $< -o $@ + +fuzz_io: fuzz_io.o stubs.o $(IO_OBJS) + $(CC) $(FUZZ_SAN) $^ -o $@ + +fuzz_token.o: fuzz_token.c + $(CC) $(HARNESS_CFLAGS) -c $< -o $@ + +fuzz_token: fuzz_token.o stubs.o $(TOKEN_OBJS) + $(CC) $(FUZZ_SAN) $^ -o $@ $(TOKEN_LIBS) + +fuzz_recv_discard.o: fuzz_recv_discard.c + $(CC) $(HARNESS_CFLAGS) -c $< -o $@ + +recv_discard_stubs.o: recv_discard_stubs.c + $(CC) $(HARNESS_CFLAGS) -c $< -o $@ + +fuzz_recv_discard: fuzz_recv_discard.o stubs.o $(RECV_DISCARD_OBJS) recv_discard_stubs.o + # util1.o provides real definitions of a few helpers (flist_ndx_push/pop, + # glob_expand*) that stubs.c also stubs; none are on the full_fname path, so + # allow the duplicates (linker keeps the first / stubs.c version). + $(CC) $(FUZZ_SAN) -Wl,--allow-multiple-definition $^ -o $@ $(RECV_DISCARD_LIBS) + +$(RSYNC_DIR)/token_fuzz.o: $(RSYNC_DIR)/token.c + $(CC) $(RSYNC_CFLAGS) -DRSYNC_FUZZ_TOKEN -c $< -o $@ + +fuzz_deflated_token.o: fuzz_deflated_token.c + $(CC) $(HARNESS_CFLAGS) -c $< -o $@ + +fuzz_deflated_token: fuzz_deflated_token.o stubs.o $(DEFL_TOKEN_OBJS) + $(CC) $(FUZZ_SAN) $^ -o $@ $(DEFL_TOKEN_LIBS) + +# flist_fuzz.o / xattrs_fuzz.o are the REAL flist.c / xattrs.c recompiled with +# the in-file fuzz hook enabled (#ifdef RSYNC_FUZZ_*). Same sanitizer CFLAGS as +# the rest of the instrumented rsync objects; -DRSYNC_FUZZ_* only adds the thin +# wrappers, it does not alter parser code. +RSYNC_CFLAGS := -std=gnu23 -I$(RSYNC_DIR) -I$(RSYNC_DIR)/zlib -g -O1 \ + -fsanitize=fuzzer-no-link,address,undefined -fno-omit-frame-pointer \ + -DHAVE_CONFIG_H -Wall -W + +$(RSYNC_DIR)/flist_fuzz.o: $(RSYNC_DIR)/flist.c + $(CC) $(RSYNC_CFLAGS) -DRSYNC_FUZZ_FLIST -c $< -o $@ + +$(RSYNC_DIR)/xattrs_fuzz.o: $(RSYNC_DIR)/xattrs.c + $(CC) $(RSYNC_CFLAGS) -DRSYNC_FUZZ_XATTRS -c $< -o $@ + +fuzz_flist.o: fuzz_flist.c + $(CC) $(HARNESS_CFLAGS) -c $< -o $@ + +fuzz_flist: fuzz_flist.o stubs.o $(FLIST_OBJS) + $(CC) $(FUZZ_SAN) $^ -o $@ $(FLIST_LIBS) + +fuzz_xattrs.o: fuzz_xattrs.c + $(CC) $(HARNESS_CFLAGS) -c $< -o $@ + +fuzz_xattrs: fuzz_xattrs.o stubs.o $(XATTRS_OBJS) + $(CC) $(FUZZ_SAN) $^ -o $@ $(XATTRS_LIBS) + +regression: all + ./run-regression.sh + +clean: + rm -f $(HARNESSES) *.o crash-* leak-* timeout-* oom-* diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 000000000..0f2d7907d --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,159 @@ +# Fuzzing harnesses (Workstream 2) + +libFuzzer harnesses for rsync's **UNTRUSTED-PEER wire parsers**, built so the +existing `read_*_bounded` / `MAX_WIRE_*` guards are actually exercised under +ASan + UBSan. A guard that correctly rejects a hostile value unwinds cleanly; a +genuine out-of-bounds access trips the sanitizer *before* any guard runs — so a +crash here means a real parser bug, not a harness artifact. + +## Targets + +| Harness | Surface | Status | +|----------------|-------------------------------------------|--------| +| `fuzz_io` | `io.c` primitives: `read_sum_head`, `read_varint`/`read_varlong`/`read_longint`/`read_vstring`, `read_int_bounded`/`read_varint_bounded`/`read_varint_size` | **working** | +| `fuzz_token` | `token.c` `recv_token` → `simple_recv_token` (CPRES_NONE literal-token path; the `i > CHUNK_SIZE` guard) | **working** | +| `fuzz_recv_discard` | `receiver.c` discard-path NULL deref: `read_sum_head` + match-token → `full_fname(NULL)` (static-findings REAL #1, util1.c:1282) | **working — bug FIXED, now a clean-gate regression** | +| `fuzz_deflated_token` | `token.c` `recv_deflated_token` (zlib/CPRES_ZLIB inflate path) | **working** | +| `fuzz_flist` | `flist.c` `recv_file_entry` | **working (live harness)** | +| `fuzz_xattrs` | `xattrs.c` `receive_xattr` | **working (live harness)** | + +> **`fuzz_recv_discard` is a REGRESSION harness for a now-FIXED bug.** On the +> original (vulnerable) code its committed seed +> `corpus/fuzz_recv_discard/match_nobasis.bin` triggered an ASan/UBSan NULL +> dereference in `full_fname` (util1.c:1282) when a block-match token arrived on +> the discard path (`fname == NULL`). The receiver fix (handle a block-match +> token on the discard path by absorbing it benignly — it is normal protocol, +> not malformed — and restrict the `mapbuf == NULL` protocol error to real-output +> transfers where `fd != -1` and `fname` is non-NULL) makes this path unwind +> cleanly. The harness is therefore now part of the default `run-regression.sh` +> clean-gate target list; it must stay green. + +The `fuzz_flist` / `fuzz_xattrs` harnesses are LIVE: they link the REAL +`flist.c` / `xattrs.c` recompiled with an in-file `#ifdef RSYNC_FUZZ_*` hook +(`flist_fuzz.o` / `xattrs_fuzz.o`) plus their whole reachable call graph, and +`fuzz_deflated_token` drives the real `recv_deflated_token` via `token_fuzz.o`. + +## Toolchain + +clang (libFuzzer) + ASan/UBSan + compiler-rt, gcc, make, python3 are all +provided by the project nix shell: + +```sh +nix develop path:$HOME/git/rsync --command bash -c '' +``` + +clang is **not** committed into the repo (the fork stays upstreamable). + +## Build + +The wire-parser objects are produced by the *normal* rsync build, configured +with the campaign sanitizer CFLAGS so those objects are instrumented (configure +preserves env `CFLAGS` — **no `Makefile.in` change is needed**): + +```sh +nix develop path:$HOME/git/rsync --command bash -c ' + ./prepare-source && + CFLAGS="-g -O1 -fsanitize=fuzzer-no-link,address,undefined -fno-omit-frame-pointer" \ + CC=clang ./configure --disable-md2man && + make io.o token.o \ + zlib/inflate.o zlib/inftrees.o zlib/inffast.o zlib/zutil.o \ + zlib/adler32.o zlib/deflate.o zlib/trees.o zlib/compress.o zlib/crc32.o && + make -C fuzz' +``` + +`make -C fuzz` builds every working harness (one `-fsanitize=fuzzer` link rule +each — the **only** build-system addition in this workstream). `make -C fuzz +fuzz_io` builds just one. + +## Run / regression mode (what WS3 CI calls) + +```sh +nix develop path:$HOME/git/rsync --command ./fuzz/run-regression.sh +``` + +`run-regression.sh` (equivalently `make -C fuzz regression`) builds each working +harness, deterministically replays its committed seed corpus (`-runs=0`), then +does a **bounded** top-up fuzz run (`-max_total_time`, default 30s) seeded from +that corpus. It **exits non-zero on any crash**. Knobs: `FUZZ_MAX_TIME`, +`FUZZ_TARGETS`. + +## Linking / stubbing strategy + +rsync is not a library, so each harness links a curated set of *unmodified* +rsync `.o` files plus `fuzz/stubs.c`. **No tracked rsync source is modified.** + +- **`fuzz_io`** = `fuzz_io.o` + `stubs.o` + `io.o`. +- **`fuzz_token`** = `fuzz_token.o` + `stubs.o` + `token.o` + `io.o` + + rsync's bundled `zlib/*.o` + `-llz4 -lzstd` (token.o references LZ4/ZSTD + symbols even though the CPRES_NONE path never calls them; the system libs + resolve them). + +`fuzz/stubs.c` supplies everything the objects reference that we do not want to +drag in: + +- **`_exit_cleanup` / `_out_of_memory` / `_overflow_exit` → `longjmp`** back to + the harness. This is the load-bearing shim: a wire-range guard calls + `exit_cleanup(RERR_*)` on a malformed value; we turn that into a clean unwind + so a *correctly rejected* input is not a crash. A real memory bug trips + ASan/UBSan *before* the guard, preserving oracle fidelity. +- **Logging no-ops**: `rprintf`, `rsyserr`, `rwrite`, `who_am_i`, `do_big_num`. +- **A self-contained `my_alloc`** (honours `max_alloc`, returns NULL on the + `file==NULL` over-limit path that `EXPAND_ITEM_LIST` relies on) so ASan tracks + every wire-driven allocation. +- **Real zero-filled `info_levels` / `debug_levels`** arrays (the `INFO_GTE` / + `DEBUG_GTE` macros index them directly; NULL would crash). +- **Default-valued globals** referenced by the objects (`stats`, `am_*`, + `io_error`, `do_compression`, `module_id`, …) and **no-op shims** for + functions only reached on code paths the parsers never enter + (`match_hard_links`, `successful_send`, `glob_expand`, `recv_file_list`, …). + +To rediscover the exact symbol set after a rebase: +`nm -u io.o | sed 's/.* U //' | sort` (and likewise for `token.o`); anything not +in libc is either a default global or a no-op shim in `stubs.c`. + +## Global-init contract + +Per `reference.md` Part 3.5, the harness controls the minimal global state so a +crash is a real parser bug: + +- `protocol_version` — `fuzz_io` derives it per-input from byte 0 (cycling + 20/26/29/30/31) to cover the `proto<27`/`<30`/`>=30` branches of + `read_sum_head` and the `varint30` width choices; `fuzz_token` pins 30. +- `xfer_sum_len` — `fuzz_io` derives it per-input (4..32); bounds `s2length` and + feeds the multiply-overflow guard in `read_sum_head`. +- `do_compression = CPRES_NONE` in `fuzz_token` (selects `simple_recv_token`). +- The iobuf is left at its default (`.in_fd = -1`). That is the whole trick: + `read_buf(f, …)` takes its `f != iobuf.in_fd` fast path straight to + `safe_read(f, …)`, so the readers pull bytes directly from a fd we control — + no multiplexing, no msg framing — with the bytes 100% attacker-chosen. + +The fuzz buffer reaches the readers via a **pipe**: write all bytes, close the +write end; reads drain the buffer then hit EOF, which `read_buf` turns into +`whine_about_eof → exit_cleanup → longjmp`. "Ran out of bytes" is therefore a +clean unwind, never a crash. + +## Seed corpus + +`corpus//` holds descriptively-named seeds (hand-built valid, boundary, +and over-range cases). libFuzzer-generated entries (40-hex-char names) are +git-ignored; a genuine crash repro should be minimized and committed under +`corpus//` with a descriptive name. + +- **`fuzz_io`** (byte 0 selects protocol/sum_len): valid sum_head (proto 30 and + proto 20), empty (count 0), `blength` exactly at `MAX_BLOCK_SIZE` and one + over, negative count, huge count (multiply-overflow guard), `s2length` over + `xfer_sum_len`, `remainder > blength`, a mixed varint+vstring stream, and an + all-`0xff` stress case (drives the `read_longint` 64-bit sentinel). +- **`fuzz_token`**: a small literal token, a zero/negative (match) token, a + literal at exactly `CHUNK_SIZE`, one over `CHUNK_SIZE` (the guard), a huge + length, and a truncated literal (EOF unwind). + +Lengths/encodings are little-endian `read_int` and follow the exact wire layout +of `read_sum_head` / `simple_recv_token`. + +## Verification result + +`fuzz_io` and `fuzz_token` both **build and run clean** under ASan+UBSan over +their seed corpora plus millions of generated inputs. No crashes were found in +the io.c or token.c (simple path) wire guards — i.e. the existing bounds hold up +under fuzzing. diff --git a/fuzz/corpus/fuzz_deflated_token/01_end b/fuzz/corpus/fuzz_deflated_token/01_end new file mode 100644 index 000000000..f76dd238a Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/01_end differ diff --git a/fuzz/corpus/fuzz_deflated_token/02_literal_small b/fuzz/corpus/fuzz_deflated_token/02_literal_small new file mode 100644 index 000000000..a5f9f2545 Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/02_literal_small differ diff --git a/fuzz/corpus/fuzz_deflated_token/03_literal_4k b/fuzz/corpus/fuzz_deflated_token/03_literal_4k new file mode 100644 index 000000000..373f39843 Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/03_literal_4k differ diff --git a/fuzz/corpus/fuzz_deflated_token/04_literal_maxish b/fuzz/corpus/fuzz_deflated_token/04_literal_maxish new file mode 100644 index 000000000..69518414c Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/04_literal_maxish differ diff --git a/fuzz/corpus/fuzz_deflated_token/05_tok_rel b/fuzz/corpus/fuzz_deflated_token/05_tok_rel new file mode 100644 index 000000000..438e74a03 Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/05_tok_rel differ diff --git a/fuzz/corpus/fuzz_deflated_token/06_tokrun_rel b/fuzz/corpus/fuzz_deflated_token/06_tokrun_rel new file mode 100644 index 000000000..b01049c52 Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/06_tokrun_rel differ diff --git a/fuzz/corpus/fuzz_deflated_token/07_tok_long b/fuzz/corpus/fuzz_deflated_token/07_tok_long new file mode 100644 index 000000000..ad8c6e393 Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/07_tok_long differ diff --git a/fuzz/corpus/fuzz_deflated_token/08_tokrun_long b/fuzz/corpus/fuzz_deflated_token/08_tokrun_long new file mode 100644 index 000000000..d9075d1e5 Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/08_tokrun_long differ diff --git a/fuzz/corpus/fuzz_deflated_token/09_mixed b/fuzz/corpus/fuzz_deflated_token/09_mixed new file mode 100644 index 000000000..1663e5089 Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/09_mixed differ diff --git a/fuzz/corpus/fuzz_deflated_token/10_tok_max_index b/fuzz/corpus/fuzz_deflated_token/10_tok_max_index new file mode 100644 index 000000000..adb201d9e Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/10_tok_max_index differ diff --git a/fuzz/corpus/fuzz_deflated_token/11_tokrun_boundary b/fuzz/corpus/fuzz_deflated_token/11_tokrun_boundary new file mode 100644 index 000000000..1f6cc022d Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/11_tokrun_boundary differ diff --git a/fuzz/corpus/fuzz_deflated_token/12_rel_chain b/fuzz/corpus/fuzz_deflated_token/12_rel_chain new file mode 100644 index 000000000..2ac65e644 Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/12_rel_chain differ diff --git a/fuzz/corpus/fuzz_deflated_token/13_trunc_hdr b/fuzz/corpus/fuzz_deflated_token/13_trunc_hdr new file mode 100644 index 000000000..62e8bdd53 Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/13_trunc_hdr differ diff --git a/fuzz/corpus/fuzz_deflated_token/14_trunc_payload b/fuzz/corpus/fuzz_deflated_token/14_trunc_payload new file mode 100644 index 000000000..b4fd24e44 Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/14_trunc_payload differ diff --git a/fuzz/corpus/fuzz_deflated_token/15_maxlen_hdr b/fuzz/corpus/fuzz_deflated_token/15_maxlen_hdr new file mode 100644 index 000000000..2c83d30e5 Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/15_maxlen_hdr differ diff --git a/fuzz/corpus/fuzz_deflated_token/16_bad_deflate b/fuzz/corpus/fuzz_deflated_token/16_bad_deflate new file mode 100644 index 000000000..fecdb1fee Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/16_bad_deflate differ diff --git a/fuzz/corpus/fuzz_deflated_token/17_zero_frame b/fuzz/corpus/fuzz_deflated_token/17_zero_frame new file mode 100644 index 000000000..56f17f2d3 Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/17_zero_frame differ diff --git a/fuzz/corpus/fuzz_deflated_token/18_neg_tok b/fuzz/corpus/fuzz_deflated_token/18_neg_tok new file mode 100644 index 000000000..a7a631859 Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/18_neg_tok differ diff --git a/fuzz/corpus/fuzz_deflated_token/19_zero_run b/fuzz/corpus/fuzz_deflated_token/19_zero_run new file mode 100644 index 000000000..87afde16a Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/19_zero_run differ diff --git a/fuzz/corpus/fuzz_deflated_token/20_multi_noend b/fuzz/corpus/fuzz_deflated_token/20_multi_noend new file mode 100644 index 000000000..aea9ad65f Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/20_multi_noend differ diff --git a/fuzz/corpus/fuzz_deflated_token/21_garbage b/fuzz/corpus/fuzz_deflated_token/21_garbage new file mode 100644 index 000000000..5d983463e Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/21_garbage differ diff --git a/fuzz/corpus/fuzz_deflated_token/22_inflated_syncpoint b/fuzz/corpus/fuzz_deflated_token/22_inflated_syncpoint new file mode 100644 index 000000000..1e5c466b7 Binary files /dev/null and b/fuzz/corpus/fuzz_deflated_token/22_inflated_syncpoint differ diff --git a/fuzz/corpus/fuzz_flist/seed_basic b/fuzz/corpus/fuzz_flist/seed_basic new file mode 100644 index 000000000..783b5ab1e Binary files /dev/null and b/fuzz/corpus/fuzz_flist/seed_basic differ diff --git a/fuzz/corpus/fuzz_flist/seed_csum b/fuzz/corpus/fuzz_flist/seed_csum new file mode 100644 index 000000000..acd487859 Binary files /dev/null and b/fuzz/corpus/fuzz_flist/seed_csum differ diff --git a/fuzz/corpus/fuzz_flist/seed_longname b/fuzz/corpus/fuzz_flist/seed_longname new file mode 100644 index 000000000..1a03f87d9 Binary files /dev/null and b/fuzz/corpus/fuzz_flist/seed_longname differ diff --git a/fuzz/corpus/fuzz_io/ff_stress b/fuzz/corpus/fuzz_io/ff_stress new file mode 100644 index 000000000..f6a7916bf --- /dev/null +++ b/fuzz/corpus/fuzz_io/ff_stress @@ -0,0 +1 @@ +ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ \ No newline at end of file diff --git a/fuzz/corpus/fuzz_io/mixed_varints b/fuzz/corpus/fuzz_io/mixed_varints new file mode 100644 index 000000000..e8c6562bd Binary files /dev/null and b/fuzz/corpus/fuzz_io/mixed_varints differ diff --git a/fuzz/corpus/fuzz_io/sumhead_blength_max b/fuzz/corpus/fuzz_io/sumhead_blength_max new file mode 100644 index 000000000..faa06c096 Binary files /dev/null and b/fuzz/corpus/fuzz_io/sumhead_blength_max differ diff --git a/fuzz/corpus/fuzz_io/sumhead_blength_over b/fuzz/corpus/fuzz_io/sumhead_blength_over new file mode 100644 index 000000000..59336fdf3 Binary files /dev/null and b/fuzz/corpus/fuzz_io/sumhead_blength_over differ diff --git a/fuzz/corpus/fuzz_io/sumhead_count_huge b/fuzz/corpus/fuzz_io/sumhead_count_huge new file mode 100644 index 000000000..ee81e03af Binary files /dev/null and b/fuzz/corpus/fuzz_io/sumhead_count_huge differ diff --git a/fuzz/corpus/fuzz_io/sumhead_count_neg b/fuzz/corpus/fuzz_io/sumhead_count_neg new file mode 100644 index 000000000..e44f24858 Binary files /dev/null and b/fuzz/corpus/fuzz_io/sumhead_count_neg differ diff --git a/fuzz/corpus/fuzz_io/sumhead_remainder_over b/fuzz/corpus/fuzz_io/sumhead_remainder_over new file mode 100644 index 000000000..97cc13a1f Binary files /dev/null and b/fuzz/corpus/fuzz_io/sumhead_remainder_over differ diff --git a/fuzz/corpus/fuzz_io/sumhead_s2_over b/fuzz/corpus/fuzz_io/sumhead_s2_over new file mode 100644 index 000000000..037eb68af Binary files /dev/null and b/fuzz/corpus/fuzz_io/sumhead_s2_over differ diff --git a/fuzz/corpus/fuzz_io/sumhead_valid_p20 b/fuzz/corpus/fuzz_io/sumhead_valid_p20 new file mode 100644 index 000000000..2caa84b3d Binary files /dev/null and b/fuzz/corpus/fuzz_io/sumhead_valid_p20 differ diff --git a/fuzz/corpus/fuzz_io/sumhead_valid_p30 b/fuzz/corpus/fuzz_io/sumhead_valid_p30 new file mode 100644 index 000000000..7080cfc79 Binary files /dev/null and b/fuzz/corpus/fuzz_io/sumhead_valid_p30 differ diff --git a/fuzz/corpus/fuzz_io/sumhead_zero b/fuzz/corpus/fuzz_io/sumhead_zero new file mode 100644 index 000000000..45a8cd09f Binary files /dev/null and b/fuzz/corpus/fuzz_io/sumhead_zero differ diff --git a/fuzz/corpus/fuzz_recv_discard/match_nobasis.bin b/fuzz/corpus/fuzz_recv_discard/match_nobasis.bin new file mode 100644 index 000000000..4f3c6c040 Binary files /dev/null and b/fuzz/corpus/fuzz_recv_discard/match_nobasis.bin differ diff --git a/fuzz/corpus/fuzz_token/lit_chunk_max b/fuzz/corpus/fuzz_token/lit_chunk_max new file mode 100644 index 000000000..d295c429d Binary files /dev/null and b/fuzz/corpus/fuzz_token/lit_chunk_max differ diff --git a/fuzz/corpus/fuzz_token/lit_end b/fuzz/corpus/fuzz_token/lit_end new file mode 100644 index 000000000..593f4708d Binary files /dev/null and b/fuzz/corpus/fuzz_token/lit_end differ diff --git a/fuzz/corpus/fuzz_token/lit_huge b/fuzz/corpus/fuzz_token/lit_huge new file mode 100644 index 000000000..59a2022ec --- /dev/null +++ b/fuzz/corpus/fuzz_token/lit_huge @@ -0,0 +1 @@ +ÿÿÿ \ No newline at end of file diff --git a/fuzz/corpus/fuzz_token/lit_over b/fuzz/corpus/fuzz_token/lit_over new file mode 100644 index 000000000..009d73a31 Binary files /dev/null and b/fuzz/corpus/fuzz_token/lit_over differ diff --git a/fuzz/corpus/fuzz_token/lit_small b/fuzz/corpus/fuzz_token/lit_small new file mode 100644 index 000000000..a32a0eb8c Binary files /dev/null and b/fuzz/corpus/fuzz_token/lit_small differ diff --git a/fuzz/corpus/fuzz_token/lit_trunc b/fuzz/corpus/fuzz_token/lit_trunc new file mode 100644 index 000000000..bbd49f757 Binary files /dev/null and b/fuzz/corpus/fuzz_token/lit_trunc differ diff --git a/fuzz/corpus/fuzz_token/match_token b/fuzz/corpus/fuzz_token/match_token new file mode 100644 index 000000000..7a557ea6e --- /dev/null +++ b/fuzz/corpus/fuzz_token/match_token @@ -0,0 +1 @@ +ùÿÿÿ \ No newline at end of file diff --git a/fuzz/corpus/fuzz_xattrs/seed_abbrev b/fuzz/corpus/fuzz_xattrs/seed_abbrev new file mode 100644 index 000000000..8764d1535 Binary files /dev/null and b/fuzz/corpus/fuzz_xattrs/seed_abbrev differ diff --git a/fuzz/corpus/fuzz_xattrs/seed_one b/fuzz/corpus/fuzz_xattrs/seed_one new file mode 100644 index 000000000..b2e832456 Binary files /dev/null and b/fuzz/corpus/fuzz_xattrs/seed_one differ diff --git a/fuzz/corpus/fuzz_xattrs/seed_zerocount b/fuzz/corpus/fuzz_xattrs/seed_zerocount new file mode 100644 index 000000000..f16a710f6 Binary files /dev/null and b/fuzz/corpus/fuzz_xattrs/seed_zerocount differ diff --git a/fuzz/fuzz_deflated_token.c b/fuzz/fuzz_deflated_token.c new file mode 100644 index 000000000..f5b1efbd2 --- /dev/null +++ b/fuzz/fuzz_deflated_token.c @@ -0,0 +1,104 @@ +/* + * fuzz_deflated_token.c - libFuzzer harness for rsync's zlib compressed-token + * decoder: recv_deflated_token() in token.c (reference.md Part 3.3, lines + * 552-664), reached via recv_token() when do_compression == CPRES_ZLIB. + * + * This closes the coverage gap left by fuzz_token.c, which only drives the + * uncompressed simple_recv_token() path. The compressed path is the same + * untrusted-peer wire-parsing surface as CVE-2026-43618 (a malicious *sender* + * drives the receiver's decode loop): it reads attacker-controlled flag bytes, + * a 14-bit DEFLATED_DATA length, absolute/relative token numbers and run + * counts, and feeds the byte stream through zlib inflate() into fixed + * cbuf[MAX_DATA_COUNT] / dbuf[AVAIL_OUT_SIZE(CHUNK_SIZE)] buffers. + * + * LIVE objects (NOT stubbed): the harness links the REAL instrumented token.o + * (compiled as token_fuzz.o with -DRSYNC_FUZZ_TOKEN, which only adds the thin + * ifdef hook below the parser - no parser/bound/inflate logic is altered), the + * REAL io.o readers (read_byte/read_buf/read_int -> safe_read), and rsync's + * REAL bundled zlib objects (inflate.o, inffast.o, inftrees.o, ...). The + * decode/inflate/bounds/run-accounting code all runs for real and is + * sanitizer-instrumented; masking any of it would hide the very bugs we hunt. + * Only true process-boundary externals (exit_cleanup, logging, allocator) are + * shimmed in fuzz/stubs.c, and exit_cleanup longjmps back here so a *correctly + * rejected* hostile input is not counted as a crash (a real OOB trips ASan + * BEFORE any guard fires - oracle fidelity preserved). + * + * Decompressor init fidelity: we do NOT hand-roll inflateInit2. The real + * receiver initializes rx_strm lazily inside recv_deflated_token's r_init arm + * (inflateInit2(&rx_strm, -15), then inflateReset on subsequent streams). The + * harness simply forces recv_state=r_init between inputs via the reset hook, so + * the decompressor is set up EXACTLY as the real receiver sets it up - any + * crash is a real parser/inflate bug, not a harness mis-init. + * + * Plumbing: identical fd trick to fuzz_io/fuzz_token - fuzz bytes arrive via a + * pipe and io.c's readers take the non-iobuf safe_read() fast path. + */ + +#include "rsync.h" +#include +#include +#include + +extern int32 fuzz_recv_deflated_token(int f, char **data); +extern void fuzz_recv_deflated_token_reset(void); + +extern int do_compression; +extern jmp_buf fuzz_unwind_env; +extern int fuzz_unwind_armed; +extern int protocol_version; + +static int fd_from_bytes(const uint8_t *data, size_t size) +{ + int fds[2]; + if (pipe(fds) != 0) + return -1; + fcntl(fds[1], F_SETFL, O_NONBLOCK); + size_t off = 0; + while (off < size) { + ssize_t n = write(fds[1], data + off, size - off); + if (n <= 0) + break; + off += (size_t)n; + } + close(fds[1]); + return fds[0]; +} + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size < 1) + return 0; + + protocol_version = 30; + do_compression = CPRES_ZLIB; /* recv_deflated_token path */ + + /* Restore the decompressor to a fresh-receiver state for THIS input. The + * next decode call re-enters the r_init arm (inflateReset), mirroring the + * real receiver's per-transfer init. */ + fuzz_recv_deflated_token_reset(); + + int f = fd_from_bytes(data, size); + if (f < 0) + return 0; + + fuzz_unwind_armed = 1; + if (setjmp(fuzz_unwind_env) == 0) { + char *out; + int32 n; + /* Drive the decoder over the whole stream: each call returns a + * positive literal byte count, a negative token index, or 0 at + * END_FLAG (clean end). Bounded loop guards a pathological spin. + * A malformed stream (bad inflate, over-range token/run) makes a + * guard call exit_cleanup, which longjmps out - not a crash. */ + for (int i = 0; i < 1 << 20; i++) { + n = fuzz_recv_deflated_token(f, &out); + if (n == 0) + break; /* END_FLAG: stream finished cleanly */ + (void)out; + } + } + fuzz_unwind_armed = 0; + + close(f); + return 0; +} diff --git a/fuzz/fuzz_flist.c b/fuzz/fuzz_flist.c new file mode 100644 index 000000000..898d1e038 --- /dev/null +++ b/fuzz/fuzz_flist.c @@ -0,0 +1,193 @@ +/* + * fuzz_flist.c - LIVE libFuzzer harness for rsync's recv_file_entry() (flist.c). + * + * TARGET: recv_file_entry (flist.c ~682-1229) - the sender->receiver file-list + * entry wire decode, with the stateful static lastname[]/lastdir reconstruction + * (reference.md Part 3.2). recv_file_entry is file-static, so flist.c is compiled + * with -DRSYNC_FUZZ_FLIST which appends three thin wrappers (fuzz_flist_new / + * fuzz_recv_file_entry / fuzz_flist_free) that replicate EXACTLY the per-entry + * work recv_file_list does around the parser (flist_expand + append). The heavy, + * non-target tail (sort/clean/recv_id_list) is intentionally skipped. + * + * LIVE LINK: this harness links the REAL instrumented flist.o + io.o + util1.o + + * util2.o + uidlist.o + exclude.o + hashtable.o + lib/*.o (pool_alloc, wildmatch, + * mdigest, ...) + checksum.o. Only true process-boundary externals (logging, + * exit_cleanup->longjmp, terminal/socket I/O) are stubbed. NOTHING in the + * parse/alloc/copy path is stubbed. + * + * PLUMBING: identical fast-path trick as fuzz_io - iobuf stays default + * ({.in_fd=-1}), the fd we hand the parser is a pipe, so every read_buf/read_sbuf + * takes the safe_read() path straight off attacker bytes. EOF => safe_read short + * => whine_about_eof -> exit_cleanup -> longjmp back here (clean unwind). + * + * STATEFULNESS: recv_file_entry's lastname[]/lastdir/mode/uid/... are function + * statics persisting across entries; XMIT_SAME_NAME reuses l1 bytes of lastname. + * We drive a SEQUENCE of entries from one input so the cross-entry name/dir + * reconstruction (a prime bug site) is exercised. State leaks across fuzzer + * inputs too (cannot reset function statics from outside) - documented, same as + * fuzz_token. + * + * ORACLE: a correctly-rejected hostile value calls overflow_exit/exit_cleanup + * AFTER its guard => clean longjmp, not a finding. A genuine OOB/UB during a copy + * or read trips ASan/UBSan BEFORE any guard => real finding. + */ + +#include "rsync.h" +#include +#include +#include + +extern struct file_list *fuzz_flist_new(void); +extern struct file_struct *fuzz_recv_file_entry(int f, struct file_list *flist, int xflags); +extern void fuzz_flist_free(struct file_list *flist); + +extern uchar read_byte(int f); + +/* From fuzz/stubs.c */ +extern jmp_buf fuzz_unwind_env; +extern int fuzz_unwind_armed; + +/* Globals recv_file_entry consults. */ +extern int protocol_version; +extern int preserve_links, preserve_devices, preserve_specials, preserve_hard_links; +extern int preserve_uid, preserve_gid, preserve_acls, preserve_xattrs; +extern int relative_paths, sanitize_paths, munge_symlinks; +extern int atimes_ndx, uid_ndx, gid_ndx, acls_ndx, xattrs_ndx; +extern int crtimes_ndx, pathname_ndx, depth_ndx, unsort_ndx; +extern int preserve_atimes, preserve_crtimes; +extern int file_extra_cnt; +extern int numeric_ids, inc_recurse, am_root, always_checksum; +extern int xfer_dirs, recurse, one_file_system, copy_devices; +extern int trust_sender_filter; + +void init_flist(void); /* sets flist_csum_len from file_sum_nni */ +void parse_checksum_choice(int); /* sets file_sum_nni/xfer_sum_nni (negotiation) */ + +static int fd_from_bytes(const uint8_t *data, size_t size) +{ + int fds[2]; + if (pipe(fds) != 0) + return -1; + fcntl(fds[1], F_SETFL, O_NONBLOCK); + size_t off = 0; + while (off < size) { + ssize_t n = write(fds[1], data + off, size - off); + if (n <= 0) + break; + off += (size_t)n; + } + close(fds[1]); + return fds[0]; +} + +static int inited; + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size < 2) + return 0; + + /* Byte 0: protocol + feature selection (cover proto<28 / <30 / >=30 and + * the optional-field branches that change which reads happen). */ + uint8_t sel = data[0]; + static const int protos[] = { 26, 28, 29, 30, 31 }; + protocol_version = protos[(sel >> 1) % 5]; + + preserve_links = (sel & 0x01) ? 1 : 0; + preserve_devices = (sel & 0x02) ? 1 : 0; + preserve_specials = (sel & 0x04) ? 1 : 0; + preserve_uid = (sel & 0x08) ? 1 : 0; + preserve_gid = (sel & 0x10) ? 1 : 0; + preserve_hard_links = (sel & 0x20) ? 1 : 0; + always_checksum = (sel & 0x40) ? 1 : 0; + /* Keep acls/xattrs OFF here: those tails have dedicated harnesses + * (fuzz_xattrs) and would otherwise dominate this corpus. */ + preserve_acls = 0; + preserve_xattrs = 0; + + /* Replicate setup_protocol()'s extra-slot assignment EXACTLY (compat.c + * 575-595). The *_ndx values and file_extra_cnt are NOT free parameters: + * 64-bit fields (atime) must be assigned first so their slot is 8-byte + * aligned. Arbitrary ndx/cnt combinations produce misaligned F_ATIME + * accesses that the real receiver never generates (a harness artifact, + * not a recv_file_entry bug). We are the receiver: am_sender=0, am_server=0. */ + preserve_atimes = (data[1] & 0x01) ? 1 : 0; + preserve_crtimes = 0; /* SUPPORT_CRTIMES not configured */ + atimes_ndx = crtimes_ndx = pathname_ndx = depth_ndx = 0; + uid_ndx = gid_ndx = acls_ndx = xattrs_ndx = unsort_ndx = 0; + file_extra_cnt = 0; + if (preserve_atimes) + atimes_ndx = (file_extra_cnt += EXTRA64_CNT); + /* am_sender==0 => depth_ndx branch */ + depth_ndx = ++file_extra_cnt; + if (preserve_uid) + uid_ndx = ++file_extra_cnt; + if (preserve_gid) + gid_ndx = ++file_extra_cnt; + if (preserve_acls) /* (!am_sender already true) */ + acls_ndx = ++file_extra_cnt; + if (preserve_xattrs) + xattrs_ndx = ++file_extra_cnt; + + relative_paths = (data[1] & 0x10) ? 1 : 0; + sanitize_paths = (data[1] & 0x20) ? 1 : 0; + munge_symlinks = 0; /* keep SYMLINK_PREFIX math out unless wanted */ + numeric_ids = 1; + inc_recurse = 0; + am_root = 0; + one_file_system = 0; + copy_devices = 0; + trust_sender_filter = 1; /* skip check_server_filter (exclude path) */ + xfer_dirs = 1; + recurse = 0; + + if (!inited) { + /* Mirror the receiver's checksum negotiation: parse_checksum_choice + * populates file_sum_nni/xfer_sum_nni (NULL choice => protocol default) + * which init_flist then consults for flist_csum_len. */ + parse_checksum_choice(0); + init_flist(); /* sets flist_csum_len from file_sum_nni */ + /* Allocate the hard-link dev/inode table ONCE. recv_file_entry's + * proto<30 hard-link path (flist.c:1191) calls idev_find(), which needs + * dev_tbl. init_hard_links only creates it when protocol_version<30, so + * we force that here. We never idev_destroy() between inputs: idev_find + * keeps a static dev_node pointer into dev_tbl that we cannot reset from + * outside, so the table is process-lived (benign cross-input coupling, + * documented like the lastname[] statics). */ + int saved = protocol_version; + protocol_version = 26; + init_hard_links(); + protocol_version = saved; + inited = 1; + } + + const uint8_t *body = data + 2; + size_t bodysz = size - 2; + + int f = fd_from_bytes(body, bodysz); + if (f < 0) + return 0; + + struct file_list *flist = fuzz_flist_new(); + + fuzz_unwind_armed = 1; + if (setjmp(fuzz_unwind_env) == 0) { + /* Drive a sequence of entries. Each leading byte from the stream + * (consumed as the xflags) chains entries; we cap the count so a + * tiny input can't spin forever, and stop when the parser unwinds on + * EOF. */ + for (int i = 0; i < 64; i++) { + int flags = read_byte(f); + if (flags == 0) + break; + if (protocol_version >= 28 && (flags & XMIT_EXTENDED_FLAGS)) + flags |= read_byte(f) << 8; + fuzz_recv_file_entry(f, flist, flags); + } + } + fuzz_unwind_armed = 0; + + fuzz_flist_free(flist); + close(f); + return 0; +} diff --git a/fuzz/fuzz_io.c b/fuzz/fuzz_io.c new file mode 100644 index 000000000..7c6c5e663 --- /dev/null +++ b/fuzz/fuzz_io.c @@ -0,0 +1,126 @@ +/* + * fuzz_io.c - libFuzzer harness for rsync's io.c wire-reading primitives. + * + * Targets (all UNTRUSTED-PEER parsers, reference.md Part 3.1): + * read_sum_head, read_varint, read_varlong, read_longint, read_vstring, + * read_int_bounded, read_varint_bounded, read_varint_size, read_int, read_byte. + * + * PLUMBING (the load-bearing trick): + * io.c's read_buf(f, buf, len) has a fast path: + * if (f != iobuf.in_fd) { safe_read(f, buf, len); ... return; } + * i.e. when the fd is NOT the registered multiplexed input fd, every reader + * pulls bytes straight from that fd via safe_read()/read(2) - no iobuf, no + * multiplexing, no msg framing. We exploit that: iobuf stays at its default + * ({.in_fd = -1}), and we hand the readers a fd backed by the fuzz buffer. + * So real parser logic + real guards run, with bytes 100% attacker-chosen. + * + * The fuzz buffer is delivered through a pipe: write all bytes, close the + * write end => reads drain the buffer then hit EOF (read()==0). On EOF + * safe_read returns short, read_buf calls whine_about_eof()->exit_cleanup(), + * which our stub turns into a longjmp back here. So "ran out of bytes" is a + * clean unwind, never a crash. + * + * ORACLE FIDELITY: + * A correctly-rejected hostile value (over-range count/length/etc.) calls + * exit_cleanup() AFTER the guard => clean longjmp, not a finding. A genuine + * OOB read/write or UB happens during the read itself and trips ASan/UBSan + * BEFORE any guard => real finding. The stubs never mask memory errors. + * + * GLOBAL INIT CONTRACT (reference.md Part 3.5): + * protocol_version, xfer_sum_len, and the log-level arrays are provided by + * fuzz/stubs.c. The first byte of each input selects protocol_version and + * xfer_sum_len so a single corpus exercises the proto<27 / <30 / >=30 + * branches of read_sum_head and the varint30 width choices. + */ + +#include "rsync.h" +#include +#include +#include + +/* From io.c (unmodified object under test). */ +extern int32 read_int(int f); +extern int32 read_varint(int f); +extern int64 read_varlong(int f, uchar min_bytes); +extern int64 read_longint(int f); +extern int read_vstring(int f, char *buf, int bufsize); +extern int32 read_int_bounded(int f, int32 lo, int32 hi, const char *what); +extern int32 read_varint_bounded(int f, int32 lo, int32 hi, const char *what); +extern size_t read_varint_size(int f, size_t max, const char *what); +extern uchar read_byte(int f); +extern void read_sum_head(int f, struct sum_struct *sum); + +/* From fuzz/stubs.c */ +extern jmp_buf fuzz_unwind_env; +extern int fuzz_unwind_armed; +extern int protocol_version; +extern int xfer_sum_len; + +/* Open a read fd backed by the given bytes: write to a pipe, close write end. */ +static int fd_from_bytes(const uint8_t *data, size_t size) +{ + int fds[2]; + if (pipe(fds) != 0) + return -1; + /* For inputs larger than the pipe buffer we'd block on write; cap the + * payload to a generous bound (parsers never legitimately need more in + * one call than this, and the corpus stays small/fast). */ + size_t off = 0; + /* Make the write end non-blocking so an over-large input can't deadlock; + * we simply stop feeding once the pipe is full - the reader hits EOF + * after consuming what fit, which is fine for fuzzing. */ + fcntl(fds[1], F_SETFL, O_NONBLOCK); + while (off < size) { + ssize_t n = write(fds[1], data + off, size - off); + if (n <= 0) + break; + off += (size_t)n; + } + close(fds[1]); + return fds[0]; +} + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size < 1) + return 0; + + /* Byte 0 chooses protocol + digest length to cover all branches. */ + uint8_t sel = data[0]; + static const int protos[] = { 20, 26, 29, 30, 31 }; + protocol_version = protos[(sel >> 1) % 5]; + /* xfer_sum_len in a realistic range (4..32); read_sum_head bounds s2length + * against it, and the multiply-overflow guard uses it. */ + xfer_sum_len = 4 + (sel & 0x1f); + if (xfer_sum_len > 32) + xfer_sum_len = 32; + + data++; size--; + + int f = fd_from_bytes(data, size); + if (f < 0) + return 0; + + fuzz_unwind_armed = 1; + if (setjmp(fuzz_unwind_env) == 0) { + /* Drive a sequence of readers off the same byte stream. Order is + * arbitrary; whichever consumes the bytes first wins, the rest hit + * EOF and longjmp out. read_sum_head is the headline target. */ + struct sum_struct sum; + read_sum_head(f, &sum); + + (void)read_varint(f); + (void)read_varlong(f, 3); + (void)read_longint(f); + (void)read_int_bounded(f, -1000, 1000, "fuzz int"); + (void)read_varint_bounded(f, 0, 0x7fffffff, "fuzz varint"); + (void)read_varint_size(f, MAXPATHLEN, "fuzz size"); + + char vbuf[MAXPATHLEN]; + (void)read_vstring(f, vbuf, sizeof vbuf); + } + fuzz_unwind_armed = 0; + + close(f); + return 0; +} diff --git a/fuzz/fuzz_recv_discard.c b/fuzz/fuzz_recv_discard.c new file mode 100644 index 000000000..baa272c55 --- /dev/null +++ b/fuzz/fuzz_recv_discard.c @@ -0,0 +1,171 @@ +/* + * fuzz_recv_discard.c - regression harness for the receiver discard-path + * NULL-pointer dereference (static-findings.md REAL #1, receiver.c:413). + * + * THE DEFECT + * ---------- + * receiver.c discard_receive_data() calls + * receive_data(f_in, NULL, -1, 0, NULL, -1, file, 0); + * i.e. fname_r == NULL, fd_r == -1, size_r == 0, fname == NULL. + * Inside receive_data(), because fd_r < 0 && size_r == 0, the basis is never + * mapped: mapbuf = NULL (receiver.c:326-327). When the peer then sends a + * block-MATCH token (recv_token() returns a negative value) with a sum-header + * count > 0 so the index check at receiver.c:393 passes, control reaches the + * fork-added block at receiver.c:411-415: + * + * if (!mapbuf) { + * rprintf(FERROR, "got a block match with no basis file for %s [%s]\n", + * full_fname(fname), who_am_i()); // <-- fname == NULL + * exit_cleanup(RERR_PROTOCOL); + * } + * + * full_fname(NULL) dereferences its argument unconditionally at util1.c:1282 + * if (*fn == '/') + * => NULL read => SIGSEGV / ASan SEGV. A remote (sender) peer controls both + * the sum-header count and the token stream, so this is reachable from + * untrusted protocol input while the receiver is merely *discarding* a file. + * + * WHAT THIS HARNESS DOES + * ---------------------- + * receive_data() is `static` in receiver.c, so we reproduce the exact + * discard-path decision sequence here against the REAL, unmodified rsync + * objects on that path: io.o (read_sum_head, recv_token via token.o, read_int, + * read_buf) and util1.o (full_fname). This is a faithful reproduction: every + * function that participates in the bug - read_sum_head, recv_token, the + * mapbuf==NULL branch, and full_fname - is the production function, and the + * NULL deref occurs inside the production full_fname(). + * + * The fname passed to full_fname is hard-NULL, exactly as discard_receive_data + * supplies it. mapbuf is hard-NULL, exactly as receive_data computes it on the + * discard configuration (fd_r=-1, size_r=0). + * + * EXPECTED RESULT + * --------------- + * On the ORIGINAL (vulnerable) code this aborted under ASan with a NULL read in + * full_fname (util1.c:1282). The receiver fix makes the discard path (fd == -1, + * fname == NULL) absorb a block-match token benignly -- it is normal protocol, + * since the sender does not know the receiver is discarding -- and restricts the + * "no basis file" protocol error to real-output transfers (fd != -1, fname + * non-NULL, full_fname safe). This harness mirrors that fixed logic and now + * unwinds cleanly (no full_fname(NULL)); it is the standing regression proof and + * a clean-gate target in run-regression.sh. + */ + +#include "rsync.h" +#include +#include +#include + +extern void read_sum_head(int f, struct sum_struct *sum); +extern int32 recv_token(int f, char **data); +extern char *full_fname(const char *fn); + +extern jmp_buf fuzz_unwind_env; +extern int fuzz_unwind_armed; +extern int protocol_version; +extern int do_compression; +extern int xfer_sum_len; + +static int fd_from_bytes(const uint8_t *data, size_t size) +{ + int fds[2]; + if (pipe(fds) != 0) + return -1; + fcntl(fds[1], F_SETFL, O_NONBLOCK); + size_t off = 0; + while (off < size) { + ssize_t n = write(fds[1], data + off, size - off); + if (n <= 0) + break; + off += (size_t)n; + } + close(fds[1]); + return fds[0]; +} + +/* + * Mirror of the receiver discard path inside receive_data(): + * read_sum_head -> mapbuf=NULL -> recv_token loop -> match-token branch -> + * index check -> !mapbuf -> full_fname(fname==NULL). + * Everything reachable from received bytes here is the real rsync code. + */ +static void drive_discard_path(int f_in) +{ + struct sum_struct sum; + struct map_struct *mapbuf; + char *fname = NULL; /* discard_receive_data passes NULL */ + int fd_r = -1; /* discard configuration */ + int fd = -1; /* discard configuration: no output fd */ + OFF_T size_r = 0; /* discard configuration */ + OFF_T offset = 0; + int32 len = 0; + char *data; + int32 i; + + read_sum_head(f_in, &sum); /* peer-controlled sum.count */ + + if (fd_r >= 0 && size_r > 0) + mapbuf = (struct map_struct *)-1; /* unreachable on discard */ + else + mapbuf = NULL; /* exactly receiver.c:327 */ + + while (1) { + data = NULL; + i = recv_token(f_in, &data); /* real token decode */ + if (i == 0) + break; + + if (i > 0) { + /* literal token: discard path ignores the payload */ + if (!data) + exit_cleanup(RERR_PROTOCOL); + continue; + } + + /* block-match token */ + i = -(i + 1); + if (i < 0 || i >= sum.count) /* receiver.c:393 guard */ + exit_cleanup(RERR_PROTOCOL); + + len = sum.blength; + if (i == (int)sum.count - 1 && sum.remainder != 0) + len = sum.remainder; + + if (!mapbuf) { /* receiver.c: !mapbuf branch */ + /* FIXED behavior: on the discard path (fd == -1) a match + * token is normal protocol; absorb it benignly instead of + * calling full_fname(fname==NULL). Only a real-output + * transfer (fd != -1) hard-errors -- and there fname is + * non-NULL, so full_fname is safe. */ + if (fd != -1) { + rprintf(FERROR, "got a block match with no basis file for %s [%s]\n", + full_fname(fname), who_am_i()); + exit_cleanup(RERR_PROTOCOL); + } + offset += len; + continue; + } + } +} + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size < 1) + return 0; + + protocol_version = 30; + do_compression = CPRES_NONE; /* simple_recv_token path */ + xfer_sum_len = 16; + + int f = fd_from_bytes(data, size); + if (f < 0) + return 0; + + fuzz_unwind_armed = 1; + if (setjmp(fuzz_unwind_env) == 0) + drive_discard_path(f); + fuzz_unwind_armed = 0; + + close(f); + return 0; +} diff --git a/fuzz/fuzz_token.c b/fuzz/fuzz_token.c new file mode 100644 index 000000000..8bbb2f432 --- /dev/null +++ b/fuzz/fuzz_token.c @@ -0,0 +1,85 @@ +/* + * fuzz_token.c - libFuzzer harness for rsync's token/delta decode (token.c). + * + * Target: recv_token() with do_compression == CPRES_NONE, i.e. + * simple_recv_token() (reference.md Part 3.3, lines 282-311). Headline guard: + * the "i > CHUNK_SIZE" length check (token.c ~line 298) that stops a hostile + * peer from driving read_buf() past the static CHUNK_SIZE literal buffer. + * + * Plumbing: identical fd trick to fuzz_io - bytes arrive via a pipe and the + * io.c readers take the non-iobuf safe_read() fast path. recv_token pulls a + * 4-byte length via read_int then a literal run via read_buf. + * + * STATE CAVEAT (documented, not a defect): simple_recv_token keeps a file-local + * `residue`/`buf`. We drive recv_token in a loop until it returns <= 0 (a + * clean chunk boundary where residue==0), so most iterations leave residue==0. + * If an iteration unwinds mid-run (EOF/guard longjmp), residue can carry into + * the next input; that cannot cause a FALSE crash (the i>CHUNK_SIZE guard and + * read_buf's own length still bound every access) - it only adds harmless + * cross-input coupling. The CPRES_ZLIB (recv_deflated_token) path is NOT wired + * here precisely because its zlib stream state is not externally resettable + * between iterations; see fuzz/README.md. + * + * Globals: do_compression set per-input; LZ4/ZSTD entry points are stubbed in + * fuzz/stubs.c (never reached under CPRES_NONE) so the object links. + */ + +#include "rsync.h" +#include +#include +#include + +extern int32 recv_token(int f, char **data); +extern int do_compression; + +extern jmp_buf fuzz_unwind_env; +extern int fuzz_unwind_armed; +extern int protocol_version; + +static int fd_from_bytes(const uint8_t *data, size_t size) +{ + int fds[2]; + if (pipe(fds) != 0) + return -1; + fcntl(fds[1], F_SETFL, O_NONBLOCK); + size_t off = 0; + while (off < size) { + ssize_t n = write(fds[1], data + off, size - off); + if (n <= 0) + break; + off += (size_t)n; + } + close(fds[1]); + return fds[0]; +} + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size < 1) + return 0; + + protocol_version = 30; + do_compression = CPRES_NONE; /* simple_recv_token path */ + + int f = fd_from_bytes(data, size); + if (f < 0) + return 0; + + fuzz_unwind_armed = 1; + if (setjmp(fuzz_unwind_env) == 0) { + char *out; + int32 n; + /* Consume the whole stream as a sequence of literal tokens until a + * clean end (<=0) or EOF longjmp. Bounded loop guards against a + * pathological 0-length spin. */ + for (int i = 0; i < 1 << 20; i++) { + n = recv_token(f, &out); + if (n <= 0) + break; + } + } + fuzz_unwind_armed = 0; + + close(f); + return 0; +} diff --git a/fuzz/fuzz_xattrs.c b/fuzz/fuzz_xattrs.c new file mode 100644 index 000000000..7fc7f2ff3 --- /dev/null +++ b/fuzz/fuzz_xattrs.c @@ -0,0 +1,123 @@ +/* + * fuzz_xattrs.c - LIVE libFuzzer harness for rsync's receive_xattr() (xattrs.c + * ~774-880): the per-file xattr wire decode (reference.md Part 3.4). The + * fuzz-worthy region is the bounded-read loop (xattrs.c 802-872): + * ndx = read_varint(f) (783) + * count = read_varint_bounded(f, 0, MAX_WIRE_XATTR_COUNT) (796) + * name_len = read_varint_size(f, MAX_WIRE_XATTR_NAMELEN) (805) + * datum_len = read_varint_size(f, MAX_WIRE_XATTR_DATALEN) (806) + * overflow guard (SIZE_MAX - dget_len < extra_len || ...) (809) + * read_buf(name) + trailing-'\0' check (813-817) + * read_buf(datum / abbreviated checksum) (818-823) + * then rsync_xal_store() (real: hashtable + checksum subsystem). + * + * LIVE LINK: xattrs.o (compiled -DRSYNC_FUZZ_XATTRS) + the SAME real core as + * fuzz_flist (io/util1/util2/uidlist/exclude/hashtable/checksum/flist/acls/ + * fileio/syscall + lib/*). Only true process-boundary externals are stubbed. + * NOTHING in the xattr parse/alloc/copy/hash path is stubbed. + * + * PLUMBING: same pipe-fd fast path as fuzz_flist (read_buf -> safe_read off the + * attacker bytes; EOF -> exit_cleanup -> longjmp). + * + * INIT (reference.md Part 3.5): protocol_version, xfer_sum_len + xattr_sum_len + * (the abbreviated-datum branch at 822 copies xattr_sum_len bytes), + * preserve_xattrs (1 or 2 - 2 enables rsync.%FOO names), saw_xattr_filter=0 so + * name_is_excluded()/exclude filter state is never reached, am_root (gates the + * HAS_PREFIX namespace-rewrite branches), and xattrs_ndx for the F_XATTR slot. + * + * ORACLE: a correctly-rejected hostile value (out-of-range ndx/count, bad + * trailing NUL, overflow) calls exit_cleanup/overflow_exit AFTER its guard => + * clean longjmp, not a finding. A genuine OOB/UB in a copy or pointer-arith + * step trips ASan/UBSan BEFORE any guard => real finding. + */ + +#include "rsync.h" +#include +#include +#include + +extern void fuzz_receive_xattr(int f, struct file_struct *file); +extern struct file_struct *fuzz_xattr_file_new(alloc_pool_t pool); + +/* Reuse fuzz_flist's pool helpers for a throwaway flist + pool. */ +extern struct file_list *fuzz_flist_new(void); +extern void fuzz_flist_free(struct file_list *flist); + +extern jmp_buf fuzz_unwind_env; +extern int fuzz_unwind_armed; + +extern int protocol_version, preserve_xattrs, saw_xattr_filter, am_root; +extern int xattr_sum_len, xfer_sum_len, file_extra_cnt, xattrs_ndx; +extern int numeric_ids, inc_recurse, am_sender, am_server; + +void parse_checksum_choice(int); +void init_flist(void); + +static int fd_from_bytes(const uint8_t *data, size_t size) +{ + int fds[2]; + if (pipe(fds) != 0) + return -1; + fcntl(fds[1], F_SETFL, O_NONBLOCK); + size_t off = 0; + while (off < size) { + ssize_t n = write(fds[1], data + off, size - off); + if (n <= 0) + break; + off += (size_t)n; + } + close(fds[1]); + return fds[0]; +} + +static int inited; + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size < 1) + return 0; + + uint8_t sel = data[0]; + static const int protos[] = { 28, 29, 30, 31 }; + protocol_version = protos[(sel >> 1) & 3]; + preserve_xattrs = (sel & 0x01) ? 2 : 1; /* >=1 required; 2 enables %FOO names */ + am_root = (sel & 0x04) ? 1 : 0; /* gates namespace-prefix rewrite branches */ + saw_xattr_filter = 0; /* keep name_is_excluded()/filter state out */ + numeric_ids = 1; + inc_recurse = 0; + am_sender = 0; + am_server = 0; + + if (!inited) { + parse_checksum_choice(0); /* sets xattr_sum_len via file/xfer sums */ + /* setup_protocol assigns xattrs_ndx; receive_xattr writes F_XATTR via + * it. As receiver with only -X: a single trailing extra slot. */ + file_extra_cnt = 1; + xattrs_ndx = 1; + inited = 1; + } + + const uint8_t *body = data + 1; + size_t bodysz = size - 1; + + int f = fd_from_bytes(body, bodysz); + if (f < 0) + return 0; + + struct file_list *flist = fuzz_flist_new(); + struct file_struct *file = fuzz_xattr_file_new(flist->file_pool); + + fuzz_unwind_armed = 1; + if (setjmp(fuzz_unwind_env) == 0) { + /* Drive a sequence of receive_xattr calls from one input so the + * rsync_xal_l accumulation / find_matching_xattr dedup path (a prime + * bug site) is exercised across entries. */ + for (int i = 0; i < 32; i++) + fuzz_receive_xattr(f, file); + } + fuzz_unwind_armed = 0; + + fuzz_flist_free(flist); + close(f); + return 0; +} diff --git a/fuzz/globals.c b/fuzz/globals.c new file mode 100644 index 000000000..a3995bdfd --- /dev/null +++ b/fuzz/globals.c @@ -0,0 +1,159 @@ +/* + * fuzz/globals.c - rsync option/state globals and a handful of leaf functions + * that the REAL flist.o / xattrs.o / acls.o / uidlist.o / exclude.o objects + * reference but that normally live in options.c / main.c / loadparm.c / hlink.c + * / log.c (TUs we deliberately do NOT link, since recv_file_entry / + * receive_xattr never reach their bodies). + * + * These are GENUINE process-state globals, not parse/alloc/copy logic, so + * defining them here does NOT mask any bug: the harness sets the load-bearing + * ones (preserve_*, *_ndx, protocol_version, ...) per-input to match the real + * receiver's configured state. The rest default to 0/NULL exactly as a freshly + * started receiver before option parsing. + * + * The few FUNCTIONS here are leaves outside recv_file_entry's reachable graph + * (hardlink table setup, name-conversion subprocess, loadparm module lookup, + * name_num registries). Each aborts loudly if ever actually called, so the + * oracle still fires instead of silently returning a bogus value. + */ + +#include "rsync.h" + +/* Defined in stubs.c; needed by the idev_find / init_hard_links copies below. */ +extern int am_sender, protocol_version, inc_recurse; + +/* ---- option / mode globals (harness overrides the load-bearing ones) ---- */ +/* preserve_hard_links lives in stubs.c; sanitize_paths in util1.o. */ +int preserve_links, preserve_devices, preserve_specials; +int preserve_uid, preserve_gid, preserve_acls, preserve_xattrs; +int preserve_perms, preserve_executability; +int relative_paths, munge_symlinks; +int uid_ndx, gid_ndx, acls_ndx, xattrs_ndx, atimes_ndx, depth_ndx, pathname_ndx, unsort_ndx; +int crtimes_ndx, preserve_atimes, preserve_crtimes; +int numeric_ids, always_checksum, recurse, xfer_dirs, one_file_system, copy_devices; +int copy_links, copy_dirlinks, copy_unsafe_links; +int omit_link_times; +int delete_during, delete_excluded, delete_mode; +int implied_dirs, prune_empty_dirs, non_perishable_cnt, ignore_perishable; +int need_unsorted_flist, use_safe_inc_flist, xmit_id0_names, proper_seed_order; +int sender_keeps_checksum, sender_symlink_iconv, use_qsort; +int am_chrooted, am_daemon, dry_run, quiet, ignore_errors, missing_args; +int modify_window, whole_file, sparse_files, inplace, preallocate_files; +/* no_acl_syscall_error lives in lib/sysacls.o */ +int cvs_exclude, output_needs_newline; +int human_readable = 0; +int do_fsync = 0; +int open_noatime = 0; +int orig_umask = 022; +int our_uid, our_gid; +int filesfrom_fd = -1; +int read_only = 0; + +char *usermap = NULL, *groupmap = NULL; +char *module_dir = NULL; +char *partial_dir = NULL; +char *filesfrom_host = NULL; +unsigned int module_dirlen = 0; + +/* xattr_sum_nni / xattr_sum_len normally come from compat.c; the abbreviated + * xattr-datum branch (xattrs.c:822) reads xattr_sum_len bytes. Default to the + * MD5 length; fuzz_xattrs overrides. */ +struct name_num_item *xattr_sum_nni = NULL; +int xattr_sum_len = 16; + +/* checksum_choice normally set by validate_choice_vs_env; checksum.c reads it. */ +char *checksum_choice = NULL; + +/* chmod_modes is a struct chmod_mode_struct* the tweak path consults. */ +struct chmod_mode_struct *chmod_modes = NULL; + +/* ---- leaf functions outside recv_file_entry / receive_xattr graph ---- */ + +NORETURN static void fuzz_unreachable(const char *who) +{ + /* If the parser path ever truly reaches one of these, that is itself a + * finding (our reachability assumption was wrong) - abort so ASan/the + * fuzzer records it rather than silently continuing on a bogus return. */ + rprintf(FERROR, "fuzz: unreachable leaf %s reached\n", who); + abort(); +} + +/* Faithful copies of hlink.c's device/inode hashtable helpers. recv_file_entry + * reaches idev_find() on the proto<30 hard-link path (flist.c:1191), so these + * must be REAL (they use the real hashtable.o); a stub would mask any OOB in + * that path. Linking all of hlink.o would drag in the receiver/generator graph, + * so we lift just these three + their statics verbatim. */ +static void *data_when_new = ""; +static struct hashtable *dev_tbl; + +void init_hard_links(void) +{ + if (am_sender || protocol_version < 30) + dev_tbl = hashtable_create(16, HT_KEY64); + /* inc_recurse/prior_hlinks branch is unreached: harness keeps inc_recurse=0 */ +} + +struct ht_int64_node *idev_find(int64 dev, int64 ino) +{ + static struct ht_int64_node *dev_node = NULL; + if (!dev_node || dev_node->key != dev+1) { + dev_node = hashtable_find(dev_tbl, dev+1, data_when_new); + if (dev_node->data == data_when_new) + dev_node->data = hashtable_create(512, HT_KEY64); + } + return hashtable_find(dev_node->data, ino, (void*)-1L); +} + +void idev_destroy(void) +{ + int i; + if (!dev_tbl) + return; + for (i = 0; i < dev_tbl->size; i++) { + struct ht_int32_node *node = HT_NODE(dev_tbl, dev_tbl->nodes, i); + if (node->data) + hashtable_destroy(node->data); + } + hashtable_destroy(dev_tbl); + dev_tbl = NULL; +} + +BOOL namecvt_call(const char *cmd, const char **name_p, id_t *id_p) +{ (void)cmd; (void)name_p; (void)id_p; fuzz_unreachable("namecvt_call"); } +int namecvt_pid = 0; + +/* Faithful copies of compat.c's registry walks (reached by init_checksum_choices + * during the legitimate checksum-negotiation init the receiver performs). Pure + * list lookups, no I/O - replicating them masks nothing. */ +struct name_num_item *get_nni_by_name(struct name_num_obj *nno, const char *name, int len) +{ + struct name_num_item *nni; + if (len < 0) + len = strlen(name); + for (nni = nno->list; nni->name; nni++) { + if (nni->num == CSUM_gone) + continue; + if (strncasecmp(name, nni->name, len) == 0 && nni->name[len] == '\0') + return nni; + } + return NULL; +} +struct name_num_item *get_nni_by_num(struct name_num_obj *nno, int num) +{ + struct name_num_item *nni; + for (nni = nno->list; nni->name; nni++) { + if (num == nni->num) + return nni; + } + return NULL; +} +void validate_choice_vs_env(int ntype, int num1, int num2) +{ (void)ntype; (void)num1; (void)num2; } + +const char *default_cvsignore(void) { return ""; } + +char *lp_name(int m) { (void)m; return "fuzz"; } +BOOL lp_use_chroot(int m) { (void)m; return 0; } +BOOL lp_ignore_nonreadable(int m) { (void)m; return 0; } + +void rflush(enum logcode code) { (void)code; } diff --git a/fuzz/lsan-suppressions.txt b/fuzz/lsan-suppressions.txt new file mode 100644 index 000000000..f48d9dfe0 --- /dev/null +++ b/fuzz/lsan-suppressions.txt @@ -0,0 +1,24 @@ +# LeakSanitizer suppressions for the rsync wire-parser fuzz harnesses. +# +# These name EXACTLY the two intentional process-lifetime static caches the +# code review identified. Both are by-design shared buffers whose lifetime is +# the process and which the receive-only harnesses cannot free from outside +# (they are file-static). They are NOT per-entry leaks and NOT memory-safety +# defects; freeing them would introduce a real use-after-free. +# +# Suppressions are by FUNCTION NAME (the allocation site frame), not a blanket +# detect_leaks=0, so any DIFFERENT leak -- a genuine new regression anywhere +# else in the parser call graph -- is still reported and still fails the run. +# +# 1) flist.c `lastdir`: recv_file_entry assigns a fresh new_array() to the +# file-static `lastdir` whenever a new directory prefix appears, without +# freeing the previous buffer, because consecutive same-directory +# file_structs SHARE it via file->dirname. Freeing on replace would dangle +# every earlier file->dirname. Reclaimed by the OS at process exit. +leak:recv_file_entry + +# 2) xattrs.c `rsync_xal_l` dedup cache: receive_xattr's storage tail moves the +# parsed items into the global process-lifetime dedup cache rsync_xal_l via +# rsync_xal_store. Only partially trimmed by uncache_tmp_xattrs, a +# generator/sender path this receive-only harness never exercises. +leak:receive_xattr diff --git a/fuzz/recv_discard_stubs.c b/fuzz/recv_discard_stubs.c new file mode 100644 index 000000000..40dc76096 --- /dev/null +++ b/fuzz/recv_discard_stubs.c @@ -0,0 +1,65 @@ +/* + * recv_discard_stubs.c - extra link-time symbols for fuzz_recv_discard. + * + * fuzz_recv_discard links the REAL util1.o (for full_fname) on top of the + * shared stubs.c. util1.o is a whole translation unit, so it references many + * symbols from other rsync TUs that are NEVER on the discard-path / full_fname + * code path we exercise. We supply them here so the object links. + * + * IMPORTANT: none of these are reached by the harness. full_fname(NULL) reads + * *fn (util1.c:1282) BEFORE touching module_id/lp_name/curr_dir, so the NULL + * deref fires first. The function stubs abort() if ever called, which would + * surface as an obvious failure rather than a silent wrong answer (oracle + * fidelity, same spirit as stubs.c). + * + * The globals are given the same neutral defaults rsync uses at start-up: + * module_id = -1 (already in stubs.c) -> full_fname skips lp_name() + * module_dirlen = 0 -> full_fname's curr_dir math is inert + * so full_fname is deterministic up to the (crashing) *fn read. + */ + +#include "rsync.h" +#include + +/* ---- globals util1.o references (neutral start-up values) ---- */ +unsigned int module_dirlen = 0; +char *module_dir = NULL; +char *partial_dir = NULL; +filter_rule_list daemon_filter_list = { .debug_type = " [daemon]" }; + +int am_daemon = 0; +int am_chrooted = 0; +int dry_run = 0; +int relative_paths = 0; +int modify_window = 0; +int preserve_xattrs = 0; +int preallocate_files = 0; +int omit_link_times = 0; + +/* ---- functions util1.o references but the full_fname path never calls ---- */ +#define NEVER(name) do { (void)(name); abort(); } while (0) + +char *lp_name(int module_id_) { (void)module_id_; NEVER("lp_name"); return NULL; } +int check_filter(filter_rule_list *lp, enum logcode code, const char *name, int name_is_dir) +{ (void)lp; (void)code; (void)name; (void)name_is_dir; NEVER("check_filter"); return 0; } +int wildmatch(const char *p, const char *t) { (void)p; (void)t; NEVER("wildmatch"); return 0; } +int copy_xattrs(const char *s, const char *d) { (void)s; (void)d; NEVER("copy_xattrs"); return 0; } +int secure_relative_open(const char *b, const char *r, int fl, mode_t m) +{ (void)b; (void)r; (void)fl; (void)m; NEVER("secure_relative_open"); return -1; } + +OFF_T do_fallocate(int fd, OFF_T off, OFF_T len) { (void)fd; (void)off; (void)len; NEVER("do_fallocate"); return -1; } +int do_fstat(int fd, STRUCT_STAT *st) { (void)fd; (void)st; NEVER("do_fstat"); return -1; } +int do_fsync(int fd) { (void)fd; NEVER("do_fsync"); return -1; } +int do_ftruncate(int fd, OFF_T sz) { (void)fd; (void)sz; NEVER("do_ftruncate"); return -1; } +int do_stat(const char *p, STRUCT_STAT *st) { (void)p; (void)st; NEVER("do_stat"); return -1; } +int do_lstat_at(const char *p, STRUCT_STAT *st) { (void)p; (void)st; NEVER("do_lstat_at"); return -1; } +int do_mkdir(char *p, mode_t m) { (void)p; (void)m; NEVER("do_mkdir"); return -1; } +int do_mkdir_at(char *p, mode_t m) { (void)p; (void)m; NEVER("do_mkdir_at"); return -1; } +int do_rmdir_at(const char *p) { (void)p; NEVER("do_rmdir_at"); return -1; } +int do_unlink_at(const char *p) { (void)p; NEVER("do_unlink_at"); return -1; } +int do_rename_at(const char *o, const char *n) { (void)o; (void)n; NEVER("do_rename_at"); return -1; } +int do_open_at(const char *p, int fl, mode_t m) { (void)p; (void)fl; (void)m; NEVER("do_open_at"); return -1; } +int do_open_nofollow(const char *p, int fl) { (void)p; (void)fl; NEVER("do_open_nofollow"); return -1; } +int do_utimensat_at(const char *p, STRUCT_STAT *st) { (void)p; (void)st; NEVER("do_utimensat_at"); return -1; } +int do_lutimes(const char *p, STRUCT_STAT *st) { (void)p; (void)st; NEVER("do_lutimes"); return -1; } +int do_utimes(const char *p, STRUCT_STAT *st) { (void)p; (void)st; NEVER("do_utimes"); return -1; } diff --git a/fuzz/run-regression.sh b/fuzz/run-regression.sh new file mode 100755 index 000000000..8c2f3b1c9 --- /dev/null +++ b/fuzz/run-regression.sh @@ -0,0 +1,70 @@ +#!/bin/sh +# fuzz/run-regression.sh - CI regression mode (Workstream 3 calls this). +# +# Builds every harness and replays its committed seed corpus under +# ASan/UBSan for a BOUNDED time. Exits NON-ZERO on the first crash so CI +# fails on any regression. It does NOT fuzz open-endedly; it is a fast, +# deterministic gate (libFuzzer in corpus-replay mode + a short top-up run). +# +# Run inside the nix shell that provides clang/libFuzzer: +# nix develop path:$HOME/git/rsync --command ./fuzz/run-regression.sh +# +# Env knobs: +# FUZZ_MAX_TIME seconds of top-up fuzzing per target after replay (default 30) +# FUZZ_TARGETS space-separated subset of targets (default: all built) + +set -eu + +cd "$(dirname "$0")" + +MAX_TIME="${FUZZ_MAX_TIME:-30}" +TARGETS="${FUZZ_TARGETS:-fuzz_io fuzz_token fuzz_recv_discard fuzz_deflated_token fuzz_flist fuzz_xattrs}" + +# Ensure the rsync wire-parser objects exist & are sanitizer-instrumented. +# (CI is expected to have configured with the campaign CFLAGS already.) +# io.o feeds fuzz_io; token.o feeds fuzz_token; util1.o (real full_fname) feeds +# fuzz_recv_discard's discard-path regression; the flist/xattrs harnesses pull +# in a broad real call graph, so build those objects too. +make -C .. io.o token.o util1.o util2.o uidlist.o exclude.o hashtable.o checksum.o \ + syscall.o acls.o xattrs.o fileio.o chmod.o \ + lib/wildmatch.o lib/compat.o lib/snprintf.o lib/mdfour.o lib/md5.o \ + lib/permstring.o lib/pool_alloc.o lib/sysacls.o lib/sysxattrs.o >/dev/null + +make all + +# Narrow LSan suppressions for fuzz_flist / fuzz_xattrs: those harnesses drive +# recv_file_entry / receive_xattr, which populate two intentional +# process-lifetime static caches (flist.c `lastdir`, xattrs.c `rsync_xal_l`). +# Those are by-design and cannot be freed from a receive-only harness, so LSan +# would otherwise report them on every run and drown a real new leak. The +# suppressions name ONLY those two allocation-site functions (not a blanket +# detect_leaks=0), so any OTHER leak still fails the run. See lsan-suppressions.txt. +LSAN_SUPP="$(pwd)/lsan-suppressions.txt" + +rc=0 +for t in $TARGETS; do + echo "=== regression: $t ===" + corpus="corpus/$t" + mkdir -p "$corpus" + # Only the flist/xattrs harnesses touch the documented static caches. + case "$t" in + fuzz_flist|fuzz_xattrs) tlsan="suppressions=$LSAN_SUPP" ;; + *) tlsan="" ;; + esac + # 1) Deterministic replay of every committed seed (runs=0 => just the corpus). + if ! LSAN_OPTIONS="$tlsan" ./"$t" -runs=0 "$corpus"; then + echo "FAIL: $t crashed replaying seed corpus" >&2 + rc=1 + continue + fi + # 2) Bounded top-up fuzz seeded from the corpus; any crash file => fail. + if ! LSAN_OPTIONS="$tlsan" ./"$t" -max_total_time="$MAX_TIME" -print_final_stats=0 "$corpus"; then + echo "FAIL: $t crashed during bounded fuzz run" >&2 + rc=1 + fi +done + +if [ "$rc" -eq 0 ]; then + echo "All fuzz regression targets clean." +fi +exit "$rc" diff --git a/fuzz/stubs.c b/fuzz/stubs.c new file mode 100644 index 000000000..625d7d8dd --- /dev/null +++ b/fuzz/stubs.c @@ -0,0 +1,193 @@ +/* + * fuzz/stubs.c - minimal external symbols required to link rsync wire-parser + * object files (io.o, token.o, ...) into a standalone libFuzzer harness. + * + * Strategy (see fuzz/README.md "Linking / stubbing strategy"): + * - The object under test (io.o etc.) is compiled UNMODIFIED from rsync's + * own sources with the campaign sanitizer CFLAGS. + * - Everything io.o references that we do NOT want to drag in (logging, + * cleanup, the rest of rsync's translation units) is supplied here. + * - The single most important stub is _exit_cleanup(): rsync's wire-range + * guards call exit_cleanup(RERR_*) on a malformed/over-range value. In the + * real program that terminates the process; in the fuzzer we longjmp back + * to the harness so a *correctly rejected* hostile input is NOT counted as + * a crash. A genuine memory bug still trips ASan/UBSan BEFORE any guard + * fires, so this preserves oracle fidelity: guard-hit => clean unwind, + * real OOB => sanitizer abort. + * + * No rsync source file is modified by this workstream; all shims live here. + */ + +#include "rsync.h" +#include + +/* Harness sets this up; exit_cleanup / out_of_memory / overflow_exit unwind to it. */ +jmp_buf fuzz_unwind_env; +int fuzz_unwind_armed; + +/* ------- functions io.o (and friends) call that we shim ------- */ + +NORETURN void _exit_cleanup(int code, const char *file, int line) +{ + (void)code; (void)file; (void)line; + if (fuzz_unwind_armed) + longjmp(fuzz_unwind_env, 1); + /* Not armed: abort loudly rather than silently mis-behaving. */ + _exit(99); +} + +__attribute__((weak)) NORETURN void _out_of_memory(const char *msg, const char *file, int line) +{ + (void)msg; (void)file; (void)line; + if (fuzz_unwind_armed) + longjmp(fuzz_unwind_env, 2); + _exit(99); +} + +__attribute__((weak)) NORETURN void _overflow_exit(const char *msg, const char *file, int line) +{ + (void)msg; (void)file; (void)line; + if (fuzz_unwind_armed) + longjmp(fuzz_unwind_env, 3); + _exit(99); +} + +void rprintf(enum logcode code, const char *format, ...) { (void)code; (void)format; } +void rsyserr(enum logcode code, int errcode, const char *format, ...) +{ (void)code; (void)errcode; (void)format; } +void rwrite(enum logcode code, const char *buf, int len, int is_utf8) +{ (void)code; (void)buf; (void)len; (void)is_utf8; } + +const char *who_am_i(void) { return "fuzz"; } + +__attribute__((weak)) char *do_big_num(int64 num, int human_flag, const char *fract) +{ + static char buf[32]; + (void)human_flag; (void)fract; + snprintf(buf, sizeof buf, "%lld", (long long)num); + return buf; +} + +__attribute__((weak)) int msleep(int t) { (void)t; return 0; } + +/* my_alloc: a self-contained allocator so ASan tracks every wire-driven + * allocation. Mirrors rsync's semantics closely enough for the parsers: + * honours max_alloc, returns NULL when file==NULL on over-limit (callers like + * EXPAND_ITEM_LIST rely on that), zero-fills on the calloc sentinel. */ +/* WEAK: real util2.o defines do_calloc + my_alloc; when fuzz_flist/fuzz_xattrs + * link util2.o those strong defs win. fuzz_io/fuzz_token (no util2.o) fall back + * to these. Same weakening applies to the few globals flist.o itself defines. */ +__attribute__((weak)) char *do_calloc = "42"; +extern size_t max_alloc; + +__attribute__((weak)) void *my_alloc(void *ptr, size_t num, size_t size, const char *file, int line) +{ + (void)line; + if (size && num >= max_alloc / size) { + if (!file) + return NULL; + _exit_cleanup(RERR_MALLOC, file, line); + } + if (!ptr || ptr == do_calloc) + return calloc(num ? num : 1, size ? size : 1); + return realloc(ptr, num * size); +} + +/* ------- global state io.o references; defaults are fine for the parsers ------- */ + +struct stats stats; +size_t max_alloc = 1u << 30; /* 1 GiB cap so over-range counts still get rejected by guards */ + +int protocol_version = PROTOCOL_VERSION; +__attribute__((weak)) int xfer_sum_len = 16; /* MD5-ish default; flist/checksum may override */ +int file_extra_cnt = 0; + +int am_server = 0, am_sender = 0, am_generator = 0, am_receiver = 0, am_root = 0; +int local_server = 0, daemon_connection = 0; +int inc_recurse = 0; +__attribute__((weak)) int io_error = 0; /* flist.o defines this strong */ +int io_timeout = 0; +int batch_fd = -1; +int eol_nulls = 0; +int read_batch = 0; +int list_only = 0; +int protect_args = 0; +int checksum_seed = 0; +__attribute__((weak)) int flist_eof = 0; /* flist.o strong */ +int compat_flags = 0; +__attribute__((weak)) int file_total = 0; /* flist.o strong */ +__attribute__((weak)) int file_old_total = 0; /* flist.o strong */ +int preserve_hard_links = 0; +int remove_source_files = 0; +int extra_flist_sending_enabled = 0; +int msgs2stderr = 0; +int flush_ok_after_signal = 0; +int bwlimit = 0; +size_t bwlimit_writemax = 0; +int stop_at_utime = 0; + +/* INFO_GTE / DEBUG_GTE index these directly, so they must be real zero arrays + * (all log verbosity off => parser hot path, no rprintf side effects). */ +short info_levels[COUNT_INFO]; +short debug_levels[COUNT_DEBUG]; + +__attribute__((weak)) struct file_list *cur_flist = NULL; /* flist.o strong */ + +/* ------- functions io.o references but the parser paths never reach ------- */ + +void check_for_finished_files(int itemizing, enum logcode code, int check_redo) +{ (void)itemizing; (void)code; (void)check_redo; } + +/* flist_for_ndx lives in rsync.c, which NO harness links, so this stub is the + * only definition. The reachable receive-side parser paths exercised here never + * call it (recv_file_entry's proto<30 hardlink path uses idev_find, not + * flist_for_ndx). A NULL return would silently diverge from real receiver + * behavior and could mask a bug, so instead of returning fake data we abort + * loudly: if a future parser path ever reaches it, the harness fails the run + * rather than carrying on with wrong state. (Not made weak: there is no real + * flist_for_ndx object to override it; weak would just leave NULL behind.) */ +struct file_list *flist_for_ndx(int ndx, const char *fatal_error_msg) +{ + fprintf(stderr, "fuzz/stubs.c: flist_for_ndx(%d, %s) reached -- the " + "harness does not link the real implementation; aborting rather " + "than returning fake flist data.\n", + ndx, fatal_error_msg ? fatal_error_msg : "(null)"); + abort(); +} + +__attribute__((weak)) struct file_list *recv_file_list(int f, int dir_ndx) { (void)f; (void)dir_ndx; return NULL; } +__attribute__((weak)) void send_extra_file_list(int f, int at_least) { (void)f; (void)at_least; } + +__attribute__((weak)) int flist_ndx_pop(flist_ndx_list *lp) { (void)lp; return -1; } +__attribute__((weak)) void flist_ndx_push(flist_ndx_list *lp, int ndx) { (void)lp; (void)ndx; } + +void log_delete(const char *fname, int mode) { (void)fname; (void)mode; } +void match_hard_links(struct file_list *flist) { (void)flist; } +void successful_send(int ndx) { (void)ndx; } +__attribute__((weak)) int glob_expand(const char *arg, char ***argv_p, int *argc_p, int *maxargs_p) +{ (void)arg; (void)argv_p; (void)argc_p; (void)maxargs_p; return 0; } +__attribute__((weak)) void glob_expand_module(char *base1, char *arg, char ***argv_p, int *argc_p, int *maxargs_p) +{ (void)base1; (void)arg; (void)argv_p; (void)argc_p; (void)maxargs_p; } + +__attribute__((weak)) void add_implied_include(const char *arg, int skip_daemon_module) { (void)arg; (void)skip_daemon_module; } +__attribute__((weak)) void free_implied_include_partial_string(void) {} +__attribute__((weak)) void implied_include_partial_string(const char *s_start, const char *s_end) { (void)s_start; (void)s_end; } + +int iconvbufs(iconv_t ic, xbuf *in, xbuf *out, int flags) +{ (void)ic; (void)in; (void)out; (void)flags; return 0; } +iconv_t ic_send = (iconv_t)-1; +iconv_t ic_recv = (iconv_t)-1; + +char *filesfrom_convert = NULL; + +/* ------- token.c (compression) globals/shims ------- */ +/* do_compression is set per-input by fuzz_token; CPRES_NONE => simple path. */ +int do_compression = 0; +int do_compression_level = 0; +int do_compression_threads = 0; +int module_id = -1; +char *skip_compress = NULL; + +char *lp_dont_compress(int module_id_) { (void)module_id_; return NULL; } +__attribute__((weak)) char *map_ptr(struct map_struct *map, OFF_T offset, int32 len) +{ (void)map; (void)offset; (void)len; return NULL; } diff --git a/hlink.c b/hlink.c index eb36730fd..330880b5e 100644 --- a/hlink.c +++ b/hlink.c @@ -133,9 +133,11 @@ static void match_gnums(int32 *ndx_list, int ndx_count) struct file_list *flist; prev = IVAL(node->data, 1); flist = flist_for_ndx(prev, NULL); - if (flist) + if (flist) { + if (prev < flist->ndx_start) + exit_cleanup(RERR_PROTOCOL); flist->files[prev - flist->ndx_start]->flags &= ~FLAG_HLINK_LAST; - else { + } else { /* We skipped all prior files in this * group, so mark this as a "first". */ file->flags |= FLAG_HLINK_FIRST; @@ -255,6 +257,8 @@ static char *check_prior(struct file_struct *file, int gnum, if (prev_ndx < 0 || (flist = flist_for_ndx(prev_ndx, NULL)) == NULL) break; + if (prev_ndx < flist->ndx_start) + exit_cleanup(RERR_PROTOCOL); fp = flist->files[prev_ndx - flist->ndx_start]; if (!(fp->flags & FLAG_SKIP_HLINK)) { *prev_ndx_p = prev_ndx; @@ -507,6 +511,8 @@ void finish_hard_link(struct file_struct *file, const char *fname, int fin_ndx, while ((ndx = prev_ndx) >= 0) { int val; flist = flist_for_ndx(ndx, "finish_hard_link"); + if (ndx < flist->ndx_start) + exit_cleanup(RERR_PROTOCOL); file = flist->files[ndx - flist->ndx_start]; file->flags = (file->flags & ~FLAG_HLINK_FIRST) | FLAG_HLINK_DONE; prev_ndx = F_HL_PREV(file); diff --git a/options.c b/options.c index 3c2d23526..2cc306a53 100644 --- a/options.c +++ b/options.c @@ -972,6 +972,7 @@ static void set_refuse_options(void) || strcmp("checksum-seed", longName) == 0 || strcmp("copy-devices", longName) == 0 /* disable wild-match (it gets refused below) */ || strcmp("write-devices", longName) == 0 /* disable wild-match (it gets refused below) */ + || strcmp("copy-as", longName) == 0 /* disable wild-match (it gets refused below) */ || strcmp("log-format", longName) == 0 /* aka out-format (NOT log-file-format) */ || strcmp("sender", longName) == 0 || strcmp("server", longName) == 0) @@ -984,6 +985,7 @@ static void set_refuse_options(void) if (am_daemon) { /* Refused by default, but can be accepted via a negated exact match. */ parse_one_refuse_match(0, "copy-devices", list_end); parse_one_refuse_match(0, "write-devices", list_end); + parse_one_refuse_match(0, "copy-as", list_end); } while (1) { @@ -3131,6 +3133,13 @@ char *check_for_hostspec(char *s, char **host_ptr, int *port_ptr) { char *path; + /* Establish the default port value up front. parse_hostspec() only + * writes *port_ptr when it parses an explicit port, and the non-URL + * call below passes a NULL port_ptr, so without this the reads at the + * "!*port_ptr" tests below can observe an uninitialized caller value. */ + if (port_ptr) + *port_ptr = 0; + if (port_ptr && strncasecmp(URL_PREFIX, s, strlen(URL_PREFIX)) == 0) { *host_ptr = parse_hostspec(s + strlen(URL_PREFIX), &path, port_ptr); if (*host_ptr) { diff --git a/receiver.c b/receiver.c index 7d429fe84..44d6e56fd 100644 --- a/receiver.c +++ b/receiver.c @@ -402,16 +402,31 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, stats.matched_data += len; - /* A block match can only be honored if we actually mapped the - * basis. If we didn't (basis open failed), the sender should - * never have been told a basis existed -- treat it as a protocol - * inconsistency rather than silently omitting these bytes from - * the verification checksum (which yields a spurious failure) or - * leaving a hole in the output. */ + /* A block match with no mapped basis is a protocol inconsistency + * ONLY when we are actually producing output (fd != -1): the + * generator told the sender a basis existed but the receiver could + * not open it, so honoring the match would silently omit these + * bytes from the verification checksum (a spurious failure) or + * leave a hole in the output. Fail cleanly in that case. + * + * On the DISCARD path (fd == -1, fname == NULL) there is no output + * and no verification: discard_receive_data() deliberately drains a + * delta the receiver never intends to write (basis fstat failed, + * basis is a directory, output open failed, batch skip, ...). The + * sender does not know the data is being discarded and streams an + * ordinary delta, so a match token here is NORMAL protocol, not + * malformed. Absorb it benignly (advance the offset and continue), + * exactly as the pre-31fbb17d "if (mapbuf)" guards did -- erroring + * would wrongly break legitimate transfers, and full_fname(fname) + * with fname==NULL would dereference NULL (remote DoS). */ if (!mapbuf) { - rprintf(FERROR, "got a block match with no basis file for %s [%s]\n", - full_fname(fname), who_am_i()); - exit_cleanup(RERR_PROTOCOL); + if (fd != -1) { + rprintf(FERROR, "got a block match with no basis file for %s [%s]\n", + full_fname(fname), who_am_i()); + exit_cleanup(RERR_PROTOCOL); + } + offset += len; + continue; } if (DEBUG_GTE(DELTASUM, 3)) { diff --git a/syscall.c b/syscall.c index 0748d9988..27233df54 100644 --- a/syscall.c +++ b/syscall.c @@ -1960,7 +1960,7 @@ int secure_mkstemp(char *template, mode_t perms) errno = EINVAL; return -1; } - if (strncmp(template, "../", 3) == 0 || strstr(template, "/../")) { + if (path_has_dotdot_component(template)) { errno = EINVAL; return -1; } diff --git a/testsuite/content-fidelity_test.py b/testsuite/content-fidelity_test.py new file mode 100644 index 000000000..17eca656d --- /dev/null +++ b/testsuite/content-fidelity_test.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +"""Workstream-1 invariant group A -- content fidelity. + +These tests assert properties the spec + source promise INDEPENDENTLY of any +known bug: + + A1: after a transfer, the destination bytes are exactly the source bytes. + receiver.c receive_data() (~478-483) computes a whole-file checksum and + returns 0 (failure) on mismatch, so a completed transfer that rsync + reports as successful must be byte-correct. We assert byte-correctness + of the result ONLY -- never a specific digest algorithm, whose strength + is protocol-dependent. + + A2: quick-check semantics. generator.c quick_check_ok() (~623-646): + - default (size+mtime): a dest with matching size+mtime is SKIPPED + even if its content differs; + - --size-only: matching size alone SKIPS; + - -c/--checksum (always_checksum): content is compared, so a same + size+mtime-but-different-content dest is RE-SENT; + - -I/--ignore-times: the mtime fast-path is forced off, so the file + is RE-SENT regardless of size+mtime. + We assert each documented behavior by checking whether the dest content + was (or was not) overwritten with the source content. +""" + +import os + +from rsyncfns import ( + FROMDIR, SCRATCHDIR, TODIR, + assert_same, make_data_file, makepath, rmtree, run_rsync, test_fail, +) + + +# -------------------------------------------------------------------------- +# A1 -- post-transfer whole-file content is byte-correct. +# -------------------------------------------------------------------------- +# A mix of sizes (including 0 and a multi-block file) and both whole-file and +# delta paths. For the delta path we seed the destination with a slightly +# different prior version so rsync must reconstruct via block matching, then +# verify the reconstructed bytes equal the source exactly. + +rmtree(FROMDIR) +rmtree(TODIR) +makepath(FROMDIR) + +sizes = { + 'empty': 0, + 'tiny': 3, + 'oneblock': 700, + 'multiblock': 300000, +} +for name, sz in sizes.items(): + make_data_file(FROMDIR / name, sz) + +# Whole-file transfer (-W forces no delta): every dest byte must equal source. +run_rsync('-aW', f'{FROMDIR}/', f'{TODIR}/') +for name in sizes: + assert_same(FROMDIR / name, TODIR / name, label=f'A1 whole-file {name}') + +# Delta transfer: pre-seed the dest with a perturbed copy of the multiblock +# file, then change the source and re-sync WITHOUT -W so the delta algorithm +# runs. The reconstructed file must still be byte-identical to the new source. +rmtree(TODIR) +makepath(TODIR) +# Seed dest from an OLDER source content. +make_data_file(TODIR / 'multiblock', 300000) +# Now make the real source a fresh, different multiblock file. +make_data_file(FROMDIR / 'multiblock', 305000) +run_rsync('-a', '--no-whole-file', f'{FROMDIR}/multiblock', f'{TODIR}/multiblock') +assert_same(FROMDIR / 'multiblock', TODIR / 'multiblock', + label='A1 delta reconstruct') + + +# -------------------------------------------------------------------------- +# A2 -- quick-check semantics across default / --size-only / -c / -I. +# -------------------------------------------------------------------------- +# Build a destination file that has IDENTICAL size and mtime to the source but +# DIFFERENT content. quick_check_ok() must skip it by default and under +# --size-only, but re-send it under -c and under -I. + +A2 = SCRATCHDIR / 'a2' + + +def _setup_same_size_mtime_diff_content(): + """Create src/f and dst/f: same size, same mtime, different bytes.""" + rmtree(A2) + src = A2 / 'src' + dst = A2 / 'dst' + makepath(src, dst) + # Same length, different content. Fixed bytes (not urandom) so the two + # differ deterministically while sharing an exact size. + (src / 'f').write_bytes(b'A' * 4096) + (dst / 'f').write_bytes(b'B' * 4096) + # Force identical mtime (and atime) on both, to the nanosecond. + st = os.stat(src / 'f') + os.utime(dst / 'f', ns=(st.st_atime_ns, st.st_mtime_ns)) + # Sanity: sizes equal, contents differ, mtimes equal. + assert os.stat(src / 'f').st_size == os.stat(dst / 'f').st_size + if (src / 'f').read_bytes() == (dst / 'f').read_bytes(): + test_fail('A2 setup: src and dst content unexpectedly equal') + return src, dst + + +def _dst_matches_src(src, dst) -> bool: + return (src / 'f').read_bytes() == (dst / 'f').read_bytes() + + +# Default quick-check: size+mtime match -> SKIP -> dst keeps its OWN content. +src, dst = _setup_same_size_mtime_diff_content() +run_rsync('-a', f'{src}/', f'{dst}/') +if _dst_matches_src(src, dst): + test_fail('A2 default: dest with same size+mtime+different content was ' + 're-sent, but quick-check should have SKIPPED it') + +# --size-only: size match alone -> SKIP -> dst keeps its own content. +src, dst = _setup_same_size_mtime_diff_content() +run_rsync('-a', '--size-only', f'{src}/', f'{dst}/') +if _dst_matches_src(src, dst): + test_fail('A2 --size-only: dest with same size was re-sent, but ' + '--size-only should have SKIPPED it') + +# -c / --checksum: content is compared -> mismatch -> RE-SEND -> dst == src. +src, dst = _setup_same_size_mtime_diff_content() +run_rsync('-ac', f'{src}/', f'{dst}/') +if not _dst_matches_src(src, dst): + test_fail('A2 -c: dest with same size+mtime but different content was ' + 'NOT re-sent, but --checksum must compare content and re-send') +assert_same(src / 'f', dst / 'f', label='A2 -c result') + +# -I / --ignore-times: mtime fast-path forced off -> RE-SEND -> dst == src. +src, dst = _setup_same_size_mtime_diff_content() +run_rsync('-aI', f'{src}/', f'{dst}/') +if not _dst_matches_src(src, dst): + test_fail('A2 -I: dest with same size+mtime but different content was ' + 'NOT re-sent, but --ignore-times must force a re-send') +assert_same(src / 'f', dst / 'f', label='A2 -I result') + +print('content-fidelity: A1 byte-correctness + A2 quick-check semantics ' + 'verified') diff --git a/testsuite/delete-backup-invariants_test.py b/testsuite/delete-backup-invariants_test.py new file mode 100644 index 000000000..2cb5244e0 --- /dev/null +++ b/testsuite/delete-backup-invariants_test.py @@ -0,0 +1,399 @@ +#!/usr/bin/env python3 +"""Workstream-1 invariant group D -- deletion and backup. + +Derived from rsync.1.md + generator.c delete_in_dir, delete.c, backup.c. + + D1: --delete only removes within recursed directories. A sibling dir that + is NOT recursed into is left untouched. We assert the deletion bound: + an extra file inside the recursed tree is removed; an extra inside a + dir the transfer never descends into survives. + + D2: (prime transport-equivalence case) a sender-side IO error SUPPRESSES + all deletion unless --ignore-errors. generator.c delete_in_dir ~297: + `if (io_error & IOERR_GENERAL && !ignore_errors) ... skip deletion`. + io_error is WIRE-PROPAGATED, so this must behave identically local vs + daemon. Mechanism: an UNREADABLE SOURCE DIRECTORY (mode 000) makes the + sender's opendir() fail during the flist scan -- which runs BEFORE + deletion -- setting io_error. (An unreadable regular file fails only + later in send_files, AFTER --delete-during has already deleted, so it + does NOT reliably suppress; the unreadable-directory mechanism is the + one that sets io_error pre-deletion.) We use --delete-before so the + suppression is DETERMINISTIC: with --delete-during/-delay the top- + level dir is deleted before the scan even reaches the unreadable + subdir that sets io_error, so a shallow extra races the error; + --delete-before (and -after) finish the io_error-setting scan before + any deletion, which is the regime that actually exercises the + delete_in_dir guard (verified on HEAD: -before/-after suppress, + -during/-delay race). We assert: + * --delete-before alone -> NO deletions (extra survives) + * --delete-before --ignore-errors -> deletions proceed (extra gone) + and that both behaviors are IDENTICAL across local and daemon legs. + + max-delete: --max-delete=N deletes at most N files (delete.c ~156). We + assert the cap holds: with N extras > limit, exactly limit are removed. + + D4: delete timing default is protocol/version dependent + (--delete-during/-before/-after). We assert the FINAL deletion SET is + identical across every explicit timing variant, NEVER mid-transfer + ordering. + + backup: --backup renames the existing dst before overwrite (backup.c + make_backup). We assert the backup file (with ~ suffix, and with + --backup-dir) holds the OLD content while the dst holds the NEW + content. +""" + +import os +import subprocess + +import rsyncfns +from rsyncfns import ( + SCRATCHDIR, makepath, rmtree, run_rsync, test_fail, +) +from equiv_fns import ( + SkipLeg, _bin_argv, _join_slash, _start_daemon_for_bin, write_daemon_conf, +) + +ROOT = SCRATCHDIR / 'dgroup' +rmtree(ROOT) +makepath(ROOT) + + +def read_file(p): + return p.read_text() + + +# -------------------------------------------------------------------------- +# D1 -- --delete only removes within recursed directories. +# -------------------------------------------------------------------------- +# Transfer src/keep/ (a single subdir) into dst/keep/. The dst ALSO has a +# sibling dst/untouched/ that the transfer never names. --delete may remove +# the extra inside dst/keep/ but MUST NOT touch dst/untouched/. +d1 = ROOT / 'd1' +src = d1 / 'src' +dst = d1 / 'dst' +makepath(src / 'keep', dst / 'keep', dst / 'untouched') +(src / 'keep' / 'f.txt').write_text('kept content\n') +(dst / 'keep' / 'extra_in_recursed.txt').write_text('should be deleted\n') +(dst / 'untouched' / 'extra_in_sibling.txt').write_text('must survive\n') + +# Transfer ONLY the keep subtree (src/keep/ -> dst/keep/). dst/untouched is +# outside the transfer's namespace and is never recursed into. +run_rsync('-a', '--delete', f'{src}/keep/', f'{dst}/keep/') +if (dst / 'keep' / 'extra_in_recursed.txt').exists(): + test_fail('D1 VIOLATION: extra file inside the recursed dir was NOT ' + 'deleted; --delete should remove it.') +if not (dst / 'untouched' / 'extra_in_sibling.txt').exists(): + test_fail('D1 VIOLATION: a file in a sibling dir the transfer never ' + 'recursed into was deleted. --delete must be bounded to the ' + 'recursed tree.') + + +# -------------------------------------------------------------------------- +# D2 -- sender IO error suppresses deletion (transport-equivalent). +# -------------------------------------------------------------------------- +# Mechanism (see module docstring): an unreadable SOURCE DIRECTORY makes the +# sender's opendir fail during the flist scan -> io_error set BEFORE deletion. +def d2_setup(work, *, ignore_errors): + """Populate a work dir for a D2 leg. + + Layout (work is the daemon module root): + work/src/good/a.txt (transfers fine) + work/src/blocked/ (mode 000 -> opendir fails on sender) + work/dst/good/a.txt + work/dst/extra_to_delete.txt (the deletion probe) + Returns the dst path. + """ + rmtree(work) + src = work / 'src' + dst = work / 'dst' + makepath(src / 'good', src / 'blocked', dst / 'good') + (src / 'good' / 'a.txt').write_text('transferable body\n') + (src / 'blocked' / 'inner.txt').write_text('cannot be read\n') + (dst / 'extra_to_delete.txt').write_text('deletion probe\n') + os.chmod(src / 'blocked', 0o000) + return dst + + +def run_d2_leg(transport, *, ignore_errors, port): + """Run the D2 scenario over one transport. Returns whether the extra + deletion-probe file SURVIVED (True == deletion suppressed). + + Each daemon leg gets a DISTINCT port: the test framework leaves prior + test daemons alive until process exit (atexit kill), so reusing a port + would let a stale daemon -- still serving the previous leg's module path + -- answer the connection and silently produce the wrong result. + """ + rsync_bin = rsyncfns.RSYNC + work = ROOT / f'd2-{transport}-{"ign" if ignore_errors else "plain"}' + dst = d2_setup(work, ignore_errors=ignore_errors) + src = _join_slash(work, 'src/') + opts = ['-a', '--delete-before'] + if ignore_errors: + opts.append('--ignore-errors') + base = _bin_argv(rsync_bin) + opts + + try: + if transport == 'local': + argv = base + [src, f'{dst}/'] + elif transport in ('daemon_pipe', 'daemon_tcp'): + use_tcp = (transport == 'daemon_tcp') + if use_tcp and not rsyncfns.USE_TCP: + raise SkipLeg('daemon_tcp needs --use-tcp') + tag = f'd2-{transport}-{"ign" if ignore_errors else "plain"}' + conf = write_daemon_conf( + [('equiv', {'path': str(work), 'read only': 'no'})], + globals={'pid file': str(work / 'rsyncd.pid')}, + name=f'{tag}.conf', + ) + if use_tcp: + # A prior daemon_pipe leg sets RSYNC_CONNECT_PROG, which would + # hijack this TCP client into the stale pipe daemon. Clear it. + os.environ.pop('RSYNC_CONNECT_PROG', None) + prefix = _start_daemon_for_bin(rsync_bin, conf, port, use_tcp=use_tcp) + argv = base + [src, f'{prefix}equiv/dst/'] + else: + raise ValueError(transport) + # Restore perms after the run regardless of outcome so cleanup works. + proc = subprocess.run(argv, capture_output=True, text=True) + # A sender IO error yields exit 23 (partial); that is expected here. + if proc.returncode not in (0, 23): + test_fail(f'[D2/{transport}] unexpected exit {proc.returncode}: ' + f'{" ".join(argv)}\n{proc.stderr}') + survived = (dst / 'extra_to_delete.txt').exists() + return survived + finally: + # Restore perms on every dir under the work tree so the scratch dir + # can be cleaned (the transfer may have created a mode-000 'blocked' + # dir in the dst). Walk bottom-up, chmod 0o755 any unreadable dir. + for dirpath, dirnames, _ in os.walk(work): + for d in dirnames: + p = os.path.join(dirpath, d) + try: + os.chmod(p, 0o755) + except OSError: + pass + + +# Run both modes across local + daemon_pipe (+ daemon_tcp under --use-tcp). +d2_transports = ['local', 'daemon_pipe'] +if rsyncfns.USE_TCP: + d2_transports.append('daemon_tcp') + +# D2 relies on chmoding a source dir to 0o000 to force an opendir IO-error. +# Under euid==0 (real root or fakeroot), DAC is bypassed: the mode-000 dir is +# still readable, the IO-error never fires, and the "suppressed" leg falsely +# looks like a VIOLATION. Skip D2 cleanly (print a notice and leave +# suppress_results/ignore_results empty so the cross-transport checks below +# are also skipped) rather than false-failing under fakeroot/root. The rest +# of the test (max-delete, D4, backup, D7) continues unaffected. +_d2_root_skip = (os.geteuid() == 0) +if _d2_root_skip: + print('D2 SKIPPED: euid==0 -- DAC bypass means mode-000 dir trick cannot ' + 'force an opendir IO-error under root; D2 is not verifiable here.') + +suppress_results = {} +ignore_results = {} +_next_port = [12893] +def _port(): + p = _next_port[0] + _next_port[0] += 1 + return p +if not _d2_root_skip: + for t in d2_transports: + try: + suppress_results[t] = run_d2_leg(t, ignore_errors=False, port=_port()) + ignore_results[t] = run_d2_leg(t, ignore_errors=True, port=_port()) + except SkipLeg as e: + print(f'[D2/{t}] skipped: {e}') + + if not suppress_results: + test_fail('D2: no transport legs ran') + +# Per-leg invariant: --delete alone suppresses (survived), --ignore-errors +# proceeds (deleted). +for t in suppress_results: + if not suppress_results[t]: + test_fail(f'[D2/{t}] VIOLATION: a sender IO error did NOT suppress ' + f'deletion. The deletion probe was removed under --delete ' + f'alone; delete_in_dir must skip deletion when io_error is ' + f'set (this protects against destructive deletion on a ' + f'broken/partial transfer).') + if ignore_results[t]: + test_fail(f'[D2/{t}] VIOLATION: --ignore-errors did NOT re-enable ' + f'deletion; the deletion probe survived. With ' + f'--ignore-errors, io_error must not suppress deletion.') + +# Transport-equivalence: every leg must agree on BOTH behaviors. io_error is +# wire-propagated, so a daemon leg that disagreed with local would be a +# silent transport divergence. (Skipped under root; suppress_results is empty.) +if not _d2_root_skip: + if len(set(suppress_results.values())) != 1: + test_fail(f'D2 TRANSPORT DIVERGENCE: IO-error deletion suppression ' + f'differs across transports: {suppress_results}') + if len(set(ignore_results.values())) != 1: + test_fail(f'D2 TRANSPORT DIVERGENCE: --ignore-errors deletion behavior ' + f'differs across transports: {ignore_results}') + + +# -------------------------------------------------------------------------- +# max-delete -- at most N files removed. +# -------------------------------------------------------------------------- +md = ROOT / 'maxdelete' +src = md / 'src' +dst = md / 'dst' +makepath(src, dst) +(src / 'keep.txt').write_text('keep\n') +N_EXTRA = 5 +LIMIT = 2 +for i in range(N_EXTRA): + (dst / f'extra{i}.txt').write_text('x\n') +# rsync exits 25 when the max-delete limit stops further deletions. +run_rsync('-a', '--delete', f'--max-delete={LIMIT}', f'{src}/', f'{dst}/', + check=False) +remaining = sorted(dst.glob('extra*')) +if len(remaining) != N_EXTRA - LIMIT: + test_fail(f'max-delete VIOLATION: with --max-delete={LIMIT} and ' + f'{N_EXTRA} extras, expected {N_EXTRA - LIMIT} survivors, ' + f'found {len(remaining)}: {remaining}. The cap must bound ' + f'deletions at N.') + + +# -------------------------------------------------------------------------- +# D4 -- delete-timing variants yield the SAME final deletion set. +# -------------------------------------------------------------------------- +# Assert FINAL STATE only, never mid-transfer ordering. +d4src = ROOT / 'd4src' +makepath(d4src / 'sub') +(d4src / 'a.txt').write_text('a\n') +(d4src / 'sub' / 'b.txt').write_text('b\n') + +EXTRAS = ['extra_top.txt', 'sub/extra_deep.txt'] +final_sets = {} +for variant in ('--delete-before', '--delete-during', '--delete-delay', + '--delete-after'): + d4dst = ROOT / f'd4dst-{variant.strip("-")}' + rmtree(d4dst) + makepath(d4dst / 'sub') + for e in EXTRAS: + (d4dst / e).write_text('garbage\n') + run_rsync('-a', '--delete', variant, f'{d4src}/', f'{d4dst}/') + # Record the surviving extras (the complement of the deletion set). + survivors = {e for e in EXTRAS if (d4dst / e).exists()} + final_sets[variant] = survivors + +distinct = {frozenset(s) for s in final_sets.values()} +if len(distinct) != 1: + test_fail(f'D4 VIOLATION: delete-timing variants produced DIFFERENT ' + f'final deletion sets: {final_sets}. The timing must not ' + f'change the final state, only when deletion happens.') +if distinct != {frozenset()}: + test_fail(f'D4 VIOLATION: some extras were NOT deleted: {final_sets}') + + +# -------------------------------------------------------------------------- +# backup -- old content preserved in backup, new content in dst. +# -------------------------------------------------------------------------- +# Plain --backup (~ suffix). +bk = ROOT / 'backup' +src = bk / 'src' +dst = bk / 'dst' +makepath(src, dst) +(src / 'f.txt').write_text('NEW content -- distinct length\n') +(dst / 'f.txt').write_text('OLD content\n') +# Give the source a clearly newer mtime so the quick-check (size+mtime) sees +# a change and re-sends; the existing dst is then backed up before overwrite. +os.utime(src / 'f.txt', (10_000_000_000, 10_000_000_000)) +run_rsync('-a', '--backup', f'{src}/', f'{dst}/') +if read_file(dst / 'f.txt') != 'NEW content -- distinct length\n': + test_fail('backup VIOLATION: dst does not hold the NEW content after ' + '--backup overwrite.') +bak = dst / 'f.txt~' +if not bak.exists(): + test_fail('backup VIOLATION: expected backup file f.txt~ was not created.') +if read_file(bak) != 'OLD content\n': + test_fail(f'backup VIOLATION: backup file does not hold the OLD content; ' + f'got {read_file(bak)!r}. make_backup must rename the existing ' + f'dst before overwrite.') + +# --backup-dir: backup lands under the backup dir, holding the OLD content. +bkd = ROOT / 'backupdir' +src = bkd / 'src' +dst = bkd / 'dst' +bdir = bkd / 'bak' +makepath(src, dst, bdir) +(src / 'f.txt').write_text('NEW2 content -- distinct length\n') +(dst / 'f.txt').write_text('OLD2 content\n') +os.utime(src / 'f.txt', (10_000_000_000, 10_000_000_000)) +run_rsync('-a', '--backup', f'--backup-dir={bdir}', f'{src}/', f'{dst}/') +if read_file(dst / 'f.txt') != 'NEW2 content -- distinct length\n': + test_fail('backup-dir VIOLATION: dst does not hold the NEW content.') +bdfile = bdir / 'f.txt' +if not bdfile.exists(): + test_fail('backup-dir VIOLATION: backup not placed in --backup-dir.') +if read_file(bdfile) != 'OLD2 content\n': + test_fail(f'backup-dir VIOLATION: backup-dir copy does not hold the OLD ' + f'content; got {read_file(bdfile)!r}.') + +# -------------------------------------------------------------------------- +# D7 -- daemon backup-dir confinement (TCP-daemon only). +# -------------------------------------------------------------------------- +# A daemon sanitizes --backup-dir: a leading slash is replaced by the module +# path, so an absolute --backup-dir=/escape is rooted INSIDE the module +# (rsyncd.conf.5.md ~241). The client cannot make the daemon write backups +# outside the module via an absolute path. We push to a module with --backup +# --backup-dir=/escape and assert the backup landed at /escape, NOT +# at the real filesystem /escape. Needs a bound socket; cleanly skips when +# --use-tcp is not set. +_d2_status = ('D2 skipped (root/fakeroot)' if _d2_root_skip + else f'D2 legs: suppress={suppress_results}, ignore={ignore_results}') +if not rsyncfns.USE_TCP: + print('delete-backup-invariants: D7 (daemon backup-dir confinement) ' + f'skipped (needs --use-tcp). D1/max-delete/D4/backup verified. ' + f'{_d2_status}') +else: + d7 = ROOT / 'd7' + module_root = d7 / 'module' + src = d7 / 'src' + rmtree(d7) + makepath(module_root, src) + # Seed the module with an existing file that will be overwritten (and so + # backed up) by the push. + (module_root / 'f.txt').write_text('OLD daemon content\n') + (src / 'f.txt').write_text('NEW daemon content -- distinct length\n') + os.utime(src / 'f.txt', (10_000_000_000, 10_000_000_000)) + + conf = write_daemon_conf( + [('bk', {'path': str(module_root), 'read only': 'no'})], + globals={'pid file': str(d7 / 'rsyncd.pid')}, + name='d7-backupconf.conf', + ) + os.environ.pop('RSYNC_CONNECT_PROG', None) + prefix = _start_daemon_for_bin(rsyncfns.RSYNC, conf, 12899, use_tcp=True) + + # Sentinel: ensure /escape (real fs root) is not writable garbage we'd + # mistake for success; we never expect anything to be created there. + argv = _bin_argv(rsyncfns.RSYNC) + [ + '-a', '--backup', '--backup-dir=/escape', + f'{src}/', f'{prefix}bk/', + ] + proc = subprocess.run(argv, capture_output=True, text=True) + if proc.returncode not in (0, 23): + test_fail(f'[D7] push exited {proc.returncode}: {" ".join(argv)}\n' + f'{proc.stderr}') + + # The new content must be in the module. + if read_file(module_root / 'f.txt') != 'NEW daemon content -- distinct length\n': + test_fail('D7: module dst does not hold the NEW content.') + # The backup of the OLD content must be CONFINED to the module: the + # leading-slash path /escape is rooted at /escape. + confined = module_root / 'escape' / 'f.txt' + if not confined.exists(): + test_fail('D7 VIOLATION: --backup-dir=/escape backup was not found at ' + f'/escape ({confined}). The daemon must sanitize a ' + 'leading-slash backup-dir to be module-rooted.') + if read_file(confined) != 'OLD daemon content\n': + test_fail(f'D7 VIOLATION: confined backup does not hold the OLD ' + f'content; got {read_file(confined)!r}.') + print('delete-backup-invariants: D1/max-delete/D4/backup + D7 (daemon ' + f'backup-dir confinement, TCP) verified. {_d2_status}') diff --git a/testsuite/equiv_fns.py b/testsuite/equiv_fns.py new file mode 100644 index 000000000..21c126411 --- /dev/null +++ b/testsuite/equiv_fns.py @@ -0,0 +1,537 @@ +"""Transport-equivalence harness for rsync tests. + +This module runs a single transfer *scenario* across rsync's four transports +and structurally diffs the resulting destination trees, partitioning every +difference into "must be byte-equal" vs "may differ only by a documented +mapping" (uid/gid/ACL-id/xattr-namespace, tolerated when unprivileged). + +The four transports: + + * ``local`` -- a plain local rsync (src and dst are both paths). + * ``ssh`` -- a remote-shell transfer via support/lsh.sh + (``-e lsh localhost:DEST`` with ``--rsync-path``). + * ``daemon_pipe`` -- an rsync:// daemon reached over a private stdio pipe + (RSYNC_CONNECT_PROG; opens no listening socket). + * ``daemon_tcp`` -- an rsync:// daemon bound to a real 127.0.0.1 socket. + Only runs under ``--use-tcp`` (see ``require_tcp``); + degrades to a clean skip otherwise. + +Black-box driving +----------------- +Every transport is driven through a single ``rsync_bin`` parameter that +defaults to the RSYNC env command but accepts an arbitrary rsync binary +path. The daemon side is launched from the *same* ``rsync_bin``, so the +whole matrix can be pointed at, e.g., a v3.4.2 vs a v3.4.3 binary without +touching the comparison logic. This is what the later proof-of-oracle uses +to demonstrate that #915 regressed link-dest-over-daemon between releases. + +The comparison logic (``capture_tree`` / ``diff_trees`` / ``partition_diffs``) +never sees the binary; it works purely off the on-disk result, so it is +independent of how the binary under test was built. +""" + +from __future__ import annotations + +import os +import shlex +import stat +import subprocess +import time +from dataclasses import dataclass, field +from pathlib import Path + +import rsyncfns +from rsyncfns import ( + RSYNC, SCRATCHDIR, SRCDIR, + claim_ports, require_tcp, rmtree, test_fail, write_daemon_conf, +) + + +LSH = str(SRCDIR / 'support' / 'lsh.sh') + +# The four transports we assert equivalence across. ``daemon_tcp`` is the +# only one that needs a real listening socket; the rest work under plain +# ``make check``. +TRANSPORTS = ('local', 'ssh', 'daemon_pipe', 'daemon_tcp') + + +# -------------------------------------------------------------------------- +# Privilege model +# -------------------------------------------------------------------------- + +def am_root() -> bool: + return os.geteuid() == 0 + + +def numeric_ids_only() -> bool: + """True when we can expect uid/gid to be reproduced verbatim. + + Owner is only preserved when running as root (``-o`` is a no-op for an + unprivileged client, and a non-root daemon cannot set uid/gid at all -- + write_daemon_conf() comments out the uid/gid lines off-root). When this + is False, an owner/group divergence is the *documented mapping*, not a + defect, and partition_diffs() tolerates it. + """ + return am_root() + + +# -------------------------------------------------------------------------- +# Black-box rsync invocation (independent of the RSYNC env binary) +# -------------------------------------------------------------------------- + +def _bin_argv(rsync_bin: str) -> list: + """argv prefix for an arbitrary rsync binary command. + + ``rsync_bin`` may itself be multi-word (e.g. 'valgrind ... rsync' or a + binary plus '--protocol=N'); shlex-split it so subprocess gets a real + argv. Defaults are handled by the callers passing RSYNC. + """ + return shlex.split(rsync_bin) + + +@dataclass +class RunResult: + transport: str + returncode: int + stdout: str + stderr: str + argv: list + + +# -------------------------------------------------------------------------- +# Per-transport daemon plumbing, parametrized by rsync_bin +# -------------------------------------------------------------------------- + +def _start_daemon_for_bin(rsync_bin: str, conf_path: Path, port: int, + *, use_tcp: bool) -> str: + """Bring up a daemon running ``rsync_bin`` and return the URL prefix. + + Mirrors rsyncfns.start_test_daemon but launches the *given* binary so an + external rsync can be driven black-box. In pipe mode this sets + RSYNC_CONNECT_PROG (no socket); in TCP mode it spawns a real loopback + rsyncd via the rsyncfns kill-on-exit machinery. + """ + if use_tcp: + # A prior daemon_pipe leg in the same process sets RSYNC_CONNECT_PROG, + # which would hijack this TCP client into the stale pipe daemon (the + # client prefers the connect prog over a real socket). Clear it so the + # TCP leg actually uses the bound socket. + os.environ.pop('RSYNC_CONNECT_PROG', None) + claim_ports(port) + argv = _bin_argv(rsync_bin) + [ + '--daemon', '--no-detach', + '--address=127.0.0.1', + f'--port={port}', + f'--config={conf_path}', + ] + proc = subprocess.Popen( + argv, + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + preexec_fn=rsyncfns._set_pdeathsig, + ) + import atexit + atexit.register(rsyncfns._stop_rsyncd, proc) + + deadline = time.monotonic() + 10 + last_err = None + import socket as _socket + while time.monotonic() < deadline: + if proc.poll() is not None: + test_fail(f"daemon ({rsync_bin}) exited before listening on " + f"port {port} (status={proc.returncode})") + try: + with _socket.create_connection(('127.0.0.1', port), timeout=0.5): + return f'rsync://localhost:{port}/' + except OSError as e: + last_err = e + time.sleep(0.05) + rsyncfns._stop_rsyncd(proc) + test_fail(f"daemon ({rsync_bin}) never listened on 127.0.0.1:{port}: " + f"{last_err}") + # Pipe mode: the client forks the daemon over a private stdio pipe. + os.environ['RSYNC_CONNECT_PROG'] = f'{rsync_bin} --config={conf_path} --daemon' + return 'rsync://localhost/' + + +# -------------------------------------------------------------------------- +# Scenario runner +# -------------------------------------------------------------------------- + +@dataclass +class Scenario: + """A transfer to replay across transports. + + ``opts`` : rsync options (NOT including src/dst), e.g. ['-aH', '--delete']. + ``rel_src`` : source path relative to the per-transport work dir; trailing + slash semantics are preserved. + ``rel_dst`` : destination path relative to the per-transport work dir. + ``setup`` : callable(workdir: Path) -> None, populates the work dir + (source tree, any link-dest/compare-dest basis dirs, an + existing destination to be --deleted into, etc.). Run fresh + per transport so each leg starts from identical inputs. + ``module_subpath`` : for daemon transports, the path *inside* the module + that maps to ``rel_dst`` (the module is rooted at the work + dir). Defaults to ``rel_dst``. + ``expect_returncode`` : the rsync exit status this scenario is expected to + produce on EVERY transport leg. Defaults to 0. Scenarios that + legitimately partial-transfer (e.g. ``--delete`` racing a + vanished source, or some ``--link-dest`` setups) set this to + 23 explicitly so a *new* partial-transfer regression in a + scenario that should return 0 is no longer silently tolerated. + """ + opts: list + rel_src: str + rel_dst: str + setup: object + extra_opts_for: dict = field(default_factory=dict) + expect_returncode: int = 0 + + +def _work_dir(transport: str) -> Path: + d = SCRATCHDIR / f'equiv-{transport}' + rmtree(d) + d.mkdir(parents=True) + return d + + +def _join_slash(base: Path, rel: str) -> str: + """Join base/rel preserving a trailing slash on rel (rsync-significant).""" + s = str(base / rel) + if rel.endswith('/') and not s.endswith('/'): + s += '/' + return s + + +def run_scenario(scenario: Scenario, transport: str, *, + rsync_bin: str = None, port: int = 12890) -> tuple: + """Run ``scenario`` over ``transport`` against ``rsync_bin``. + + Returns ``(dest_dir: Path, RunResult)``. The destination tree is left on + disk for capture_tree(). ``rsync_bin`` defaults to the RSYNC env command. + + For ``daemon_tcp`` the caller is responsible for having gated on + require_tcp(); this function will still skip the leg cleanly if invoked + without --use-tcp by raising SkipLeg. + """ + if rsync_bin is None: + rsync_bin = RSYNC + + work = _work_dir(transport) + scenario.setup(work) + + src = _join_slash(work, scenario.rel_src) + dst_dir = work / scenario.rel_dst.rstrip('/') + extra = scenario.extra_opts_for.get(transport, []) + base_argv = _bin_argv(rsync_bin) + list(scenario.opts) + list(extra) + + if transport == 'local': + dst = _join_slash(work, scenario.rel_dst) + argv = base_argv + [src, dst] + + elif transport == 'ssh': + dst = _join_slash(work, scenario.rel_dst) + argv = base_argv + ['-e', LSH, f'--rsync-path={rsync_bin}', + src, f'localhost:{dst}'] + + elif transport in ('daemon_pipe', 'daemon_tcp'): + use_tcp = (transport == 'daemon_tcp') + if use_tcp and not rsyncfns.USE_TCP: + raise SkipLeg('daemon_tcp needs --use-tcp') + # Module rooted at the work dir; the daemon writes into it via a + # module-relative path. read-only=no so a push can land. + conf = write_daemon_conf( + [('equiv', {'path': str(work), 'read only': 'no'})], + name=f'equiv-{transport}.conf', + ) + prefix = _start_daemon_for_bin(rsync_bin, conf, port, use_tcp=use_tcp) + module_dst = scenario.rel_dst + url = f'{prefix}equiv/{module_dst}' + argv = base_argv + [src, url] + else: + raise ValueError(f'unknown transport {transport!r}') + + proc = subprocess.run(argv, capture_output=True, text=True) + return dst_dir, RunResult(transport, proc.returncode, + proc.stdout, proc.stderr, argv) + + +class SkipLeg(Exception): + """Raised to skip a single transport leg (e.g. TCP without --use-tcp).""" + + +# -------------------------------------------------------------------------- +# Structural tree capture +# -------------------------------------------------------------------------- + +@dataclass +class FileEntry: + path: str # relative path within the captured root + ftype: str # 'file' | 'dir' | 'symlink' | 'other' + size: int + mode: int # st_mode & 0o7777 + mtime: int # whole seconds + mtime_nsec: int # nanosecond remainder + linktarget: str # symlink target, or '' for non-symlinks + uid: int + gid: int + content_sha: str # sha256 of regular-file content, or '' otherwise + ino_key: int # (dev, ino) collapsed into a group id; see capture_tree + + +@dataclass +class Tree: + root: Path + entries: dict # path -> FileEntry + ino_groups: dict # ino_key -> sorted list of paths + deletion_set: set = field(default_factory=set) + + +def _sha256(path: Path) -> str: + import hashlib + h = hashlib.sha256() + with open(path, 'rb') as f: + for chunk in iter(lambda: f.read(1 << 16), b''): + h.update(chunk) + return h.hexdigest() + + +def capture_tree(root) -> Tree: + """Walk ``root`` with os.lstat/os.scandir and record per-file structure. + + Inode grouping: files that share a (st_dev, st_ino) get the same + ``ino_key`` and are listed together in ``ino_groups`` -- this is what the + hardlink / link-dest assertions read. We key on (dev, ino) so a link-dest + that lands on a different filesystem can never be mistaken for a shared + inode. + """ + root = Path(root) + entries: dict = {} + devino_to_group: dict = {} + ino_groups: dict = {} + next_group = [0] + + def group_for(st) -> int: + key = (st.st_dev, st.st_ino) + if key not in devino_to_group: + devino_to_group[key] = next_group[0] + next_group[0] += 1 + return devino_to_group[key] + + def walk(d: Path, relbase: str): + with os.scandir(d) as it: + for de in sorted(it, key=lambda e: e.name): + rel = de.name if not relbase else f'{relbase}/{de.name}' + st = os.lstat(de.path) + m = st.st_mode + if stat.S_ISDIR(m): + ftype, target, content = 'dir', '', '' + entries[rel] = _entry(rel, ftype, st, target, content, + group_for(st)) + walk(Path(de.path), rel) + elif stat.S_ISLNK(m): + target = os.readlink(de.path) + entries[rel] = _entry(rel, 'symlink', st, target, '', + group_for(st)) + elif stat.S_ISREG(m): + content = _sha256(Path(de.path)) + entries[rel] = _entry(rel, 'file', st, '', content, + group_for(st)) + else: + entries[rel] = _entry(rel, 'other', st, '', '', + group_for(st)) + + if root.exists(): + walk(root, '') + + for rel, e in entries.items(): + ino_groups.setdefault(e.ino_key, []).append(rel) + for k in ino_groups: + ino_groups[k].sort() + + return Tree(root=root, entries=entries, ino_groups=ino_groups) + + +def _entry(rel, ftype, st, target, content, group) -> FileEntry: + nsec = getattr(st, 'st_mtime_ns', int(st.st_mtime * 1e9)) % 1_000_000_000 + return FileEntry( + path=rel, + ftype=ftype, + size=st.st_size if ftype != 'dir' else 0, + mode=stat.S_IMODE(st.st_mode), + mtime=int(st.st_mtime), + mtime_nsec=nsec, + linktarget=target, + uid=st.st_uid, + gid=st.st_gid, + content_sha=content, + ino_key=group, + ) + + +# -------------------------------------------------------------------------- +# Structural diff + partition +# -------------------------------------------------------------------------- + +# Fields that must be byte-equal across every transport. +STRICT_FIELDS = ('ftype', 'size', 'mode', 'mtime', 'mtime_nsec', + 'linktarget', 'content_sha') +# Fields that may legitimately differ by a documented mapping when +# unprivileged (asserted equal only when numeric_ids_only()). +MAPPED_FIELDS = ('uid', 'gid') + + +@dataclass +class Diff: + path: str + field: str + a: object + b: object + + +def diff_trees(a: Tree, b: Tree) -> dict: + """Compare two captured trees field by field. + + Returns a dict with: + 'only_in_a' / 'only_in_b' : paths present in exactly one tree + 'strict' : list[Diff] over STRICT_FIELDS + 'mapped' : list[Diff] over MAPPED_FIELDS + 'ino_group_mismatch' : list of human-readable strings where the + inode-grouping partition differs + """ + out = { + 'only_in_a': sorted(set(a.entries) - set(b.entries)), + 'only_in_b': sorted(set(b.entries) - set(a.entries)), + 'strict': [], + 'mapped': [], + 'ino_group_mismatch': [], + } + for path in sorted(set(a.entries) & set(b.entries)): + ea, eb = a.entries[path], b.entries[path] + for f in STRICT_FIELDS: + va, vb = getattr(ea, f), getattr(eb, f) + if va == vb: + continue + # Directory mtime nanoseconds: rsync's default --modify-window=0 + # compares directory times at whole-second granularity (rsync.1.md), + # so the nanosecond remainder of a directory's mtime is not + # preserved unless -@ -1 / --modify-window=-1 is given to enable + # strict sub-second comparison. Additionally, protocol-30 drops + # nsec in the wire encoding for directories. The whole-second dir + # mtime is still strict; only the nsec remainder goes to 'mapped'. + if f == 'mtime_nsec' and ea.ftype == 'dir': + out['mapped'].append(Diff(path, 'dir_mtime_nsec', va, vb)) + continue + out['strict'].append(Diff(path, f, va, vb)) + for f in MAPPED_FIELDS: + va, vb = getattr(ea, f), getattr(eb, f) + if va != vb: + out['mapped'].append(Diff(path, f, va, vb)) + + # Inode grouping equivalence: build, for each tree, the partition of + # shared-inode sets (groups of size > 1 are the meaningful ones). + def shared_sets(t: Tree) -> set: + return frozenset( + frozenset(paths) for paths in t.ino_groups.values() + if len(paths) > 1 + ) + sa, sb = shared_sets(a), shared_sets(b) + if sa != sb: + out['ino_group_mismatch'].append( + f'shared-inode partitions differ: {sorted(map(sorted, sa))} ' + f'!= {sorted(map(sorted, sb))}' + ) + return out + + +def partition_diffs(diff: dict) -> tuple: + """Split a diff_trees() result into (fatal, tolerated) lists of strings. + + Fatal: + * any membership difference (only_in_a / only_in_b) -- the deletion set + and file set must match across transports; + * any STRICT_FIELDS difference; + * any inode-grouping mismatch. + Tolerated (only when NOT numeric_ids_only()): + * uid/gid differences, the documented owner-mapping divergence on an + unprivileged daemon. When running as root these are promoted to fatal. + """ + fatal, tolerated = [], [] + for p in diff['only_in_a']: + fatal.append(f'present only in A: {p}') + for p in diff['only_in_b']: + fatal.append(f'present only in B: {p}') + for d in diff['strict']: + fatal.append(f'{d.path}: {d.field} {d.a!r} != {d.b!r}') + for d in diff['ino_group_mismatch']: + fatal.append(d) + for d in diff['mapped']: + msg = f'{d.path}: {d.field} {d.a!r} != {d.b!r}' + if d.field in MAPPED_FIELDS: + # uid/gid: the owner mapping. Tolerated only when unprivileged; + # promoted to fatal under root, where owner MUST be preserved. + if numeric_ids_only(): + fatal.append(msg + ' (root: owner must be preserved)') + else: + tolerated.append(msg + ' (unprivileged owner-mapping, tolerated)') + else: + # Non-owner documented mapping (e.g. directory mtime nsec): always + # tolerated, never owner-dependent. + tolerated.append(msg + ' (documented transport non-equivalence)') + return fatal, tolerated + + +# -------------------------------------------------------------------------- +# High-level equivalence assertion +# -------------------------------------------------------------------------- + +def run_matrix(scenario: Scenario, *, rsync_bin: str = None, + port: int = 12890, transports=TRANSPORTS) -> dict: + """Run ``scenario`` across all transports, returning {transport: Tree}. + + Legs that cannot run (daemon_tcp without --use-tcp) are recorded as None + and skipped, not failed. + """ + trees: dict = {} + for t in transports: + try: + dst_dir, res = run_scenario(scenario, t, rsync_bin=rsync_bin, + port=port) + except SkipLeg: + trees[t] = None + continue + if res.returncode != scenario.expect_returncode: + test_fail(f'[{t}] rsync exited {res.returncode}, expected ' + f'{scenario.expect_returncode}: ' + f'{" ".join(res.argv)}\n{res.stderr}') + trees[t] = capture_tree(dst_dir) + return trees + + +def assert_equivalent(trees: dict, *, reference: str = 'local') -> list: + """Assert all present trees are equivalent to the reference transport. + + Fatal diffs call test_fail(). Returns the list of tolerated-diff strings + (for the caller to log). Skipped legs (None) are ignored. + """ + if trees.get(reference) is None: + # Reference itself skipped (shouldn't happen for 'local'); pick the + # first present tree as reference. + present = [t for t, v in trees.items() if v is not None] + if not present: + test_fail('no transport legs ran') + reference = present[0] + + ref_tree = trees[reference] + all_tolerated: list = [] + for t, tree in trees.items(): + if tree is None or t == reference: + continue + diff = diff_trees(ref_tree, tree) + fatal, tolerated = partition_diffs(diff) + all_tolerated += [f'[{reference} vs {t}] {m}' for m in tolerated] + if fatal: + detail = '\n '.join(fatal) + test_fail(f'transport divergence {reference} vs {t}:\n {detail}') + return all_tolerated diff --git a/testsuite/idempotence_test.py b/testsuite/idempotence_test.py new file mode 100644 index 000000000..3448bbc12 --- /dev/null +++ b/testsuite/idempotence_test.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python3 +"""Workstream-1 invariant group E -- idempotence / round-trip. + + E1: a second identical `rsync -a` run transfers nothing. With -i the + itemized-change output for an already-synced tree must be empty (modulo + the documented dir-time restamping precision -- a directory line whose + only change is a sub-second time is not a real transfer). We assert no + file/content/metadata item is emitted on the second leg. + + E2: sync A->B, then reverse-sync B->A, is a no-op on the second leg -- the + reverse transfer itemizes nothing and leaves both trees structurally + equivalent. Diffs are PARTITIONED via equiv_fns.partition_diffs so an + unprivileged uid/gid mapping is tolerated rather than false-failing. + +Both legs reuse equiv_fns.capture_tree/diff_trees/partition_diffs for the +structural comparison rather than reinventing tree-walking. +""" + +import os + +from rsyncfns import ( + FROMDIR, SCRATCHDIR, TODIR, + make_tree, rmtree, run_rsync, test_fail, +) +from equiv_fns import capture_tree, diff_trees, partition_diffs + + +# An itemized line is a "real transfer" unless it is purely a directory whose +# only changed attribute is a sub-second time. rsync emits dir lines like +# "cd+++++++++ d/" on creation and ".d..t...... d/" for a pure time restamp. +# We treat a line as a real change unless it is a '.d' line with no change +# code other than 't' (time) / '.' in the attribute field -- i.e. a dir whose +# content/perms/owner did NOT change. Everything else is a genuine transfer. +def _real_changes(itemized: str): + real = [] + for line in itemized.splitlines(): + line = line.rstrip() + if not line: + continue + # Itemized change strings are 11 chars (YXcstpoguax) then a space then + # the name. Anything that isn't an itemized line (warnings, etc.) is + # conservatively treated as a real change so we never hide a problem. + if len(line) < 12 or line[11] != ' ': + real.append(line) + continue + code = line[:11] + update, ftype = code[0], code[1] + attrs = code[2:] + # A directory whose only "change" is a timestamp (or nothing) is the + # documented dir-time restamp, not a transfer. update char is '.' (no + # data/checksum change) and ftype is 'd'. + if update == '.' and ftype == 'd': + non_time = attrs.replace('t', '.').replace('T', '.') + if set(non_time) <= {'.', '+'}: + continue # pure dir-time restamp: tolerated + real.append(line) + return real + + +def _structural_fatal(tree_a, tree_b): + """diff_trees(tree_a, tree_b), partition it, and return the FATAL strings. + + Beyond equiv_fns.partition_diffs (which tolerates dir-time nsec and the + unprivileged uid/gid mapping), we also tolerate a SYMLINK's mtime_nsec: + Linux preserves symlink sub-second mtime via utimensat(AT_SYMLINK_NOFOLLOW), + but platforms such as macOS use lutimes(3) which has only whole-second + resolution, so the nanosecond remainder of a symlink's mtime may not survive + a round-trip on those systems. The whole-second symlink mtime is still + enforced as strict via the 'mtime' field. + + Membership ('only_in_*') diffs are KEPT fatal: every caller here compares + two trees that an idempotent/round-trip sync is supposed to have made + set-identical (FROMDIR vs TODIR after a sync; A before vs after a reverse + leg; A vs B after a round-trip). A file present in one tree but absent in + the other is therefore a real divergence -- a dropped or spuriously-created + entry -- not an artifact of walking two unrelated roots, so it MUST surface. + """ + diff = diff_trees(tree_a, tree_b) + sym_nsec = { + d.path for d in diff['strict'] + if d.field == 'mtime_nsec' + and tree_a.entries.get(d.path) + and tree_a.entries[d.path].ftype == 'symlink' + } + fatal, _tolerated = partition_diffs(diff) + out = [] + for m in fatal: + if any(m.startswith(f'{p}: mtime_nsec ') for p in sym_nsec): + continue + out.append(m) + return out + + +def _itemized_second_leg(src, dst, *opts): + """Run rsync twice; return the real-change lines of the SECOND run.""" + run_rsync('-a', *opts, f'{src}/', f'{dst}/') + out = run_rsync('-ai', *opts, f'{src}/', f'{dst}/', + check=True, capture_output=True).stdout + return _real_changes(out), out + + +# -------------------------------------------------------------------------- +# E1 -- a second identical `rsync -a` run transfers nothing. +# -------------------------------------------------------------------------- +# A representative tree: nested dirs, regular files at depth, a hard-link pair, +# and a symlink -- so the idempotence claim covers every entry kind. +rmtree(FROMDIR) +rmtree(TODIR) +make_tree(FROMDIR, depth=3, data=True) +os.link(FROMDIR / 'f0', FROMDIR / 'f0_hl') +os.symlink('f0', FROMDIR / 'sl') + +real, raw = _itemized_second_leg(FROMDIR, TODIR, '-H') +if real: + test_fail('E1: a second identical -aH run transferred items (expected ' + 'none beyond tolerated dir-time restamps):\n ' + + '\n '.join(real) + + f'\n--- full itemized output ---\n{raw}') + +# Structural confirmation: src and dst trees are equivalent after the no-op +# (partitioned so an unprivileged owner mapping and symlink-time sub-second +# precision are tolerated, not fatal). +fatal = _structural_fatal(capture_tree(FROMDIR), capture_tree(TODIR)) +if fatal: + test_fail('E1: src and dst trees diverge after idempotent sync:\n ' + + '\n '.join(fatal)) + + +# -------------------------------------------------------------------------- +# E2 -- A->B then reverse B->A is a no-op on the reverse leg. +# -------------------------------------------------------------------------- +# Build A, sync to B, then reverse-sync B back to A. The reverse leg must +# itemize nothing (beyond tolerated dir-time restamps) and leave A unchanged. +A = SCRATCHDIR / 'rt_a' +B = SCRATCHDIR / 'rt_b' +rmtree(A) +rmtree(B) +make_tree(A, depth=3, data=True) +os.link(A / 'f0', A / 'f0_hl') +os.symlink('f0', A / 'sl') + +# Forward leg: A -> B. +run_rsync('-aH', f'{A}/', f'{B}/') + +# Snapshot A before the reverse leg so we can prove the reverse changed nothing. +a_before = capture_tree(A) + +# Reverse leg with -i: B -> A must transfer nothing. +out = run_rsync('-aHi', f'{B}/', f'{A}/', check=True, + capture_output=True).stdout +real = _real_changes(out) +if real: + test_fail('E2: reverse-sync B->A transferred items (expected a no-op ' + 'beyond tolerated dir-time restamps):\n ' + '\n '.join(real) + + f'\n--- full itemized output ---\n{out}') + +# A must be byte/metadata-identical before vs after the reverse leg. +fatal = _structural_fatal(a_before, capture_tree(A)) +if fatal: + test_fail('E2: the reverse leg mutated A:\n ' + '\n '.join(fatal)) + +# And A and B must be equivalent (partitioned for owner mapping + precision). +fatal = _structural_fatal(capture_tree(A), capture_tree(B)) +if fatal: + test_fail('E2: A and B diverge after round-trip:\n ' + '\n '.join(fatal)) + +print('idempotence: E1 second -a run is a no-op; E2 A->B->A reverse leg is a ' + 'no-op (dir-time precision tolerated, owner mapping partitioned)') diff --git a/testsuite/link-dest-equiv_test.py b/testsuite/link-dest-equiv_test.py new file mode 100644 index 000000000..d7e5f5f04 --- /dev/null +++ b/testsuite/link-dest-equiv_test.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +"""--link-dest transport-equivalence test (regression guard for #915). + +Asserts that ``rsync -a --link-dest=BASIS`` hard-links unchanged files into +the destination *identically* across all four transports: local, ssh (via +support/lsh.sh), a pipe-mode rsync:// daemon, and -- under --use-tcp -- a +real loopback-bound rsync:// daemon. + +This is the exact regression that shipped as #915: ``--link-dest`` silently +stopped hard-linking across a daemon, so a "incremental" backup over rsyncd +re-copied every byte and shared no inodes with the basis. The bug is a +*silent* divergence: the transfer still succeeds, the tree still verifies +byte-for-byte, only the inode-sharing (and thus disk usage) regresses. A +content/tree diff alone cannot see it; the inode-grouping partition can. + +Privilege branch (plan C2): with ``-o`` (preserve owner, implied by ``-a``) +an unprivileged daemon receiver cannot reproduce a foreign owner, which +makes unchanged_attrs() return false and legitimately suppresses the hard +link. Here every file is owned by the test user on both ends, so ``-o`` is +satisfiable and linking MUST happen on every leg; we still partition any +uid/gid difference as a documented mapping rather than a failure when +unprivileged, so the test never false-positives on owner mapping. + +Path note: a daemon sanitizes ``--link-dest`` against the module root, so +the basis must be expressed as a module-relative path on the daemon legs +(``/basis``) and as the real on-disk path on the local/ssh legs. Both spell +the same directory; ``extra_opts_for`` carries the per-transport spelling. +""" + +import os +import subprocess + +import rsyncfns +from rsyncfns import RSYNC, SCRATCHDIR, USE_TCP, rmtree, test_fail +from equiv_fns import ( + LSH, TRANSPORTS, SkipLeg, + _bin_argv, _join_slash, _start_daemon_for_bin, + capture_tree, diff_trees, numeric_ids_only, partition_diffs, + write_daemon_conf, +) + +DAEMON_PORT = 12890 + +# Skip the TCP leg cleanly when no real socket is available, but still run +# local + ssh + daemon_pipe under plain `make check`. +transports = list(TRANSPORTS) +if not USE_TCP: + transports = [t for t in transports if t != 'daemon_tcp'] + + +def build_fixture(): + """Create ONE shared src/ + basis/ used by every transport leg. + + Sharing a single source is what makes cross-transport mtime/nsec + comparison meaningful: each leg copies from the *same* bytes and times, + so any absolute mtime divergence in a dst is a real transport defect, not + an artifact of two separate fixture creations. The basis is a + byte-identical, identical-mtime (incl. nsec) copy so every file is a + valid --link-dest candidate (quick_check_ok size+mtime match). + + Layout (root == the daemon module root): + /src/{a.txt,b.txt,sub/c.txt} + /basis/{a.txt,b.txt,sub/c.txt} (link-dest source) + // (each leg's dst, created later) + """ + root = SCRATCHDIR / 'equiv-fixture' + rmtree(root) + src = root / 'src' + basis = root / 'basis' + (src / 'sub').mkdir(parents=True) + (basis / 'sub').mkdir(parents=True) + + def seed(p, text): + sp = src / p + sp.write_text(text) + bp = basis / p + bp.write_text(text) + st = os.stat(sp) + os.utime(bp, ns=(st.st_atime_ns, st.st_mtime_ns)) + + seed('a.txt', 'hello world content\n' * 4) + seed('b.txt', 'second file body here\n' * 4) + seed('sub/c.txt', 'nested candidate file\n' * 4) + return root + + +def run_link_dest_leg(root, transport, *, rsync_bin=None, port=DAEMON_PORT): + """Run the link-dest scenario over one transport against the shared + fixture ``root``, returning (dst_tree, basis_tree). + + The destination is ``//`` so daemon legs (module rooted + at ``root``) can reach both the dst and the basis. ``--link-dest`` is + spelled as the real path for local/ssh and module-relative (``/basis``) + for the daemon legs, since a daemon sanitizes the option against the + module root. + """ + if rsync_bin is None: + rsync_bin = RSYNC + if transport == 'daemon_tcp' and not rsyncfns.USE_TCP: + raise SkipLeg('daemon_tcp needs --use-tcp') + + basis = root / 'basis' + dst = root / transport + rmtree(dst) + dst.mkdir(parents=True) + src = _join_slash(root, 'src/') + base_argv = _bin_argv(rsync_bin) + ['-a'] + + if transport == 'local': + argv = base_argv + [f'--link-dest={basis}', src, f'{dst}/'] + elif transport == 'ssh': + argv = base_argv + ['-e', LSH, f'--rsync-path={rsync_bin}', + f'--link-dest={basis}', + src, f'localhost:{dst}/'] + elif transport in ('daemon_pipe', 'daemon_tcp'): + use_tcp = (transport == 'daemon_tcp') + conf = write_daemon_conf( + [('equiv', {'path': str(root), 'read only': 'no'})], + name=f'equiv-{transport}.conf', + ) + prefix = _start_daemon_for_bin(rsync_bin, conf, port, use_tcp=use_tcp) + argv = base_argv + ['--link-dest=/basis', src, f'{prefix}equiv/{transport}/'] + else: + raise ValueError(transport) + + proc = subprocess.run(argv, capture_output=True, text=True) + if proc.returncode not in (0, 23): + test_fail(f'[{transport}] rsync exited {proc.returncode}: ' + f'{" ".join(argv)}\n{proc.stderr}') + return capture_tree(dst), capture_tree(basis) + + +root = build_fixture() +results = {} +for t in transports: + try: + dst_tree, basis_tree = run_link_dest_leg(root, t) + except SkipLeg as e: + print(f'[{t}] skipped: {e}') + continue + results[t] = (dst_tree, basis_tree) + +if not results: + test_fail('no transport legs ran') + +# 1) Tree-structural equivalence across legs (content/mode/mtime/...), +# partitioning owner mapping. Reference = local if present. +trees = {t: dst for t, (dst, _b) in results.items()} +ref = 'local' if 'local' in trees else next(iter(trees)) +ref_tree = trees[ref] +tolerated_all = [] +for t, tree in trees.items(): + if t == ref: + continue + diff = diff_trees(ref_tree, tree) + fatal, tolerated = partition_diffs(diff) + tolerated_all += [f'[{ref} vs {t}] {m}' for m in tolerated] + if fatal: + test_fail(f'tree divergence {ref} vs {t}:\n ' + '\n '.join(fatal)) + +# 2) The load-bearing assertion: every leg's destination must SHARE +# inodes with its basis (SRC ⊆ DST grouping -- each linked file's dst +# inode equals the basis inode). This is what #915 broke over a daemon. +rel_files = ['a.txt', 'b.txt', 'sub/c.txt'] +for t, (dst_tree, basis_tree) in results.items(): + for rel in rel_files: + d = dst_tree.entries.get(rel) + b = basis_tree.entries.get(rel) + if d is None or b is None: + test_fail(f'[{t}] missing {rel} in dst or basis') + dst_ino = os.stat(dst_tree.root / rel).st_ino + dst_dev = os.stat(dst_tree.root / rel).st_dev + bas_ino = os.stat(basis_tree.root / rel).st_ino + bas_dev = os.stat(basis_tree.root / rel).st_dev + shared = (dst_ino, dst_dev) == (bas_ino, bas_dev) + # C2: a non-root daemon that cannot satisfy -o would legitimately + # break linking. Here owner is identical on both ends, so the + # documented mapping holds and linking MUST occur. If it does not, + # that is exactly the #915 silent divergence -- fail loudly. + if not shared: + test_fail( + f'[{t}] --link-dest did NOT share inode for {rel} ' + f'(dst ino={dst_ino} dev={dst_dev}, basis ino={bas_ino} ' + f'dev={bas_dev}). This is the #915 link-dest-over-' + f'transport regression: the file was re-copied instead ' + f'of hard-linked. privileged={numeric_ids_only()}' + ) + +for m in tolerated_all: + print(f'tolerated (documented mapping): {m}') +legs = ', '.join(sorted(results)) +print(f'link-dest-equiv: shared inodes verified across [{legs}] ' + f'({len(rel_files)} files/leg)') diff --git a/testsuite/link-dest-variants_test.py b/testsuite/link-dest-variants_test.py new file mode 100644 index 000000000..e7df94ebe --- /dev/null +++ b/testsuite/link-dest-variants_test.py @@ -0,0 +1,184 @@ +#!/usr/bin/env python3 +"""Workstream-1 invariant group C -- alternate-basis (link/copy/compare-dest) +and hardlink-group semantics. + +C2 (link-dest shared inodes across transports) lives in its own file +(link-dest-equiv_test.py) and is NOT duplicated here. This file covers the +OTHER C cases, derived from rsync.1.md + generator.c try_dests_reg/_non and +hlink.c: + + C3: --link-dest + -I/--ignore-times never hard-links. This is EMERGENT + behavior with no explicit guard: quick_check_ok() returns 0 under + ignore_times (generator.c ~601), so try_dests_reg never reaches the + full-attr match level that would set the link, and the file is + re-sent. There is no code line that says "if ignore_times don't + link" -- it falls out of the match-level machinery -- so it is + fragile to refactor and kept as an explicit regression test. We + assert the dst file does NOT share an inode with the link-dest basis. + + copy-dest: --copy-dest copies the unchanged candidate from the basis; + the dst is a real, distinct-inode copy with identical content (NOT a + hard link). Assert content identical AND inode NOT shared. + + compare-dest: --compare-dest skips the transfer entirely when the basis + matches; the dst file may not be created at all. Assert the unchanged + file is absent from the dst (skipped), while a CHANGED file IS + transferred. + + C7: hardlink grouping is SRC ⊆ DST, not equality. When transferring a + subset of a hardlink group with -H, the dst group may include + pre-existing extra members. Assert the transferred members share one + inode (subset coherence) and NEVER assert strict set equality between + the src group and the dst group. +""" + +import os + +from rsyncfns import ( + SCRATCHDIR, assert_same, makepath, rmtree, run_rsync, test_fail, +) + + +def shared_inode(a, b): + sa, sb = os.stat(a), os.stat(b) + return (sa.st_dev, sa.st_ino) == (sb.st_dev, sb.st_ino) + + +# Each subtest uses a fresh subtree under the scratch dir. +ROOT = SCRATCHDIR / 'cgroup' +rmtree(ROOT) +makepath(ROOT) + + +# -------------------------------------------------------------------------- +# C3 -- --link-dest with -I/--ignore-times must NOT hard-link. +# -------------------------------------------------------------------------- +# Build a basis that is a byte-identical, identical-mtime copy of the source +# (so without -I it WOULD link). Then transfer with --link-dest AND -I and +# assert the dst file is a fresh copy (distinct inode from the basis). +c3 = ROOT / 'c3' +src = c3 / 'src' +basis = c3 / 'basis' +dst = c3 / 'dst' +makepath(src, basis, dst) + +(src / 'f.txt').write_text('link-dest ignore-times candidate\n' * 8) +(basis / 'f.txt').write_text('link-dest ignore-times candidate\n' * 8) +st = os.stat(src / 'f.txt') +os.utime(basis / 'f.txt', ns=(st.st_atime_ns, st.st_mtime_ns)) + +# Sanity: without -I, --link-dest DOES link (proves the basis is a valid +# candidate, so the C3 non-link below is attributable to -I and not to a +# bad fixture). +dst_ctl = c3 / 'dst_ctl' +makepath(dst_ctl) +run_rsync('-a', f'--link-dest={basis}', f'{src}/', f'{dst_ctl}/') +if not shared_inode(dst_ctl / 'f.txt', basis / 'f.txt'): + test_fail('C3 fixture invalid: --link-dest without -I did not link; ' + 'cannot attribute the -I non-link result to ignore-times') + +# The invariant: with -I the file is re-sent, so NO inode sharing. +run_rsync('-a', '-I', f'--link-dest={basis}', f'{src}/', f'{dst}/') +assert_same(src / 'f.txt', dst / 'f.txt', label='C3 content still correct') +if shared_inode(dst / 'f.txt', basis / 'f.txt'): + test_fail('C3 VIOLATION: --link-dest with -I/--ignore-times hard-linked ' + 'the dst to the basis. The ignore-times fast-path-off should ' + 'force a re-send with a fresh inode (emergent behavior, no ' + 'explicit guard -- a refactor likely broke it).') + + +# -------------------------------------------------------------------------- +# copy-dest -- copies from the basis: distinct inode, identical content. +# -------------------------------------------------------------------------- +cc = ROOT / 'copydest' +src = cc / 'src' +basis = cc / 'basis' +dst = cc / 'dst' +makepath(src, basis, dst) +(src / 'f.txt').write_text('copy-dest candidate body\n' * 8) +(basis / 'f.txt').write_text('copy-dest candidate body\n' * 8) +st = os.stat(src / 'f.txt') +os.utime(basis / 'f.txt', ns=(st.st_atime_ns, st.st_mtime_ns)) + +run_rsync('-a', f'--copy-dest={basis}', f'{src}/', f'{dst}/') +# The dst file must exist with the source content... +assert_same(src / 'f.txt', dst / 'f.txt', label='copy-dest content') +# ...but be a real copy, NOT a hard link to the basis. +if shared_inode(dst / 'f.txt', basis / 'f.txt'): + test_fail('copy-dest VIOLATION: dst shares an inode with the basis. ' + '--copy-dest must COPY (distinct inode), only --link-dest ' + 'hard-links.') + + +# -------------------------------------------------------------------------- +# compare-dest -- skips transfer when the basis matches; dst not created. +# -------------------------------------------------------------------------- +cmp = ROOT / 'comparedest' +src = cmp / 'src' +basis = cmp / 'basis' +dst = cmp / 'dst' +makepath(src, basis, dst) + +# 'same.txt' matches the basis (will be SKIPPED -> absent in dst). +(src / 'same.txt').write_text('unchanged compare-dest body\n' * 8) +(basis / 'same.txt').write_text('unchanged compare-dest body\n' * 8) +st = os.stat(src / 'same.txt') +os.utime(basis / 'same.txt', ns=(st.st_atime_ns, st.st_mtime_ns)) +# 'diff.txt' has no basis match (will be TRANSFERRED -> present in dst). +(src / 'diff.txt').write_text('this file has no basis match at all\n' * 8) + +run_rsync('-a', f'--compare-dest={basis}', f'{src}/', f'{dst}/') +if (dst / 'same.txt').exists(): + test_fail('compare-dest VIOLATION: the basis-matching file was created ' + 'in the dst. --compare-dest must SKIP the transfer entirely ' + '(file not created) when the basis matches.') +if not (dst / 'diff.txt').exists(): + test_fail('compare-dest VIOLATION: a file with no basis match was NOT ' + 'transferred. --compare-dest must still transfer files the ' + 'basis does not cover.') +assert_same(src / 'diff.txt', dst / 'diff.txt', label='compare-dest changed file') + + +# -------------------------------------------------------------------------- +# C7 -- hardlink grouping is SRC ⊆ DST, not equality. +# -------------------------------------------------------------------------- +# Source has a 3-member hardlink group {h1,h2,h3}. The destination already +# contains an extra pre-existing member h0 hard-linked... but rsync can only +# extend a group with what IT transfers, so the real subset property we can +# assert portably is: transferring a SUBSET of the source group still +# produces a coherent single-inode group on the dst, and we NEVER require the +# dst group to equal the src group. +c7 = ROOT / 'c7' +src = c7 / 'src' +dst = c7 / 'dst' +makepath(src, dst) + +(src / 'h1').write_text('hardlink group payload\n' * 8) +os.link(src / 'h1', src / 'h2') +os.link(src / 'h1', src / 'h3') + +# Transfer only a SUBSET of the group (h1, h2) with -H. h3 is excluded, so the +# dst group is a strict subset of the src group. +run_rsync('-aH', '--exclude=h3', f'{src}/', f'{dst}/') + +d1 = os.stat(dst / 'h1') +d2 = os.stat(dst / 'h2') +# Subset coherence: the members we DID transfer share one inode. +if (d1.st_dev, d1.st_ino) != (d2.st_dev, d2.st_ino): + test_fail('C7 VIOLATION: transferred hardlink-group members h1,h2 do NOT ' + 'share an inode on the dst. -H must preserve linkage among the ' + 'transferred subset.') +# Subset, not equality: h3 was excluded and must be absent. We assert the +# subset bound, NEVER that the dst group equals the src group. +if (dst / 'h3').exists(): + test_fail('C7 fixture error: excluded h3 unexpectedly present in dst') + +# The dst inode need NOT equal any src inode, and the dst group need NOT equal +# the src group {h1,h2,h3}; asserting either would be over-assertion. We only +# assert the transferred subset is internally consistent (done above) and that +# content is correct. +assert_same(src / 'h1', dst / 'h1', label='C7 content') + +print('link-dest-variants: C3 (no link under -I), copy-dest (copy not link), ' + 'compare-dest (skip when matched), C7 (SRC subset-of DST grouping) ' + 'verified') diff --git a/testsuite/metadata-fidelity_test.py b/testsuite/metadata-fidelity_test.py new file mode 100644 index 000000000..43b9836ca --- /dev/null +++ b/testsuite/metadata-fidelity_test.py @@ -0,0 +1,297 @@ +#!/usr/bin/env python3 +"""Workstream-1 invariant group B -- metadata fidelity. + +Driven by generator.c's *_differ predicates (reference.md Part 2): under the +matching -a sub-options, rsync makes the destination's metadata match the +source's, and unchanged_attrs() treats a single attribute difference as a +reason to re-stamp. We assert each attribute is reproduced -- PARTITIONED so +that legitimately-privilege/feature-dependent attributes (owner, group, ACL +ids, xattr namespace, devices) are only asserted when the environment can +actually preserve them, and SKIP cleanly otherwise. + +Covered: perms, exec-bit (-E), times incl. nanoseconds (mtime_differs ~395), +owner/group (ownership_differs ~428, privilege-partitioned), hardlinks (-H), +devices (root-only), ACLs (-A, feature+ability gated), xattrs (-X, feature +gated), omit-dir-times (-O) and omit-link-times (--omit-link-times). +""" + +import os +import platform +import shutil +import stat +import subprocess + +from rsyncfns import ( + FROMDIR, SCRATCHDIR, TODIR, + assert_hardlinked, assert_mode, assert_mtime_close, + makepath, rmtree, run_rsync, test_fail, + xattr_dump, xattr_set, xattrs_supported, +) +from equiv_fns import am_root + + +VV = run_rsync('-VV', check=True, capture_output=True).stdout + + +def _fresh(*dirs): + for d in (FROMDIR, TODIR, *dirs): + rmtree(d) + makepath(FROMDIR) + + +# -------------------------------------------------------------------------- +# B-perms -- -p reproduces the full permission bits. +# -------------------------------------------------------------------------- +_fresh() +for nm, mode in (('a', 0o644), ('b', 0o600), ('c', 0o751), ('d', 0o444)): + (FROMDIR / nm).write_text(nm) + os.chmod(FROMDIR / nm, mode) +run_rsync('-a', f'{FROMDIR}/', f'{TODIR}/') +for nm, mode in (('a', 0o644), ('b', 0o600), ('c', 0o751), ('d', 0o444)): + assert_mode(TODIR / nm, mode, label=f'B-perms {nm}') + + +# -------------------------------------------------------------------------- +# B-exec -- -E (--executability) propagates ONLY the executable bit. +# -------------------------------------------------------------------------- +# Without -p, a non-exec source file's exec bit must be cleared on a dest that +# has it; an exec source file's exec bit must be set. Non-exec permission bits +# are NOT forced to match (that's -p's job), so we assert exactly the exec bit. +_fresh() +(FROMDIR / 'prog').write_text('#!/bin/sh\n') +(FROMDIR / 'data').write_text('plain\n') +os.chmod(FROMDIR / 'prog', 0o700) # has exec +os.chmod(FROMDIR / 'data', 0o600) # no exec +# Pre-seed dest with the OPPOSITE exec state so -E has to flip both. +makepath(TODIR) +(TODIR / 'prog').write_text('old\n') +(TODIR / 'data').write_text('old\n') +os.chmod(TODIR / 'prog', 0o600) # missing exec, must gain it +os.chmod(TODIR / 'data', 0o700) # has exec, must lose it +run_rsync('-rtE', f'{FROMDIR}/', f'{TODIR}/') +if not (os.stat(TODIR / 'prog').st_mode & 0o111): + test_fail('B-exec: -E did not set the exec bit on an executable source') +if os.stat(TODIR / 'data').st_mode & 0o111: + test_fail('B-exec: -E did not clear the exec bit on a non-executable source') + + +# -------------------------------------------------------------------------- +# B-times -- -t reproduces mtime; nanoseconds preserved where representable. +# -------------------------------------------------------------------------- +_fresh() +(FROMDIR / 'f').write_text('timed\n') +# A whole-second base plus a non-zero nanosecond remainder. +WHOLE = 1_500_000_000 +NSEC = 123_456_789 +os.utime(FROMDIR / 'f', ns=(WHOLE * 1_000_000_000 + NSEC, + WHOLE * 1_000_000_000 + NSEC)) +src_ns = os.stat(FROMDIR / 'f').st_mtime_ns + +run_rsync('-a', f'{FROMDIR}/', f'{TODIR}/') +# Whole-second mtime must always match. +assert_mtime_close(TODIR / 'f', WHOLE, tol=1.0, label='B-times whole-second') + +# Nanosecond sub-assertion: only meaningful if BOTH the source filesystem AND +# rsync actually represent sub-second mtimes. Probe the source: if the fs +# truncated our nsec to 0, we cannot assert nsec fidelity -- degrade cleanly. +dst_ns = os.stat(TODIR / 'f').st_mtime_ns +src_sub = src_ns % 1_000_000_000 +if src_sub == 0 or '"symtimes": true' not in VV: + print('B-times: nanosecond sub-assertion skipped (fs/rsync lacks ' + 'sub-second mtime representation)') +else: + if dst_ns != src_ns: + test_fail(f'B-times nsec: dst mtime_ns {dst_ns} != src {src_ns} ' + '(sub-second mtime not preserved)') + + +# -------------------------------------------------------------------------- +# B-owner-group -- -og. PARTITIONED by privilege. +# -------------------------------------------------------------------------- +# ownership_differs(): UID is only reproduced when am_root && uid_ndx; GID can +# be set by an unprivileged caller ONLY for groups it belongs to. We therefore: +# * always assert the dest GID matches the source for a file we chgrp to one +# of OUR OWN supplementary groups (no privilege needed); +# * assert UID equality ONLY when running as root. +_fresh() +(FROMDIR / 'f').write_text('owned\n') + +# Group test: pick a secondary group we already belong to (so chgrp succeeds +# unprivileged). Skip the group sub-check if we have no usable second group. +my_gid = os.getgid() +groups = [g for g in os.getgroups() if g != my_gid] +if groups: + target_gid = groups[0] + os.chown(FROMDIR / 'f', -1, target_gid) + run_rsync('-a', f'{FROMDIR}/', f'{TODIR}/') + dst_gid = os.stat(TODIR / 'f').st_gid + if dst_gid != target_gid: + test_fail(f'B-group: -g did not reproduce gid {target_gid} ' + f'(dst gid {dst_gid})') +else: + print('B-group: skipped (no usable secondary group to chgrp into)') + +# Owner test: only assert under root; chown(2) to an arbitrary uid needs root. +if am_root(): + rmtree(TODIR) + os.chown(FROMDIR / 'f', 5000, -1) + run_rsync('-a', f'{FROMDIR}/', f'{TODIR}/') + dst_uid = os.stat(TODIR / 'f').st_uid + if dst_uid != 5000: + test_fail(f'B-owner: -o did not reproduce uid 5000 (dst {dst_uid})') +else: + print('B-owner: uid-equality sub-check skipped (needs root to set/verify ' + 'arbitrary ownership)') + + +# -------------------------------------------------------------------------- +# B-hardlinks -- -H preserves a hard-link group (shared inode on the dest). +# -------------------------------------------------------------------------- +_fresh() +(FROMDIR / 'h1').write_text('linked\n') +os.link(FROMDIR / 'h1', FROMDIR / 'h2') +os.link(FROMDIR / 'h1', FROMDIR / 'h3') +# Sanity: source really is one inode. +assert_hardlinked(FROMDIR / 'h1', FROMDIR / 'h2', label='B-hardlinks src') +run_rsync('-aH', f'{FROMDIR}/', f'{TODIR}/') +assert_hardlinked(TODIR / 'h1', TODIR / 'h2', label='B-hardlinks h1==h2') +assert_hardlinked(TODIR / 'h1', TODIR / 'h3', label='B-hardlinks h1==h3') +# Without -H the destination files must NOT be linked (proves -H is load- +# bearing, not an accident of the filesystem). +rmtree(TODIR) +run_rsync('-a', f'{FROMDIR}/', f'{TODIR}/') +s2, s3 = os.stat(TODIR / 'h1'), os.stat(TODIR / 'h2') +if (s2.st_dev, s2.st_ino) == (s3.st_dev, s3.st_ino): + test_fail('B-hardlinks: dest files share an inode WITHOUT -H ' + '(hard-link preservation is not actually being controlled by -H)') + + +# -------------------------------------------------------------------------- +# B-devices -- -D reproduces a device node's type+rdev. ROOT-ONLY. +# -------------------------------------------------------------------------- +# mknod(2) of a char/block device needs root; mirror devices-fake's pattern of +# skipping cleanly when not privileged. +if not am_root(): + print('B-devices: skipped (mknod needs root)') +else: + _fresh() + dev = FROMDIR / 'nulldev' + try: + os.mknod(dev, 0o600 | 0o020000, os.makedev(1, 3)) # S_IFCHR + except (PermissionError, OSError) as e: + print(f'B-devices: skipped (mknod failed: {e})') + else: + run_rsync('-aD', f'{FROMDIR}/', f'{TODIR}/') + dst = TODIR / 'nulldev' + st = os.stat(dst, follow_symlinks=False) + if not (platform.system() and os.path.exists(dst)): + test_fail('B-devices: device node not created on dest') + if not stat.S_ISCHR(st.st_mode): + test_fail('B-devices: dest is not a character device') + if st.st_rdev != os.makedev(1, 3): + test_fail(f'B-devices: dest rdev {st.st_rdev} != source ' + f'{os.makedev(1, 3)}') + + +# -------------------------------------------------------------------------- +# B-acls -- -A preserves a POSIX ACL. FEATURE + ABILITY gated. +# -------------------------------------------------------------------------- +if '"ACLs": true' not in VV: + print('B-acls: skipped (rsync built without ACL support)') +elif platform.system() != 'Linux' or not (shutil.which('setfacl') + and shutil.which('getfacl')): + print('B-acls: skipped (no setfacl/getfacl on this platform)') +else: + _fresh() + (FROMDIR / 'f').write_text('acl\n') + # Grant an extra ACL entry to a group we belong to (no privilege needed), + # falling back to a skip if the filesystem rejects ACLs. + gid = os.getgid() + r = subprocess.run(['setfacl', '-m', f'g:{gid}:rwx', str(FROMDIR / 'f')]) + if r.returncode != 0: + print('B-acls: skipped (filesystem rejected setfacl)') + else: + run_rsync('-aA', f'{FROMDIR}/', f'{TODIR}/') + + def acl_of(p): + out = subprocess.run(['getfacl', '-cE', str(p)], + capture_output=True, text=True).stdout + return '\n'.join(sorted(l for l in out.splitlines() if l.strip())) + if acl_of(FROMDIR / 'f') != acl_of(TODIR / 'f'): + test_fail('B-acls: -A did not reproduce the source ACL\n' + f'src:\n{acl_of(FROMDIR / "f")}\n' + f'dst:\n{acl_of(TODIR / "f")}') + + +# -------------------------------------------------------------------------- +# B-xattrs -- -X preserves a user-namespace xattr. FEATURE gated. +# -------------------------------------------------------------------------- +if not xattrs_supported(): + print('B-xattrs: skipped (no usable xattr surface)') +else: + _fresh() + (FROMDIR / 'f').write_text('xattr\n') + try: + xattr_set('test.attr', 'hello-world', FROMDIR / 'f') + except (PermissionError, OSError) as e: + print(f'B-xattrs: skipped (filesystem rejected xattr set: {e})') + else: + run_rsync('-aX', f'{FROMDIR}/', f'{TODIR}/') + + # xattr_dump prefixes each file with a "# file: " header; strip + # the per-file headers (and any blank lines) so we compare only the + # name="value" payload, which is what -X must reproduce. + def _xpayload(p): + return '\n'.join( + ln for ln in xattr_dump(p).splitlines() + if ln.strip() and not ln.startswith('# file:')) + src_x = _xpayload(FROMDIR / 'f') + dst_x = _xpayload(TODIR / 'f') + if not src_x: + test_fail('B-xattrs: source xattr not set as expected') + if src_x != dst_x: + test_fail(f'B-xattrs: -X did not reproduce xattrs\n' + f'src: {src_x!r}\ndst: {dst_x!r}') + + +# -------------------------------------------------------------------------- +# B-omit-dir-times -- -O preserves FILE mtimes but leaves DIR mtimes alone. +# -------------------------------------------------------------------------- +OLD = 1_400_000_000 +_fresh() +makepath(FROMDIR / 'sub') +(FROMDIR / 'sub' / 'f').write_text('x\n') +for p in (FROMDIR / 'sub' / 'f', FROMDIR / 'sub', FROMDIR): + os.utime(p, (OLD, OLD)) +run_rsync('-rlt', '-O', f'{FROMDIR}/', f'{TODIR}/') +assert_mtime_close(TODIR / 'sub' / 'f', OLD, tol=1.0, label='B-O file mtime') +if abs(os.stat(TODIR / 'sub').st_mtime - OLD) <= 1: + test_fail('B-omit-dir-times: -O preserved a directory mtime instead of ' + 'omitting it') + + +# -------------------------------------------------------------------------- +# B-omit-link-times -- --omit-link-times preserves a symlink but omits its +# mtime (where the platform records symlink mtimes). +# -------------------------------------------------------------------------- +_fresh() +(FROMDIR / 'target').write_text('t\n') +os.symlink('target', FROMDIR / 'sl') +try: + os.utime(FROMDIR / 'sl', (OLD, OLD), follow_symlinks=False) +except (NotImplementedError, OSError): + print('B-omit-link-times: skipped (no symlink-mtime support here)') +else: + if '"symtimes": true' not in VV: + print('B-omit-link-times: skipped (rsync built without symtimes)') + else: + run_rsync('-rlt', '--omit-link-times', f'{FROMDIR}/', f'{TODIR}/') + dst = TODIR / 'sl' + if not os.path.islink(dst): + test_fail('B-omit-link-times: symlink not copied') + if abs(os.lstat(dst).st_mtime - OLD) <= 1: + test_fail('B-omit-link-times: --omit-link-times did not omit the ' + 'symlink mtime') + +print('metadata-fidelity: perms/exec/times+nsec/owner-group/hardlinks/' + 'devices/acls/xattrs/omit-times verified (partitioned by privilege+feature)') diff --git a/testsuite/transport-equiv-meta_test.py b/testsuite/transport-equiv-meta_test.py new file mode 100644 index 000000000..97abc4101 --- /dev/null +++ b/testsuite/transport-equiv-meta_test.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python3 +"""Workstream-1 invariant group F -- transport-equivalence meta-invariants. + +The general guard: the SAME scenario produces the SAME final destination tree +across every transport (local / ssh / daemon_pipe / daemon_tcp). This is +transport-blindness: rsync's result must not depend on how the peers are +connected. We drive several representative scenarios through equiv_fns' +run_matrix + assert_equivalent, which structurally diffs the trees and +partitions every difference into fatal (content/mode/mtime/linktarget/inode- +grouping/membership) vs tolerated-when-unprivileged (uid/gid owner mapping, +directory mtime nanoseconds) -- so an unprivileged run never false-fails on +owner mapping. + +Cases (all derived from rsync.1.md + compat.c, NOT from a known bug): + + F-general: a mixed tree (regular files of several sizes, symlinks, nested + dirs, a hardlink group, varied perms) must land byte/structure + identical on every transport, including the shared-inode + partition of the hardlink group. + + F-clamp: the negotiated protocol is the MINIMUM of the two peers + (compat.c setup_protocol ~606 clamps to min, not average). + Forcing --protocol=N (a lower version) must be honored and the + transfer must still succeed with an equivalent final tree on + every transport. + + F-negotiate: checksum/compress negotiation is intersection-or-error. We do + NOT assert a specific digest/compressor (those are version- and + build-dependent). We assert (a) a valid negotiation succeeds and + yields a correct tree, and (b) an EMPTY intersection (an + unsupported forced --compress-choice) ERRORS CLEANLY rather than + silently mis-transferring. +""" + +import os +import subprocess +import tempfile +from pathlib import Path + +from rsyncfns import ( + RSYNC, USE_TCP, rsync_argv, test_fail, +) +from equiv_fns import ( + TRANSPORTS, Scenario, assert_equivalent, diff_trees, partition_diffs, + run_matrix, +) + + +# Skip the TCP leg cleanly without --use-tcp; local + ssh + daemon_pipe still +# run under plain `make check`. +transports = list(TRANSPORTS) +if not USE_TCP: + transports = [t for t in transports if t != 'daemon_tcp'] + + +# -------------------------------------------------------------------------- +# Shared mixed-tree builder. Run fresh per transport leg (each leg starts +# from identical inputs) so any cross-leg divergence is a real transport +# defect, not a fixture artifact. +# -------------------------------------------------------------------------- +# Fixed epoch for every source mtime so each per-leg fixture is byte- AND +# time-identical: that makes any cross-leg mtime/content divergence in the +# RESULT a real transport defect, not an artifact of two wall-clock fixture +# creations. (The link-dest-equiv test shares one fixture for the same +# reason; here setup() must run per leg, so we pin determinism instead.) +FIXED_MTIME = 1_600_000_000 # 2020-09-13, well in the past, sub-second = 0 + + +def _det_bytes(n, seed): + """Deterministic pseudo-random-looking bytes (LCG); identical every run + so every transport leg copies the SAME content.""" + out = bytearray(n) + x = seed & 0xFFFFFFFF + for i in range(n): + x = (1103515245 * x + 12345) & 0xFFFFFFFF + out[i] = (x >> 16) & 0xFF + return bytes(out) + + +def build_mixed_tree(work, *, with_symlinks=False): + """Populate /src with a representative mixed tree. + + Regular files of several sizes (incl. empty + multi-block), nested dirs, a + 3-member hardlink group, and varied perms. Content is deterministic and + every mtime is pinned to FIXED_MTIME so the fixture is identical across + legs. + + Symlinks are gated behind ``with_symlinks`` because a daemon MUNGES + symlink targets by default (``munge symlinks`` -- a documented daemon + security mapping, prefixing ``/rsyncd-munged/``). That munge makes symlink + targets legitimately NON-equivalent daemon-vs-local, so symlinks are + excluded from the all-transport byte-equality matrix and verified + separately across the non-munging transports (local, ssh). + """ + src = work / 'src' + (src / 'nested' / 'deep').mkdir(parents=True) + + (src / 'empty').write_text('') + (src / 'small.txt').write_text('a small file body\n' * 4) + (src / 'nested' / 'mid.txt').write_text('mid file in a subdir\n' * 200) + (src / 'nested' / 'deep' / 'leaf.bin').write_bytes(_det_bytes(40000, 12345)) + + # Hardlink group (3 members across two dirs). + h = src / 'h1' + h.write_text('hardlink group payload across transports\n' * 8) + os.link(h, src / 'h2') + os.link(h, src / 'nested' / 'h3') + + if with_symlinks: + # A relative (in-tree) and a dangling symlink. + os.symlink('small.txt', src / 'rel_link') + os.symlink('does-not-exist', src / 'dangling_link') + + # Varied perms. + os.chmod(src / 'small.txt', 0o600) + os.chmod(src / 'nested' / 'mid.txt', 0o644) + os.chmod(src / 'nested', 0o755) + # An executable bit somewhere to exercise the perm-preservation path. + (src / 'run.sh').write_text('#!/bin/sh\necho hi\n') + os.chmod(src / 'run.sh', 0o755) + + # Pin every mtime (files, dirs, symlinks) to FIXED_MTIME, deepest first so + # writing a parent's children doesn't re-stamp the parent afterwards. + paths = [] + for dirpath, dirnames, filenames in os.walk(src): + for nm in filenames + dirnames: + paths.append(os.path.join(dirpath, nm)) + paths.append(str(src)) + # Sort by depth descending so children are stamped before parents. + for p in sorted(paths, key=lambda q: q.count(os.sep), reverse=True): + os.utime(p, (FIXED_MTIME, FIXED_MTIME), follow_symlinks=False) + + +# -------------------------------------------------------------------------- +# F-general -- mixed tree is transport-blind. +# -------------------------------------------------------------------------- +# -aHl preserves perms/times/owner/group + hardlinks + symlinks: the full +# structure must be reproduced identically on every transport. +general = Scenario( + opts=['-aH'], + rel_src='src/', + rel_dst='dst/', + setup=build_mixed_tree, +) +trees = run_matrix(general, transports=transports) +tolerated = assert_equivalent(trees) +for m in tolerated: + print(f'F-general tolerated (documented mapping): {m}') + +# Symlinks: verified across the NON-munging transports only (local, ssh). +# A daemon munges symlink targets by default, a documented mapping that makes +# them non-equivalent over a daemon; including them in the all-transport +# matrix would false-fail on that documented behavior. +sym_transports = [t for t in transports if t in ('local', 'ssh')] +if len(sym_transports) >= 2: + sym = Scenario( + opts=['-aHl'], + rel_src='src/', + rel_dst='dst/', + setup=lambda w: build_mixed_tree(w, with_symlinks=True), + ) + sym_trees = run_matrix(sym, transports=sym_transports) + for m in assert_equivalent(sym_trees): + print(f'F-symlink tolerated (documented mapping): {m}') + + +# -------------------------------------------------------------------------- +# F-clamp -- forcing a lower --protocol is honored and stays transport-blind. +# -------------------------------------------------------------------------- +# The negotiated version is min(local, remote); forcing a lower --protocol=N +# on the client must be honored (and the daemon/ssh peer clamps to it) with +# the SAME final tree across transports. We pick a version below the build's +# max but at/above the minimum. 30 is the modern floor with full feature set +# (varint flist, etc.) and is universally supported by current rsync. +FORCE_PROTOCOL = 30 + +clamp = Scenario( + opts=['-aH', f'--protocol={FORCE_PROTOCOL}'], + rel_src='src/', + rel_dst='dst/', + setup=build_mixed_tree, +) +clamp_trees = run_matrix(clamp, transports=transports) +clamp_tolerated = assert_equivalent(clamp_trees) +for m in clamp_tolerated: + print(f'F-clamp tolerated (documented mapping): {m}') + +# The clamped result must also match the UN-clamped result's structure (the +# protocol floor must not change the final bytes/structure of this tree). +# Compare each clamp leg against the general local reference. +ref = 'local' if trees.get('local') is not None else next( + t for t, v in trees.items() if v is not None) +for t, ct in clamp_trees.items(): + if ct is None: + continue + d = diff_trees(trees[ref], ct) + fatal, _tol = partition_diffs(d) + if fatal: + test_fail(f'F-clamp: --protocol={FORCE_PROTOCOL} changed the final ' + f'tree vs the unclamped transfer ({ref} vs clamp/{t}):\n ' + + '\n '.join(fatal)) + + +# -------------------------------------------------------------------------- +# F-negotiate -- intersection-or-error, no specific algorithm asserted. +# -------------------------------------------------------------------------- +# (a) A valid negotiation succeeds and produces a correct tree. We let rsync +# negotiate freely (default) -- already covered by F-general -- and ALSO +# pin a checksum the build is known to support to confirm an explicit +# valid choice negotiates. We do NOT assert WHICH algorithm wins. +# (b) An empty intersection (an unsupported forced --compress-choice) must +# ERROR CLEANLY (non-zero exit, no destructive/silent mis-transfer), +# never silently fall back. + +# (b) -- the load-bearing negative case. A bogus compressor name has no +# mutual option, so negotiation must fail with a clean non-zero exit. +negroot = Path(tempfile.mkdtemp(prefix='fneg-', dir=os.environ['scratchdir'])) +(negroot / 'src').mkdir() +(negroot / 'src' / 'f.txt').write_text('negotiation payload\n' * 4) +(negroot / 'dst').mkdir() + +bogus = subprocess.run( + rsync_argv('-a', '--compress', '--compress-choice=no-such-algo-xyz', + f'{negroot}/src/', f'{negroot}/dst/'), + capture_output=True, text=True, +) +if bogus.returncode == 0: + test_fail('F-negotiate: an unsupported --compress-choice was accepted ' + '(exit 0). An empty negotiation intersection must error cleanly, ' + 'not silently transfer.') +# Clean error: nothing should have been silently transferred under the bogus +# choice (the run aborted before/at negotiation). The dst must be empty. +if (negroot / 'dst' / 'f.txt').exists(): + test_fail('F-negotiate: a file was transferred despite the failed ' + 'compress negotiation -- the error was not clean.') + +# (a) -- a valid explicit checksum choice negotiates and transfers correctly. +# Discover an algorithm the build actually supports rather than hard-coding +# one (build-dependent: md5/md4/sha1/xxh*/...). +vv = subprocess.run(rsync_argv('--version'), capture_output=True, text=True) +# rsync --version lists "Checksum list:\n "; parse the algo line. +algos = [] +lines = vv.stdout.splitlines() +for i, ln in enumerate(lines): + if 'Checksum list' in ln: + # algorithms are on the following indented line(s) + for j in range(i + 1, min(i + 3, len(lines))): + for tok in lines[j].split(): + if tok and not tok.endswith(':'): + algos.append(tok) + break +# Filter to real algorithm tokens (drop stray words). +algos = [a for a in algos if a.replace('-', '').isalnum()] +if not algos: + # Fall back to the universally-present md5; if even that fails the test + # will catch it below. + algos = ['md5'] + +valid_choice = algos[0] +(negroot / 'dst2').mkdir() +good = subprocess.run( + rsync_argv('-a', f'--checksum-choice={valid_choice}', + f'{negroot}/src/', f'{negroot}/dst2/'), + capture_output=True, text=True, +) +if good.returncode != 0: + test_fail(f'F-negotiate: a valid --checksum-choice={valid_choice} failed ' + f'to negotiate: {good.stderr}') +if (negroot / 'dst2' / 'f.txt').read_text() != 'negotiation payload\n' * 4: + test_fail('F-negotiate: valid checksum-choice transfer produced wrong ' + 'content.') + + +legs = ', '.join(sorted(t for t, v in trees.items() if v is not None)) +print(f'transport-equiv-meta: F-general + F-clamp (--protocol={FORCE_PROTOCOL}) ' + f'transport-blind across [{legs}]; F-negotiate intersection-or-error ' + f'(bogus compress rejected cleanly, valid checksum={valid_choice} ' + f'negotiated) verified') diff --git a/token.c b/token.c index 62ffae151..91dff6e19 100644 --- a/token.c +++ b/token.c @@ -1126,3 +1126,35 @@ void see_token(char *data, int32 toklen) NOISY_DEATH("Unknown do_compression value"); } } + +#ifdef RSYNC_FUZZ_TOKEN +/* Fuzzing hook (compiled ONLY when RSYNC_FUZZ_TOKEN is defined; the normal + * rsync build never sees this). It exposes the file-internal static + * recv_deflated_token() (the zlib/CPRES_ZLIB compressed-token decoder) to + * fuzz/fuzz_deflated_token.c, plus a reset that restores the per-stream decode + * state to what a fresh receiver has at the start of a transfer. + * + * recv_deflated_token keeps file-static decode state (recv_state, the rx_strm + * inflate stream, rx_token/rx_run run accounting). A real receiver processes + * exactly one well-formed token stream per process lifetime and resets to + * r_init on END_FLAG. To isolate fuzz iterations - including iterations that + * unwound mid-stream via an exit_cleanup longjmp, leaving rx_strm mid-inflate- + * block - fuzz_recv_deflated_token_reset() forces recv_state back to r_init. + * The very next call then takes the r_init arm, which runs inflateReset(&rx_strm) + * (after first init) - the identical zlib re-init the real receiver performs - + * so every input starts from a pristine, faithfully-initialized decompressor, + * and rx_token is zeroed there too. (The function-local static saved_flag can + * carry a value across a mid-DEFLATED_DATA unwind; it is masked & 0xff and fed + * back as a flag byte, so at worst it injects one spurious bounded token/inflate + * step on the next input - documented cross-input coupling, never a memory bug.) + * No parse, bound, inflate, or accounting logic is altered. */ +void fuzz_recv_deflated_token_reset(void) +{ + recv_state = r_init; +} + +int32 fuzz_recv_deflated_token(int f, char **data) +{ + return recv_deflated_token(f, data); +} +#endif /* RSYNC_FUZZ_TOKEN */ diff --git a/xattrs.c b/xattrs.c index 99795f244..65079ff26 100644 --- a/xattrs.c +++ b/xattrs.c @@ -295,8 +295,10 @@ static int rsync_xal_get(const char *fname, item_list *xalp) rxa = xalp->items; if (count > 1) qsort(rxa, count, sizeof (rsync_xa), rsync_xal_compare_names); - for (rxa += count-1; count; count--, rxa--) - rxa->num = count; + if (count) { + for (rxa += count-1; count; count--, rxa--) + rxa->num = count; + } return 0; } @@ -460,7 +462,8 @@ static int rsync_xal_store(item_list *xalp) * entire initial-count, not just enough space for one new item. */ *new_list = empty_xa_list; (void)EXPAND_ITEM_LIST(&new_list->xa_items, rsync_xa, xalp->count); - memcpy(new_list->xa_items.items, xalp->items, xalp->count * sizeof (rsync_xa)); + if (xalp->count) + memcpy(new_list->xa_items.items, xalp->items, xalp->count * sizeof (rsync_xa)); new_list->xa_items.count = xalp->count; xalp->count = 0; @@ -1284,4 +1287,40 @@ int x_fstat(int fd, STRUCT_STAT *fst, STRUCT_STAT *xst) return ret; } +#ifdef RSYNC_FUZZ_XATTRS +#include "rounding.h" /* EXTRA_ROUNDING - rsync.h does NOT pull this in; flist.c does */ +/* Fuzzing hook (compiled ONLY when RSYNC_FUZZ_XATTRS is defined). Exposes the + * per-file xattr wire decode receive_xattr() to fuzz/fuzz_xattrs.c together with + * a minimal file_struct that owns the F_XATTR extra slot the function writes. + * receive_xattr's storage tail (rsync_xal_store -> xattr_lookup_hash -> + * hashtable + checksum) runs for real against the linked instrumented objects; + * nothing in the parse/alloc/copy path is stubbed. The rsync_xal_l / temp_xattr + * statics persist across inputs (cannot reset file-statics from outside); + * documented cross-input coupling, same as recv_file_entry's lastname[]. */ +struct file_struct *fuzz_xattr_file_new(alloc_pool_t pool) +{ + /* Replicate recv_file_entry's extra-slot alignment dance (flist.c + * 1024-1034) EXACTLY so the file_struct lands 8-byte aligned - otherwise + * F_XATTR's union access is misaligned (a harness artifact, not a bug). */ + int extra_len = file_extra_cnt * EXTRA_LEN; + char *bp; + struct file_struct *file; +#if EXTRA_ROUNDING > 0 + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN)) + extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN; +#endif + bp = pool_alloc(pool, FILE_STRUCT_LEN + extra_len + 1, "fuzz_xattr_file_new"); + memset(bp, 0, FILE_STRUCT_LEN + extra_len); + bp += extra_len; + file = (struct file_struct *)bp; + file->mode = S_IFREG | 0644; /* a plain file; not a symlink */ + return file; +} + +void fuzz_receive_xattr(int f, struct file_struct *file) +{ + receive_xattr(f, file); +} +#endif /* RSYNC_FUZZ_XATTRS */ + #endif /* SUPPORT_XATTRS */