From bedda979f66f1c53031d7a785d8064f4474083b0 Mon Sep 17 00:00:00 2001 From: Joseph <162703152+josephnef@users.noreply.github.com> Date: Tue, 26 May 2026 09:07:46 +0300 Subject: [PATCH] tests: fix radiotap parser + per-cell pcap collision; surface kernel-TX flag stripping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two bugs in the sniffer integration from #40/#41 that together hid the real finding about what kernel TX actually puts on-air. Both fixed here, and the resulting AR9271 capture conclusively answers the open question about LDPC asymmetry that has been hanging since #40. ## Bug 1: `_parse_radiotap` returned None on every real-world frame The parser only iterated `it_present[0]` and bailed on bits not in `_RT_FIELDS`. Every captured frame from `ath9k_htc` has presence 0xa000402f with bits [0,1,2,3,5,14,29,31]: bit 29 (RADIOTAP_NS marker) and bit 31 (EXT — continuation word) are control bits, not data fields. The parser hit bit 29 and bailed → 408 parse-errors for every captured frame in the first AR9271 sniffer matrix run. Fix: iterate bits across ALL presence words; recognise the three control bits (29 RADIOTAP_NS, 30 VENDOR_NS, 31 EXT) and skip them correctly (with 6-byte data consumption for VENDOR_NS); when hitting an unknown radiotap field, return what was parsed so far rather than discarding the whole frame. Round-trip against `inject_beacon.build_beacon` for every (HT, VHT) × (BCC, LDPC, STBC) combo: byte-identical decoded fields ✓. Real AR9271-captured beacon: now parses cleanly as `kind=legacy` (real APs emit DSSS/OFDM, no MCS info). ## Bug 2: cell pcap filename collision in --encoding-matrix `cell_id = f"tx-{tx_side}_rx-{rx_side}"` doesn't include the encoding label, so six encoding cells per driver-mode wrote to the same `/tmp/devourer-regress-*/tx-{tx}_rx-{rx}.sniffer.pcap`. The last cell overwrote the first five — so `--keep-logs` retained only the LAST encoding combo per mode (typically VHT-LDPC, where the AR9271 captures 0 frames since it's n-only). Made post-hoc debugging impossible. Fix: optional `cell_tag` parameter on `run_cell`, set by `run_encoding_matrix` to the sanitised encoding label (`ht-bcc / ht-ldpc / vht-ldpcstbc / ...`). Other matrix modes leave it empty (they only run one cell per driver-mode pair). ## What this surfaces Encoding matrix re-run on the rig (8814 TX → 8821 RX, ch6, AR9271 as sniffer): | Mode | Encoding requested | Sniffer decoded | |------|--------------------|-----------------------------------| | k/k | HT-BCC | HT MCS1 BCC 20MHz STBC=0 (412) | | k/k | HT-LDPC | HT MCS1 **BCC** 20MHz STBC=0 (386)| | k/k | HT-STBC=1 | HT MCS1 BCC 20MHz **STBC=0** (418)| | k/k | HT-LDPC+STBC=1 | HT MCS1 **BCC** **STBC=0** (422) | | k/k | VHT-BCC | 0 frames (AR9271 is n-only) | | k/k | VHT-LDPC | 0 frames | Same pattern in the k/d row. Definitive finding: aircrack-ng/88XXau (or mac80211 in its TX path) STRIPS the radiotap LDPC bit and STBC stream count. MCS index (1) and the HT-vs-VHT distinction DO survive. So every "LDPC" kernel-TX cell in #40 and #41 was actually emitting BCC on-air — the flat k/d row never disproved Roman's 8821AU LDPC-RX-no claim, it just meant we never tested the chip with an actual LDPC frame. VHT cells show 0 to AR9271 (n-only) but >0 to the 8821 RX (AC chip), confirming the HT/VHT distinction is honoured at the TX path. We still can't see whether mac80211 strips the VHT-LDPC bit specifically — would need an AC-capable sniffer, or capture from 8821 RX itself in monitor mode. ## Implications - The encoding-matrix mode is most useful for chip-side encoding asymmetries reachable through MCS-index and HT/VHT only; it can't validate LDPC- or STBC-specific RX behaviour through the kernel TX path as currently wired. - A proper LDPC-RX validation needs a userspace TX path that writes the radiotap directly to the chip (devourer's txdemo does this — ground-truth verified by `DEVOURER_TX_LDPC=1` env var). 8814 TX being broken on master is the blocker for using this from the d/k cells. - The AR9271 sniffer integration is functional; the data it produces is now reliable. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/regress.py | 12 ++++- tests/sniff_air.py | 108 ++++++++++++++++++++++++++++++--------------- 2 files changed, 84 insertions(+), 36 deletions(-) diff --git a/tests/regress.py b/tests/regress.py index e253982..be0938a 100755 --- a/tests/regress.py +++ b/tests/regress.py @@ -806,6 +806,7 @@ def run_cell( kh: KernelHost, encoding: Optional[dict] = None, sniffer_iface: Optional[str] = None, + cell_tag: str = "", ) -> CellResult: """Run one matrix cell. State contract: always restore DUTs to a clean baseline (host kernel-bound) on exit via try/finally. @@ -819,7 +820,14 @@ def run_cell( via sniff_air's radiotap decoder and a one-line summary appended to the CellResult.notes. Lets the matrix prove what encoding actually flew, vs what inject_beacon.py / txdemo *requested*.""" + # Per-cell filename stem. `cell_tag` is appended by callers that run + # multiple cells with the same (tx_side, rx_side) but different + # encodings (--encoding-matrix) — without it the pcap from cell N + # gets overwritten by cell N+1, so --keep-logs only preserves the + # last encoding combo per driver-mode. cell_id = f"tx-{tx_side}_rx-{rx_side}" + if cell_tag: + cell_id = f"{cell_id}_{cell_tag}" tx_log = tmpdir / f"{cell_id}.tx.log" rx_log = tmpdir / f"{cell_id}.rx.log" sniffer_pcap = tmpdir / f"{cell_id}.sniffer.pcap" if sniffer_iface else None @@ -1152,11 +1160,13 @@ def run_encoding_matrix( f"RX={rx_dut.chipset} ({rx_side}) enc=[{enc_label}]" ) print(cell_hdr + " ...", flush=True) + # Sanitize encoding label for use in a filename. + tag = enc_label.lower().replace("+", "-").replace("=", "") try: r = run_cell( devourer_root, tx_dut, rx_dut, tx_side, rx_side, channel, duration, tmpdir, kh, encoding=enc, - sniffer_iface=sniffer_iface, + sniffer_iface=sniffer_iface, cell_tag=tag, ) except Exception as e: print(f" ✗ cell crashed: {e}", flush=True) diff --git a/tests/sniff_air.py b/tests/sniff_air.py index 27020d2..3a6d9e1 100644 --- a/tests/sniff_air.py +++ b/tests/sniff_air.py @@ -84,7 +84,12 @@ def _aligned(offset: int, align: int) -> int: def _parse_radiotap(frame: bytes): """Decode radiotap MCS (bit 19) / VHT (bit 21) info from one captured frame. Returns dict with keys: kind ('HT' | 'VHT' | 'legacy'), mcs, - ldpc, stbc, bw, nss (where applicable).""" + ldpc, stbc, bw, nss (where applicable), or None on malformed header. + + Handles multi-word it_present chains and the radiotap control bits + (29 RADIOTAP_NS, 30 VENDOR_NS, 31 EXT). Real mac80211 captures from + ath9k_htc routinely use multiple presence words + bit 29, so a parser + that only walks word0 fails on every frame.""" if len(frame) < 8: return None version, _pad, it_len = struct.unpack_from(" it_len: - return None - if bit == 19: # MCS info - mcs_known, mcs_flags, mcs_idx = struct.unpack_from( - " it_len: + return None + cur += 6 + in_vendor_ns = True + # Continue iterating bits, but any further field bits in + # this word are in vendor namespace — we can't size them. + continue + if in_vendor_ns: + # Vendor field, unknown size — bail out of further parsing + # but keep what we already extracted. + return parsed_to_result(parsed, it_len) + if bit not in _RT_FIELDS: + # Unknown radiotap field — can't safely advance cur. Return + # what we parsed so far rather than discarding. + return parsed_to_result(parsed, it_len) + size, align = _RT_FIELDS[bit] + cur = _aligned(cur, align) + if cur + size > it_len: + return parsed_to_result(parsed, it_len) + if bit == 19: # MCS info + mcs_known, mcs_flags, mcs_idx = struct.unpack_from( + "