diff --git a/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggle.qhelp b/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggle.qhelp new file mode 100644 index 000000000000..1b0d08be8ea5 --- /dev/null +++ b/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggle.qhelp @@ -0,0 +1,153 @@ + + + + +

+The Go module golang.org/x/net/idna implements UTS-46 IDNA +processing. On the Lookup and Display profiles +(and any profile constructed via idna.New(idna.MapForLookup(), ...)), +both (*Profile).ToASCII and (*Profile).ToUnicode +apply an NFKC-based character map that folds 100 distinct +non-ASCII Unicode digit codepoints to their ASCII equivalents. +The 100 codepoints partition into the following Unicode-block ranges: +

+ +

+Devanagari digits (U+0966..U+096F) are not in scope: +empirical testing against golang.org/x/net/idna v0.53.0 +confirms they do not fold to ASCII via UTS-46. The +Registration profile is structurally covered by the rule +but disallows every fold codepoint at the rune-validation stage, so a +caller that respects the returned error never sees a +smuggled literal from that profile in practice. +

+

+The library contains no IP-literal detection. A caller that applies UTS-46 +mapping to an attacker-controlled host string and consumes the result in a +network sink without rechecking against IP-literal parsers receives a +valid ASCII IPv4 literal back as the "domain name" output. Any downstream +allowlist check, SSRF guard, NoProxy match, or TLS-SNI router that does +not re-check the post-IDNA result is bypassed. The anti-pattern also +applies to callers that do a pre-IDNA net.ParseIP check and +think it is sufficient: the smuggled host is not ASCII, so the pre-IDNA +check rejects it as non-IP, and the post-IDNA value (now a numeric +literal) reaches the sink unguarded. +

+

+IPv6 is out of scope: : is a UTS-46 disallowed character; +bare-IPv6 inputs are rejected by IDNA rune-validation before any +digit-fold mapping runs. +

+

+Sinks where the smuggled literal becomes exploitable include +net.JoinHostPort, net.Dial, +(*http.Request).URL.Host, (*tls.Config).ServerName, +(*http.Cookie).Domain, and any HTTP client request URL +constructed from the mapped value. +

+
+ + +

+Either: +

+
    +
  1. +Use a strict IDNA profile option that returns an error if the mapped +output parses as an IP literal, if your IDNA library exposes one. +
  2. +
  3. +Apply the explicit safe pattern: after the IDNA mapping call, strip +trailing dots from the result and parse it. Reject if +net.ParseIP returns a non-nil address, or if +netip.ParseAddr returns no error (note the inverted +convention: netip.ParseAddr reports a successfully parsed +address via err == nil, not via a non-zero return). The +trailing-dot strip is required because "0.¹.0.0." maps to +"0.1.0.0.", which a bare net.ParseIP rejects +on its own yet is still an IP literal for routing purposes; the strip +exposes the literal so the parser sees it. +
  4. +
+
+ + +

+Vulnerable pattern. The host string is mapped through the IDNA profile +and reaches a network sink with no post-IDNA IP-literal recheck: +

+ + + +

+Safe pattern. Post-IDNA trailing-dot strip followed by +net.ParseIP recheck: +

+ + + +

+The safe pattern accepts three trailing-dot strip forms. They are +not equivalent in coverage: +

+
    +
  • strings.TrimRight(ace, "."): strict form. Strips + all trailing dots, so the multi-dot residue produced when UTS-46 + maps the fullwidth dot U+FF0E or the ideographic dot U+3002 next + to ASCII dots is fully removed.
  • +
  • strings.TrimSuffix(ace, "."): lenient form. Strips + only one trailing dot. Sufficient for the canonical + "0.1.0.0." shape but leaves residue if multiple + trailing dots were produced by mapping.
  • +
  • if strings.HasSuffix(ace, ".") { ace = ace[:len(ace)-1] }: + manual single-dot slice. Behaves identically to + TrimSuffix in coverage and inherits the same + multi-dot-residue limitation.
  • +
+

+Callers whose threat model includes the multi-trailing-dot variant +should prefer strings.TrimRight. After the strip, parse +with netip.ParseAddr (preferred) or net.ParseIP +and reject if the value parses as an IP literal (err == nil +for the former, non-nil return for the latter). +

+
+ + + +
  • +Unicode Technical Standard #46 (IDNA Compatibility Processing): +https://www.unicode.org/reports/tr46/ +
  • +
  • +golang.org/x/net/idna package documentation: +https://pkg.go.dev/golang.org/x/net/idna +
  • +
  • +WHATWG URL Standard, ends_in_a_number host parser check +(prior art for IP-literal detection in URL parsers): +https://url.spec.whatwg.org/#ends-in-a-number-checker +
  • +
  • +CWE-918: Server-Side Request Forgery (SSRF): +https://cwe.mitre.org/data/definitions/918.html +
  • +
  • +CWE-020: Improper Input Validation: +https://cwe.mitre.org/data/definitions/20.html +
  • + +
    +
    diff --git a/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggle.ql b/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggle.ql new file mode 100644 index 000000000000..3b0cace5a481 --- /dev/null +++ b/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggle.ql @@ -0,0 +1,36 @@ +/** + * @name IDNA digit-fold IP-literal smuggling via UTS-46 NFKC mapping + * @description An untrusted hostname flows through + * `(*golang.org/x/net/idna.Profile).ToASCII` or `.ToUnicode` + * on a digit-folding profile (which folds 100 non-ASCII + * Unicode digit codepoints to ASCII via UTS-46 NFKC) and + * reaches a security-relevant hostname sink without a + * post-IDNA IP-literal recheck. A caller that omits the + * recheck (or only runs `net.ParseIP` BEFORE the mapping + * call) will accept a smuggled IPv4 literal such as + * `"0.¹.0.0"` (which maps to `"0.1.0.0"`). Scope is IPv4 + * only because IPv6 colons are rejected by IDNA + * rune-validation before UTS-46 mapping runs. + * @id go/idna-ip-literal-smuggle + * @kind path-problem + * @problem.severity warning + * @security-severity 8.1 + * @precision high + * @tags security + * experimental + * external/cwe/cwe-918 + * external/cwe/cwe-020 + * @requires codeql/go-all >= 0.6.0 + */ + +import go +import IdnaIpLiteralSmuggle +import Flow::PathGraph + +from + Flow::PathNode source, + Flow::PathNode sink +where Flow::flowPath(source, sink) +select sink.getNode(), source, sink, + "Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value.", + source.getNode(), "this user-controlled value" diff --git a/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggle.qll b/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggle.qll new file mode 100644 index 000000000000..3d502b6e3436 --- /dev/null +++ b/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggle.qll @@ -0,0 +1,384 @@ +/** + * Stateful taint-tracking configuration for UTS-46 IDNA digit-fold + * IP-literal smuggling in Go. + * + * Background + * ---------- + * `golang.org/x/net/idna` applies UTS-46 NFKC mapping inside + * `(*Profile).ToASCII` and `(*Profile).ToUnicode`, which fold 100 + * non-ASCII Unicode digit codepoints to their ASCII equivalents. The + * 100 codepoints span Latin-1 superscripts, mathematical superscripts + * and subscripts, circled digits, fullwidth digits, the Mathematical + * Alphanumeric Symbols block (bold, double-struck, sans-serif, + * sans-serif-bold, and monospace digit styles), and segmented digits. + * Devanagari digits are not in scope; they pass through Punycode rather + * than fold to ASCII. A caller that omits a post-IDNA IP-literal + * recheck (or that only checks BEFORE the IDNA call) will accept a + * smuggled IPv4 literal back as the "domain name" output and pass it + * to a downstream allowlist, SSRF guard, or routing decision unguarded. + * Scope is IPv4 only. IPv6 colons are rejected by IDNA rune-validation + * before UTS-46 mapping runs, so no IPv6 smuggle path exists. + * + * Modeling + * -------- + * Single-state tracking is structurally insufficient because a pre-IDNA + * `net.ParseIP` barrier must NOT block flow that transitions through the + * IDNA call. The configuration uses `TaintTracking::GlobalWithState` with + * two flow states: + * + * - `TPreIdna` : untrusted hostname before IDNA mapping + * - `TPostIdna` : mapped output flowing toward a security-relevant sink + * + * `(*idna.Profile).ToASCII` and `(*idna.Profile).ToUnicode` on the + * digit-folding profiles (`Lookup`, `Display`, `Registration`, and any + * profile constructed via `idna.New(idna.MapForLookup(), ...)`) are + * modeled as state-transition additional flow steps that flip + * `TPreIdna -> TPostIdna`. The package-level `idna.ToASCII` helper is + * intentionally NOT modeled because it dispatches to + * `Punycode.process(...)`, which has a nil UTS-46 mapping and so + * cannot produce the digit-fold smuggle. The `Punycode` profile is + * excluded for the same reason. + * + * The barrier is `net.ParseIP`, `net.ParseCIDR`, `netip.ParseAddr`, or + * `netip.ParsePrefix` consumed in `TPostIdna`. The safe pattern requires + * trimming trailing dots before re-checking. Without the trim the literal + * `"0.¹.0.0."` maps to `"0.1.0.0."`, which `net.ParseIP` rejects, so the + * smuggle survives. The configuration also requires that the post-IDNA + * value reaching the parser was produced by one of: + * (a) a `strings.TrimRight(_, ".")` call. This is the strict form. It + * handles multi-trailing-dot variants where UTS-46 mapping produces + * multiple trailing ASCII dots from fullwidth U+FF0E or ideographic + * U+3002 dot characters composing with ASCII dots. + * (b) a `strings.TrimSuffix(_, ".")` call. This accepts the common + * single-trailing-dot pattern but is incomplete for the multi-dot + * variant. It is included because it matches widely-used + * real-world callers. + * (c) an explicit `if strings.HasSuffix(out, ".") { out = out[:len(out)-1] }` + * slicing pattern. + * + * The barrier-strictness choice is documented in the README under + * "Barrier strictness". Callers that rely on the TrimSuffix form should + * verify that the multi-dot bypass class is not in their threat model. + * + * Sources + * ------- + * Untrusted hostname inputs surfaced via the active threat model (HTTP request + * URL host, HTTP request headers, function parameters typed as hostname-like, + * env-var reads, file-content reads, command-line flag reads). + * + * Sinks + * ----- + * - `net.JoinHostPort` host argument + * - field-write to `(*net/http.Request).URL.Host` + * - field-write to `(*crypto/tls.Config).ServerName` + * - field-write to `(*net/http.Cookie).Domain` + * - HTTP client-request URL sinks (already modeled by `Http::ClientRequest`) + * - first argument to `net.Dial`, `net.DialTimeout`, `(*net.Dialer).Dial`, + * `(*net.Dialer).DialContext` + * - `net.LookupHost(host)` host argument + * - `net.LookupIP(host)` host argument + * - `(*net.Resolver).LookupHost(ctx, host)` host argument (index 1) + * - `(*net.Resolver).LookupIPAddr(ctx, host)` host argument (index 1) + * + * DNS-resolver sinks are exploitable because a smuggled IP literal + * passed to `net.LookupHost` triggers a DNS query for the address-literal + * form. Some resolvers answer with the IP directly, which is the DNS + * resolver allowlist bypass risk class. + * + * `net.LookupCNAME` is intentionally excluded. Its first argument is a + * hostname used only as a CNAME chain start, not passed to a connection + * primitive, so IP-literal smuggling through it has no direct + * network-access consequence and would produce noise without additional + * sink chaining. `(*net.Resolver).LookupIP` is excluded for a different + * reason: it does not exist on `*net.Resolver` (LookupIP is + * package-level only); the Resolver type exposes LookupIPAddr instead. + * + * @id-companion go/idna-ip-literal-smuggle + */ + +import go + + /** + * The two flow states in the IDNA-smuggle taint configuration. + * + * `TPreIdna()` is the initial state of every untrusted hostname source. + * `TPostIdna()` is reached only after a value has flowed through one of the + * IDNA mapping calls. Sinks are only flagged in `TPostIdna()`. + */ + newtype TFlowState = + TPreIdna() or + TPostIdna() + + /** + * Holds if `call` is a call to one of the `idna` mapping entry points whose + * UTS-46 NFKC behavior performs the digit fold. The argument-0 input is + * regarded as the source of the additional flow step; result-0 is the + * mapped output. + * + * The set covered: + * - method `(*golang.org/x/net/idna.Profile).ToASCII` on a profile that + * applies UTS-46 mapping (`Lookup`, `Display`, `Registration`, or any + * `idna.New(idna.MapForLookup(), ...)`-constructed profile) + * - method `(*golang.org/x/net/idna.Profile).ToUnicode` on the same + * profiles (the digit-fold pipeline runs in `validateAndMap` before + * the encode-as-Punycode-or-not branch, so `ToUnicode` produces the + * same digit-folded output as `ToASCII`) + * + * The package-level `golang.org/x/net/idna.ToASCII` helper is + * intentionally NOT covered: it dispatches to `Punycode.process(...)`, + * which has a nil mapping function and does not run the UTS-46 fold. + * It has no smuggle surface and is treated as an unrelated identifier. + */ + predicate idnaMappingCall(DataFlow::CallNode call) { + call.(DataFlow::MethodCallNode) + .getTarget() + .hasQualifiedName("golang.org/x/net/idna", "Profile", ["ToASCII", "ToUnicode"]) and + // Exclude the Punycode profile: it has nil UTS-46 mapping and so cannot + // produce the digit-fold smuggle. Detect by the receiver being the + // package-level `idna.Punycode` value. + not exists(DataFlow::Node recv | + recv = call.(DataFlow::MethodCallNode).getReceiver() and + recv.asExpr().(SelectorExpr).getBase().(Ident).getName() = "idna" and + recv.asExpr().(SelectorExpr).getSelector().getName() = "Punycode" + ) + } + + /** + * Holds if `node` is the input argument to an IDNA mapping call and `result` + * is the call's primary string return. + */ + predicate idnaMapInToOut(DataFlow::Node node, DataFlow::Node out) { + exists(DataFlow::CallNode call | + idnaMappingCall(call) and + node = call.getArgument(0) and + out = call.getResult(0) + ) + } + + /** + * Holds if `node` is the value being checked by a post-IDNA IP-literal + * recheck call. We accept the four canonical Go primitives: + * - `net.ParseIP` + * - `net.ParseCIDR` + * - `net/netip.ParseAddr` + * - `net/netip.ParsePrefix` + */ + predicate ipLiteralRecheckInput(DataFlow::Node node) { + exists(DataFlow::CallNode c | + c.getTarget().hasQualifiedName("net", ["ParseIP", "ParseCIDR"]) + or + c.getTarget().hasQualifiedName("net/netip", ["ParseAddr", "ParsePrefix"]) + | + node = c.getArgument(0) + ) + } + + /** + * Holds if `result` is the return of a `strings.TrimSuffix(x, ".")` or + * `strings.TrimRight(x, ".")` call applied to `node`. This is the + * trailing-dot strip that the safe pattern requires before the post-IDNA + * IP recheck. + */ + predicate trailingDotTrim(DataFlow::Node node, DataFlow::Node out) { + exists(DataFlow::CallNode c | + c.getTarget().hasQualifiedName("strings", ["TrimSuffix", "TrimRight"]) and + c.getArgument(1).getStringValue() = "." and + node = c.getArgument(0) and + out = c.getResult(0) + ) + } + + /** + * Holds if `result` is produced by a slice-assignment of the form + * `out = out[:len(out)-1]` applied to `node`. This is the manual slicing + * variant of the trailing-dot strip (shape (c) in the module docstring). + * + * The high bound must be exactly `len(x) - 1` where `x` is the same + * global-value-number as `node`. The lower bound is unconstrained because + * the idiomatic form `out[:len(out)-1]` omits it (implicit zero). + */ + predicate trailingDotSlice(DataFlow::Node node, DataFlow::Node out) { + exists(SliceExpr se, DataFlow::CallNode lenCall, SubExpr sub | + se.getHigh() = sub and + sub.getRightOperand().getIntValue() = 1 and + sub.getLeftOperand() = lenCall.asExpr() and + // builtin `len` has no enclosing package (Go pseudo-package "builtin" + // has no DataFlow representation); guard via callee name and the + // absence of a package qualifier on the target Function. + lenCall.getCalleeName() = "len" and + not exists(lenCall.getTarget().getPackage()) and + // The slice base, the len argument, and `node` all refer to the + // same SSA-level value. Match by global-value-number so that any + // of the three Reads can serve as the bound `node`. + se.getBase().(Ident).getGlobalValueNumber() = + node.asExpr().getGlobalValueNumber() and + lenCall.getArgument(0).asExpr().getGlobalValueNumber() = + node.asExpr().getGlobalValueNumber() and + // The trim output is the slice expression itself; data flow + // propagates from there through any subsequent assignment to the + // parse input via DataFlow::localFlow. + DataFlow::exprNode(se) = out + ) + } + + /** + * Holds if `node` reaches an IP-literal recheck call AND the value + * reaching that call was produced by a trailing-dot trim of the + * original IDNA output. + * + * Three shapes are accepted: + * (a) `strings.TrimRight(out, ".")` (multi-dot form) + * (b) `strings.TrimSuffix(out, ".")` (single-dot form) + * (c) `if strings.HasSuffix(out, ".") { out = out[:len(out)-1] }` + * (manual slice form) + * + * Callers that omit the trim entirely are NOT sanitized by this + * predicate and remain alertable. + */ + predicate safePostIdnaRecheck(DataFlow::Node postIdnaSource, DataFlow::Node node) { + exists(DataFlow::Node trimSource, DataFlow::Node trimmed | + DataFlow::localFlow(postIdnaSource, trimSource) and + (trailingDotTrim(trimSource, trimmed) or + trailingDotSlice(trimSource, trimmed)) and + DataFlow::localFlow(trimmed, node) and + ipLiteralRecheckInput(node) + ) + } + + /** + * Holds if `sink` is a hostname-consuming security-relevant sink for which + * smuggled IP literals are exploitable. + */ + predicate hostnameSink(DataFlow::Node sink) { + // net.JoinHostPort host argument + exists(DataFlow::CallNode c | + c.getTarget().hasQualifiedName("net", "JoinHostPort") and + sink = c.getArgument(0) + ) + or + // net.Dial, net.DialTimeout, (*net.Dialer).Dial, and .DialContext + // "address" argument. Address is "host:port"; the whole arg is + // modeled because upstream taint frequently arrives pre-joined. + exists(DataFlow::CallNode c | + c.getTarget().hasQualifiedName("net", ["Dial", "DialTimeout"]) and + sink = c.getArgument(1) + ) + or + // (*Dialer).Dial(network, address string). Address is at argument + // index 1. Reference signature: net/dial.go Dial(network, address + // string). + exists(DataFlow::MethodCallNode c | + c.getTarget().hasQualifiedName("net", "Dialer", "Dial") and + sink = c.getArgument(1) + ) + or + // (*Dialer).DialContext(ctx context.Context, network, address string). + // Address is at index 2. Reference signature: net/dial.go + // DialContext(ctx, network, address string). + // The typed-address siblings (DialTCP, DialUDP, DialIP, DialUnix) take + // *TCPAddr, *UDPAddr, *IPAddr, *UnixAddr (not plain strings) and are + // intentionally excluded; a smuggled hostname string cannot satisfy + // those parameter types. + exists(DataFlow::MethodCallNode c | + c.getTarget().hasQualifiedName("net", "Dialer", "DialContext") and + sink = c.getArgument(2) + ) + or + // Field-writes to net/http.Request.URL.Host, tls.Config.ServerName, + // http.Cookie.Domain. + exists(Write w, Field f | + ( + f.hasQualifiedName("net/url", "URL", "Host") or + f.hasQualifiedName("crypto/tls", "Config", "ServerName") or + f.hasQualifiedName("net/http", "Cookie", "Domain") + ) and + w.writesField(_, f, sink) + ) + or + // HTTP client-request URL sinks already modeled by the standard library. + sink = any(Http::ClientRequest r).getUrl() + or + // net.LookupHost(host). Package-level DNS resolver; argument 0 is + // the host. + exists(DataFlow::CallNode c | + c.getTarget().hasQualifiedName("net", "LookupHost") and + sink = c.getArgument(0) + ) + or + // net.LookupIP(host). Package-level DNS resolver; argument 0 is the + // host. + exists(DataFlow::CallNode c | + c.getTarget().hasQualifiedName("net", "LookupIP") and + sink = c.getArgument(0) + ) + or + // (*net.Resolver).LookupHost(ctx, host). Argument 1 is the host + // (argument 0 is context.Context). + exists(DataFlow::MethodCallNode c | + c.getTarget().hasQualifiedName("net", "Resolver", "LookupHost") and + sink = c.getArgument(1) + ) + or + // (*net.Resolver).LookupIPAddr(ctx, host). Argument 1 is the host + // (argument 0 is context.Context). + exists(DataFlow::MethodCallNode c | + c.getTarget().hasQualifiedName("net", "Resolver", "LookupIPAddr") and + sink = c.getArgument(1) + ) + } + + /** Configuration implementing the stateful taint-tracking signature. */ + module Config implements DataFlow::StateConfigSig { + /** A flow state carried by tainted values in this configuration. */ + class FlowState extends TFlowState { + /** Gets a human-readable description of this state. */ + string toString() { + this = TPreIdna() and result = "PreIdna" + or + this = TPostIdna() and result = "PostIdna" + } + } + + predicate isSource(DataFlow::Node source, FlowState state) { + source instanceof ActiveThreatModelSource and state = TPreIdna() + } + + predicate isSink(DataFlow::Node sink, FlowState state) { + hostnameSink(sink) and state = TPostIdna() + } + + /** + * The IDNA mapping is modeled as a state-transition step: + * `TPreIdna(arg) -> TPostIdna(result)` + */ + predicate isAdditionalFlowStep( + DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2 + ) { + idnaMapInToOut(node1, node2) and + state1 = TPreIdna() and + state2 = TPostIdna() + } + + /** + * A correct post-IDNA IP-literal recheck (trailing-dot trim FOLLOWED + * BY `net.ParseIP` or equivalent) is a barrier in `TPostIdna`. The + * trim source is bound to the post-IDNA-tainted predecessor so that + * an unrelated TrimRight + ParseIP construct elsewhere in the same + * scope does not silently sanitize the IDNA-tainted path. A bare + * `net.ParseIP` without the prior trim is NOT a barrier; the alert + * remains. + */ + predicate isBarrier(DataFlow::Node node, FlowState state) { + state = TPostIdna() and + exists(DataFlow::Node postIdnaResult, DataFlow::Node parseInput | + idnaMapInToOut(_, postIdnaResult) and + DataFlow::localFlow(postIdnaResult, node) and + safePostIdnaRecheck(postIdnaResult, parseInput) + ) + } + + predicate observeDiffInformedIncrementalMode() { any() } + } + +/** Tracks taint flow for IDNA digit-fold IP-literal smuggling. */ +module Flow = TaintTracking::GlobalWithState; diff --git a/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggleBad.go b/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggleBad.go new file mode 100644 index 000000000000..c912f5660bfd --- /dev/null +++ b/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggleBad.go @@ -0,0 +1,41 @@ +package main + +import ( + "net" + "net/http" + "net/url" + + "golang.org/x/net/idna" +) + +// VulnerableLookup mirrors the shape of the anti-pattern as it appears in +// real Go code: an attacker-controlled host string is canonicalised through +// idna.Lookup.ToASCII, the result is consumed by a network sink, and there +// is no post-IDNA recheck against IP-literal parsers. UTS-46 NFKC mapping +// folds 100 non-ASCII digit codepoints (e.g. fullwidth, mathematical +// superscripts, circled, segmented) to their ASCII equivalents, so an input +// like "0.¹.0.0" emerges from ToASCII as "0.1.0.0" and reaches the sink +// as a routable IPv4 literal. +func VulnerableLookup(host string) (*http.Response, error) { + ace, err := idna.Lookup.ToASCII(host) + if err != nil { + return nil, err + } + return http.Get("https://" + ace + "/") +} + +// VulnerableProxyRoute mirrors the canonicalAddr shape used in callers that +// canonicalise a URL host before applying network policy. The host is read +// from an attacker-controlled URL, mapped through an idna.Lookup.ToASCII +// wrapper, and passed to net.JoinHostPort without an IP-literal recheck. +func VulnerableProxyRoute(rawURL string) (string, error) { + u, err := url.Parse(rawURL) + if err != nil { + return "", err + } + addr := u.Hostname() + if v, err := idna.Lookup.ToASCII(addr); err == nil { + addr = v + } + return net.JoinHostPort(addr, "443"), nil +} diff --git a/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggleGood.go b/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggleGood.go new file mode 100644 index 000000000000..c1d69c0f35f9 --- /dev/null +++ b/go/ql/src/experimental/CWE-918/IdnaIpLiteralSmuggleGood.go @@ -0,0 +1,36 @@ +package main + +import ( + "net" + "net/http" + "strings" + + "golang.org/x/net/idna" +) + +// SafeLookup applies the safe pattern: a post-IDNA trailing-dot trim +// followed by net.ParseIP. The trim is required because "0.¹.0.0." maps +// to "0.1.0.0." which net.ParseIP rejects on its own yet is still +// routable as 0.1.0.0 in the rest of the stack. +func SafeLookup(host string) (*http.Response, error) { + ace, err := idna.Lookup.ToASCII(host) + if err != nil { + return nil, err + } + + // Post-IDNA trailing-dot trim, then re-check. TrimRight (not + // TrimSuffix) handles multiple trailing dots that UTS-46 mapping can + // produce when fullwidth/ideographic dots compose with ASCII dots. + candidate := strings.TrimRight(ace, ".") + if ip := net.ParseIP(candidate); ip != nil { + return nil, errBadHost + } + + return http.Get("https://" + ace + "/") +} + +var errBadHost = errIPLiteral{} + +type errIPLiteral struct{} + +func (errIPLiteral) Error() string { return "ip literals not allowed" } diff --git a/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/IdnaIpLiteralSmuggle.expected b/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/IdnaIpLiteralSmuggle.expected new file mode 100644 index 000000000000..71ccea573021 --- /dev/null +++ b/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/IdnaIpLiteralSmuggle.expected @@ -0,0 +1,253 @@ +#select +| negatives.go:243:26:243:28 | ace | negatives.go:228:10:228:19 | selection of Header | negatives.go:243:26:243:28 | ace | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | negatives.go:228:10:228:19 | selection of Header | this user-controlled value | +| positives.go:27:11:27:32 | ...+... | positives.go:25:10:25:19 | selection of Header | positives.go:27:11:27:32 | ...+... | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:25:10:25:19 | selection of Header | this user-controlled value | +| positives.go:36:25:36:28 | addr | positives.go:33:10:33:19 | selection of Header | positives.go:36:25:36:28 | addr | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:33:10:33:19 | selection of Header | this user-controlled value | +| positives.go:44:19:44:21 | ace | positives.go:42:10:42:19 | selection of Header | positives.go:44:19:44:21 | ace | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:42:10:42:19 | selection of Header | this user-controlled value | +| positives.go:54:11:54:13 | ace | positives.go:51:10:51:19 | selection of Header | positives.go:54:11:54:13 | ace | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:51:10:51:19 | selection of Header | this user-controlled value | +| positives.go:64:19:64:21 | ace | positives.go:61:10:61:19 | selection of Header | positives.go:64:19:64:21 | ace | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:61:10:61:19 | selection of Header | this user-controlled value | +| positives.go:73:17:73:19 | ace | positives.go:71:10:71:19 | selection of Header | positives.go:73:17:73:19 | ace | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:71:10:71:19 | selection of Header | this user-controlled value | +| positives.go:81:18:81:44 | call to JoinHostPort | positives.go:79:10:79:19 | selection of Header | positives.go:81:18:81:44 | call to JoinHostPort | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:79:10:79:19 | selection of Header | this user-controlled value | +| positives.go:81:35:81:37 | ace | positives.go:79:10:79:19 | selection of Header | positives.go:81:35:81:37 | ace | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:79:10:79:19 | selection of Header | this user-controlled value | +| positives.go:92:45:92:48 | addr | positives.go:88:10:88:19 | selection of Header | positives.go:92:45:92:48 | addr | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:88:10:88:19 | selection of Header | this user-controlled value | +| positives.go:101:13:101:15 | ace | positives.go:98:10:98:19 | selection of Header | positives.go:101:13:101:15 | ace | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:98:10:98:19 | selection of Header | this user-controlled value | +| positives.go:112:15:112:17 | ace | positives.go:109:10:109:19 | selection of Header | positives.go:112:15:112:17 | ace | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:109:10:109:19 | selection of Header | this user-controlled value | +| positives.go:121:11:121:13 | ace | positives.go:118:10:118:19 | selection of Header | positives.go:121:11:121:13 | ace | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:118:10:118:19 | selection of Header | this user-controlled value | +| positives.go:132:37:132:39 | ace | positives.go:129:10:129:19 | selection of Header | positives.go:132:37:132:39 | ace | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:129:10:129:19 | selection of Header | this user-controlled value | +| positives.go:140:11:140:32 | ...+... | positives.go:138:10:138:19 | selection of Header | positives.go:140:11:140:32 | ...+... | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:138:10:138:19 | selection of Header | this user-controlled value | +| positives.go:151:16:151:19 | addr | positives.go:147:10:147:19 | selection of Header | positives.go:151:16:151:19 | addr | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:147:10:147:19 | selection of Header | this user-controlled value | +| positives.go:160:11:160:32 | ...+... | positives.go:158:10:158:19 | selection of Header | positives.go:160:11:160:32 | ...+... | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:158:10:158:19 | selection of Header | this user-controlled value | +| positives.go:172:11:172:32 | ...+... | positives.go:170:10:170:19 | selection of Header | positives.go:172:11:172:32 | ...+... | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:170:10:170:19 | selection of Header | this user-controlled value | +| positives.go:180:17:180:19 | out | positives.go:178:10:178:19 | selection of Header | positives.go:180:17:180:19 | out | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:178:10:178:19 | selection of Header | this user-controlled value | +| positives.go:193:11:193:32 | ...+... | positives.go:188:10:188:19 | selection of Header | positives.go:193:11:193:32 | ...+... | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:188:10:188:19 | selection of Header | this user-controlled value | +| positives.go:204:17:204:19 | ace | positives.go:202:10:202:19 | selection of Header | positives.go:204:17:204:19 | ace | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:202:10:202:19 | selection of Header | this user-controlled value | +| positives.go:211:15:211:17 | ace | positives.go:209:10:209:19 | selection of Header | positives.go:211:15:211:17 | ace | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:209:10:209:19 | selection of Header | this user-controlled value | +| positives.go:219:37:219:39 | ace | positives.go:216:10:216:19 | selection of Header | positives.go:219:37:219:39 | ace | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:216:10:216:19 | selection of Header | this user-controlled value | +| positives.go:227:39:227:41 | ace | positives.go:224:10:224:19 | selection of Header | positives.go:227:39:227:41 | ace | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:224:10:224:19 | selection of Header | this user-controlled value | +| positives.go:241:19:241:22 | addr | positives.go:233:10:233:19 | selection of Header | positives.go:241:19:241:22 | addr | Untrusted hostname from $@ flows through a `golang.org/x/net/idna` mapping call (which performs UTS-46 NFKC digit folding) and reaches this hostname sink without a post-IDNA `net.ParseIP` (or `netip.ParseAddr`) recheck on the trailing-dot-stripped value. | positives.go:233:10:233:19 | selection of Header | this user-controlled value | +edges +| negatives.go:228:10:228:19 | selection of Header | negatives.go:228:10:228:45 | call to Get | provenance | Src:MaD:1 MaD:2 | +| negatives.go:228:10:228:45 | call to Get | negatives.go:239:34:239:37 | host | provenance | | +| negatives.go:239:2:239:38 | ... := ...[0] | negatives.go:243:26:243:28 | ace | provenance | | +| negatives.go:239:34:239:37 | host | negatives.go:239:2:239:38 | ... := ...[0] | provenance | Config | +| positives.go:25:10:25:19 | selection of Header | positives.go:25:10:25:40 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:25:10:25:40 | call to Get | positives.go:26:32:26:35 | host | provenance | | +| positives.go:26:2:26:36 | ... := ...[0] | positives.go:27:11:27:32 | ...+... | provenance | | +| positives.go:26:32:26:35 | host | positives.go:26:2:26:36 | ... := ...[0] | provenance | Config | +| positives.go:33:10:33:19 | selection of Header | positives.go:33:10:33:44 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:33:10:33:44 | call to Get | positives.go:34:32:34:35 | host | provenance | | +| positives.go:34:2:34:36 | ... := ...[0] | positives.go:36:25:36:28 | addr | provenance | | +| positives.go:34:32:34:35 | host | positives.go:34:2:34:36 | ... := ...[0] | provenance | Config | +| positives.go:42:10:42:19 | selection of Header | positives.go:42:10:42:41 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:42:10:42:41 | call to Get | positives.go:43:32:43:35 | host | provenance | | +| positives.go:43:2:43:36 | ... := ...[0] | positives.go:44:19:44:21 | ace | provenance | | +| positives.go:43:32:43:35 | host | positives.go:43:2:43:36 | ... := ...[0] | provenance | Config | +| positives.go:51:10:51:19 | selection of Header | positives.go:51:10:51:41 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:51:10:51:41 | call to Get | positives.go:52:33:52:36 | host | provenance | | +| positives.go:52:2:52:37 | ... := ...[0] | positives.go:54:11:54:13 | ace | provenance | | +| positives.go:52:33:52:36 | host | positives.go:52:2:52:37 | ... := ...[0] | provenance | Config | +| positives.go:61:10:61:19 | selection of Header | positives.go:61:10:61:43 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:61:10:61:43 | call to Get | positives.go:62:32:62:35 | host | provenance | | +| positives.go:62:2:62:36 | ... := ...[0] | positives.go:64:19:64:21 | ace | provenance | | +| positives.go:62:32:62:35 | host | positives.go:62:2:62:36 | ... := ...[0] | provenance | Config | +| positives.go:71:10:71:19 | selection of Header | positives.go:71:10:71:47 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:71:10:71:47 | call to Get | positives.go:72:32:72:35 | host | provenance | | +| positives.go:72:2:72:36 | ... := ...[0] | positives.go:73:17:73:19 | ace | provenance | | +| positives.go:72:32:72:35 | host | positives.go:72:2:72:36 | ... := ...[0] | provenance | Config | +| positives.go:79:10:79:19 | selection of Header | positives.go:79:10:79:41 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:79:10:79:41 | call to Get | positives.go:80:32:80:35 | host | provenance | | +| positives.go:80:2:80:36 | ... := ...[0] | positives.go:81:35:81:37 | ace | provenance | | +| positives.go:80:2:80:36 | ... := ...[0] | positives.go:81:35:81:37 | ace | provenance | | +| positives.go:80:32:80:35 | host | positives.go:80:2:80:36 | ... := ...[0] | provenance | Config | +| positives.go:81:35:81:37 | ace | positives.go:81:18:81:44 | call to JoinHostPort | provenance | MaD:3 | +| positives.go:88:10:88:19 | selection of Header | positives.go:88:10:88:45 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:88:10:88:45 | call to Get | positives.go:89:38:89:41 | host | provenance | | +| positives.go:89:2:89:42 | ... := ...[0] | positives.go:92:45:92:48 | addr | provenance | | +| positives.go:89:38:89:41 | host | positives.go:89:2:89:42 | ... := ...[0] | provenance | Config | +| positives.go:98:10:98:19 | selection of Header | positives.go:98:10:98:43 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:98:10:98:43 | call to Get | positives.go:99:32:99:35 | host | provenance | | +| positives.go:99:2:99:36 | ... := ...[0] | positives.go:101:13:101:15 | ace | provenance | | +| positives.go:99:32:99:35 | host | positives.go:99:2:99:36 | ... := ...[0] | provenance | Config | +| positives.go:109:10:109:19 | selection of Header | positives.go:109:10:109:47 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:109:10:109:47 | call to Get | positives.go:111:28:111:31 | host | provenance | | +| positives.go:111:2:111:32 | ... := ...[0] | positives.go:112:15:112:17 | ace | provenance | | +| positives.go:111:28:111:31 | host | positives.go:111:2:111:32 | ... := ...[0] | provenance | Config | +| positives.go:118:10:118:19 | selection of Header | positives.go:118:10:118:42 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:118:10:118:42 | call to Get | positives.go:119:32:119:35 | host | provenance | | +| positives.go:119:2:119:36 | ... := ...[0] | positives.go:121:11:121:13 | ace | provenance | | +| positives.go:119:32:119:35 | host | positives.go:119:2:119:36 | ... := ...[0] | provenance | Config | +| positives.go:129:10:129:19 | selection of Header | positives.go:129:10:129:42 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:129:10:129:42 | call to Get | positives.go:130:32:130:35 | host | provenance | | +| positives.go:130:2:130:36 | ... := ...[0] | positives.go:132:37:132:39 | ace | provenance | | +| positives.go:130:32:130:35 | host | positives.go:130:2:130:36 | ... := ...[0] | provenance | Config | +| positives.go:138:10:138:19 | selection of Header | positives.go:138:10:138:43 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:138:10:138:43 | call to Get | positives.go:139:32:139:35 | host | provenance | | +| positives.go:139:2:139:36 | ... := ...[0] | positives.go:140:11:140:32 | ...+... | provenance | | +| positives.go:139:32:139:35 | host | positives.go:139:2:139:36 | ... := ...[0] | provenance | Config | +| positives.go:147:10:147:19 | selection of Header | positives.go:147:10:147:47 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:147:10:147:47 | call to Get | positives.go:148:32:148:35 | host | provenance | | +| positives.go:148:2:148:36 | ... := ...[0] | positives.go:151:16:151:19 | addr | provenance | | +| positives.go:148:32:148:35 | host | positives.go:148:2:148:36 | ... := ...[0] | provenance | Config | +| positives.go:158:10:158:19 | selection of Header | positives.go:158:10:158:46 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:158:10:158:46 | call to Get | positives.go:159:32:159:35 | host | provenance | | +| positives.go:159:2:159:36 | ... := ...[0] | positives.go:160:11:160:32 | ...+... | provenance | | +| positives.go:159:32:159:35 | host | positives.go:159:2:159:36 | ... := ...[0] | provenance | Config | +| positives.go:170:10:170:19 | selection of Header | positives.go:170:10:170:44 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:170:10:170:44 | call to Get | positives.go:171:34:171:37 | host | provenance | | +| positives.go:171:2:171:38 | ... := ...[0] | positives.go:172:11:172:32 | ...+... | provenance | | +| positives.go:171:34:171:37 | host | positives.go:171:2:171:38 | ... := ...[0] | provenance | Config | +| positives.go:178:10:178:19 | selection of Header | positives.go:178:10:178:52 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:178:10:178:52 | call to Get | positives.go:179:35:179:38 | host | provenance | | +| positives.go:179:2:179:39 | ... := ...[0] | positives.go:180:17:180:19 | out | provenance | | +| positives.go:179:35:179:38 | host | positives.go:179:2:179:39 | ... := ...[0] | provenance | Config | +| positives.go:188:10:188:19 | selection of Header | positives.go:188:10:188:46 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:188:10:188:46 | call to Get | positives.go:189:32:189:35 | host | provenance | | +| positives.go:189:2:189:36 | ... := ...[0] | positives.go:193:11:193:32 | ...+... | provenance | | +| positives.go:189:32:189:35 | host | positives.go:189:2:189:36 | ... := ...[0] | provenance | Config | +| positives.go:202:10:202:19 | selection of Header | positives.go:202:10:202:40 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:202:10:202:40 | call to Get | positives.go:203:32:203:35 | host | provenance | | +| positives.go:203:2:203:36 | ... := ...[0] | positives.go:204:17:204:19 | ace | provenance | | +| positives.go:203:32:203:35 | host | positives.go:203:2:203:36 | ... := ...[0] | provenance | Config | +| positives.go:209:10:209:19 | selection of Header | positives.go:209:10:209:43 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:209:10:209:43 | call to Get | positives.go:210:32:210:35 | host | provenance | | +| positives.go:210:2:210:36 | ... := ...[0] | positives.go:211:15:211:17 | ace | provenance | | +| positives.go:210:32:210:35 | host | positives.go:210:2:210:36 | ... := ...[0] | provenance | Config | +| positives.go:216:10:216:19 | selection of Header | positives.go:216:10:216:49 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:216:10:216:49 | call to Get | positives.go:217:32:217:35 | host | provenance | | +| positives.go:217:2:217:36 | ... := ...[0] | positives.go:219:37:219:39 | ace | provenance | | +| positives.go:217:32:217:35 | host | positives.go:217:2:217:36 | ... := ...[0] | provenance | Config | +| positives.go:224:10:224:19 | selection of Header | positives.go:224:10:224:49 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:224:10:224:49 | call to Get | positives.go:225:32:225:35 | host | provenance | | +| positives.go:225:2:225:36 | ... := ...[0] | positives.go:227:39:227:41 | ace | provenance | | +| positives.go:225:32:225:35 | host | positives.go:225:2:225:36 | ... := ...[0] | provenance | Config | +| positives.go:233:10:233:19 | selection of Header | positives.go:233:10:233:43 | call to Get | provenance | Src:MaD:1 MaD:2 | +| positives.go:233:10:233:43 | call to Get | positives.go:238:35:238:38 | addr | provenance | | +| positives.go:238:5:238:39 | ... := ...[0] | positives.go:241:19:241:22 | addr | provenance | | +| positives.go:238:35:238:38 | addr | positives.go:238:5:238:39 | ... := ...[0] | provenance | Config | +models +| 1 | Source: net/http; Request; true; Header; ; ; ; remote; manual | +| 2 | Summary: net/http; Header; true; Get; ; ; Argument[receiver]; ReturnValue; taint; manual | +| 3 | Summary: net; ; false; JoinHostPort; ; ; Argument[0..1]; ReturnValue; taint; manual | +nodes +| negatives.go:228:10:228:19 | selection of Header | semmle.label | selection of Header | +| negatives.go:228:10:228:45 | call to Get | semmle.label | call to Get | +| negatives.go:239:2:239:38 | ... := ...[0] | semmle.label | ... := ...[0] | +| negatives.go:239:34:239:37 | host | semmle.label | host | +| negatives.go:243:26:243:28 | ace | semmle.label | ace | +| positives.go:25:10:25:19 | selection of Header | semmle.label | selection of Header | +| positives.go:25:10:25:40 | call to Get | semmle.label | call to Get | +| positives.go:26:2:26:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:26:32:26:35 | host | semmle.label | host | +| positives.go:27:11:27:32 | ...+... | semmle.label | ...+... | +| positives.go:33:10:33:19 | selection of Header | semmle.label | selection of Header | +| positives.go:33:10:33:44 | call to Get | semmle.label | call to Get | +| positives.go:34:2:34:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:34:32:34:35 | host | semmle.label | host | +| positives.go:36:25:36:28 | addr | semmle.label | addr | +| positives.go:42:10:42:19 | selection of Header | semmle.label | selection of Header | +| positives.go:42:10:42:41 | call to Get | semmle.label | call to Get | +| positives.go:43:2:43:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:43:32:43:35 | host | semmle.label | host | +| positives.go:44:19:44:21 | ace | semmle.label | ace | +| positives.go:51:10:51:19 | selection of Header | semmle.label | selection of Header | +| positives.go:51:10:51:41 | call to Get | semmle.label | call to Get | +| positives.go:52:2:52:37 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:52:33:52:36 | host | semmle.label | host | +| positives.go:54:11:54:13 | ace | semmle.label | ace | +| positives.go:61:10:61:19 | selection of Header | semmle.label | selection of Header | +| positives.go:61:10:61:43 | call to Get | semmle.label | call to Get | +| positives.go:62:2:62:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:62:32:62:35 | host | semmle.label | host | +| positives.go:64:19:64:21 | ace | semmle.label | ace | +| positives.go:71:10:71:19 | selection of Header | semmle.label | selection of Header | +| positives.go:71:10:71:47 | call to Get | semmle.label | call to Get | +| positives.go:72:2:72:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:72:32:72:35 | host | semmle.label | host | +| positives.go:73:17:73:19 | ace | semmle.label | ace | +| positives.go:79:10:79:19 | selection of Header | semmle.label | selection of Header | +| positives.go:79:10:79:41 | call to Get | semmle.label | call to Get | +| positives.go:80:2:80:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:80:32:80:35 | host | semmle.label | host | +| positives.go:81:18:81:44 | call to JoinHostPort | semmle.label | call to JoinHostPort | +| positives.go:81:35:81:37 | ace | semmle.label | ace | +| positives.go:81:35:81:37 | ace | semmle.label | ace | +| positives.go:88:10:88:19 | selection of Header | semmle.label | selection of Header | +| positives.go:88:10:88:45 | call to Get | semmle.label | call to Get | +| positives.go:89:2:89:42 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:89:38:89:41 | host | semmle.label | host | +| positives.go:92:45:92:48 | addr | semmle.label | addr | +| positives.go:98:10:98:19 | selection of Header | semmle.label | selection of Header | +| positives.go:98:10:98:43 | call to Get | semmle.label | call to Get | +| positives.go:99:2:99:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:99:32:99:35 | host | semmle.label | host | +| positives.go:101:13:101:15 | ace | semmle.label | ace | +| positives.go:109:10:109:19 | selection of Header | semmle.label | selection of Header | +| positives.go:109:10:109:47 | call to Get | semmle.label | call to Get | +| positives.go:111:2:111:32 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:111:28:111:31 | host | semmle.label | host | +| positives.go:112:15:112:17 | ace | semmle.label | ace | +| positives.go:118:10:118:19 | selection of Header | semmle.label | selection of Header | +| positives.go:118:10:118:42 | call to Get | semmle.label | call to Get | +| positives.go:119:2:119:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:119:32:119:35 | host | semmle.label | host | +| positives.go:121:11:121:13 | ace | semmle.label | ace | +| positives.go:129:10:129:19 | selection of Header | semmle.label | selection of Header | +| positives.go:129:10:129:42 | call to Get | semmle.label | call to Get | +| positives.go:130:2:130:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:130:32:130:35 | host | semmle.label | host | +| positives.go:132:37:132:39 | ace | semmle.label | ace | +| positives.go:138:10:138:19 | selection of Header | semmle.label | selection of Header | +| positives.go:138:10:138:43 | call to Get | semmle.label | call to Get | +| positives.go:139:2:139:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:139:32:139:35 | host | semmle.label | host | +| positives.go:140:11:140:32 | ...+... | semmle.label | ...+... | +| positives.go:147:10:147:19 | selection of Header | semmle.label | selection of Header | +| positives.go:147:10:147:47 | call to Get | semmle.label | call to Get | +| positives.go:148:2:148:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:148:32:148:35 | host | semmle.label | host | +| positives.go:151:16:151:19 | addr | semmle.label | addr | +| positives.go:158:10:158:19 | selection of Header | semmle.label | selection of Header | +| positives.go:158:10:158:46 | call to Get | semmle.label | call to Get | +| positives.go:159:2:159:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:159:32:159:35 | host | semmle.label | host | +| positives.go:160:11:160:32 | ...+... | semmle.label | ...+... | +| positives.go:170:10:170:19 | selection of Header | semmle.label | selection of Header | +| positives.go:170:10:170:44 | call to Get | semmle.label | call to Get | +| positives.go:171:2:171:38 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:171:34:171:37 | host | semmle.label | host | +| positives.go:172:11:172:32 | ...+... | semmle.label | ...+... | +| positives.go:178:10:178:19 | selection of Header | semmle.label | selection of Header | +| positives.go:178:10:178:52 | call to Get | semmle.label | call to Get | +| positives.go:179:2:179:39 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:179:35:179:38 | host | semmle.label | host | +| positives.go:180:17:180:19 | out | semmle.label | out | +| positives.go:188:10:188:19 | selection of Header | semmle.label | selection of Header | +| positives.go:188:10:188:46 | call to Get | semmle.label | call to Get | +| positives.go:189:2:189:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:189:32:189:35 | host | semmle.label | host | +| positives.go:193:11:193:32 | ...+... | semmle.label | ...+... | +| positives.go:202:10:202:19 | selection of Header | semmle.label | selection of Header | +| positives.go:202:10:202:40 | call to Get | semmle.label | call to Get | +| positives.go:203:2:203:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:203:32:203:35 | host | semmle.label | host | +| positives.go:204:17:204:19 | ace | semmle.label | ace | +| positives.go:209:10:209:19 | selection of Header | semmle.label | selection of Header | +| positives.go:209:10:209:43 | call to Get | semmle.label | call to Get | +| positives.go:210:2:210:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:210:32:210:35 | host | semmle.label | host | +| positives.go:211:15:211:17 | ace | semmle.label | ace | +| positives.go:216:10:216:19 | selection of Header | semmle.label | selection of Header | +| positives.go:216:10:216:49 | call to Get | semmle.label | call to Get | +| positives.go:217:2:217:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:217:32:217:35 | host | semmle.label | host | +| positives.go:219:37:219:39 | ace | semmle.label | ace | +| positives.go:224:10:224:19 | selection of Header | semmle.label | selection of Header | +| positives.go:224:10:224:49 | call to Get | semmle.label | call to Get | +| positives.go:225:2:225:36 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:225:32:225:35 | host | semmle.label | host | +| positives.go:227:39:227:41 | ace | semmle.label | ace | +| positives.go:233:10:233:19 | selection of Header | semmle.label | selection of Header | +| positives.go:233:10:233:43 | call to Get | semmle.label | call to Get | +| positives.go:238:5:238:39 | ... := ...[0] | semmle.label | ... := ...[0] | +| positives.go:238:35:238:38 | addr | semmle.label | addr | +| positives.go:241:19:241:22 | addr | semmle.label | addr | +subpaths diff --git a/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/IdnaIpLiteralSmuggle.qlref b/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/IdnaIpLiteralSmuggle.qlref new file mode 100644 index 000000000000..f1a05365a63b --- /dev/null +++ b/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/IdnaIpLiteralSmuggle.qlref @@ -0,0 +1,4 @@ +query: experimental/CWE-918/IdnaIpLiteralSmuggle.ql +postprocess: + - utils/test/PrettyPrintModels.ql + - utils/test/InlineExpectationsTestQuery.ql diff --git a/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/go.mod b/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/go.mod new file mode 100644 index 000000000000..c577f5f042ef --- /dev/null +++ b/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/go.mod @@ -0,0 +1,7 @@ +module github.com/github/codeql/go/test/experimental/CWE-918/IdnaIpLiteralSmuggle + +go 1.25.0 + +require golang.org/x/net v0.53.0 + +require golang.org/x/text v0.36.0 // indirect diff --git a/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/negatives.go b/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/negatives.go new file mode 100644 index 000000000000..c0d066562fda --- /dev/null +++ b/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/negatives.go @@ -0,0 +1,244 @@ +// Negative test cases: compliant callers that must NOT trigger the alert. +// No `// $ Alert` annotations on the sink lines. + +package main + +import ( + "context" + "net" + "net/http" + "net/netip" + "net/url" + "strings" + + "golang.org/x/net/idna" +) + +// Compliant: post-IDNA TrimSuffix(".") followed by net.ParseIP recheck. +// This is the safe pattern. +func compliantTrimAndRecheck(req *http.Request) { + host := req.Header.Get("X-HOST-OK-1") + ace, err := idna.Lookup.ToASCII(host) + if err != nil { + return + } + candidate := strings.TrimSuffix(ace, ".") + if ip := net.ParseIP(candidate); ip != nil { + return + } + http.Get("https://" + ace + "/") // OK: post-IDNA recheck barrier +} + +// Compliant variant: TrimRight(".") variant of the trim. +func compliantTrimRight(req *http.Request) { + host := req.Header.Get("X-HOST-OK-2") + ace, _ := idna.Lookup.ToASCII(host) + candidate := strings.TrimRight(ace, ".") + if ip := net.ParseIP(candidate); ip != nil { + return + } + net.JoinHostPort(ace, "443") // OK +} + +// True-negative: caller uses idna.Punycode, which does NOT apply the +// UTS-46 NFKC mapping. Even without a recheck, no digit-fold occurs. +func purePunycode(req *http.Request) { + host := req.Header.Get("X-HOST-PUNYCODE") + ace, _ := idna.Punycode.ToASCII(host) + http.Get("https://" + ace + "/") // OK: no digit-fold profile +} + +// True-negative: package-level idna.ToASCII is intentionally excluded +// from the model (it dispatches to Punycode.process and so cannot +// produce the digit-fold smuggle). Pinning the documented exclusion +// against future broadening of the call matcher. +func packageLevelToASCII(req *http.Request) { + host := req.Header.Get("X-HOST-PKG-TOASCII") + ace, _ := idna.ToASCII(host) + http.Get("https://" + ace + "/") // OK: package-level helper excluded +} + +// True-negative: caller uses idna.Display for human rendering only; the +// output never reaches a network sink in this function. +func displayOnly(req *http.Request) { + host := req.Header.Get("X-HOST-DISPLAY") + disp, _ := idna.Display.ToUnicode(host) + _ = disp // OK: never reaches a sink +} + +// True-negative: pure URL-parser pipeline. net/url.Parse is not the +// IDNA mapper; URL.Host is consumed without idna.ToASCII having run. +func urlParseOnly(req *http.Request) { + raw := req.Header.Get("X-URL-RAW") + u, err := url.Parse(raw) + if err != nil { + return + } + http.Get(u.String()) // OK: no IDNA mapping in the path +} + +// True-negative: idna.ToASCII output is immediately discarded; nothing +// reaches a sink. +func idnaDiscard(req *http.Request) { + host := req.Header.Get("X-HOST-DISCARD") + _, _ = idna.Lookup.ToASCII(host) // OK: result discarded +} + +// Compliant: post-IDNA TrimSuffix + net.ParseIP recheck before net.LookupHost. +func compliantLookupHost(req *http.Request) { + host := req.Header.Get("X-HOST-LOOKUP-OK") + ace, err := idna.Lookup.ToASCII(host) + if err != nil { + return + } + candidate := strings.TrimSuffix(ace, ".") + if ip := net.ParseIP(candidate); ip != nil { + return + } + net.LookupHost(ace) // OK: post-IDNA recheck barrier +} + +// Compliant: post-IDNA TrimRight + net.ParseIP recheck before (*Resolver).LookupHost. +func compliantResolverLookupHost(req *http.Request) { + host := req.Header.Get("X-HOST-RESOLVER-OK") + ace, err := idna.Lookup.ToASCII(host) + if err != nil { + return + } + candidate := strings.TrimRight(ace, ".") + if ip := net.ParseIP(candidate); ip != nil { + return + } + r := &net.Resolver{} + r.LookupHost(context.Background(), ace) // OK: post-IDNA recheck barrier +} + +// Compliant: post-IDNA TrimSuffix + netip.ParseAddr recheck before (*Resolver).LookupIPAddr. +func compliantResolverLookupIPAddr(req *http.Request) { + host := req.Header.Get("X-HOST-IPADDR-OK") + ace, err := idna.Lookup.ToASCII(host) + if err != nil { + return + } + candidate := strings.TrimSuffix(ace, ".") + if _, parseErr := netip.ParseAddr(candidate); parseErr == nil { + return + } + r := &net.Resolver{} + r.LookupIPAddr(context.Background(), ace) // OK: post-IDNA recheck barrier +} + +// Compliant: post-IDNA TrimRight + netip.ParseAddr recheck. The canonical +// strict pattern combining the multi-trailing-dot trim with the modern +// netip parser. This must NOT alert. +func compliantTrimRightNetipParseAddr(req *http.Request) { + host := req.Header.Get("X-HOST-TRIMRIGHT-NETIP-OK") + ace, err := idna.Lookup.ToASCII(host) + if err != nil { + return + } + candidate := strings.TrimRight(ace, ".") + if _, parseErr := netip.ParseAddr(candidate); parseErr == nil { + return + } + http.Get("https://" + ace + "/") // OK: post-IDNA recheck barrier +} + +// Compliant: post-IDNA TrimSuffix + netip.ParseAddr. The lenient +// single-trailing-dot pattern, accepted by the rule per shape (b) in the +// module docstring. This must NOT alert. +func compliantTrimSuffixNetipParseAddr(req *http.Request) { + host := req.Header.Get("X-HOST-TRIMSUFFIX-NETIP-OK") + ace, err := idna.Lookup.ToASCII(host) + if err != nil { + return + } + candidate := strings.TrimSuffix(ace, ".") + if _, parseErr := netip.ParseAddr(candidate); parseErr == nil { + return + } + net.JoinHostPort(ace, "443") // OK: post-IDNA recheck barrier +} + +// Compliant: manual slice form of the trim per shape (c) in the module +// docstring: `if strings.HasSuffix(out, ".") { out = out[:len(out)-1] }` +// followed by net.ParseIP. This must NOT alert. +func compliantManualSliceParseIP(req *http.Request) { + host := req.Header.Get("X-HOST-MANUAL-SLICE-OK") + ace, err := idna.Lookup.ToASCII(host) + if err != nil { + return + } + out := ace + if strings.HasSuffix(out, ".") { + out = out[:len(out)-1] + } + if ip := net.ParseIP(out); ip != nil { + return + } + net.JoinHostPort(ace, "443") // OK: post-IDNA recheck barrier +} + +// Compliant: post-IDNA TrimRight + net.ParseCIDR recheck. Pins the +// ParseCIDR branch of ipLiteralRecheckInput against regressions that +// would only break for callers using the CIDR parser. +func compliantTrimRightParseCIDR(req *http.Request) { + host := req.Header.Get("X-HOST-PARSECIDR-OK") + ace, err := idna.Lookup.ToASCII(host) + if err != nil { + return + } + candidate := strings.TrimRight(ace, ".") + if _, _, parseErr := net.ParseCIDR(candidate); parseErr == nil { + return + } + http.Get("https://" + ace + "/") // OK: post-IDNA recheck barrier +} + +// Compliant: post-IDNA TrimSuffix + netip.ParsePrefix recheck. Pins the +// ParsePrefix branch of ipLiteralRecheckInput. +func compliantTrimSuffixNetipParsePrefix(req *http.Request) { + host := req.Header.Get("X-HOST-PARSEPREFIX-OK") + ace, err := idna.Lookup.ToASCII(host) + if err != nil { + return + } + candidate := strings.TrimSuffix(ace, ".") + if _, parseErr := netip.ParsePrefix(candidate); parseErr == nil { + return + } + net.JoinHostPort(ace, "443") // OK: post-IDNA recheck barrier +} + +// AdversarialWitnessBinding mixes an unrelated TrimRight + ParseIP +// construct in the same scope as an IDNA-tainted path. The pre-fix +// predicate would silently sanitize the IDNA path because some-trim +// flowed-to-some-ParseIP existed in scope. The post-fix predicate ties +// the trim source to the post-IDNA tainted predecessor and correctly +// fires the alert. +// +// Expected: this function SHOULD trigger an alert on the JoinHostPort +// line. The negatives-fixture name is misleading; this is technically a +// positive test for the witness-binding fix. Placed here for proximity +// to the bug shape it regresses against. +// +// This is the canonical regression test for the v0.1.0 witness-binding +// fix: pre-fix predicate would NOT alert; post-fix predicate WILL alert. +func AdversarialWitnessBinding(req *http.Request, otherInput string) string { + host := req.Header.Get("X-HOST-ADVERSARIAL") // $ Source + + // Unrelated trim + ParseIP elsewhere in the same scope. The pre-fix + // predicate matched any-trim-to-any-ParseIP and treated this as a + // sanitizer on the IDNA path below. It is not. + unrelated := strings.TrimRight(otherInput, ".") + if ip := net.ParseIP(unrelated); ip != nil { + return "rejected unrelated" + } + + // IDNA-tainted path with no post-IDNA recheck. Must alert. + ace, err := idna.Lookup.ToASCII(host) + if err != nil { + return "" + } + return net.JoinHostPort(ace, "443") // $ Alert +} diff --git a/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/positives.go b/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/positives.go new file mode 100644 index 000000000000..cd93f294599f --- /dev/null +++ b/go/ql/test/experimental/CWE-918/IdnaIpLiteralSmuggle/positives.go @@ -0,0 +1,242 @@ +// Positive test cases for the IDNA digit-fold IP-literal smuggle query. +// Each `// $ Source` and `// $ Alert` annotation is consumed by the +// CodeQL InlineExpectationsTestQuery harness. +// +// Sources are *http.Request fields (Header, URL.Hostname, FormValue, +// URL.Query().Get) so the default RemoteFlowSource threat model in +// codeql test run picks them up without extra configuration. + +package main + +import ( + "context" + "crypto/tls" + "net" + "net/http" + "net/url" + "time" + + "golang.org/x/net/idna" +) + +// --- Class 1: Latin-1 superscripts (U+00B9 SUPERSCRIPT ONE) --- +// "0.¹.0.0" -> "0.1.0.0" +func smuggleLatin1Superscript(req *http.Request) { + host := req.Header.Get("X-HOST-LATIN1") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + http.Get("https://" + ace + "/") // $ Alert +} + +// --- Class 1 second positive: Latin-1 superscript U+00B2 -> "2", net.DialTimeout sink --- +// "0.0.².0" -> "0.0.2.0" +func smuggleLatin1SuperscriptDialTimeout(req *http.Request) { + host := req.Header.Get("X-HOST-LATIN1-TWO") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + addr := ace + ":443" + net.DialTimeout("tcp", addr, 5*time.Second) // $ Alert +} + +// --- Class 2: Mathematical superscripts (U+2074 SUPERSCRIPT FOUR) --- +// "10.⁴.0.1" -> "10.4.0.1" +func smuggleMathSuperscript(req *http.Request) { + host := req.Header.Get("X-HOST-MATHSUP") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + net.JoinHostPort(ace, "443") // $ Alert +} + +// --- Class 2 second positive: Math superscript U+2079 -> "9", url.URL.Host sink --- +// Uses idna.Display.ToASCII to exercise an alternate UTS-46 mapping profile. +// "10.0.⁹.1" -> "10.0.9.1" +func smuggleMathSuperscriptURLHost(req *http.Request) { + host := req.Header.Get("X-Forward-Host") // $ Source + ace, _ := idna.Display.ToASCII(host) + u := &url.URL{Scheme: "https"} + u.Host = ace // $ Alert + _ = u +} + +// --- Class 3: Mathematical subscripts (U+2081 SUBSCRIPT ONE) --- +// "127.0.0.₁" -> "127.0.0.1" +func smuggleMathSubscript(req *http.Request) { + host := req.Header.Get("X-HOST-SUBSCRIPT") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + cfg := &tls.Config{} + cfg.ServerName = ace // $ Alert + _ = cfg +} + +// --- Class 3 second positive: Math subscript U+2087 -> "7", net.LookupHost sink --- +// "10.₇.0.1" -> "10.7.0.1" +func smuggleMathSubscriptLookupHost(req *http.Request) { + host := req.Header.Get("X-HOST-SUBSCRIPT-TWO") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + net.LookupHost(ace) // $ Alert +} + +// --- Class 4: Circled digits (U+2460 CIRCLED DIGIT ONE) --- +// "192.168.①.1" -> "192.168.1.1" +func smuggleCircledDigit(req *http.Request) { + host := req.Header.Get("X-HOST-CIRCLED") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + net.Dial("tcp", net.JoinHostPort(ace, "80")) // $ Alert +} + +// --- Class 4 second positive: Circled digit U+2463 -> "4", (*net.Dialer).DialContext sink --- +// Uses idna.Registration.ToASCII to exercise the registration profile. +// "10.0.0.④" -> "10.0.0.4" +func smuggleCircledDigitDialerContext(req *http.Request) { + host := req.Header.Get("X-HOST-CIRCLED-TWO") // $ Source + ace, _ := idna.Registration.ToASCII(host) + addr := ace + ":443" + d := &net.Dialer{} + d.DialContext(context.Background(), "tcp", addr) // $ Alert +} + +// --- Class 5: Fullwidth digits (U+FF11 FULLWIDTH DIGIT ONE) --- +// "192.168.1.1" -> "192.168.1.1" +func smuggleFullwidth(req *http.Request) { + host := req.Header.Get("X-HOST-FULLWIDTH") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + c := &http.Cookie{} + c.Domain = ace // $ Alert + _ = c +} + +// --- Class 5 second positive: Fullwidth U+FF10 -> "0", net.LookupIP sink --- +// Uses an idna.New(idna.MapForLookup(), ...) constructed profile. +// "0.0.0.1" -> "0.0.0.1" +func smuggleFullwidthLookupIP(req *http.Request) { + host := req.Header.Get("X-HOST-FULLWIDTH-TWO") // $ Source + profile := idna.New(idna.MapForLookup()) + ace, _ := profile.ToASCII(host) + net.LookupIP(ace) // $ Alert +} + +// --- Class 6: Mathematical bold/sans/double-struck/mono (U+1D7CE MATH BOLD ZERO) --- +// "\U0001D7CE.\U0001D7CF.\U0001D7CE.\U0001D7CF" -> "0.1.0.1" +func smuggleMathBold(req *http.Request) { + host := req.Header.Get("X-HOST-MATHBOLD") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + u := &url.URL{Scheme: "https"} + u.Host = ace // $ Alert + _ = u +} + +// --- Class 6 second positive: Math sans-serif digit U+1D7E2 -> "0", +// (*net.Resolver).LookupHost sink --- +// "\U0001D7E2.\U0001D7E3.\U0001D7E2.\U0001D7E3" -> "0.1.0.1" +func smuggleMathSansResolverLookupHost(req *http.Request) { + host := req.Header.Get("X-HOST-MATHSANS") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + r := &net.Resolver{} + r.LookupHost(context.Background(), ace) // $ Alert +} + +// --- Class 7: Segmented digits (U+1FBF1 SEGMENTED DIGIT ONE) --- +// "\U0001FBF1.0.0.0" -> "1.0.0.0" +func smuggleSegmented(req *http.Request) { + host := req.Header.Get("X-HOST-SEGMENTED") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + http.Get("https://" + ace + "/") // $ Alert +} + +// --- Class 7 second positive: Segmented digit U+1FBF7 -> "7", +// (*net.Dialer).Dial sink --- +// "\U0001FBF7.0.0.1" -> "7.0.0.1" +func smuggleSegmentedDialerDial(req *http.Request) { + host := req.Header.Get("X-HOST-SEGMENTED-TWO") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + addr := ace + ":80" + d := &net.Dialer{} + d.Dial("tcp", addr) // $ Alert +} + +// --- Class 4 third positive: U+24EA CIRCLED DIGIT ZERO (zero-only +// codepoint in the circled family; U+2460 starts at one). --- +// "⓪.0.0.1" -> "0.0.0.1" +func smuggleCircledZero(req *http.Request) { + host := req.Header.Get("X-HOST-CIRCLED-ZERO") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + http.Get("https://" + ace + "/") // $ Alert +} + +// --- ToUnicode positive: idna.Lookup.ToUnicode runs the same UTS-46 +// digit-fold pipeline as ToASCII (validateAndMap executes before the +// encode-vs-decode branch), so a Latin-1 superscript input emerges as +// ASCII via ToUnicode too. Empirically verified against +// golang.org/x/net/idna v0.53.0: Lookup.ToUnicode("0.¹.0.0") +// returns "0.1.0.0". +func smuggleViaToUnicode(req *http.Request) { + host := req.Header.Get("X-HOST-TO-UNICODE") // $ Source + out, _ := idna.Lookup.ToUnicode(host) + http.Get("https://" + out + "/") // $ Alert +} + +// --- ToUnicode positive on Display profile, net.LookupHost sink. --- +// "10.⁹.0.1" -> "10.9.0.1" +func smuggleDisplayToUnicodeLookupHost(req *http.Request) { + host := req.Header.Get("X-HOST-DISPLAY-TO-UNICODE") // $ Source + out, _ := idna.Display.ToUnicode(host) + net.LookupHost(out) // $ Alert +} + +// --- Trailing-dot variant: "0.¹.0.0." -> "0.1.0.0." --- +// A bare `net.ParseIP("0.1.0.0.")` returns nil, so a post-IDNA recheck +// WITHOUT a trailing-dot trim does NOT sanitize. The query must still +// alert here. +func smuggleTrailingDot(req *http.Request) { + host := req.Header.Get("X-HOST-TRAILING-DOT") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + if ip := net.ParseIP(ace); ip != nil { // wrong: no TrimSuffix + return + } + http.Get("https://" + ace + "/") // $ Alert +} + +// --- DNS resolver sinks --- + +// net.LookupHost: smuggled IP literal triggers DNS query for the literal form. +// "0.¹.0.0" -> "0.1.0.0"; LookupHost("0.1.0.0") issues a PTR-style query that +// some resolvers answer with the IP directly. +func smuggleLookupHost(req *http.Request) { + host := req.Header.Get("X-HOST-LOOKUP") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + net.LookupHost(ace) // $ Alert +} + +// net.LookupIP: same digit-fold as above; argument 0 is the host. +func smuggleLookupIP(req *http.Request) { + host := req.Header.Get("X-HOST-LOOKUP-IP") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + net.LookupIP(ace) // $ Alert +} + +// (*net.Resolver).LookupHost: custom resolver; host is argument 1, ctx is argument 0. +func smuggleResolverLookupHost(req *http.Request) { + host := req.Header.Get("X-HOST-RESOLVER-LOOKUP") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + r := &net.Resolver{} + r.LookupHost(context.Background(), ace) // $ Alert +} + +// (*net.Resolver).LookupIPAddr: custom resolver; host is argument 1. +func smuggleResolverLookupIPAddr(req *http.Request) { + host := req.Header.Get("X-HOST-RESOLVER-IPADDR") // $ Source + ace, _ := idna.Lookup.ToASCII(host) + r := &net.Resolver{} + r.LookupIPAddr(context.Background(), ace) // $ Alert +} + +// --- Caller-pattern reproduction: pre-IDNA ParseIP guard, no post-IDNA +// recheck. Mirrors `golang.org/x/net/http/httpproxy/proxy.go::canonicalAddr`. --- +func smuggleCanonicalAddrShape(req *http.Request) { + addr := req.Header.Get("X-HOST-CANONICAL") // $ Source + if ip := net.ParseIP(addr); ip != nil { + // pretend we reject IP-literal inputs early + return + } + if v, err := idna.Lookup.ToASCII(addr); err == nil { + addr = v + } + net.JoinHostPort(addr, "443") // $ Alert +}