diff --git a/CHANGELOG.md b/CHANGELOG.md index 1277fa9..af2bba5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,9 @@ The following emojis are used to highlight certain changes: ### Fixed +- someguy now expires `/p2p-circuit` (relay) addresses from its cache much sooner than direct ones. A relay reservation is short-lived: it lasts at most an hour and is dropped the moment the peer disconnects from the relay, so a relay address kept for the usual 48 hours is often long dead by the time a client dials it. Relay addresses now use a shorter TTL (twice the relay reservation TTL by default) and are renewed only while the peer stays reachable, so working relay paths survive and dead ones age out within hours instead of days. See [`docs/peer-address-caching.md`](https://github.com/ipfs/someguy/blob/main/docs/peer-address-caching.md). +- `/routing/v1` responses now list direct addresses before `/p2p-circuit` (relay) addresses. A client that dials addresses in order reaches a directly dialable one first and only falls back to a relay, which is slower and exists mainly to bootstrap a direct connection. + ### Security ## [v0.14.0] - 2026-06-08 diff --git a/cached_addr_book.go b/cached_addr_book.go index 8b37a22..4f10d24 100644 --- a/cached_addr_book.go +++ b/cached_addr_book.go @@ -59,6 +59,18 @@ const ( probeResultOffline = "offline" ) +// DefaultRelayAddrTTL bounds how long someguy serves a cached /p2p-circuit +// (relay) address. A relay reservation lasts at most the relay's reservation +// TTL (relay.DefaultResources().ReservationTTL) and is dropped the instant the +// reserving peer disconnects from the relay, so a relay address is far more +// perishable than a direct one. Caching it for the full +// DefaultRecentlyConnectedAddrTTL would keep handing clients relay paths that +// died hours ago. The probe loop re-extends this TTL for peers that are still +// reachable, so live relay-only peers survive while dead relays age out +// quickly. It is set to twice the reservation TTL so the probe loop has room to +// re-confirm a live peer before its entry expires. +var DefaultRelayAddrTTL = 2 * relay.DefaultResources().ReservationTTL + var ( probeDurationHistogram = promauto.NewHistogram(prometheus.HistogramOpts{ Name: "probe_duration_seconds", @@ -100,6 +112,7 @@ type cachedAddrBook struct { isProbing atomic.Bool allowPrivateIPs bool // for testing recentlyConnectedTTL time.Duration + relayAddrTTL time.Duration } type AddrBookOption func(*cachedAddrBook) error @@ -130,6 +143,15 @@ func WithRecentlyConnectedTTL(ttl time.Duration) AddrBookOption { } } +// WithRelayAddrTTL overrides the TTL used for /p2p-circuit (relay) addresses. +// See DefaultRelayAddrTTL for why these are kept shorter than direct addresses. +func WithRelayAddrTTL(ttl time.Duration) AddrBookOption { + return func(cab *cachedAddrBook) error { + cab.relayAddrTTL = ttl + return nil + } +} + func WithActiveProbing(enabled bool) AddrBookOption { return func(cab *cachedAddrBook) error { cab.probingEnabled = enabled @@ -147,6 +169,7 @@ func newCachedAddrBook(opts ...AddrBookOption) (*cachedAddrBook, error) { peerCache: peerCache, addrBook: pstoremem.NewAddrBook(), recentlyConnectedTTL: DefaultRecentlyConnectedAddrTTL, // Set default value + relayAddrTTL: DefaultRelayAddrTTL, // Set default value } for _, opt := range opts { @@ -156,6 +179,7 @@ func newCachedAddrBook(opts ...AddrBookOption) (*cachedAddrBook, error) { } } logger.Infof("Using TTL of %s for recently connected peers", cab.recentlyConnectedTTL) + logger.Infof("Using TTL of %s for relay (/p2p-circuit) addresses", cab.relayAddrTTL) logger.Infof("Probing enabled: %t", cab.probingEnabled) return cab, nil } @@ -218,9 +242,12 @@ func (cab *cachedAddrBook) background(ctx context.Context, host host.Host) { } cab.replacePeerAddrs(ev.Peer, ev.SignedPeerRecord, ev.ListenAddrs, connAddrs, ttl) case event.EvtPeerConnectednessChanged: - // If the peer is not connected or limited, we update the TTL + // On disconnect, move the peer's addresses off the connected TTL, + // then cap its relay addresses so a now-idle relay path is not + // served for the full recentlyConnectedTTL. if !hasValidConnectedness(ev.Connectedness) { cab.addrBook.UpdateAddrs(ev.Peer, ConnectedAddrTTL, cab.recentlyConnectedTTL) + cab.capRelayAddrTTL(ev.Peer) } } case <-probeTicker.C: @@ -278,6 +305,12 @@ func (cab *cachedAddrBook) replacePeerAddrs(p peer.ID, signed *record.Envelope, cab.addrBook.AddAddrs(p, listenAddrs, ttl) } + // Cap relay (/p2p-circuit) addresses at the shorter relayAddrTTL. The + // advertised set is freshly verified, but a relay reservation can lapse long + // before ttl, so a relay path should not inherit the full TTL. Run this + // before re-adding connAddrs so a live relay session keeps the connected TTL. + cab.capRelayAddrTTL(p) + // Preserve live-connection addresses at the connected TTL even when absent // from the advertised set, so an active session is never dropped. if len(connAddrs) > 0 { @@ -372,8 +405,9 @@ func (cab *cachedAddrBook) GetCachedAddrs(p peer.ID) []types.Multiaddr { // connection (e.g. embedded in a provider record returned by FindProviders) so // that later peer-routing lookups can serve them from the same peerbook. // Private addresses are dropped unless explicitly allowed. These addresses are -// unverified, so they are stored with the recently-connected TTL and will be -// confirmed or evicted by the probe loop. +// unverified, so direct addresses are stored with the recently-connected TTL +// and relay (/p2p-circuit) addresses with the shorter relayAddrTTL; the probe +// loop confirms or evicts them. func (cab *cachedAddrBook) CacheAddrs(p peer.ID, addrs []types.Multiaddr) { if len(addrs) == 0 { return @@ -391,7 +425,13 @@ func (cab *cachedAddrBook) CacheAddrs(p peer.ID, addrs []types.Multiaddr) { return } - cab.addrBook.AddAddrs(p, maddrs, cab.recentlyConnectedTTL) + // Relay (/p2p-circuit) addresses are far more perishable than direct ones, + // so cache them under the shorter relayAddrTTL. AddAddrs only ever extends a + // TTL, so adding the relay set here never shortens one already held by a live + // connection. + direct, relayAddrs := splitRelayAddrs(maddrs) + cab.addrBook.AddAddrs(p, direct, cab.recentlyConnectedTTL) + cab.addrBook.AddAddrs(p, relayAddrs, cab.relayAddrTTL) } // Update the peer cache with information about a failed connection @@ -449,3 +489,32 @@ func (cab *cachedAddrBook) getTTL(connectedness network.Connectedness) time.Dura } return cab.recentlyConnectedTTL } + +// isRelayAddr reports whether a is a circuit-relay (/p2p-circuit) address. +func isRelayAddr(a ma.Multiaddr) bool { + _, err := a.ValueForProtocol(ma.P_CIRCUIT) + return err == nil +} + +// splitRelayAddrs partitions addrs into direct addresses and circuit-relay +// (/p2p-circuit) addresses, preserving order within each group. +func splitRelayAddrs(addrs []ma.Multiaddr) (direct, relay []ma.Multiaddr) { + for _, a := range addrs { + if isRelayAddr(a) { + relay = append(relay, a) + } else { + direct = append(direct, a) + } + } + return direct, relay +} + +// capRelayAddrTTL lowers the TTL of p's stored relay (/p2p-circuit) addresses to +// relayAddrTTL. It uses SetAddrs, which sets an exact TTL and so, unlike +// AddAddrs, can shorten an entry; a caller that must keep a live relay session +// re-adds it at the connected TTL afterward. +func (cab *cachedAddrBook) capRelayAddrTTL(p peer.ID) { + if _, relayAddrs := splitRelayAddrs(cab.addrBook.Addrs(p)); len(relayAddrs) > 0 { + cab.addrBook.SetAddrs(p, relayAddrs, cab.relayAddrTTL) + } +} diff --git a/cached_addr_book_test.go b/cached_addr_book_test.go index 2278d30..74c9322 100644 --- a/cached_addr_book_test.go +++ b/cached_addr_book_test.go @@ -3,9 +3,12 @@ package main import ( "context" "fmt" + "io" "testing" + "testing/synctest" "time" + "github.com/ipfs/boxo/routing/http/types" "github.com/libp2p/go-libp2p/core/event" "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/network" @@ -135,6 +138,74 @@ func TestReplacePeerAddrsEmptyInputKeepsExistingAddrs(t *testing.T) { require.Equal(t, []string{"/ip4/1.1.1.1/tcp/4001"}, got) } +func TestSplitRelayAddrs(t *testing.T) { + direct1 := ma.StringCast("/ip4/1.2.3.4/tcp/4001") + direct2 := ma.StringCast("/ip4/1.2.3.4/udp/4001/quic-v1") + relay1 := ma.StringCast("/ip4/5.6.7.8/tcp/4001/p2p/12D3KooWCZ67sU8oCvKd82Y6c9NgpqgoZYuZEUcg4upHCjK3n1aj/p2p-circuit") + relay2 := ma.StringCast("/dns4/relay.example/tcp/443/wss/p2p/12D3KooWCZ67sU8oCvKd82Y6c9NgpqgoZYuZEUcg4upHCjK3n1aj/p2p-circuit") + + direct, relay := splitRelayAddrs([]ma.Multiaddr{direct1, relay1, direct2, relay2}) + require.Equal(t, []ma.Multiaddr{direct1, direct2}, direct) + require.Equal(t, []ma.Multiaddr{relay1, relay2}, relay) + + require.False(t, isRelayAddr(direct1)) + require.True(t, isRelayAddr(relay1)) +} + +// relayTTLPeer and the addrs reused by the TTL tests below. +var ( + ttlTestPeer = peer.ID("test-peer") + ttlTestDirectAddr = ma.StringCast("/ip4/3.3.3.3/tcp/4001") + ttlTestRelayAddr = ma.StringCast("/ip4/5.6.7.8/udp/4001/quic-v1/p2p/12D3KooWCZ67sU8oCvKd82Y6c9NgpqgoZYuZEUcg4upHCjK3n1aj/p2p-circuit") +) + +func TestCacheAddrsUsesShorterRelayTTL(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + cab, err := newCachedAddrBook( + WithAllowPrivateIPs(), + WithRecentlyConnectedTTL(48*time.Hour), + WithRelayAddrTTL(2*time.Hour), + ) + require.NoError(t, err) + defer cab.addrBook.(io.Closer).Close() + + cab.CacheAddrs(ttlTestPeer, []types.Multiaddr{ + {Multiaddr: ttlTestDirectAddr}, + {Multiaddr: ttlTestRelayAddr}, + }) + require.Len(t, cab.addrBook.Addrs(ttlTestPeer), 2) + + // Past the relay TTL but well within the direct TTL: the relay address + // has aged out and only the direct address remains. + time.Sleep(3 * time.Hour) + got := cab.addrBook.Addrs(ttlTestPeer) + require.Len(t, got, 1) + require.Equal(t, ttlTestDirectAddr.String(), got[0].String()) + }) +} + +func TestReplacePeerAddrsCapsRelayTTL(t *testing.T) { + synctest.Test(t, func(t *testing.T) { + cab, err := newCachedAddrBook( + WithAllowPrivateIPs(), + WithRecentlyConnectedTTL(48*time.Hour), + WithRelayAddrTTL(2*time.Hour), + ) + require.NoError(t, err) + defer cab.addrBook.(io.Closer).Close() + + // A completed identify (no signed record) reports a direct and a relay + // address at the 48h ttl; the relay address must be capped to 2h. + cab.replacePeerAddrs(ttlTestPeer, nil, []ma.Multiaddr{ttlTestDirectAddr, ttlTestRelayAddr}, nil, 48*time.Hour) + require.Len(t, cab.addrBook.Addrs(ttlTestPeer), 2) + + time.Sleep(3 * time.Hour) + got := cab.addrBook.Addrs(ttlTestPeer) + require.Len(t, got, 1) + require.Equal(t, ttlTestDirectAddr.String(), got[0].String()) + }) +} + func TestBackground(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() diff --git a/docs/peer-address-caching.md b/docs/peer-address-caching.md index 33249fd..249d5e6 100644 --- a/docs/peer-address-caching.md +++ b/docs/peer-address-caching.md @@ -28,7 +28,7 @@ path (`cachedAddrBook.GetCachedAddrs`): | Store | Lifetime | Filled by | | --- | --- | --- | -| **Cached address book** (`cachedAddrBook`) | 48h (`DefaultProvideValidity`), or permanent while connected | identify events, the probe loop, and addresses observed in provider records | +| **Cached address book** (`cachedAddrBook`) | 48h (`DefaultProvideValidity`) for direct addresses, a shorter `DefaultRelayAddrTTL` for `/p2p-circuit` relay addresses, or permanent while connected | identify events, the probe loop, and addresses observed in provider records | | **Host peerstore** (`host.Peerstore()`) | 2 minutes (`TempAddrTTL`) | the DHT, which records provider and peer addresses during its own lookups | The cached address book is the durable, probed store. The host peerstore is a @@ -83,6 +83,26 @@ together with any live-connection address so an active session is never dropped. A reachable peer therefore collapses back to its current advertised set on each refresh instead of growing without bound. +## Relay addresses + +A `/p2p-circuit` (relay) address is far more perishable than a direct one. A +relay grants a reservation for at most its reservation TTL +(`relay.DefaultResources().ReservationTTL`, one hour by default) and drops it the +moment the peer disconnects from the relay, so a relay address can die within +minutes of being learned. A NAT'd node also advertises many at once: it reserves +on a couple of relays, and each relay is reachable over several transports, so +one peer often lists a dozen or more relay addresses. + +someguy handles these addresses on their own terms: + +- **Shorter TTL.** Relay addresses are cached under `DefaultRelayAddrTTL` (twice + the relay reservation TTL) rather than the 48h used for direct addresses. The + probe loop renews this for peers that are still reachable, so live relay-only + peers stay cached while dead relays age out within hours. +- **Listed last.** `/providers` and `/peers` return direct addresses before + relay addresses, so a client dials a directly reachable address first and + falls back to a relay only when it must. + ## How each endpoint reads the cache Both endpoints are cache-first and share the same read path. They differ only diff --git a/server_routers.go b/server_routers.go index 56790c3..0e31ba2 100644 --- a/server_routers.go +++ b/server_routers.go @@ -645,10 +645,19 @@ func filterPrivateMultiaddr(a []types.Multiaddr) []types.Multiaddr { b = append(b, addr) } - // Sort for a stable response across requests. Addresses can arrive in - // nondeterministic order (e.g. the peerstore stores them in a map), and - // this runs on every record from every router, so all sources are covered. + // Sort for a stable response across requests, and place relay + // (/p2p-circuit) addresses last so a client tries direct, dialable addresses + // first and only falls back to a relay. Addresses can arrive in + // nondeterministic order (e.g. the peerstore stores them in a map), and this + // runs on every record from every router, so all sources are covered. slices.SortFunc(b, func(x, y types.Multiaddr) int { + xRelay, yRelay := isRelayAddr(x.Multiaddr), isRelayAddr(y.Multiaddr) + if xRelay != yRelay { + if xRelay { + return 1 + } + return -1 + } return bytes.Compare(x.Multiaddr.Bytes(), y.Multiaddr.Bytes()) }) diff --git a/server_routers_test.go b/server_routers_test.go index b10998b..7f75ead 100644 --- a/server_routers_test.go +++ b/server_routers_test.go @@ -810,3 +810,28 @@ func TestFilterPrivateMultiaddrSortsAndFilters(t *testing.T) { } } } + +func TestFilterPrivateMultiaddrPlacesRelayAddrsLast(t *testing.T) { + mustAddr := func(s string) types.Multiaddr { + m, err := multiaddr.NewMultiaddr(s) + require.NoError(t, err) + return types.Multiaddr{Multiaddr: m} + } + + relay := "/p2p/12D3KooWCZ67sU8oCvKd82Y6c9NgpqgoZYuZEUcg4upHCjK3n1aj/p2p-circuit" + input := []types.Multiaddr{ + mustAddr("/ip4/9.9.9.9/udp/4001/quic-v1" + relay), + mustAddr("/ip4/1.1.1.1/tcp/4001"), + mustAddr("/ip4/5.5.5.5/tcp/4001" + relay), + mustAddr("/ip4/2.2.2.2/udp/4001/quic-v1"), + } + + out := filterPrivateMultiaddr(input) + require.Len(t, out, 4) + + // Direct addresses come first, relay (/p2p-circuit) addresses last. + require.False(t, isRelayAddr(out[0].Multiaddr)) + require.False(t, isRelayAddr(out[1].Multiaddr)) + require.True(t, isRelayAddr(out[2].Multiaddr)) + require.True(t, isRelayAddr(out[3].Multiaddr)) +}