Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion tests/SMPClient.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import Network.Socket
import qualified Network.TLS as TLS
import Simplex.Messaging.Agent.Store.Postgres.Options (DBOpts (..))
import Simplex.Messaging.Agent.Store.Shared (MigrationConfirmation (..))
import Simplex.Messaging.Client (ProtocolClientConfig (..), chooseTransportHost, defaultNetworkConfig)
import Simplex.Messaging.Client (NetworkConfig (..), NetworkTimeout (..), ProtocolClientConfig (..), chooseTransportHost, defaultNetworkConfig)
import Simplex.Messaging.Client.Agent (SMPClientAgentConfig (..), defaultSMPClientAgentConfig)
import qualified Simplex.Messaging.Crypto as C
import Simplex.Messaging.Encoding
Expand Down Expand Up @@ -339,6 +339,16 @@ proxyCfgJ2QS = \case
SQSMemory -> journalCfg (proxyCfgMS $ ASType SQSMemory SMSJournal) testStoreLogFile2 testStoreMsgsDir2
SQSPostgres -> journalCfgDB (proxyCfgMS $ ASType SQSPostgres SMSJournal) testStoreDBOpts2 testStoreMsgsDir2

-- Proxy config with a short relay-connection timeout, to bound how long a failing
-- proxy->relay connection attempt blocks in the relay reconnection tests.
proxyCfgShortTimeout :: AServerConfig
proxyCfgShortTimeout =
updateCfg proxyCfg $ \cfg' ->
let aCfg = smpAgentCfg cfg'
cCfg = smpCfg aCfg
nt = NetworkTimeout {backgroundTimeout = 4_000000, interactiveTimeout = 4_000000}
in cfg' {smpAgentCfg = aCfg {smpCfg = cCfg {networkConfig = (networkConfig cCfg) {tcpConnectTimeout = nt}}}}

proxyVRangeV8 :: VersionRangeSMP
proxyVRangeV8 = mkVersionRange minServerSMPRelayVersion sendingProxySMPVersion

Expand Down Expand Up @@ -383,6 +393,15 @@ serverBracket process afterProcess f = do
Nothing -> error $ "server did not " <> s
_ -> pure ()

-- A TCP server that accepts connections but never performs a TLS handshake, so a client
-- connecting to it stays blocked in the TLS handshake until its connection timeout.
withStallingServerOn :: HasCallStack => ServiceName -> IO a -> IO a
withStallingServerOn port action =
serverBracket
(\started -> runLocalTCPServer started port (\_ -> threadDelay maxBound))
(pure ())
(const action)

withSmpServerOn :: HasCallStack => (ASrvTransport, AStoreType) -> ServiceName -> IO a -> IO a
withSmpServerOn ps port' = withSmpServerThreadOn ps port' . const

Expand Down
60 changes: 60 additions & 0 deletions tests/SMPProxyTests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ smpProxyTests = do
describe "server configuration" $ do
it "refuses proxy handshake unless enabled" testNoProxy
it "checks basic auth in proxy requests" testProxyAuth
describe "relay reconnection" $ do
it "recovers when unresponsive relay restarts (control, no disconnect)" $ \_ ->
testProxyRecoversWithoutDisconnect
it "reconnects to relay after sender disconnects mid-connection" $ \_ ->
testProxyReconnectAfterRelayRestart
describe "proxy requests" $ do
describe "bad relay URIs" $ do
xit "host not resolved" todo
Expand Down Expand Up @@ -447,6 +452,61 @@ testProxyAuth msType = do
where
proxyCfgAuth = updateCfg (proxyCfgMS msType) $ \cfg_ -> cfg_ {newQueueBasicAuth = Just "correct"}

-- Connect a sender client to the proxy and request a relay session to testSMPServer2 (PRXY).
-- On success the reply is PKEY; otherwise it is the proxy error for the relay connection.
requestRelaySession :: IO (Either SMP.ErrorType SMP.BrokerMsg)
requestRelaySession =
testSMPClient_ "localhost" testPort proxyVRangeV8 Nothing $ \(th :: THandleSMP TLS 'TClient) ->
(\(_, _, reply) -> reply) <$> sendRecv th (Nothing, "1", NoEntity, SMP.PRXY testSMPServer2 Nothing)

-- Shared "phase 2" of the reconnection tests: start a healthy relay, confirm it is reachable
-- directly (PING, not via the proxy) so a proxy failure can only mean the proxy didn't reconnect,
-- let any stored connection error expire, then require the proxy to establish the session (PKEY).
requireProxyReconnect :: IO ()
requireProxyReconnect =
withSmpServerConfigOn (transport @TLS) proxyCfgJ2 testPort2 $ \_ -> do
testSMPClient_ "127.0.0.1" testPort2 proxyVRangeV8 Nothing $ \(th :: THandleSMP TLS 'TClient) -> do
(_, _, reply) <- sendRecv th (Nothing, "0", NoEntity, SMP.PING)
reply `shouldBe` Right SMP.PONG
threadDelay 1500000 -- > persistErrorInterval (1s), so the stored connection error has expired
requestRelaySession >>= \case
Right SMP.PKEY {} -> pure ()
reply -> expectationFailure $ "proxy failed to reach the healthy relay; expected PKEY, got: " <> show reply

-- Control: same stalling relay and proxy config as the bug test, but the sender stays connected.
-- The connect fails by timing out (storing a Left error that self-heals via persistErrorInterval),
-- so once a healthy relay is running the proxy reconnects. This proves the stalling relay alone
-- does not cause the permanent failure - only the mid-connection disconnect does.
testProxyRecoversWithoutDisconnect :: IO ()
testProxyRecoversWithoutDisconnect =
withSmpServerConfigOn (transport @TLS) proxyCfgShortTimeout testPort $ \_ -> do
withStallingServerOn testPort2 $
requestRelaySession >>= \case
Right (SMP.ERR (SMP.PROXY (SMP.BROKER _))) -> pure ()
reply -> expectationFailure $ "expected a proxy broker error from the unresponsive relay, got: " <> show reply
requireProxyReconnect

-- Reproduces the production bug: an SMP proxy permanently fails to reconnect to a destination
-- relay after the relay restarts (logs: repeated PCEResponseTimeout).
--
-- A PRXY request makes the proxy worker (forked via forkClient, registered in the sender's
-- endThreads) insert an empty SessionVar into smpClients and then block in connectClient. If the
-- sender disconnects while that connect is in flight, clientDisconnected kills the worker;
-- clientHandlers re-throws the async exception, so the SessionVar is never filled. Nothing removes
-- an empty SessionVar, so every later request waits the connection timeout on it - PROXY (BROKER
-- TIMEOUT) - forever, even once the relay is healthy again.
--
-- The stalling relay (accepts TCP, never completes TLS) holds the connect open long enough to
-- interleave the disconnect. Phase 2 (requireProxyReconnect) is identical to the control above;
-- the only difference is this disconnect.
testProxyReconnectAfterRelayRestart :: IO ()
testProxyReconnectAfterRelayRestart =
withSmpServerConfigOn (transport @TLS) proxyCfgShortTimeout testPort $ \_ -> do
-- disconnect the sender 1s into the 4s connect to the stalling relay, killing the in-flight worker
withStallingServerOn testPort2 $
race_ (threadDelay 1000000) requestRelaySession
requireProxyReconnect

todo :: AStoreType -> IO ()
todo _ = fail "TODO"

Expand Down
Loading