diff --git a/CHANGELOG.md b/CHANGELOG.md index 65c9f8549..67a30651d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +Bugfixes: +* Fix flaky `SQLITE_IOERR_TRUNCATE` on Windows when multiple spago processes connect concurrently to the cache DB, by skipping `PRAGMA journal_mode = WAL` when it's already enabled (WAL mode is persistent in the DB file header) and tolerating the race on the initial set +* Retry transient network failures (connection errors and 5xx responses) when fetching package tarballs and calling the registry API, instead of failing immediately + ## [1.0.4] - 2026-03-30 Bugfixes: diff --git a/src/Spago/Command/Fetch.purs b/src/Spago/Command/Fetch.purs index c439bea25..976dd9a3e 100644 --- a/src/Spago/Command/Fetch.purs +++ b/src/Spago/Command/Fetch.purs @@ -319,9 +319,13 @@ fetchPackagesToLocalCache packages = do , url = packageUrl } ) - -- If we get a 503, we want the backoff to kick in, so we wait here and we'll eventually be retried + -- If the request failed (connection error) or got a 5xx, we want the backoff + -- to kick in. withBackoff' only retries on its own timeout, so we delay here + -- to lose the race against runTimeout and trigger a retry. case res of - Right { status } | status == StatusCode 503 -> Aff.delay (Aff.Milliseconds 30_000.0) + Left _ -> Aff.delay (Aff.Milliseconds 30_000.0) + Right { status } | status >= StatusCode 500 && status < StatusCode 600 -> + Aff.delay (Aff.Milliseconds 30_000.0) _ -> pure unit pure res case response of diff --git a/src/Spago/Db.js b/src/Spago/Db.js index a21242237..db9d0c88b 100644 --- a/src/Spago/Db.js +++ b/src/Spago/Db.js @@ -11,10 +11,28 @@ export const connectImpl = (databasePath, logger) => { const db = new DatabaseSync(databasePath, { enableForeignKeyConstraints: true, - timeout: 5000, // Wait up to 5s if database is locked (matches better-sqlite3 default) + timeout: 5000, // Wait up to 5s if database is locked }); - db.exec("PRAGMA journal_mode = WAL"); + // WAL journal mode is persistent in the DB file header (bytes 18-19), so + // once set it sticks across connections and reopens. We skip the PRAGMA when + // it's already set to avoid hitting winTruncate on the wal-index (.shm), + // which races between concurrent spago processes on Windows and surfaces as + // SQLITE_IOERR_TRUNCATE (errcode 1546). + // + // When two fresh processes race the initial set, the loser's exec throws, + // but the winner has already written WAL to the header — so we only re-throw + // if WAL didn't actually end up enabled (i.e. the error wasn't the benign + // race we expect). + // + // See: + // https://sqlite.org/pragma.html (journal_mode persistence) + // https://sqlite.org/fileformat.html (header bytes 18-19 = WAL marker) + const inWal = () => db.prepare("PRAGMA journal_mode").get()?.journal_mode === "wal"; + if (!inWal()) { + try { db.exec("PRAGMA journal_mode = WAL"); } + catch (e) { if (!inWal()) throw e; } + } db.prepare(`CREATE TABLE IF NOT EXISTS package_sets ( version TEXT PRIMARY KEY NOT NULL diff --git a/src/Spago/Prelude.purs b/src/Spago/Prelude.purs index 6f051baef..8e5de4e81 100644 --- a/src/Spago/Prelude.purs +++ b/src/Spago/Prelude.purs @@ -162,7 +162,7 @@ withBackoff { delay: Aff.Milliseconds timeout, action, shouldCancel, shouldRetry case maybeRetry of Maybe.Nothing -> pure Maybe.Nothing Maybe.Just newAction -> do - let newTimeout = Int.floor timeout `Int.pow` (attempt + 1) + let newTimeout = Int.floor timeout * (2 `Int.pow` attempt) maybeResult <- runAction attempt newAction newTimeout loop (attempt + 1) maybeResult Maybe.Just result -> diff --git a/src/Spago/Registry.purs b/src/Spago/Registry.purs index 2ede68d87..3f85f4bb0 100644 --- a/src/Spago/Registry.purs +++ b/src/Spago/Registry.purs @@ -431,13 +431,20 @@ submitRegistryOperation payload = do callRegistry :: forall env a b. String -> CJ.Codec b -> Maybe { codec :: CJ.Codec a, data :: a } -> Spago (GitEnv env) b callRegistry url outputCodec maybeInput = handleError do logDebug $ "Calling registry at " <> url - response <- liftAff $ withBackoff' $ try case maybeInput of - Just { codec: inputCodec, data: input } -> Http.fetch url - { method: Http.POST - , headers: { "Content-Type": "application/json" } - , body: Json.stringifyJson inputCodec input - } - Nothing -> Http.fetch url { method: Http.GET } + response <- liftAff $ withBackoff' do + res <- try case maybeInput of + Just { codec: inputCodec, data: input } -> Http.fetch url + { method: Http.POST + , headers: { "Content-Type": "application/json" } + , body: Json.stringifyJson inputCodec input + } + Nothing -> Http.fetch url { method: Http.GET } + case res of + Left _ -> Aff.delay (Aff.Milliseconds 30_000.0) + Right { status } | status >= 500 && status < 600 -> + Aff.delay (Aff.Milliseconds 30_000.0) + _ -> pure unit + pure res case response of Nothing -> pure $ Left $ "Could not reach the registry at " <> url Just (Left err) -> pure $ Left $ "Error while calling the registry:\n " <> Exception.message err diff --git a/test-fixtures/build/1148-warnings-diff-errors/errors/expected-stderr.txt b/test-fixtures/build/1148-warnings-diff-errors/errors/expected-stderr.txt index a3ffb4e6f..39f17c106 100644 --- a/test-fixtures/build/1148-warnings-diff-errors/errors/expected-stderr.txt +++ b/test-fixtures/build/1148-warnings-diff-errors/errors/expected-stderr.txt @@ -4,8 +4,8 @@ Reading Spago workspace configuration... Downloading dependencies... Building... -[1 of 2] Compiling Foo -[2 of 2] Compiling Main +[x of 2] Compiling module-name +[x of 2] Compiling module-name [ERROR 1/2 TypesDoNotUnify] src/Foo.purs:4:5 4 x = "nope" diff --git a/test/Prelude.purs b/test/Prelude.purs index 2263ddc5d..868e0cd65 100644 --- a/test/Prelude.purs +++ b/test/Prelude.purs @@ -9,6 +9,8 @@ import Data.Array as Array import Data.Map as Map import Data.String (Pattern(..), Replacement(..)) import Data.String as String +import Data.String.Regex as Regex +import Data.String.Regex.Flags as RF import Effect.Aff as Aff import Effect.Aff.AVar (AVar) import Effect.Aff.AVar as AVar @@ -176,6 +178,17 @@ sanitizePlatformOutput = >>> String.replaceAll (Pattern "\\") (Replacement "/") >>> String.replaceAll (Pattern "\r\n") (Replacement "\n") +-- | Normalize `[N of ] Compiling ` lines. purs schedules +-- | independent modules in whatever order system resources allow, so fixture +-- | comparison has to ignore the order. Pass the total module count expected. +normalizeCompileOrder :: Int -> String -> String +normalizeCompileOrder total = + Regex.replace regex ("[x of " <> show total <> "] Compiling module-name") + where + regex = unsafeFromRight $ Regex.regex + ("\\[\\d+ of " <> show total <> "\\] Compiling [^\n]+") + RF.global + checkFixture :: ∀ path. IsPath path => path -> FixturePath -> Aff Unit checkFixture filepath fixturePath = checkFixture' filepath fixturePath identity (shouldEqualStr `on` String.trim) diff --git a/test/Spago/Build.purs b/test/Spago/Build.purs index 240d5e32b..db0104b81 100644 --- a/test/Spago/Build.purs +++ b/test/Spago/Build.purs @@ -212,7 +212,7 @@ spec sem = Spec.parallel $ Spec.around (withBuildLock sem) do { stdoutFile: Nothing , stderrFile: Just $ fixture expectedFixture , result - , sanitize: sanitizePlatformOutput + , sanitize: sanitizePlatformOutput >>> normalizeCompileOrder 2 } FS.copyTree { src: fixture "build/1148-warnings-diff-errors", dst: testCwd "." } diff --git a/test/Spago/Publish.purs b/test/Spago/Publish.purs index 054a0411a..bed67ea7f 100644 --- a/test/Spago/Publish.purs +++ b/test/Spago/Publish.purs @@ -5,8 +5,6 @@ module Test.Spago.Publish import Test.Prelude -import Data.String.Regex as Regex -import Data.String.Regex.Flags as RF import Node.Platform as Platform import Node.Process as Process import Spago.FS as FS @@ -94,17 +92,9 @@ spec = Spec.around withTempDir do { stdoutFile: Nothing , stderrFile: Just file , result: isLeft - , sanitize: sanitizePlatformOutput >>> Regex.replace buildOrderRegex "[x of 3] Compiling module-name" + , sanitize: sanitizePlatformOutput >>> normalizeCompileOrder 3 } - -- We have to ignore lines like "[1 of 3] Compiling Effect.Console" when - -- comparing output, because the compiler will always compile in - -- different order, depending on how the system resources happened to - -- align at the moment of the test run. - buildOrderRegex = unsafeFromRight $ Regex.regex - "\\[\\d of 3\\] Compiling (Effect\\.Console|Effect\\.Class\\.Console|Lib)" - RF.global - FS.copyTree { src: fixture "publish/1110-solver-different-version", dst: testCwd } spago [ "build" ] >>= shouldBeSuccess doTheGitThing testCwd