From 1aace05a22c6fb2a4a5a24ebbdb9ac819ad9aabd Mon Sep 17 00:00:00 2001 From: Jian-Zhang08 <53095992+Jian-Zhang08@users.noreply.github.com> Date: Fri, 26 Jun 2026 11:28:25 -0500 Subject: [PATCH] fix(csv-parse): preserve multi-byte record delimiter in raw output With { raw: true }, only the first byte of the record delimiter was appended to the raw buffer (in the per-char loop), and the parser then advanced pos past the remaining delimiter bytes before emitting the record. For multi-byte delimiters such as Windows "\r\n" this dropped the trailing byte, so raw was 'a,b\r' instead of 'a,b\r\n'. Append the remaining record-delimiter bytes to the raw buffer when a delimiter is detected, so multi-byte delimiters are preserved in full. Single-byte delimiters are unaffected (the loop body does not run). Adds a regression test and rebuilds dist. Fixes #332 --- packages/csv-parse/dist/cjs/index.cjs | 10 ++++++++++ packages/csv-parse/dist/cjs/stream.cjs | 10 ++++++++++ packages/csv-parse/dist/cjs/sync.cjs | 10 ++++++++++ packages/csv-parse/dist/esm/index.js | 10 ++++++++++ packages/csv-parse/dist/esm/sync.js | 10 ++++++++++ packages/csv-parse/dist/iife/index.js | 10 ++++++++++ packages/csv-parse/dist/iife/sync.js | 10 ++++++++++ packages/csv-parse/dist/umd/index.js | 10 ++++++++++ packages/csv-parse/dist/umd/sync.js | 10 ++++++++++ packages/csv-parse/lib/api/index.js | 10 ++++++++++ packages/csv-parse/test/option.raw.ts | 13 +++++++++++++ 11 files changed, 113 insertions(+) diff --git a/packages/csv-parse/dist/cjs/index.cjs b/packages/csv-parse/dist/cjs/index.cjs index 4e029162..a74123f3 100644 --- a/packages/csv-parse/dist/cjs/index.cjs +++ b/packages/csv-parse/dist/cjs/index.cjs @@ -1360,6 +1360,16 @@ const transform = function (original_options = {}) { pos, ); if (recordDelimiterLength !== 0) { + // Only the first byte of the record delimiter was appended to + // the raw buffer above; append the remaining bytes so that + // multi-byte delimiters such as "\r\n" are preserved in full in + // `raw` instead of being truncated when `pos` skips past them + // below (e.g. `raw: 'a,b\r'` instead of `raw: 'a,b\r\n'`). (#332) + if (raw === true) { + for (let i = 1; i < recordDelimiterLength; i++) { + rawBuffer.append(buf[pos + i]); + } + } // Do not emit comments which take a full line const skipCommentLine = this.state.commenting && diff --git a/packages/csv-parse/dist/cjs/stream.cjs b/packages/csv-parse/dist/cjs/stream.cjs index 635049d6..a30286fc 100644 --- a/packages/csv-parse/dist/cjs/stream.cjs +++ b/packages/csv-parse/dist/cjs/stream.cjs @@ -1360,6 +1360,16 @@ const transform = function (original_options = {}) { pos, ); if (recordDelimiterLength !== 0) { + // Only the first byte of the record delimiter was appended to + // the raw buffer above; append the remaining bytes so that + // multi-byte delimiters such as "\r\n" are preserved in full in + // `raw` instead of being truncated when `pos` skips past them + // below (e.g. `raw: 'a,b\r'` instead of `raw: 'a,b\r\n'`). (#332) + if (raw === true) { + for (let i = 1; i < recordDelimiterLength; i++) { + rawBuffer.append(buf[pos + i]); + } + } // Do not emit comments which take a full line const skipCommentLine = this.state.commenting && diff --git a/packages/csv-parse/dist/cjs/sync.cjs b/packages/csv-parse/dist/cjs/sync.cjs index 835b51c5..b9d835e1 100644 --- a/packages/csv-parse/dist/cjs/sync.cjs +++ b/packages/csv-parse/dist/cjs/sync.cjs @@ -1358,6 +1358,16 @@ const transform = function (original_options = {}) { pos, ); if (recordDelimiterLength !== 0) { + // Only the first byte of the record delimiter was appended to + // the raw buffer above; append the remaining bytes so that + // multi-byte delimiters such as "\r\n" are preserved in full in + // `raw` instead of being truncated when `pos` skips past them + // below (e.g. `raw: 'a,b\r'` instead of `raw: 'a,b\r\n'`). (#332) + if (raw === true) { + for (let i = 1; i < recordDelimiterLength; i++) { + rawBuffer.append(buf[pos + i]); + } + } // Do not emit comments which take a full line const skipCommentLine = this.state.commenting && diff --git a/packages/csv-parse/dist/esm/index.js b/packages/csv-parse/dist/esm/index.js index b583dbc7..ac3b06b7 100644 --- a/packages/csv-parse/dist/esm/index.js +++ b/packages/csv-parse/dist/esm/index.js @@ -6409,6 +6409,16 @@ const transform = function (original_options = {}) { pos, ); if (recordDelimiterLength !== 0) { + // Only the first byte of the record delimiter was appended to + // the raw buffer above; append the remaining bytes so that + // multi-byte delimiters such as "\r\n" are preserved in full in + // `raw` instead of being truncated when `pos` skips past them + // below (e.g. `raw: 'a,b\r'` instead of `raw: 'a,b\r\n'`). (#332) + if (raw === true) { + for (let i = 1; i < recordDelimiterLength; i++) { + rawBuffer.append(buf[pos + i]); + } + } // Do not emit comments which take a full line const skipCommentLine = this.state.commenting && diff --git a/packages/csv-parse/dist/esm/sync.js b/packages/csv-parse/dist/esm/sync.js index 8865f324..c9021785 100644 --- a/packages/csv-parse/dist/esm/sync.js +++ b/packages/csv-parse/dist/esm/sync.js @@ -3328,6 +3328,16 @@ const transform = function (original_options = {}) { pos, ); if (recordDelimiterLength !== 0) { + // Only the first byte of the record delimiter was appended to + // the raw buffer above; append the remaining bytes so that + // multi-byte delimiters such as "\r\n" are preserved in full in + // `raw` instead of being truncated when `pos` skips past them + // below (e.g. `raw: 'a,b\r'` instead of `raw: 'a,b\r\n'`). (#332) + if (raw === true) { + for (let i = 1; i < recordDelimiterLength; i++) { + rawBuffer.append(buf[pos + i]); + } + } // Do not emit comments which take a full line const skipCommentLine = this.state.commenting && diff --git a/packages/csv-parse/dist/iife/index.js b/packages/csv-parse/dist/iife/index.js index 362b6137..d62fdeda 100644 --- a/packages/csv-parse/dist/iife/index.js +++ b/packages/csv-parse/dist/iife/index.js @@ -6412,6 +6412,16 @@ var csv_parse = (function (exports) { pos, ); if (recordDelimiterLength !== 0) { + // Only the first byte of the record delimiter was appended to + // the raw buffer above; append the remaining bytes so that + // multi-byte delimiters such as "\r\n" are preserved in full in + // `raw` instead of being truncated when `pos` skips past them + // below (e.g. `raw: 'a,b\r'` instead of `raw: 'a,b\r\n'`). (#332) + if (raw === true) { + for (let i = 1; i < recordDelimiterLength; i++) { + rawBuffer.append(buf[pos + i]); + } + } // Do not emit comments which take a full line const skipCommentLine = this.state.commenting && diff --git a/packages/csv-parse/dist/iife/sync.js b/packages/csv-parse/dist/iife/sync.js index d9f6b67c..2308b562 100644 --- a/packages/csv-parse/dist/iife/sync.js +++ b/packages/csv-parse/dist/iife/sync.js @@ -3331,6 +3331,16 @@ var csv_parse_sync = (function (exports) { pos, ); if (recordDelimiterLength !== 0) { + // Only the first byte of the record delimiter was appended to + // the raw buffer above; append the remaining bytes so that + // multi-byte delimiters such as "\r\n" are preserved in full in + // `raw` instead of being truncated when `pos` skips past them + // below (e.g. `raw: 'a,b\r'` instead of `raw: 'a,b\r\n'`). (#332) + if (raw === true) { + for (let i = 1; i < recordDelimiterLength; i++) { + rawBuffer.append(buf[pos + i]); + } + } // Do not emit comments which take a full line const skipCommentLine = this.state.commenting && diff --git a/packages/csv-parse/dist/umd/index.js b/packages/csv-parse/dist/umd/index.js index 4c5728e2..a856d867 100644 --- a/packages/csv-parse/dist/umd/index.js +++ b/packages/csv-parse/dist/umd/index.js @@ -6415,6 +6415,16 @@ pos, ); if (recordDelimiterLength !== 0) { + // Only the first byte of the record delimiter was appended to + // the raw buffer above; append the remaining bytes so that + // multi-byte delimiters such as "\r\n" are preserved in full in + // `raw` instead of being truncated when `pos` skips past them + // below (e.g. `raw: 'a,b\r'` instead of `raw: 'a,b\r\n'`). (#332) + if (raw === true) { + for (let i = 1; i < recordDelimiterLength; i++) { + rawBuffer.append(buf[pos + i]); + } + } // Do not emit comments which take a full line const skipCommentLine = this.state.commenting && diff --git a/packages/csv-parse/dist/umd/sync.js b/packages/csv-parse/dist/umd/sync.js index e2218aae..babe6d0d 100644 --- a/packages/csv-parse/dist/umd/sync.js +++ b/packages/csv-parse/dist/umd/sync.js @@ -3334,6 +3334,16 @@ pos, ); if (recordDelimiterLength !== 0) { + // Only the first byte of the record delimiter was appended to + // the raw buffer above; append the remaining bytes so that + // multi-byte delimiters such as "\r\n" are preserved in full in + // `raw` instead of being truncated when `pos` skips past them + // below (e.g. `raw: 'a,b\r'` instead of `raw: 'a,b\r\n'`). (#332) + if (raw === true) { + for (let i = 1; i < recordDelimiterLength; i++) { + rawBuffer.append(buf[pos + i]); + } + } // Do not emit comments which take a full line const skipCommentLine = this.state.commenting && diff --git a/packages/csv-parse/lib/api/index.js b/packages/csv-parse/lib/api/index.js index 6cd8e3fe..1d889e93 100644 --- a/packages/csv-parse/lib/api/index.js +++ b/packages/csv-parse/lib/api/index.js @@ -336,6 +336,16 @@ const transform = function (original_options = {}) { pos, ); if (recordDelimiterLength !== 0) { + // Only the first byte of the record delimiter was appended to + // the raw buffer above; append the remaining bytes so that + // multi-byte delimiters such as "\r\n" are preserved in full in + // `raw` instead of being truncated when `pos` skips past them + // below (e.g. `raw: 'a,b\r'` instead of `raw: 'a,b\r\n'`). (#332) + if (raw === true) { + for (let i = 1; i < recordDelimiterLength; i++) { + rawBuffer.append(buf[pos + i]); + } + } // Do not emit comments which take a full line const skipCommentLine = this.state.commenting && diff --git a/packages/csv-parse/test/option.raw.ts b/packages/csv-parse/test/option.raw.ts index a6fdccfc..d923e909 100644 --- a/packages/csv-parse/test/option.raw.ts +++ b/packages/csv-parse/test/option.raw.ts @@ -56,6 +56,19 @@ describe("Option `raw`", function () { }); }); + it("preserves windows (CRLF) record delimiters", function (next) { + // The `\n` of a `\r\n` record delimiter used to be dropped from `raw`, + // yielding `a,b\r` instead of `a,b\r\n`. (#332) + parse("a,b\r\nc,d\r\n", { raw: true }, (err, records) => { + if (err) return next(err); + const raws = ( + records as unknown as { record: string[]; raw: string }[] + ).map((r) => r.raw); + raws.should.eql(["a,b\r\n", "c,d\r\n"]); + next(); + }); + }); + it("preserve columns", function (next) { parse( dedent`