From 2176cd8e5b50063f44f00ee3a1deda528d017443 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 3 Mar 2026 14:20:57 +0000 Subject: [PATCH 01/27] Another try at adding Git support, this time with Claude Opus 4.6 --- src/patch/parse.ts | 246 ++++++++++++++++++++++--- test/patch/parse.js | 439 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 664 insertions(+), 21 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 29356d61..65d9834b 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -15,6 +15,7 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { list.push(index); // Parse diff metadata + let seenFileHeader = false; while (i < diffstr.length) { const line = diffstr[i]; @@ -23,27 +24,103 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { break; } - // Try to parse the line as a diff header, like - // Index: README.md - // or - // diff -r 9117c6561b0b -r 273ce12ad8f1 .hgignore - // or - // Index: something with multiple words - // and extract the filename (or whatever else is used as an index name) - // from the end (i.e. 'README.md', '.hgignore', or - // 'something with multiple words' in the examples above). + // Check if this line is a recognized file-level header: + // - diff --git a/... b/... + // - Index: ... + // - diff -r [-r ] (Mercurial) // - // TODO: It seems awkward that we indiscriminately trim off trailing - // whitespace here. Theoretically, couldn't that be meaningful - - // e.g. if the patch represents a diff of a file whose name ends - // with a space? Seems wrong to nuke it. - // But this behaviour has been around since v2.2.1 in 2015, so if - // it's going to change, it should be done cautiously and in a new - // major release, for backwards-compat reasons. - // -- ExplodingCabbage - const headerMatch = (/^(?:Index:|diff(?: -r \w+)+)\s+/).exec(line); - if (headerMatch) { - index.index = line.substring(headerMatch[0].length).trim(); + // We specifically do NOT match arbitrary `diff` commands like + // `diff -u -p -r1.1 -r1.2` here, because in some formats (e.g. + // CVS diffs) such lines appear as metadata within a single file's + // header section, after an `Index:` line. See the diffx + // documentation (https://diffx.org) for examples. + // + // If we've already seen a recognized file header for *this* file + // and now we encounter another one, it must belong to the next + // file, so break. + if ((/^diff --git /).test(line)) { + if (seenFileHeader) { + break; + } + seenFileHeader = true; + + // Parse the old and new filenames from the diff --git header and + // tentatively set oldFileName and newFileName from them. These may + // be overridden by the --- and +++ lines below if they are + // present, but for git diffs that lack --- and +++ lines (e.g. + // renames, mode-only changes, binary files), these are the only + // filenames we get. + const paths = parseGitDiffHeader(line); + if (paths) { + index.oldFileName = paths.oldFileName; + index.newFileName = paths.newFileName; + } + + // Consume git extended headers (old mode, new mode, rename from, + // rename to, similarity index, index, Binary files ... differ, + // etc.) + i++; + while (i < diffstr.length) { + const extLine = diffstr[i]; + + // If we hit ---, +++, @@, or another recognized file header, + // stop consuming extended headers. + if ((/^(---|\+\+\+|@@)\s/).test(extLine) + || (/^diff --git /).test(extLine) + || (/^Index:\s/).test(extLine) + || (/^diff(?: -r \w+)+\s/).test(extLine)) { + break; + } + + // Parse rename from / rename to lines - these give us + // unambiguous filenames without a/ b/ prefixes + const renameFromMatch = (/^rename from (.*)/).exec(extLine); + if (renameFromMatch) { + index.oldFileName = renameFromMatch[1]; + } + const renameToMatch = (/^rename to (.*)/).exec(extLine); + if (renameToMatch) { + index.newFileName = renameToMatch[1]; + } + + // Parse copy from / copy to lines similarly + const copyFromMatch = (/^copy from (.*)/).exec(extLine); + if (copyFromMatch) { + index.oldFileName = copyFromMatch[1]; + } + const copyToMatch = (/^copy to (.*)/).exec(extLine); + if (copyToMatch) { + index.newFileName = copyToMatch[1]; + } + + i++; + } + continue; + } else if ((/^(Index:\s|diff(?: -r \w+)+\s)/).test(line)) { + if (seenFileHeader) { + break; + } + seenFileHeader = true; + + // For Mercurial-style headers like + // diff -r 9117c6561b0b -r 273ce12ad8f1 .hgignore + // or Index: headers like + // Index: something with multiple words + // we extract the trailing filename as the index. + // + // TODO: It seems awkward that we indiscriminately trim off + // trailing whitespace here. Theoretically, couldn't that + // be meaningful - e.g. if the patch represents a diff of a + // file whose name ends with a space? Seems wrong to nuke + // it. But this behaviour has been around since v2.2.1 in + // 2015, so if it's going to change, it should be done + // cautiously and in a new major release, for + // backwards-compat reasons. + // -- ExplodingCabbage + const headerMatch = (/^(?:Index:|diff(?: -r \w+)+)\s+/).exec(line); + if (headerMatch) { + index.index = line.substring(headerMatch[0].length).trim(); + } } i++; @@ -59,7 +136,7 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { while (i < diffstr.length) { const line = diffstr[i]; - if ((/^(Index:\s|diff\s|---\s|\+\+\+\s|===================================================================)/).test(line)) { + if ((/^(Index:\s|diff --git\s|diff(?: -r \w+)+\s|---\s|\+\+\+\s|===================================================================)/).test(line)) { break; } else if ((/^@@/).test(line)) { index.hunks.push(parseHunk()); @@ -71,6 +148,133 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { } } + /** + * Parses the old and new filenames from a `diff --git` header line. + * + * The format is: + * diff --git a/ b/ + * + * When filenames contain special characters, git quotes them with C-style + * escaping: + * diff --git "a/file with spaces.txt" "b/file with spaces.txt" + * + * When filenames don't contain special characters and the old and new names + * are the same, we can unambiguously split on ` b/` by finding the midpoint. + * When they differ AND contain spaces AND aren't quoted, parsing is + * inherently ambiguous, so we do our best. + */ + function parseGitDiffHeader(line: string): { oldFileName: string, newFileName: string } | null { + // Strip the "diff --git " prefix + const rest = line.substring('diff --git '.length); + + // Handle quoted paths: "a/path" "b/path" + // Git quotes paths when they contain special characters. + if (rest.startsWith('"')) { + const oldPath = parseQuotedFileName(rest); + if (oldPath === null) { return null; } + const afterOld = rest.substring(oldPath.rawLength + 1); // +1 for space + let newFileName: string; + if (afterOld.startsWith('"')) { + const newPath = parseQuotedFileName(afterOld); + if (newPath === null) { return null; } + newFileName = stripPrefix(newPath.fileName, 'b/'); + } else { + newFileName = stripPrefix(afterOld, 'b/'); + } + return { + oldFileName: stripPrefix(oldPath.fileName, 'a/'), + newFileName + }; + } + + // Check if the second path is quoted + // e.g. diff --git a/simple "b/complex name" + const quoteIdx = rest.indexOf('"'); + if (quoteIdx > 0) { + const oldFileName = stripPrefix(rest.substring(0, quoteIdx - 1), 'a/'); + const newPath = parseQuotedFileName(rest.substring(quoteIdx)); + if (newPath === null) { return null; } + return { + oldFileName, + newFileName: stripPrefix(newPath.fileName, 'b/') + }; + } + + // Unquoted paths. Try to find the split point. + // The format is: a/ b/ + // + // Strategy: if the path starts with a/ and contains " b/", we try + // to find where to split. When old and new names are the same, there's + // a unique split where the two halves (minus prefixes) match. When + // they differ, we try the split closest to the middle. + if (rest.startsWith('a/')) { + // Try to find a " b/" separator. If the filename itself contains " b/", + // there could be multiple candidates. We try each one and pick the + // split where both halves look like valid prefixed paths. + let searchFrom = 2; // skip past initial "a/" + let bestSplit = -1; + while (true) { + const idx = rest.indexOf(' b/', searchFrom); + if (idx === -1) { break; } + // Candidate: old = rest[0..idx), new = rest[idx+1..) + const candidateOld = rest.substring(2, idx); // strip "a/" + const candidateNew = rest.substring(idx + 3); // strip " b/" + if (candidateOld === candidateNew) { + // Perfect match - unambiguous + return { oldFileName: candidateOld, newFileName: candidateNew }; + } + bestSplit = idx; + searchFrom = idx + 3; + } + if (bestSplit !== -1) { + return { + oldFileName: rest.substring(2, bestSplit), + newFileName: rest.substring(bestSplit + 3) + }; + } + } + + // Fallback: can't parse, return null + return null; + } + + /** + * Parses a C-style quoted filename as used by git. + * Returns the unescaped filename and the raw length consumed (including quotes). + */ + function parseQuotedFileName(s: string): { fileName: string, rawLength: number } | null { + if (!s.startsWith('"')) { return null; } + let result = ''; + let j = 1; // skip opening quote + while (j < s.length) { + if (s[j] === '"') { + return { fileName: result, rawLength: j + 1 }; + } + if (s[j] === '\\' && j + 1 < s.length) { + j++; + switch (s[j]) { + case 'n': result += '\n'; break; + case 't': result += '\t'; break; + case '\\': result += '\\'; break; + case '"': result += '"'; break; + default: result += '\\' + s[j]; break; + } + } else { + result += s[j]; + } + j++; + } + // Unterminated quote + return null; + } + + function stripPrefix(s: string, prefix: string): string { + if (s.startsWith(prefix)) { + return s.substring(prefix.length); + } + return s; + } + // Parses the --- and +++ headers, if none are found, no lines // are consumed. function parseFileHeader(index: Partial) { diff --git a/test/patch/parse.js b/test/patch/parse.js index ed604ab1..0555bf76 100644 --- a/test/patch/parse.js +++ b/test/patch/parse.js @@ -710,5 +710,444 @@ line3 // eslint-disable-next-line dot-notation expect(() => {parsePatch(patchStr);}).to.throw('Hunk at line 5 contained invalid line line3'); }); + + it('should parse a single-file diff --git patch', function() { + expect(parsePatch( +`diff --git a/file.txt b/file.txt +index abc1234..def5678 100644 +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,4 @@ + line1 + line2 ++line3 + line4`)) + .to.eql([{ + oldFileName: 'a/file.txt', + oldHeader: '', + newFileName: 'b/file.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 3, + newStart: 1, newLines: 4, + lines: [ + ' line1', + ' line2', + '+line3', + ' line4' + ] + } + ] + }]); + }); + + it('should parse a multi-file diff --git patch', function() { + expect(parsePatch( +`diff --git a/file1.txt b/file1.txt +index abc1234..def5678 100644 +--- a/file1.txt ++++ b/file1.txt +@@ -1,3 +1,4 @@ + line1 + line2 ++line3 + line4 +diff --git a/file2.txt b/file2.txt +index 1234567..abcdef0 100644 +--- a/file2.txt ++++ b/file2.txt +@@ -1,3 +1,4 @@ + lineA + lineB ++lineC + lineD`)) + .to.eql([{ + oldFileName: 'a/file1.txt', + oldHeader: '', + newFileName: 'b/file1.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 3, + newStart: 1, newLines: 4, + lines: [ + ' line1', + ' line2', + '+line3', + ' line4' + ] + } + ] + }, { + oldFileName: 'a/file2.txt', + oldHeader: '', + newFileName: 'b/file2.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 3, + newStart: 1, newLines: 4, + lines: [ + ' lineA', + ' lineB', + '+lineC', + ' lineD' + ] + } + ] + }]); + }); + + it('should parse a diff --git rename with no content change', function() { + expect(parsePatch( +`diff --git a/README.md b/README-2.md +similarity index 100% +rename from README.md +rename to README-2.md`)) + .to.eql([{ + oldFileName: 'README.md', + newFileName: 'README-2.md', + hunks: [] + }]); + }); + + it('should parse a diff --git rename with content change', function() { + expect(parsePatch( +`diff --git a/old-name.txt b/new-name.txt +similarity index 85% +rename from old-name.txt +rename to new-name.txt +index abc1234..def5678 100644 +--- a/old-name.txt ++++ b/new-name.txt +@@ -1,3 +1,4 @@ + line1 + line2 ++line3 + line4`)) + .to.eql([{ + oldFileName: 'a/old-name.txt', + oldHeader: '', + newFileName: 'b/new-name.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 3, + newStart: 1, newLines: 4, + lines: [ + ' line1', + ' line2', + '+line3', + ' line4' + ] + } + ] + }]); + }); + + it('should parse a diff --git mode-only change', function() { + expect(parsePatch( +`diff --git a/script.sh b/script.sh +old mode 100644 +new mode 100755`)) + .to.eql([{ + oldFileName: 'script.sh', + newFileName: 'script.sh', + hunks: [] + }]); + }); + + it('should parse a diff --git binary file change', function() { + expect(parsePatch( +`diff --git a/image.png b/image.png +index abc1234..def5678 100644 +Binary files a/image.png and b/image.png differ`)) + .to.eql([{ + oldFileName: 'image.png', + newFileName: 'image.png', + hunks: [] + }]); + }); + + it('should not lose files when a diff --git binary change is followed by a text change', function() { + expect(parsePatch( +`diff --git a/file1.txt b/file1.txt +--- a/file1.txt ++++ b/file1.txt +@@ -1 +1 @@ +-old ++new +diff --git a/image.png b/image.png +Binary files a/image.png and b/image.png differ +diff --git a/file3.txt b/file3.txt +--- a/file3.txt ++++ b/file3.txt +@@ -1 +1 @@ +-foo ++bar`)) + .to.eql([{ + oldFileName: 'a/file1.txt', + oldHeader: '', + newFileName: 'b/file1.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + } + ] + }, { + oldFileName: 'image.png', + newFileName: 'image.png', + hunks: [] + }, { + oldFileName: 'a/file3.txt', + oldHeader: '', + newFileName: 'b/file3.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-foo', '+bar'] + } + ] + }]); + }); + + it('should not lose files when a diff --git mode-only change is in the middle', function() { + expect(parsePatch( +`diff --git a/file1.txt b/file1.txt +--- a/file1.txt ++++ b/file1.txt +@@ -1,3 +1,4 @@ + line1 + line2 ++line3 + line4 +diff --git a/script.sh b/script.sh +old mode 100644 +new mode 100755 +diff --git a/file3.txt b/file3.txt +--- a/file3.txt ++++ b/file3.txt +@@ -1,2 +1,3 @@ + aaa ++bbb + ccc`)) + .to.eql([{ + oldFileName: 'a/file1.txt', + oldHeader: '', + newFileName: 'b/file1.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 3, + newStart: 1, newLines: 4, + lines: [ + ' line1', + ' line2', + '+line3', + ' line4' + ] + } + ] + }, { + oldFileName: 'script.sh', + newFileName: 'script.sh', + hunks: [] + }, { + oldFileName: 'a/file3.txt', + oldHeader: '', + newFileName: 'b/file3.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 2, + newStart: 1, newLines: 3, + lines: [ + ' aaa', + '+bbb', + ' ccc' + ] + } + ] + }]); + }); + + it('should parse a diff --git copy', function() { + expect(parsePatch( +`diff --git a/original.txt b/copy.txt +similarity index 100% +copy from original.txt +copy to copy.txt`)) + .to.eql([{ + oldFileName: 'original.txt', + newFileName: 'copy.txt', + hunks: [] + }]); + }); + + it('should parse a diff --git new file', function() { + expect(parsePatch( +`diff --git a/newfile.txt b/newfile.txt +new file mode 100644 +index 0000000..abc1234 +--- /dev/null ++++ b/newfile.txt +@@ -0,0 +1,2 @@ ++hello ++world`)) + .to.eql([{ + oldFileName: '/dev/null', + oldHeader: '', + newFileName: 'b/newfile.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 0, + newStart: 1, newLines: 2, + lines: ['+hello', '+world'] + } + ] + }]); + }); + + it('should parse diff --git with quoted filenames containing spaces', function() { + expect(parsePatch( +`diff --git "a/file with spaces.txt" "b/file with spaces.txt" +index abc1234..def5678 100644 +--- "a/file with spaces.txt" ++++ "b/file with spaces.txt" +@@ -1 +1 @@ +-old ++new`)) + .to.eql([{ + oldFileName: 'a/file with spaces.txt', + oldHeader: '', + newFileName: 'b/file with spaces.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + } + ] + }]); + }); + + it('should parse diff --git rename with quoted filenames', function() { + expect(parsePatch( +`diff --git "a/old name.txt" "b/new name.txt" +similarity index 100% +rename from old name.txt +rename to new name.txt`)) + .to.eql([{ + oldFileName: 'old name.txt', + newFileName: 'new name.txt', + hunks: [] + }]); + }); + + it('should let --- and +++ lines override filenames from diff --git header', function() { + // When --- and +++ are present, they should take precedence over + // the filenames parsed from the diff --git header line. + expect(parsePatch( +`diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new`)) + .to.eql([{ + oldFileName: 'a/file.txt', + oldHeader: '', + newFileName: 'b/file.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + } + ] + }]); + }); + + it('should not be confused by a diff --git rename followed by files with hunks', function() { + expect(parsePatch( +`diff --git a/old.txt b/new.txt +similarity index 100% +rename from old.txt +rename to new.txt +diff --git a/other.txt b/other.txt +--- a/other.txt ++++ b/other.txt +@@ -1 +1 @@ +-aaa ++bbb`)) + .to.eql([{ + oldFileName: 'old.txt', + newFileName: 'new.txt', + hunks: [] + }, { + oldFileName: 'a/other.txt', + oldHeader: '', + newFileName: 'b/other.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-aaa', '+bbb'] + } + ] + }]); + }); + + it('should parse diff --git with unquoted filenames containing spaces (same old and new)', function() { + expect(parsePatch( +`diff --git a/file with spaces.txt b/file with spaces.txt +old mode 100644 +new mode 100755`)) + .to.eql([{ + oldFileName: 'file with spaces.txt', + newFileName: 'file with spaces.txt', + hunks: [] + }]); + }); + + it('should parse diff --git rename with unquoted filenames containing spaces', function() { + // The diff --git line alone is ambiguous when filenames contain spaces + // and old != new, but rename from / rename to resolve the ambiguity. + expect(parsePatch( +`diff --git a/file with spaces.txt b/another file with spaces.txt +similarity index 100% +rename from file with spaces.txt +rename to another file with spaces.txt`)) + .to.eql([{ + oldFileName: 'file with spaces.txt', + newFileName: 'another file with spaces.txt', + hunks: [] + }]); + }); + + it('should handle diff --git with a filename containing " b/"', function() { + // The filename literally contains " b/" which is also the separator + // between the old and new paths. Since old === new, the parser can + // find the unique split where both halves match. + expect(parsePatch( +`diff --git a/x b/y.txt b/x b/y.txt +old mode 100644 +new mode 100755`)) + .to.eql([{ + oldFileName: 'x b/y.txt', + newFileName: 'x b/y.txt', + hunks: [] + }]); + }); }); }); From f35ab14b37368773435357fb1b80be6b7cf658c1 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 3 Mar 2026 14:50:05 +0000 Subject: [PATCH 02/27] Fix --- src/patch/parse.ts | 37 ++++++++++++++++--------------------- test/patch/parse.js | 24 ++++++++++++------------ 2 files changed, 28 insertions(+), 33 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 65d9834b..6d237a4f 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -159,7 +159,8 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { * diff --git "a/file with spaces.txt" "b/file with spaces.txt" * * When filenames don't contain special characters and the old and new names - * are the same, we can unambiguously split on ` b/` by finding the midpoint. + * are the same, we can unambiguously split on ` b/` by finding where the + * two halves (including their a/ and b/ prefixes) yield matching bare names. * When they differ AND contain spaces AND aren't quoted, parsing is * inherently ambiguous, so we do our best. */ @@ -177,12 +178,12 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { if (afterOld.startsWith('"')) { const newPath = parseQuotedFileName(afterOld); if (newPath === null) { return null; } - newFileName = stripPrefix(newPath.fileName, 'b/'); + newFileName = newPath.fileName; } else { - newFileName = stripPrefix(afterOld, 'b/'); + newFileName = afterOld; } return { - oldFileName: stripPrefix(oldPath.fileName, 'a/'), + oldFileName: oldPath.fileName, newFileName }; } @@ -191,12 +192,12 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { // e.g. diff --git a/simple "b/complex name" const quoteIdx = rest.indexOf('"'); if (quoteIdx > 0) { - const oldFileName = stripPrefix(rest.substring(0, quoteIdx - 1), 'a/'); + const oldFileName = rest.substring(0, quoteIdx - 1); const newPath = parseQuotedFileName(rest.substring(quoteIdx)); if (newPath === null) { return null; } return { oldFileName, - newFileName: stripPrefix(newPath.fileName, 'b/') + newFileName: newPath.fileName }; } @@ -205,8 +206,9 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { // // Strategy: if the path starts with a/ and contains " b/", we try // to find where to split. When old and new names are the same, there's - // a unique split where the two halves (minus prefixes) match. When - // they differ, we try the split closest to the middle. + // a unique split where both halves (after stripping their respective + // a/ and b/ prefixes) match. When they differ, we try the last split. + // The returned filenames include the a/ and b/ prefixes. if (rest.startsWith('a/')) { // Try to find a " b/" separator. If the filename itself contains " b/", // there could be multiple candidates. We try each one and pick the @@ -217,19 +219,19 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { const idx = rest.indexOf(' b/', searchFrom); if (idx === -1) { break; } // Candidate: old = rest[0..idx), new = rest[idx+1..) - const candidateOld = rest.substring(2, idx); // strip "a/" - const candidateNew = rest.substring(idx + 3); // strip " b/" - if (candidateOld === candidateNew) { + const candidateOldBare = rest.substring(2, idx); // strip "a/" for comparison + const candidateNewBare = rest.substring(idx + 3); // strip " b/" for comparison + if (candidateOldBare === candidateNewBare) { // Perfect match - unambiguous - return { oldFileName: candidateOld, newFileName: candidateNew }; + return { oldFileName: rest.substring(0, idx), newFileName: rest.substring(idx + 1) }; } bestSplit = idx; searchFrom = idx + 3; } if (bestSplit !== -1) { return { - oldFileName: rest.substring(2, bestSplit), - newFileName: rest.substring(bestSplit + 3) + oldFileName: rest.substring(0, bestSplit), + newFileName: rest.substring(bestSplit + 1) }; } } @@ -268,13 +270,6 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { return null; } - function stripPrefix(s: string, prefix: string): string { - if (s.startsWith(prefix)) { - return s.substring(prefix.length); - } - return s; - } - // Parses the --- and +++ headers, if none are found, no lines // are consumed. function parseFileHeader(index: Partial) { diff --git a/test/patch/parse.js b/test/patch/parse.js index 0555bf76..e07e005c 100644 --- a/test/patch/parse.js +++ b/test/patch/parse.js @@ -852,8 +852,8 @@ index abc1234..def5678 100644 old mode 100644 new mode 100755`)) .to.eql([{ - oldFileName: 'script.sh', - newFileName: 'script.sh', + oldFileName: 'a/script.sh', + newFileName: 'b/script.sh', hunks: [] }]); }); @@ -864,8 +864,8 @@ new mode 100755`)) index abc1234..def5678 100644 Binary files a/image.png and b/image.png differ`)) .to.eql([{ - oldFileName: 'image.png', - newFileName: 'image.png', + oldFileName: 'a/image.png', + newFileName: 'b/image.png', hunks: [] }]); }); @@ -899,8 +899,8 @@ diff --git a/file3.txt b/file3.txt } ] }, { - oldFileName: 'image.png', - newFileName: 'image.png', + oldFileName: 'a/image.png', + newFileName: 'b/image.png', hunks: [] }, { oldFileName: 'a/file3.txt', @@ -955,8 +955,8 @@ diff --git a/file3.txt b/file3.txt } ] }, { - oldFileName: 'script.sh', - newFileName: 'script.sh', + oldFileName: 'a/script.sh', + newFileName: 'b/script.sh', hunks: [] }, { oldFileName: 'a/file3.txt', @@ -1114,8 +1114,8 @@ diff --git a/other.txt b/other.txt old mode 100644 new mode 100755`)) .to.eql([{ - oldFileName: 'file with spaces.txt', - newFileName: 'file with spaces.txt', + oldFileName: 'a/file with spaces.txt', + newFileName: 'b/file with spaces.txt', hunks: [] }]); }); @@ -1144,8 +1144,8 @@ rename to another file with spaces.txt`)) old mode 100644 new mode 100755`)) .to.eql([{ - oldFileName: 'x b/y.txt', - newFileName: 'x b/y.txt', + oldFileName: 'a/x b/y.txt', + newFileName: 'b/x b/y.txt', hunks: [] }]); }); From 9b5d3236941fa1fc73ae769c9ec18ade174c7f59 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 3 Mar 2026 15:05:05 +0000 Subject: [PATCH 03/27] Fix --- src/patch/parse.ts | 12 +++++++----- test/patch/parse.js | 20 ++++++++++---------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 6d237a4f..f9491930 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -73,24 +73,26 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { } // Parse rename from / rename to lines - these give us - // unambiguous filenames without a/ b/ prefixes + // unambiguous filenames. These lines don't include the + // a/ and b/ prefixes that appear in the diff --git header + // and --- / +++ lines, so we add them for consistency. const renameFromMatch = (/^rename from (.*)/).exec(extLine); if (renameFromMatch) { - index.oldFileName = renameFromMatch[1]; + index.oldFileName = 'a/' + renameFromMatch[1]; } const renameToMatch = (/^rename to (.*)/).exec(extLine); if (renameToMatch) { - index.newFileName = renameToMatch[1]; + index.newFileName = 'b/' + renameToMatch[1]; } // Parse copy from / copy to lines similarly const copyFromMatch = (/^copy from (.*)/).exec(extLine); if (copyFromMatch) { - index.oldFileName = copyFromMatch[1]; + index.oldFileName = 'a/' + copyFromMatch[1]; } const copyToMatch = (/^copy to (.*)/).exec(extLine); if (copyToMatch) { - index.newFileName = copyToMatch[1]; + index.newFileName = 'b/' + copyToMatch[1]; } i++; diff --git a/test/patch/parse.js b/test/patch/parse.js index e07e005c..0e5bb653 100644 --- a/test/patch/parse.js +++ b/test/patch/parse.js @@ -806,8 +806,8 @@ similarity index 100% rename from README.md rename to README-2.md`)) .to.eql([{ - oldFileName: 'README.md', - newFileName: 'README-2.md', + oldFileName: 'a/README.md', + newFileName: 'b/README-2.md', hunks: [] }]); }); @@ -984,8 +984,8 @@ similarity index 100% copy from original.txt copy to copy.txt`)) .to.eql([{ - oldFileName: 'original.txt', - newFileName: 'copy.txt', + oldFileName: 'a/original.txt', + newFileName: 'b/copy.txt', hunks: [] }]); }); @@ -1046,8 +1046,8 @@ similarity index 100% rename from old name.txt rename to new name.txt`)) .to.eql([{ - oldFileName: 'old name.txt', - newFileName: 'new name.txt', + oldFileName: 'a/old name.txt', + newFileName: 'b/new name.txt', hunks: [] }]); }); @@ -1090,8 +1090,8 @@ diff --git a/other.txt b/other.txt -aaa +bbb`)) .to.eql([{ - oldFileName: 'old.txt', - newFileName: 'new.txt', + oldFileName: 'a/old.txt', + newFileName: 'b/new.txt', hunks: [] }, { oldFileName: 'a/other.txt', @@ -1129,8 +1129,8 @@ similarity index 100% rename from file with spaces.txt rename to another file with spaces.txt`)) .to.eql([{ - oldFileName: 'file with spaces.txt', - newFileName: 'another file with spaces.txt', + oldFileName: 'a/file with spaces.txt', + newFileName: 'b/another file with spaces.txt', hunks: [] }]); }); From 04df6c082f0d8b514df0f2ead913156cffb3c2d4 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 3 Mar 2026 15:12:50 +0000 Subject: [PATCH 04/27] More tests --- test/patch/parse.js | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/test/patch/parse.js b/test/patch/parse.js index 0e5bb653..0bb1638d 100644 --- a/test/patch/parse.js +++ b/test/patch/parse.js @@ -1149,5 +1149,46 @@ new mode 100755`)) hunks: [] }]); }); + + it('should handle diff --git rename where filenames contain " b/"', function() { + // rename from / rename to lines are unambiguous (one filename per + // line) so " b/" in the name is not a problem for them. The + // diff --git header IS ambiguous, but rename from/to override it. + expect(parsePatch( +`diff --git a/x b/old.txt b/x b/new.txt +similarity index 100% +rename from x b/old.txt +rename to x b/new.txt`)) + .to.eql([{ + oldFileName: 'a/x b/old.txt', + newFileName: 'b/x b/new.txt', + hunks: [] + }]); + }); + + it('should handle diff --git rename where filenames contain " b/", without rename from/to', function() { + // Without rename from/to, the diff --git header is ambiguous when + // filenames contain " b/". But --- and +++ lines resolve it. + expect(parsePatch( +`diff --git a/x b/old.txt b/x b/new.txt +--- a/x b/old.txt ++++ b/x b/new.txt +@@ -1 +1 @@ +-hello ++world`)) + .to.eql([{ + oldFileName: 'a/x b/old.txt', + oldHeader: '', + newFileName: 'b/x b/new.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-hello', '+world'] + } + ] + }]); + }); }); }); From 2a367ffc1d960f3f18914bf16d746a0824f104e6 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 3 Mar 2026 15:33:32 +0000 Subject: [PATCH 05/27] Fix a comment --- src/patch/parse.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index f9491930..7745a553 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -46,10 +46,11 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { // Parse the old and new filenames from the diff --git header and // tentatively set oldFileName and newFileName from them. These may - // be overridden by the --- and +++ lines below if they are - // present, but for git diffs that lack --- and +++ lines (e.g. - // renames, mode-only changes, binary files), these are the only - // filenames we get. + // be overridden below by rename from / rename to or copy from / + // copy to extended headers, or by --- and +++ lines. But for git + // diffs that lack all of those (e.g. mode-only changes, binary + // file changes without rename), these are the only filenames we + // get. const paths = parseGitDiffHeader(line); if (paths) { index.oldFileName = paths.oldFileName; From d72de9ee2b551bdd97766982b136903da47a99b4 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 3 Mar 2026 23:04:48 +0000 Subject: [PATCH 06/27] refactor --- src/patch/parse.ts | 52 +++++++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 7745a553..235c8fc4 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -10,21 +10,39 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { list: Partial[] = []; let i = 0; + // These helper functions identify line types that can appear between files + // in a multi-file patch. Keeping them in one place avoids subtle + // inconsistencies from having the same regexes duplicated in multiple places. + + // Matches lines that denote the start of a new diff's section in a + // multi-file patch: `diff --git ...`, `Index: ...`, or `diff -r ...`. + function isDiffHeader(line: string): boolean { + return (/^diff --git /).test(line) + || (/^Index:\s/).test(line) + || (/^diff(?: -r \w+)+\s/).test(line); + } + + // Matches `--- ...` and `+++ ...` (file headers) and `@@ ...` (hunk + // headers). Any of these indicate the end of diff/file metadata. + function isFileHeader(line: string): boolean { + return (/^(---|\+\+\+|@@)\s/).test(line); + } + function parseIndex() { const index: Partial = {}; list.push(index); // Parse diff metadata - let seenFileHeader = false; + let seenDiffHeader = false; while (i < diffstr.length) { const line = diffstr[i]; - // File header found, end parsing diff metadata - if ((/^(---|\+\+\+|@@)\s/).test(line)) { + // File header (---, +++) or hunk header (@@) found, end parsing diff metadata + if (isFileHeader(line)) { break; } - // Check if this line is a recognized file-level header: + // Check if this line is a recognized diff header: // - diff --git a/... b/... // - Index: ... // - diff -r [-r ] (Mercurial) @@ -35,14 +53,14 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { // header section, after an `Index:` line. See the diffx // documentation (https://diffx.org) for examples. // - // If we've already seen a recognized file header for *this* file + // If we've already seen a recognized diff header for *this* file // and now we encounter another one, it must belong to the next // file, so break. if ((/^diff --git /).test(line)) { - if (seenFileHeader) { + if (seenDiffHeader) { break; } - seenFileHeader = true; + seenDiffHeader = true; // Parse the old and new filenames from the diff --git header and // tentatively set oldFileName and newFileName from them. These may @@ -64,12 +82,10 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { while (i < diffstr.length) { const extLine = diffstr[i]; - // If we hit ---, +++, @@, or another recognized file header, - // stop consuming extended headers. - if ((/^(---|\+\+\+|@@)\s/).test(extLine) - || (/^diff --git /).test(extLine) - || (/^Index:\s/).test(extLine) - || (/^diff(?: -r \w+)+\s/).test(extLine)) { + // If we hit a file header (---, +++), hunk header (@@), or another + // diff header (diff --git, Index:, diff -r), stop consuming + // extended headers. + if (isFileHeader(extLine) || isDiffHeader(extLine)) { break; } @@ -99,11 +115,11 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { i++; } continue; - } else if ((/^(Index:\s|diff(?: -r \w+)+\s)/).test(line)) { - if (seenFileHeader) { + } else if ((/^Index:\s/).test(line) || (/^diff(?: -r \w+)+\s/).test(line)) { + if (seenDiffHeader) { break; } - seenFileHeader = true; + seenDiffHeader = true; // For Mercurial-style headers like // diff -r 9117c6561b0b -r 273ce12ad8f1 .hgignore @@ -139,7 +155,9 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { while (i < diffstr.length) { const line = diffstr[i]; - if ((/^(Index:\s|diff --git\s|diff(?: -r \w+)+\s|---\s|\+\+\+\s|===================================================================)/).test(line)) { + // Note: we intentionally don't break on `@@` here — those lines + // should be parsed as hunks (below), not trigger a break. + if (isDiffHeader(line) || (/^(---|\+\+\+)\s/).test(line) || (/^===================================================================/).test(line)) { break; } else if ((/^@@/).test(line)) { index.hunks.push(parseHunk()); From ae01ddd44d8239f117f3d7f676c69a2f04123f23 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Wed, 4 Mar 2026 12:36:35 +0000 Subject: [PATCH 07/27] Further refactor --- src/patch/parse.ts | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 235c8fc4..1183f7f7 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -14,18 +14,27 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { // in a multi-file patch. Keeping them in one place avoids subtle // inconsistencies from having the same regexes duplicated in multiple places. + // Matches `diff --git ...` lines specifically. + function isGitDiffHeader(line: string): boolean { + return (/^diff --git /).test(line); + } + // Matches lines that denote the start of a new diff's section in a // multi-file patch: `diff --git ...`, `Index: ...`, or `diff -r ...`. function isDiffHeader(line: string): boolean { - return (/^diff --git /).test(line) + return isGitDiffHeader(line) || (/^Index:\s/).test(line) || (/^diff(?: -r \w+)+\s/).test(line); } - // Matches `--- ...` and `+++ ...` (file headers) and `@@ ...` (hunk - // headers). Any of these indicate the end of diff/file metadata. + // Matches `--- ...` and `+++ ...` file header lines. function isFileHeader(line: string): boolean { - return (/^(---|\+\+\+|@@)\s/).test(line); + return (/^(---|\+\+\+)\s/).test(line); + } + + // Matches `@@ ...` hunk header lines. + function isHunkHeader(line: string): boolean { + return (/^@@\s/).test(line); } function parseIndex() { @@ -37,8 +46,8 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { while (i < diffstr.length) { const line = diffstr[i]; - // File header (---, +++) or hunk header (@@) found, end parsing diff metadata - if (isFileHeader(line)) { + // File header (---, +++) or hunk header (@@) found; end parsing diff metadata + if (isFileHeader(line) || isHunkHeader(line)) { break; } @@ -56,7 +65,7 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { // If we've already seen a recognized diff header for *this* file // and now we encounter another one, it must belong to the next // file, so break. - if ((/^diff --git /).test(line)) { + if (isGitDiffHeader(line)) { if (seenDiffHeader) { break; } @@ -85,7 +94,7 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { // If we hit a file header (---, +++), hunk header (@@), or another // diff header (diff --git, Index:, diff -r), stop consuming // extended headers. - if (isFileHeader(extLine) || isDiffHeader(extLine)) { + if (isFileHeader(extLine) || isHunkHeader(extLine) || isDiffHeader(extLine)) { break; } @@ -115,7 +124,7 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { i++; } continue; - } else if ((/^Index:\s/).test(line) || (/^diff(?: -r \w+)+\s/).test(line)) { + } else if (isDiffHeader(line)) { if (seenDiffHeader) { break; } @@ -155,11 +164,9 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { while (i < diffstr.length) { const line = diffstr[i]; - // Note: we intentionally don't break on `@@` here — those lines - // should be parsed as hunks (below), not trigger a break. - if (isDiffHeader(line) || (/^(---|\+\+\+)\s/).test(line) || (/^===================================================================/).test(line)) { + if (isDiffHeader(line) || isFileHeader(line) || (/^===================================================================/).test(line)) { break; - } else if ((/^@@/).test(line)) { + } else if (isHunkHeader(line)) { index.hunks.push(parseHunk()); } else if (line) { throw new Error('Unknown line ' + (i + 1) + ' ' + JSON.stringify(line)); From c32112e100c067a930c7148115f932c108f43d29 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Wed, 4 Mar 2026 13:12:28 +0000 Subject: [PATCH 08/27] Comment tweaks --- src/patch/parse.ts | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 1183f7f7..74a0277a 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -51,20 +51,17 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { break; } - // Check if this line is a recognized diff header: - // - diff --git a/... b/... - // - Index: ... - // - diff -r [-r ] (Mercurial) + // The next two branches handle recognized diff headers. Note that + // isDiffHeader deliberately does NOT match arbitrary `diff` + // commands like `diff -u -p -r1.1 -r1.2`, because in some + // formats (e.g. CVS diffs) such lines appear as metadata within + // a single file's header section, after an `Index:` line. See the + // diffx documentation (https://diffx.org) for examples. // - // We specifically do NOT match arbitrary `diff` commands like - // `diff -u -p -r1.1 -r1.2` here, because in some formats (e.g. - // CVS diffs) such lines appear as metadata within a single file's - // header section, after an `Index:` line. See the diffx - // documentation (https://diffx.org) for examples. - // - // If we've already seen a recognized diff header for *this* file - // and now we encounter another one, it must belong to the next - // file, so break. + // In both branches: if we've already seen a diff header for *this* + // file and now we encounter another one, it must belong to the + // next file, so break. + if (isGitDiffHeader(line)) { if (seenDiffHeader) { break; @@ -91,9 +88,8 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { while (i < diffstr.length) { const extLine = diffstr[i]; - // If we hit a file header (---, +++), hunk header (@@), or another - // diff header (diff --git, Index:, diff -r), stop consuming - // extended headers. + // Stop consuming extended headers if we hit a file header, + // hunk header, or another diff header. if (isFileHeader(extLine) || isHunkHeader(extLine) || isDiffHeader(extLine)) { break; } From b9bd0b887762c9ff331b90ed64839f599d15d154 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Wed, 4 Mar 2026 13:32:06 +0000 Subject: [PATCH 09/27] Fix more comments --- src/patch/parse.ts | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 74a0277a..5a3785a9 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -71,7 +71,7 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { // Parse the old and new filenames from the diff --git header and // tentatively set oldFileName and newFileName from them. These may // be overridden below by rename from / rename to or copy from / - // copy to extended headers, or by --- and +++ lines. But for git + // copy to extended headers, or by --- and +++ lines. But for Git // diffs that lack all of those (e.g. mode-only changes, binary // file changes without rename), these are the only filenames we // get. @@ -81,7 +81,7 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { index.newFileName = paths.newFileName; } - // Consume git extended headers (old mode, new mode, rename from, + // Consume Git extended headers (old mode, new mode, rename from, // rename to, similarity index, index, Binary files ... differ, // etc.) i++; @@ -178,9 +178,9 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { * The format is: * diff --git a/ b/ * - * When filenames contain special characters, git quotes them with C-style - * escaping: - * diff --git "a/file with spaces.txt" "b/file with spaces.txt" + * When filenames contain characters like newlines, tabs, backslashes, or + * double quotes, Git quotes them with C-style escaping: + * diff --git "a/file\twith\ttabs.txt" "b/file\twith\ttabs.txt" * * When filenames don't contain special characters and the old and new names * are the same, we can unambiguously split on ` b/` by finding where the @@ -193,7 +193,8 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { const rest = line.substring('diff --git '.length); // Handle quoted paths: "a/path" "b/path" - // Git quotes paths when they contain special characters. + // Git quotes paths when they contain characters like newlines, tabs, + // backslashes, or double quotes (but notably not spaces). if (rest.startsWith('"')) { const oldPath = parseQuotedFileName(rest); if (oldPath === null) { return null; } @@ -213,7 +214,7 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { } // Check if the second path is quoted - // e.g. diff --git a/simple "b/complex name" + // e.g. diff --git a/simple "b/renamed\nnewline.txt" const quoteIdx = rest.indexOf('"'); if (quoteIdx > 0) { const oldFileName = rest.substring(0, quoteIdx - 1); From 44a9dbae013fe5ac8b2d7872a48e180a6fa78b8e Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Wed, 4 Mar 2026 14:08:12 +0000 Subject: [PATCH 10/27] Document an error case better --- src/patch/parse.ts | 12 +++++++++ test/patch/parse.js | 65 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 1 deletion(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 5a3785a9..a50e19b5 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -75,6 +75,10 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { // diffs that lack all of those (e.g. mode-only changes, binary // file changes without rename), these are the only filenames we // get. + // parseGitDiffHeader returns null if the header can't be parsed + // (e.g. unterminated quoted filename, or unexpected format). In + // that case we skip setting filenames here; they may still be + // set from --- / +++ or rename from / rename to lines below. const paths = parseGitDiffHeader(line); if (paths) { index.oldFileName = paths.oldFileName; @@ -187,6 +191,14 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { * two halves (including their a/ and b/ prefixes) yield matching bare names. * When they differ AND contain spaces AND aren't quoted, parsing is * inherently ambiguous, so we do our best. + * + * Returns null if the header can't be parsed — e.g. if a quoted filename + * has an unterminated quote, or if the unquoted header doesn't match the + * expected `a/... b/...` format. In that case, the caller (parseIndex) + * skips setting oldFileName/newFileName from this header, but they may + * still be set later from `---`/`+++` lines or `rename from`/`rename to` + * extended headers; if none of those are present either, they'll remain + * undefined in the output. */ function parseGitDiffHeader(line: string): { oldFileName: string, newFileName: string } | null { // Strip the "diff --git " prefix diff --git a/test/patch/parse.js b/test/patch/parse.js index 0bb1638d..aee9b7b1 100644 --- a/test/patch/parse.js +++ b/test/patch/parse.js @@ -1189,6 +1189,69 @@ rename to x b/new.txt`)) } ] }]); - }); + }); + + // So far as we know, Git never actually produces diff --git headers that + // can't be parsed (e.g. with unterminated quotes or missing a/b prefixes). + // But we test these cases to confirm parsePatch doesn't crash and instead + // gracefully falls back to getting filenames from --- / +++ lines. + + it('should handle an unparseable diff --git header with unterminated quote', function() { + expect(parsePatch( +`diff --git "a/unterminated +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new`)) + .to.eql([{ + oldFileName: 'a/file.txt', + oldHeader: '', + newFileName: 'b/file.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + } + ] + }]); + }); + + it('should handle an unparseable diff --git header with no a/b prefixes', function() { + expect(parsePatch( +`diff --git file.txt file.txt +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new`)) + .to.eql([{ + oldFileName: 'a/file.txt', + oldHeader: '', + newFileName: 'b/file.txt', + newHeader: '', + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + } + ] + }]); + }); + + it('should handle an unparseable diff --git header with no --- or +++ fallback', function() { + // When both the diff --git header is unparseable AND there are no + // --- / +++ lines, filenames remain undefined. + expect(parsePatch( +`diff --git file.txt file.txt +old mode 100644 +new mode 100755`)) + .to.eql([{ + hunks: [] + }]); + }); }); }); From ba1d8e855947484238b7511ea9f49be09c9a81a7 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Wed, 4 Mar 2026 14:35:07 +0000 Subject: [PATCH 11/27] Docs correction --- README.md | 2 +- src/types.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8d7db61b..a2d5ade6 100644 --- a/README.md +++ b/README.md @@ -186,7 +186,7 @@ jsdiff's diff functions all take an old text and a new text and perform three st * `parsePatch(diffStr)` - Parses a patch into structured data - Return a JSON object representation of the a patch, suitable for use with the `applyPatch` method. This parses to the same structure returned by `structuredPatch`. + Return a JSON object representation of the a patch, suitable for use with the `applyPatch` method. This parses to the same structure returned by `structuredPatch`, except that `oldFileName` and `newFileName` may be `undefined` if the patch doesn't contain enough information to determine them (e.g. a hunk-only patch with no file headers, or a `diff --git` header that couldn't be parsed and has no `---`/`+++` lines to fall back on). * `reversePatch(patch)` - Returns a new structured patch which when applied will undo the original `patch`. diff --git a/src/types.ts b/src/types.ts index 1ae11370..38ab9476 100644 --- a/src/types.ts +++ b/src/types.ts @@ -225,8 +225,8 @@ export type AllDiffOptions = DiffJsonOptions; export interface StructuredPatch { - oldFileName: string, - newFileName: string, + oldFileName: string | undefined, + newFileName: string | undefined, oldHeader: string | undefined, newHeader: string | undefined, hunks: StructuredPatchHunk[], From b79069e34af8460203a0cd3cacd6832434748781 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Mon, 9 Mar 2026 13:45:11 +0000 Subject: [PATCH 12/27] Latest AI changes --- README.md | 70 +++++++++++- src/patch/create.ts | 4 +- test/patch/create.js | 36 ++++++ test/patch/readme-rename-example.js | 165 ++++++++++++++++++++++++++++ 4 files changed, 271 insertions(+), 4 deletions(-) create mode 100644 test/patch/readme-rename-example.js diff --git a/README.md b/README.md index a2d5ade6..a3fd82a9 100644 --- a/README.md +++ b/README.md @@ -184,9 +184,11 @@ jsdiff's diff functions all take an old text and a new text and perform three st Once all patches have been applied or an error occurs, the `options.complete(err)` callback is made. -* `parsePatch(diffStr)` - Parses a patch into structured data +* `parsePatch(diffStr)` - Parses a unified diff format patch into a structured patch object. - Return a JSON object representation of the a patch, suitable for use with the `applyPatch` method. This parses to the same structure returned by `structuredPatch`, except that `oldFileName` and `newFileName` may be `undefined` if the patch doesn't contain enough information to determine them (e.g. a hunk-only patch with no file headers, or a `diff --git` header that couldn't be parsed and has no `---`/`+++` lines to fall back on). + Return a JSON object representation of the a patch, suitable for use with the `applyPatch` method. This parses to the same structure returned by `structuredPatch`, except that `oldFileName` and `newFileName` may be `undefined` if the patch doesn't contain enough information to determine them (e.g. a hunk-only patch with no file headers). + + `parsePatch` has some understanding of [Git's particular dialect of unified diff format](https://git-scm.com/docs/git-diff#generate_patch_text_with_p). In particular, it can extract filenames from the patch headers or extended headers of Git patches that contain no hunks and no file headers, including ones representing a file being renamed without changes. (However, it ignores many extended headers that describe things irrelevant to jsdiff's patch representation, like mode changes.) * `reversePatch(patch)` - Returns a new structured patch which when applied will undo the original `patch`. @@ -360,6 +362,70 @@ applyPatches(patch, { }); ``` +##### Applying a multi-file Git patch that may include renames + +Git diffs can include file renames (with or without content changes). `applyPatches` doesn't perform renames automatically, but you can handle them by collecting rename operations during patching and applying them at the end. + +**Important:** Git patches describe the state of files *before* the commit, and [the Git docs note](https://git-scm.com/docs/git-diff#generate_patch_text_with_p) that it is incorrect to apply each change sequentially. For example, a patch that swaps two files (`a → b` and `b → a`) would break if you renamed `a` to `b` before processing the second entry. The example below avoids this by writing patched content to temporary files, then moving them all into place at the end: + +``` +const {applyPatches} = require('diff'); +const patch = fs.readFileSync("git-diff.patch").toString(); +const pendingWrites = []; // collect writes, apply them all at the end +let nextTmpId = 0; +applyPatches(patch, { + loadFile: (patch, callback) => { + let fileContents; + try { + // Git diffs use a/ and b/ prefixes; strip them to get the real path + const filePath = patch.oldFileName.replace(/^a\//, ''); + fileContents = fs.readFileSync(filePath).toString(); + } catch (e) { + callback(`No such file: ${patch.oldFileName}`); + return; + } + callback(undefined, fileContents); + }, + patched: (patch, patchedContent, callback) => { + if (patchedContent === false) { + callback(`Failed to apply patch to ${patch.oldFileName}`); + return; + } + const oldPath = patch.oldFileName.replace(/^a\//, ''); + const newPath = patch.newFileName.replace(/^b\//, ''); + // Write to a temporary file so we don't clobber files that + // later patches may still need to read from their old paths + // (e.g. when two files are being swapped) + const tmpPath = `.tmp-patch-${nextTmpId++}`; + fs.writeFileSync(tmpPath, patchedContent); + if (oldPath !== newPath) { + pendingWrites.push({tmpPath, oldPath, newPath}); + } else { + pendingWrites.push({tmpPath, oldPath, newPath: oldPath}); + } + callback(); + }, + complete: (err) => { + if (err) { + console.log("Failed with error:", err); + return; + } + // First, delete all old files that are being renamed away. + // We must do all deletes before any renames, so that e.g. a + // swap (a→b, b→a) doesn't delete a file we just renamed. + for (const {oldPath, newPath} of pendingWrites) { + if (oldPath !== newPath) { + fs.unlinkSync(oldPath); + } + } + // Then move all temp files into their final destinations + for (const {tmpPath, newPath} of pendingWrites) { + fs.renameSync(tmpPath, newPath); + } + } +}); +``` + ## Compatibility jsdiff should support all ES5 environments. If you find one that it doesn't support, please [open an issue](https://github.com/kpdecker/jsdiff/issues). diff --git a/src/patch/create.ts b/src/patch/create.ts index 13c2a2b1..e64aaf50 100644 --- a/src/patch/create.ts +++ b/src/patch/create.ts @@ -292,13 +292,13 @@ export function formatPatch(patch: StructuredPatch | StructuredPatch[], headerOp } const ret = []; - if (headerOptions.includeIndex && patch.oldFileName == patch.newFileName) { + if (headerOptions.includeIndex && patch.oldFileName == patch.newFileName && patch.oldFileName !== undefined) { ret.push('Index: ' + patch.oldFileName); } if (headerOptions.includeUnderline) { ret.push('==================================================================='); } - if (headerOptions.includeFileHeaders) { + if (headerOptions.includeFileHeaders && patch.oldFileName !== undefined && patch.newFileName !== undefined) { ret.push('--- ' + patch.oldFileName + (typeof patch.oldHeader === 'undefined' ? '' : '\t' + patch.oldHeader)); ret.push('+++ ' + patch.newFileName + (typeof patch.newHeader === 'undefined' ? '' : '\t' + patch.newHeader)); } diff --git a/test/patch/create.js b/test/patch/create.js index 204818d8..4001ff34 100644 --- a/test/patch/create.js +++ b/test/patch/create.js @@ -1175,6 +1175,42 @@ describe('patch/create', function() { // eslint-disable-next-line dot-notation expect(() => formatPatch(patchArray, OMIT_HEADERS)).to.throw(); }); + + it('should silently skip headers when filenames are undefined', function() { + const patchWithNoFilenames = { + oldFileName: undefined, + newFileName: undefined, + oldHeader: undefined, + newHeader: undefined, + hunks: [{ + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + }] + }; + // All header options should silently skip headers when filenames + // are undefined, rather than emitting "--- undefined" etc. + const expectedOutput = + '@@ -1,1 +1,1 @@\n' + + '-old\n' + + '+new\n'; + const expectedWithUnderline = + '===================================================================\n' + + '@@ -1,1 +1,1 @@\n' + + '-old\n' + + '+new\n'; + expect(formatPatch(patchWithNoFilenames, OMIT_HEADERS)).to.equal(expectedOutput); + expect(formatPatch(patchWithNoFilenames, FILE_HEADERS_ONLY)).to.equal(expectedOutput); + // INCLUDE_HEADERS still emits the underline, just skips Index and file headers + expect(formatPatch(patchWithNoFilenames, INCLUDE_HEADERS)).to.equal(expectedWithUnderline); + expect(formatPatch(patchWithNoFilenames)).to.equal(expectedWithUnderline); + // includeIndex: true with undefined filenames should also skip silently + expect(formatPatch(patchWithNoFilenames, { + includeIndex: true, + includeUnderline: false, + includeFileHeaders: false + })).to.equal(expectedOutput); + }); }); }); }); diff --git a/test/patch/readme-rename-example.js b/test/patch/readme-rename-example.js new file mode 100644 index 00000000..a91cb325 --- /dev/null +++ b/test/patch/readme-rename-example.js @@ -0,0 +1,165 @@ +import {applyPatches} from '../../libesm/patch/apply.js'; + +import {expect} from 'chai'; +import fs from 'fs'; +import os from 'os'; +import path from 'path'; + +describe('README Git rename example', function() { + let tmpDir; + let originalCwd; + + beforeEach(function() { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'jsdiff-readme-test-')); + originalCwd = process.cwd(); + process.chdir(tmpDir); + }); + + afterEach(function() { + process.chdir(originalCwd); + fs.rmSync(tmpDir, {recursive: true, force: true}); + }); + + /** + * Extract the Git rename example code from the README and return it as a + * function that takes (applyPatches, patch, fs, path) and runs the example. + */ + function getReadmeExampleFn() { + const readme = fs.readFileSync( + path.join(__dirname, '../../README.md'), + 'utf-8' + ); + + // Find the heading + const headingIndex = readme.indexOf('##### Applying a multi-file Git patch that may include renames'); + if (headingIndex === -1) { + throw new Error('Could not find the Git rename example heading in README.md'); + } + + // Find the code block after the heading + const afterHeading = readme.substring(headingIndex); + const codeBlockStart = afterHeading.indexOf('\n```\n'); + if (codeBlockStart === -1) { + throw new Error('Could not find the code block in the Git rename example'); + } + const codeStart = codeBlockStart + 4; // skip past the \n```\n + const codeBlockEnd = afterHeading.indexOf('\n```\n', codeStart); + if (codeBlockEnd === -1) { + throw new Error('Could not find the end of the code block in the Git rename example'); + } + + let code = afterHeading.substring(codeStart, codeBlockEnd); + + // Strip the require line — we'll provide applyPatches as an argument. + // Strip the fs.readFileSync for the patch — we'll provide patch as an argument. + code = code + .replace(/const \{applyPatches\}.*\n/, '') + .replace(/const patch = .*\n/, ''); + + // eslint-disable-next-line no-new-func + return new Function('applyPatches', 'patch', 'fs', 'path', code); + } + + it('should handle a simple rename with content change', function() { + fs.writeFileSync('old.txt', 'line1\nline2\nline3\n'); + + const patch = +`diff --git a/old.txt b/new.txt +similarity index 80% +rename from old.txt +rename to new.txt +--- a/old.txt ++++ b/new.txt +@@ -1,3 +1,3 @@ + line1 +-line2 ++line2modified + line3 +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.existsSync('old.txt')).to.equal(false); + expect(fs.readFileSync('new.txt', 'utf-8')) + .to.equal('line1\nline2modified\nline3\n'); + }); + + it('should handle a swap rename (a→b, b→a)', function() { + fs.writeFileSync('a.txt', 'content of a\n'); + fs.writeFileSync('b.txt', 'content of b\n'); + + const patch = +`diff --git a/a.txt b/b.txt +similarity index 100% +rename from a.txt +rename to b.txt +diff --git a/b.txt b/a.txt +similarity index 100% +rename from b.txt +rename to a.txt +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.readFileSync('a.txt', 'utf-8')).to.equal('content of b\n'); + expect(fs.readFileSync('b.txt', 'utf-8')).to.equal('content of a\n'); + }); + + it('should handle a swap rename with content changes', function() { + fs.writeFileSync('a.txt', 'aaa\n'); + fs.writeFileSync('b.txt', 'bbb\n'); + + const patch = +`diff --git a/a.txt b/b.txt +similarity index 50% +rename from a.txt +rename to b.txt +--- a/a.txt ++++ b/b.txt +@@ -1 +1 @@ +-aaa ++aaa-modified +diff --git a/b.txt b/a.txt +similarity index 50% +rename from b.txt +rename to a.txt +--- a/b.txt ++++ b/a.txt +@@ -1 +1 @@ +-bbb ++bbb-modified +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.readFileSync('a.txt', 'utf-8')).to.equal('bbb-modified\n'); + expect(fs.readFileSync('b.txt', 'utf-8')).to.equal('aaa-modified\n'); + }); + + it('should handle a three-way rotation (a→b, b→c, c→a)', function() { + fs.writeFileSync('a.txt', 'content of a\n'); + fs.writeFileSync('b.txt', 'content of b\n'); + fs.writeFileSync('c.txt', 'content of c\n'); + + const patch = +`diff --git a/a.txt b/b.txt +similarity index 100% +rename from a.txt +rename to b.txt +diff --git a/b.txt b/c.txt +similarity index 100% +rename from b.txt +rename to c.txt +diff --git a/c.txt b/a.txt +similarity index 100% +rename from c.txt +rename to a.txt +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.readFileSync('a.txt', 'utf-8')).to.equal('content of c\n'); + expect(fs.readFileSync('b.txt', 'utf-8')).to.equal('content of a\n'); + expect(fs.readFileSync('c.txt', 'utf-8')).to.equal('content of b\n'); + }); +}); From 636963a5e705b97163d61fe2a1907a5ace24fa9f Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Mon, 9 Mar 2026 14:19:49 +0000 Subject: [PATCH 13/27] Further fixes --- README.md | 49 ++++++++++++++--------------- test/patch/readme-rename-example.js | 34 ++++++++++++++++++++ 2 files changed, 57 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index a3fd82a9..5cd143c3 100644 --- a/README.md +++ b/README.md @@ -364,27 +364,29 @@ applyPatches(patch, { ##### Applying a multi-file Git patch that may include renames -Git diffs can include file renames (with or without content changes). `applyPatches` doesn't perform renames automatically, but you can handle them by collecting rename operations during patching and applying them at the end. +Git diffs can include file renames (with or without content changes). `applyPatches` doesn't perform renames automatically, but you can handle them in the callbacks. -**Important:** Git patches describe the state of files *before* the commit, and [the Git docs note](https://git-scm.com/docs/git-diff#generate_patch_text_with_p) that it is incorrect to apply each change sequentially. For example, a patch that swaps two files (`a → b` and `b → a`) would break if you renamed `a` to `b` before processing the second entry. The example below avoids this by writing patched content to temporary files, then moving them all into place at the end: +**Important:** Git patches describe the state of files *before* the commit, and [the Git docs note](https://git-scm.com/docs/git-diff#generate_patch_text_with_p) that it is incorrect to apply each change sequentially. For example, a patch that swaps two files (`a → b` and `b → a`) would break if you wrote `a`'s content to `b` before reading `b`'s original content. The example below avoids this by collecting all writes in a `Map` and applying them all at the end in `complete`: ``` const {applyPatches} = require('diff'); const patch = fs.readFileSync("git-diff.patch").toString(); -const pendingWrites = []; // collect writes, apply them all at the end -let nextTmpId = 0; +const DELETE = Symbol('delete'); +const pendingWrites = new Map(); // newPath → content (or DELETE sentinel) applyPatches(patch, { loadFile: (patch, callback) => { - let fileContents; + // Git uses /dev/null as the old name when a file is newly created + if (patch.oldFileName === '/dev/null') { + callback(undefined, ''); + return; + } try { // Git diffs use a/ and b/ prefixes; strip them to get the real path const filePath = patch.oldFileName.replace(/^a\//, ''); - fileContents = fs.readFileSync(filePath).toString(); + callback(undefined, fs.readFileSync(filePath).toString()); } catch (e) { callback(`No such file: ${patch.oldFileName}`); - return; } - callback(undefined, fileContents); }, patched: (patch, patchedContent, callback) => { if (patchedContent === false) { @@ -393,15 +395,15 @@ applyPatches(patch, { } const oldPath = patch.oldFileName.replace(/^a\//, ''); const newPath = patch.newFileName.replace(/^b\//, ''); - // Write to a temporary file so we don't clobber files that - // later patches may still need to read from their old paths - // (e.g. when two files are being swapped) - const tmpPath = `.tmp-patch-${nextTmpId++}`; - fs.writeFileSync(tmpPath, patchedContent); - if (oldPath !== newPath) { - pendingWrites.push({tmpPath, oldPath, newPath}); + // Git uses /dev/null as the new name when a file is deleted + if (patch.newFileName === '/dev/null') { + pendingWrites.set(oldPath, DELETE); } else { - pendingWrites.push({tmpPath, oldPath, newPath: oldPath}); + pendingWrites.set(newPath, patchedContent); + if (oldPath !== newPath && patch.oldFileName !== '/dev/null' + && !pendingWrites.has(oldPath)) { + pendingWrites.set(oldPath, DELETE); + } } callback(); }, @@ -410,18 +412,13 @@ applyPatches(patch, { console.log("Failed with error:", err); return; } - // First, delete all old files that are being renamed away. - // We must do all deletes before any renames, so that e.g. a - // swap (a→b, b→a) doesn't delete a file we just renamed. - for (const {oldPath, newPath} of pendingWrites) { - if (oldPath !== newPath) { - fs.unlinkSync(oldPath); + for (const [filePath, content] of pendingWrites) { + if (content === DELETE) { + fs.unlinkSync(filePath); + } else { + fs.writeFileSync(filePath, content); } } - // Then move all temp files into their final destinations - for (const {tmpPath, newPath} of pendingWrites) { - fs.renameSync(tmpPath, newPath); - } } }); ``` diff --git a/test/patch/readme-rename-example.js b/test/patch/readme-rename-example.js index a91cb325..4535f7ca 100644 --- a/test/patch/readme-rename-example.js +++ b/test/patch/readme-rename-example.js @@ -162,4 +162,38 @@ rename to a.txt expect(fs.readFileSync('b.txt', 'utf-8')).to.equal('content of a\n'); expect(fs.readFileSync('c.txt', 'utf-8')).to.equal('content of b\n'); }); + + it('should handle a file deletion', function() { + fs.writeFileSync('doomed.txt', 'goodbye\n'); + + const patch = +`diff --git a/doomed.txt b/doomed.txt +deleted file mode 100644 +index 2b31011..0000000 +--- a/doomed.txt ++++ /dev/null +@@ -1 +0,0 @@ +-goodbye +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.existsSync('doomed.txt')).to.equal(false); + }); + + it('should handle a file creation', function() { + const patch = +`diff --git a/brand-new.txt b/brand-new.txt +new file mode 100644 +index 0000000..fa49b07 +--- /dev/null ++++ b/brand-new.txt +@@ -0,0 +1 @@ ++hello world +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.readFileSync('brand-new.txt', 'utf-8')).to.equal('hello world\n'); + }); }); From bfaa2e2912001581e8b7a543536a9e6e334fa8a8 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Mon, 9 Mar 2026 14:51:07 +0000 Subject: [PATCH 14/27] Shorten & simplify README prose --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index 5cd143c3..81dda101 100644 --- a/README.md +++ b/README.md @@ -364,9 +364,7 @@ applyPatches(patch, { ##### Applying a multi-file Git patch that may include renames -Git diffs can include file renames (with or without content changes). `applyPatches` doesn't perform renames automatically, but you can handle them in the callbacks. - -**Important:** Git patches describe the state of files *before* the commit, and [the Git docs note](https://git-scm.com/docs/git-diff#generate_patch_text_with_p) that it is incorrect to apply each change sequentially. For example, a patch that swaps two files (`a → b` and `b → a`) would break if you wrote `a`'s content to `b` before reading `b`'s original content. The example below avoids this by collecting all writes in a `Map` and applying them all at the end in `complete`: +[Git patches](https://git-scm.com/docs/git-diff#generate_patch_text_with_p) can include file renames (with or without content changes), which need to be handled in the callbacks you provide to `applyPatches`. They can also potentially include file *swaps* (renaming `a → b` and `b → a`), in which case it is incorrect to simply apply each change atomically in sequence. The pattern with the `pendingWrites` Map below handles this nuance: ``` const {applyPatches} = require('diff'); From 90c1bea5754616694a9de09bc80eb87e463a1716 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Mon, 9 Mar 2026 15:39:52 +0000 Subject: [PATCH 15/27] Further bugfix --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 81dda101..01868130 100644 --- a/README.md +++ b/README.md @@ -395,7 +395,9 @@ applyPatches(patch, { const newPath = patch.newFileName.replace(/^b\//, ''); // Git uses /dev/null as the new name when a file is deleted if (patch.newFileName === '/dev/null') { - pendingWrites.set(oldPath, DELETE); + if (!pendingWrites.has(oldPath)) { + pendingWrites.set(oldPath, DELETE); + } } else { pendingWrites.set(newPath, patchedContent); if (oldPath !== newPath && patch.oldFileName !== '/dev/null' From 1130b9620fd12ba99060a36b045f77e04055cf90 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 10 Mar 2026 13:09:40 +0000 Subject: [PATCH 16/27] stuff --- README.md | 9 +-- release-notes.md | 28 ++++++++ src/patch/create.ts | 41 +++++++++-- src/patch/parse.ts | 24 +++++++ src/patch/reverse.ts | 16 ++++- src/types.ts | 38 +++++++++++ test/patch/create.js | 154 ++++++++++++++++++++++++++++++++++++++++++ test/patch/parse.js | 86 +++++++++++++++++++++-- test/patch/reverse.js | 89 +++++++++++++++++++++++- 9 files changed, 466 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 01868130..3a721967 100644 --- a/README.md +++ b/README.md @@ -364,13 +364,13 @@ applyPatches(patch, { ##### Applying a multi-file Git patch that may include renames -[Git patches](https://git-scm.com/docs/git-diff#generate_patch_text_with_p) can include file renames (with or without content changes), which need to be handled in the callbacks you provide to `applyPatches`. They can also potentially include file *swaps* (renaming `a → b` and `b → a`), in which case it is incorrect to simply apply each change atomically in sequence. The pattern with the `pendingWrites` Map below handles this nuance: +[Git patches](https://git-scm.com/docs/git-diff#generate_patch_text_with_p) can include file renames and copies (with or without content changes), which need to be handled in the callbacks you provide to `applyPatches`. `parsePatch` sets `isRename` or `isCopy` on the structured patch object so you can distinguish these cases. Patches can also potentially include file *swaps* (renaming `a → b` and `b → a`), in which case it is incorrect to simply apply each change atomically in sequence. The pattern with the `pendingWrites` Map below handles all of these nuances: ``` const {applyPatches} = require('diff'); const patch = fs.readFileSync("git-diff.patch").toString(); const DELETE = Symbol('delete'); -const pendingWrites = new Map(); // newPath → content (or DELETE sentinel) +const pendingWrites = new Map(); // filePath → content (or DELETE sentinel) applyPatches(patch, { loadFile: (patch, callback) => { // Git uses /dev/null as the old name when a file is newly created @@ -400,8 +400,9 @@ applyPatches(patch, { } } else { pendingWrites.set(newPath, patchedContent); - if (oldPath !== newPath && patch.oldFileName !== '/dev/null' - && !pendingWrites.has(oldPath)) { + // For renames, delete the old file (but not for copies, + // where the old file should be kept) + if (patch.isRename && !pendingWrites.has(oldPath)) { pendingWrites.set(oldPath, DELETE); } } diff --git a/release-notes.md b/release-notes.md index 68e5db6d..d7cb4d14 100644 --- a/release-notes.md +++ b/release-notes.md @@ -1,5 +1,33 @@ # Release Notes +## 9.0.0 (prerelease) + +- **`parsePatch` now robustly handles Git-style diffs.** Previously, `parsePatch` had inconsistent regex usage that caused several bugs when parsing `diff --git` output: + * Multi-file Git diffs containing hunk-less entries (e.g. mode-only changes, binary files, rename-only entries without content changes) could cause file entries to be merged together or lost entirely. + * Git extended headers (`rename from`/`rename to`, `copy from`/`copy to`, `old mode`/`new mode`, `index`, etc.) were not parsed and could cause parse errors. + * `diff --git` was not consistently recognized as a diff header, leading to missing `index` entries and other subtle issues. + + The parser now: + * Correctly recognizes `diff --git` headers and parses filenames from them, including C-style quoted filenames (used by Git when paths contain tabs, newlines, backslashes, or double quotes). + * Consumes Git extended headers (`rename from`/`rename to`, `copy from`/`copy to`, mode changes, similarity index, etc.) without choking. + * Handles hunk-less entries (rename-only, mode-only, binary) as distinct file entries rather than merging them into adjacent entries. + * Sets metadata flags on the resulting `StructuredPatch`: `isGit` (always, for Git diffs), `isRename`, `isCopy`, `isCreate`, `isDelete` (when the corresponding extended headers are present), and `oldMode`/`newMode` (parsed from `old mode`, `new mode`, `deleted file mode`, or `new file mode` headers). This lets consumers distinguish renames (where the old file should be deleted) from copies (where it should be kept), detect file creations and deletions, and preserve file mode information. + * Uses consistent, centralized helper functions for header detection instead of duplicated regexes. + +- **`reversePatch` now correctly reverses copy patches.** Reversing a copy produces a deletion (the reversed patch has `newFileName` set to `'/dev/null'` and `isCopy`/`isRename` unset), since undoing a copy means deleting the file that was created. Reversing a rename still produces a rename in the opposite direction, as before. + +- **`formatPatch` now supports Git-style patches.** When a `StructuredPatch` has `isGit: true`, `formatPatch` emits a `diff --git` header (instead of `Index:` / underline) and the appropriate Git extended headers (`rename from`/`rename to`, `copy from`/`copy to`, `deleted file mode`, `new file mode`, `old mode`/`new mode`) based on the patch's metadata flags. File headers (`---`/`+++`) are omitted on hunk-less Git patches (e.g. pure renames, mode-only changes), matching Git's own output. This means `parsePatch` output can be round-tripped through `formatPatch`. + +- **`formatPatch` now gracefully handles patches with undefined filenames** instead of emitting nonsensical headers like `--- undefined`. If `oldFileName` or `newFileName` is `undefined`, the `---`/`+++` file headers and the `Index:` line are silently omitted. This is consistent with how such patches can arise from parsing Git diffs that lack `---`/`+++` lines. + +- **README: added documentation and example for applying Git patches that include renames, copies, deletions, and file creations** using `applyPatches`. + +### Breaking changes + +- **The `oldFileName` and `newFileName` fields of `StructuredPatch` are now typed as `string | undefined` instead of `string`.** This reflects the reality that `parsePatch` can produce patches without filenames (e.g. when parsing a Git diff with an unparseable `diff --git` header and no `---`/`+++` fallback). TypeScript users who access these fields without null checks will see type errors and should update their code to handle the `undefined` case. + +- **`StructuredPatch` has new optional fields for Git metadata:** `isGit`, `isRename`, `isCopy`, `isCreate`, `isDelete`, `oldMode`, and `newMode`. These are set by `parsePatch` when parsing Git diffs. Code that does exact deep-equality checks (e.g. `assert.deepEqual`) against `StructuredPatch` objects from `parsePatch` may need updating to account for the new fields. + ## 8.0.4 (prerelease) - [#667](https://github.com/kpdecker/jsdiff/pull/667) - **fix another bug in `diffWords` when used with an `Intl.Segmenter`**. If the text to be diffed included a combining mark after a whitespace character (i.e. roughly speaking, an accented space), `diffWords` would previously crash. Now this case is handled correctly. diff --git a/src/patch/create.ts b/src/patch/create.ts index e64aaf50..f50b06ba 100644 --- a/src/patch/create.ts +++ b/src/patch/create.ts @@ -292,13 +292,42 @@ export function formatPatch(patch: StructuredPatch | StructuredPatch[], headerOp } const ret = []; - if (headerOptions.includeIndex && patch.oldFileName == patch.newFileName && patch.oldFileName !== undefined) { - ret.push('Index: ' + patch.oldFileName); - } - if (headerOptions.includeUnderline) { - ret.push('==================================================================='); + + if (patch.isGit) { + // Emit Git-style diff --git header and extended headers + ret.push('diff --git ' + (patch.oldFileName ?? '') + ' ' + (patch.newFileName ?? '')); + if (patch.isDelete) { + ret.push('deleted file mode ' + (patch.oldMode ?? '100644')); + } + if (patch.isCreate) { + ret.push('new file mode ' + (patch.newMode ?? '100644')); + } + if (patch.oldMode && patch.newMode && !patch.isDelete && !patch.isCreate) { + ret.push('old mode ' + patch.oldMode); + ret.push('new mode ' + patch.newMode); + } + if (patch.isRename) { + ret.push('rename from ' + (patch.oldFileName ?? '').replace(/^a\//, '')); + ret.push('rename to ' + (patch.newFileName ?? '').replace(/^b\//, '')); + } + if (patch.isCopy) { + ret.push('copy from ' + (patch.oldFileName ?? '').replace(/^a\//, '')); + ret.push('copy to ' + (patch.newFileName ?? '').replace(/^b\//, '')); + } + } else { + if (headerOptions.includeIndex && patch.oldFileName == patch.newFileName && patch.oldFileName !== undefined) { + ret.push('Index: ' + patch.oldFileName); + } + if (headerOptions.includeUnderline) { + ret.push('==================================================================='); + } } - if (headerOptions.includeFileHeaders && patch.oldFileName !== undefined && patch.newFileName !== undefined) { + + // Emit --- / +++ file headers. For Git patches with no hunks (e.g. + // pure renames, mode-only changes), Git omits these, so we do too. + const hasHunks = patch.hunks.length > 0; + if (headerOptions.includeFileHeaders && patch.oldFileName !== undefined && patch.newFileName !== undefined + && (!patch.isGit || hasHunks)) { ret.push('--- ' + patch.oldFileName + (typeof patch.oldHeader === 'undefined' ? '' : '\t' + patch.oldHeader)); ret.push('+++ ' + patch.newFileName + (typeof patch.newHeader === 'undefined' ? '' : '\t' + patch.newHeader)); } diff --git a/src/patch/parse.ts b/src/patch/parse.ts index a50e19b5..d0878134 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -67,6 +67,7 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { break; } seenDiffHeader = true; + index.isGit = true; // Parse the old and new filenames from the diff --git header and // tentatively set oldFileName and newFileName from them. These may @@ -105,20 +106,43 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { const renameFromMatch = (/^rename from (.*)/).exec(extLine); if (renameFromMatch) { index.oldFileName = 'a/' + renameFromMatch[1]; + index.isRename = true; } const renameToMatch = (/^rename to (.*)/).exec(extLine); if (renameToMatch) { index.newFileName = 'b/' + renameToMatch[1]; + index.isRename = true; } // Parse copy from / copy to lines similarly const copyFromMatch = (/^copy from (.*)/).exec(extLine); if (copyFromMatch) { index.oldFileName = 'a/' + copyFromMatch[1]; + index.isCopy = true; } const copyToMatch = (/^copy to (.*)/).exec(extLine); if (copyToMatch) { index.newFileName = 'b/' + copyToMatch[1]; + index.isCopy = true; + } + + const newFileModeMatch = (/^new file mode (\d+)/).exec(extLine); + if (newFileModeMatch) { + index.isCreate = true; + index.newMode = newFileModeMatch[1]; + } + const deletedFileModeMatch = (/^deleted file mode (\d+)/).exec(extLine); + if (deletedFileModeMatch) { + index.isDelete = true; + index.oldMode = deletedFileModeMatch[1]; + } + const oldModeMatch = (/^old mode (\d+)/).exec(extLine); + if (oldModeMatch) { + index.oldMode = oldModeMatch[1]; + } + const newModeMatch = (/^new mode (\d+)/).exec(extLine); + if (newModeMatch) { + index.newMode = newModeMatch[1]; } i++; diff --git a/src/patch/reverse.ts b/src/patch/reverse.ts index 65a5abc3..4cd62e44 100644 --- a/src/patch/reverse.ts +++ b/src/patch/reverse.ts @@ -13,7 +13,7 @@ export function reversePatch(structuredPatch: StructuredPatch | StructuredPatch[ return structuredPatch.map(patch => reversePatch(patch)).reverse(); } - return { + const reversed: StructuredPatch = { ...structuredPatch, oldFileName: structuredPatch.newFileName, oldHeader: structuredPatch.newHeader, @@ -33,4 +33,18 @@ export function reversePatch(structuredPatch: StructuredPatch | StructuredPatch[ }; }) }; + + if (structuredPatch.isCopy) { + // Reversing a copy means deleting the file that was created by the copy. + // The "old" file in the reversed patch is the copy destination (which + // exists and should be removed), and the "new" file is /dev/null. + reversed.newFileName = '/dev/null'; + reversed.newHeader = undefined; + delete reversed.isCopy; + delete reversed.isRename; + } + // Reversing a rename is just a rename in the opposite direction; + // isRename stays set and the filenames are already swapped above. + + return reversed; } diff --git a/src/types.ts b/src/types.ts index 38ab9476..ebaf3821 100644 --- a/src/types.ts +++ b/src/types.ts @@ -231,6 +231,44 @@ export interface StructuredPatch { newHeader: string | undefined, hunks: StructuredPatchHunk[], index?: string, + /** + * Set to true when the patch was parsed from a Git-style diff (one with a + * `diff --git` header). Controls whether `formatPatch` emits a `diff --git` + * header (instead of `Index:` / underline headers) when formatting the patch. + */ + isGit?: boolean, + /** + * Set to true when parsing a Git diff that includes `rename from`/`rename to` + * extended headers, indicating the file was renamed (and the old file no + * longer exists). Consumers applying the patch should delete the old file. + */ + isRename?: boolean, + /** + * Set to true when parsing a Git diff that includes `copy from`/`copy to` + * extended headers, indicating the file was copied (and the old file still + * exists). Consumers applying the patch should NOT delete the old file. + */ + isCopy?: boolean, + /** + * Set to true when parsing a Git diff that includes a `new file mode` extended + * header, indicating the file was newly created. + */ + isCreate?: boolean, + /** + * Set to true when parsing a Git diff that includes a `deleted file mode` + * extended header, indicating the file was deleted. + */ + isDelete?: boolean, + /** + * The file mode (e.g. `'100644'`, `'100755'`) of the old file, parsed from + * Git extended headers (`old mode` or `deleted file mode`). + */ + oldMode?: string, + /** + * The file mode (e.g. `'100644'`, `'100755'`) of the new file, parsed from + * Git extended headers (`new mode` or `new file mode`). + */ + newMode?: string, } export interface StructuredPatchHunk { diff --git a/test/patch/create.js b/test/patch/create.js index 4001ff34..b47efe8a 100644 --- a/test/patch/create.js +++ b/test/patch/create.js @@ -1211,6 +1211,160 @@ describe('patch/create', function() { includeFileHeaders: false })).to.equal(expectedOutput); }); + + it('should emit diff --git header for patches with isGit flag', function() { + const patch = { + oldFileName: 'a/file.txt', + newFileName: 'b/file.txt', + oldHeader: '', + newHeader: '', + isGit: true, + hunks: [{ + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + }] + }; + expect(formatPatch(patch)).to.equal( + 'diff --git a/file.txt b/file.txt\n' + + '--- a/file.txt\t\n' + + '+++ b/file.txt\t\n' + + '@@ -1,1 +1,1 @@\n' + + '-old\n' + + '+new\n' + ); + }); + + it('should emit rename headers for patches with isGit and isRename', function() { + const patch = { + oldFileName: 'a/old.txt', + newFileName: 'b/new.txt', + oldHeader: undefined, + newHeader: undefined, + isGit: true, + isRename: true, + hunks: [] + }; + expect(formatPatch(patch)).to.equal( + 'diff --git a/old.txt b/new.txt\n' + + 'rename from old.txt\n' + + 'rename to new.txt\n' + ); + }); + + it('should emit copy headers for patches with isGit and isCopy', function() { + const patch = { + oldFileName: 'a/original.txt', + newFileName: 'b/copy.txt', + oldHeader: undefined, + newHeader: undefined, + isGit: true, + isCopy: true, + hunks: [] + }; + expect(formatPatch(patch)).to.equal( + 'diff --git a/original.txt b/copy.txt\n' + + 'copy from original.txt\n' + + 'copy to copy.txt\n' + ); + }); + + it('should emit new file mode header for patches with isGit and isCreate', function() { + const patch = { + oldFileName: '/dev/null', + newFileName: 'b/newfile.txt', + oldHeader: '', + newHeader: '', + isGit: true, + isCreate: true, + hunks: [{ + oldStart: 1, oldLines: 0, + newStart: 1, newLines: 1, + lines: ['+hello'] + }] + }; + expect(formatPatch(patch)).to.equal( + 'diff --git /dev/null b/newfile.txt\n' + + 'new file mode 100644\n' + + '--- /dev/null\t\n' + + '+++ b/newfile.txt\t\n' + + '@@ -0,0 +1,1 @@\n' + + '+hello\n' + ); + }); + + it('should emit deleted file mode header for patches with isGit and isDelete', function() { + const patch = { + oldFileName: 'a/doomed.txt', + newFileName: '/dev/null', + oldHeader: '', + newHeader: '', + isGit: true, + isDelete: true, + hunks: [{ + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 0, + lines: ['-goodbye'] + }] + }; + expect(formatPatch(patch)).to.equal( + 'diff --git a/doomed.txt /dev/null\n' + + 'deleted file mode 100644\n' + + '--- a/doomed.txt\t\n' + + '+++ /dev/null\t\n' + + '@@ -1,1 +0,0 @@\n' + + '-goodbye\n' + ); + }); + + it('should emit rename headers with file headers when hunks are present', function() { + const patch = { + oldFileName: 'a/old.txt', + newFileName: 'b/new.txt', + oldHeader: '', + newHeader: '', + isGit: true, + isRename: true, + hunks: [{ + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-aaa', '+bbb'] + }] + }; + expect(formatPatch(patch)).to.equal( + 'diff --git a/old.txt b/new.txt\n' + + 'rename from old.txt\n' + + 'rename to new.txt\n' + + '--- a/old.txt\t\n' + + '+++ b/new.txt\t\n' + + '@@ -1,1 +1,1 @@\n' + + '-aaa\n' + + '+bbb\n' + ); + }); + + it('should round-trip a Git rename patch through formatPatch and parsePatch', function() { + const original = { + oldFileName: 'a/old.txt', + newFileName: 'b/new.txt', + oldHeader: '', + newHeader: '', + isGit: true, + isRename: true, + hunks: [{ + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-aaa', '+bbb'] + }] + }; + const formatted = formatPatch(original); + const parsed = parsePatch(formatted); + expect(parsed).to.have.length(1); + expect(parsed[0].oldFileName).to.equal('a/old.txt'); + expect(parsed[0].newFileName).to.equal('b/new.txt'); + expect(parsed[0].isGit).to.equal(true); + expect(parsed[0].isRename).to.equal(true); + }); }); }); }); diff --git a/test/patch/parse.js b/test/patch/parse.js index aee9b7b1..d431dd76 100644 --- a/test/patch/parse.js +++ b/test/patch/parse.js @@ -727,6 +727,7 @@ index abc1234..def5678 100644 oldHeader: '', newFileName: 'b/file.txt', newHeader: '', + isGit: true, hunks: [ { oldStart: 1, oldLines: 3, @@ -767,6 +768,7 @@ index 1234567..abcdef0 100644 oldHeader: '', newFileName: 'b/file1.txt', newHeader: '', + isGit: true, hunks: [ { oldStart: 1, oldLines: 3, @@ -784,6 +786,7 @@ index 1234567..abcdef0 100644 oldHeader: '', newFileName: 'b/file2.txt', newHeader: '', + isGit: true, hunks: [ { oldStart: 1, oldLines: 3, @@ -808,7 +811,9 @@ rename to README-2.md`)) .to.eql([{ oldFileName: 'a/README.md', newFileName: 'b/README-2.md', - hunks: [] + isGit: true, + hunks: [], + isRename: true }]); }); @@ -831,6 +836,8 @@ index abc1234..def5678 100644 oldHeader: '', newFileName: 'b/new-name.txt', newHeader: '', + isGit: true, + isRename: true, hunks: [ { oldStart: 1, oldLines: 3, @@ -854,6 +861,9 @@ new mode 100755`)) .to.eql([{ oldFileName: 'a/script.sh', newFileName: 'b/script.sh', + isGit: true, + oldMode: '100644', + newMode: '100755', hunks: [] }]); }); @@ -866,6 +876,7 @@ Binary files a/image.png and b/image.png differ`)) .to.eql([{ oldFileName: 'a/image.png', newFileName: 'b/image.png', + isGit: true, hunks: [] }]); }); @@ -891,6 +902,7 @@ diff --git a/file3.txt b/file3.txt oldHeader: '', newFileName: 'b/file1.txt', newHeader: '', + isGit: true, hunks: [ { oldStart: 1, oldLines: 1, @@ -901,12 +913,14 @@ diff --git a/file3.txt b/file3.txt }, { oldFileName: 'a/image.png', newFileName: 'b/image.png', + isGit: true, hunks: [] }, { oldFileName: 'a/file3.txt', oldHeader: '', newFileName: 'b/file3.txt', newHeader: '', + isGit: true, hunks: [ { oldStart: 1, oldLines: 1, @@ -942,6 +956,7 @@ diff --git a/file3.txt b/file3.txt oldHeader: '', newFileName: 'b/file1.txt', newHeader: '', + isGit: true, hunks: [ { oldStart: 1, oldLines: 3, @@ -957,12 +972,16 @@ diff --git a/file3.txt b/file3.txt }, { oldFileName: 'a/script.sh', newFileName: 'b/script.sh', + isGit: true, + oldMode: '100644', + newMode: '100755', hunks: [] }, { oldFileName: 'a/file3.txt', oldHeader: '', newFileName: 'b/file3.txt', newHeader: '', + isGit: true, hunks: [ { oldStart: 1, oldLines: 2, @@ -986,7 +1005,9 @@ copy to copy.txt`)) .to.eql([{ oldFileName: 'a/original.txt', newFileName: 'b/copy.txt', - hunks: [] + isGit: true, + hunks: [], + isCopy: true }]); }); @@ -1005,6 +1026,9 @@ index 0000000..abc1234 oldHeader: '', newFileName: 'b/newfile.txt', newHeader: '', + isGit: true, + isCreate: true, + newMode: '100644', hunks: [ { oldStart: 1, oldLines: 0, @@ -1015,6 +1039,33 @@ index 0000000..abc1234 }]); }); + it('should parse a diff --git deleted file', function() { + expect(parsePatch( +`diff --git a/old.txt b/old.txt +deleted file mode 100644 +index ce01362..0000000 +--- a/old.txt ++++ /dev/null +@@ -1 +0,0 @@ +-goodbye`)) + .to.eql([{ + oldFileName: 'a/old.txt', + oldHeader: '', + newFileName: '/dev/null', + newHeader: '', + isGit: true, + isDelete: true, + oldMode: '100644', + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 0, + lines: ['-goodbye'] + } + ] + }]); + }); + it('should parse diff --git with quoted filenames containing spaces', function() { expect(parsePatch( `diff --git "a/file with spaces.txt" "b/file with spaces.txt" @@ -1029,6 +1080,7 @@ index abc1234..def5678 100644 oldHeader: '', newFileName: 'b/file with spaces.txt', newHeader: '', + isGit: true, hunks: [ { oldStart: 1, oldLines: 1, @@ -1048,7 +1100,9 @@ rename to new name.txt`)) .to.eql([{ oldFileName: 'a/old name.txt', newFileName: 'b/new name.txt', - hunks: [] + isGit: true, + hunks: [], + isRename: true }]); }); @@ -1067,6 +1121,7 @@ rename to new name.txt`)) oldHeader: '', newFileName: 'b/file.txt', newHeader: '', + isGit: true, hunks: [ { oldStart: 1, oldLines: 1, @@ -1092,12 +1147,15 @@ diff --git a/other.txt b/other.txt .to.eql([{ oldFileName: 'a/old.txt', newFileName: 'b/new.txt', - hunks: [] + isGit: true, + hunks: [], + isRename: true }, { oldFileName: 'a/other.txt', oldHeader: '', newFileName: 'b/other.txt', newHeader: '', + isGit: true, hunks: [ { oldStart: 1, oldLines: 1, @@ -1116,6 +1174,9 @@ new mode 100755`)) .to.eql([{ oldFileName: 'a/file with spaces.txt', newFileName: 'b/file with spaces.txt', + isGit: true, + oldMode: '100644', + newMode: '100755', hunks: [] }]); }); @@ -1131,7 +1192,9 @@ rename to another file with spaces.txt`)) .to.eql([{ oldFileName: 'a/file with spaces.txt', newFileName: 'b/another file with spaces.txt', - hunks: [] + isGit: true, + hunks: [], + isRename: true }]); }); @@ -1146,6 +1209,9 @@ new mode 100755`)) .to.eql([{ oldFileName: 'a/x b/y.txt', newFileName: 'b/x b/y.txt', + isGit: true, + oldMode: '100644', + newMode: '100755', hunks: [] }]); }); @@ -1162,7 +1228,9 @@ rename to x b/new.txt`)) .to.eql([{ oldFileName: 'a/x b/old.txt', newFileName: 'b/x b/new.txt', - hunks: [] + isGit: true, + hunks: [], + isRename: true }]); }); @@ -1181,6 +1249,7 @@ rename to x b/new.txt`)) oldHeader: '', newFileName: 'b/x b/new.txt', newHeader: '', + isGit: true, hunks: [ { oldStart: 1, oldLines: 1, @@ -1209,6 +1278,7 @@ rename to x b/new.txt`)) oldHeader: '', newFileName: 'b/file.txt', newHeader: '', + isGit: true, hunks: [ { oldStart: 1, oldLines: 1, @@ -1232,6 +1302,7 @@ rename to x b/new.txt`)) oldHeader: '', newFileName: 'b/file.txt', newHeader: '', + isGit: true, hunks: [ { oldStart: 1, oldLines: 1, @@ -1250,6 +1321,9 @@ rename to x b/new.txt`)) old mode 100644 new mode 100755`)) .to.eql([{ + isGit: true, + oldMode: '100644', + newMode: '100755', hunks: [] }]); }); diff --git a/test/patch/reverse.js b/test/patch/reverse.js index d151b439..bd8e6213 100644 --- a/test/patch/reverse.js +++ b/test/patch/reverse.js @@ -61,7 +61,7 @@ describe('patch/reverse', function() { '+bar\n' ); expect(formatPatch(reversePatch(patch))).to.equal( - '===================================================================\n' + + 'diff --git b/README.md a/README.md\n' + '--- b/README.md\t\n' + '+++ a/README.md\t\n' + '@@ -1,7 +1,5 @@\n' + @@ -79,7 +79,7 @@ describe('patch/reverse', function() { '-\n' + '-bar\n' + '\n' + - '===================================================================\n' + + 'diff --git b/CONTRIBUTING.md a/CONTRIBUTING.md\n' + '--- b/CONTRIBUTING.md\t\n' + '+++ a/CONTRIBUTING.md\t\n' + '@@ -2,8 +2,6 @@\n' + @@ -93,5 +93,90 @@ describe('patch/reverse', function() { ' Generally we like to see pull requests that\n' ); }); + + it('should reverse a rename patch into a rename in the opposite direction', function() { + const patch = parsePatch( + 'diff --git a/old.txt b/new.txt\n' + + 'similarity index 85%\n' + + 'rename from old.txt\n' + + 'rename to new.txt\n' + + '--- a/old.txt\n' + + '+++ b/new.txt\n' + + '@@ -1,3 +1,3 @@\n' + + ' line1\n' + + '-line2\n' + + '+line2modified\n' + + ' line3\n' + ); + const reversed = reversePatch(patch); + expect(reversed).to.have.length(1); + expect(reversed[0].oldFileName).to.equal('b/new.txt'); + expect(reversed[0].newFileName).to.equal('a/old.txt'); + expect(reversed[0].isRename).to.equal(true); + expect(reversed[0].isCopy).to.equal(undefined); + expect(reversed[0].hunks).to.have.length(1); + expect(reversed[0].hunks[0].lines).to.eql([ + ' line1', + '+line2', + '-line2modified', + ' line3' + ]); + }); + + it('should reverse a copy patch into a deletion', function() { + const patch = parsePatch( + 'diff --git a/original.txt b/copy.txt\n' + + 'similarity index 85%\n' + + 'copy from original.txt\n' + + 'copy to copy.txt\n' + + '--- a/original.txt\n' + + '+++ b/copy.txt\n' + + '@@ -1,3 +1,3 @@\n' + + ' line1\n' + + '-line2\n' + + '+line2modified\n' + + ' line3\n' + ); + const reversed = reversePatch(patch); + expect(reversed).to.have.length(1); + // Reversing a copy means deleting the copy destination + expect(reversed[0].oldFileName).to.equal('b/copy.txt'); + expect(reversed[0].newFileName).to.equal('/dev/null'); + expect(reversed[0].newHeader).to.equal(undefined); + expect(reversed[0].isRename).to.equal(undefined); + expect(reversed[0].isCopy).to.equal(undefined); + }); + + it('should reverse a hunk-less copy into a deletion', function() { + const patch = parsePatch( + 'diff --git a/original.txt b/copy.txt\n' + + 'similarity index 100%\n' + + 'copy from original.txt\n' + + 'copy to copy.txt\n' + ); + const reversed = reversePatch(patch); + expect(reversed).to.have.length(1); + expect(reversed[0].oldFileName).to.equal('b/copy.txt'); + expect(reversed[0].newFileName).to.equal('/dev/null'); + expect(reversed[0].isRename).to.equal(undefined); + expect(reversed[0].isCopy).to.equal(undefined); + expect(reversed[0].hunks).to.eql([]); + }); + + it('should reverse a hunk-less rename', function() { + const patch = parsePatch( + 'diff --git a/old.txt b/new.txt\n' + + 'similarity index 100%\n' + + 'rename from old.txt\n' + + 'rename to new.txt\n' + ); + const reversed = reversePatch(patch); + expect(reversed).to.have.length(1); + expect(reversed[0].oldFileName).to.equal('b/new.txt'); + expect(reversed[0].newFileName).to.equal('a/old.txt'); + expect(reversed[0].isRename).to.equal(true); + expect(reversed[0].isCopy).to.equal(undefined); + expect(reversed[0].hunks).to.eql([]); + }); }); }); From e56667ae84fb142e402a7a53e4e3fd48c4f158a5 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 10 Mar 2026 13:16:33 +0000 Subject: [PATCH 17/27] fix --- README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 3a721967..bfa86b04 100644 --- a/README.md +++ b/README.md @@ -373,8 +373,8 @@ const DELETE = Symbol('delete'); const pendingWrites = new Map(); // filePath → content (or DELETE sentinel) applyPatches(patch, { loadFile: (patch, callback) => { - // Git uses /dev/null as the old name when a file is newly created - if (patch.oldFileName === '/dev/null') { + if (patch.isCreate) { + // Newly created file — no old content to load callback(undefined, ''); return; } @@ -393,8 +393,7 @@ applyPatches(patch, { } const oldPath = patch.oldFileName.replace(/^a\//, ''); const newPath = patch.newFileName.replace(/^b\//, ''); - // Git uses /dev/null as the new name when a file is deleted - if (patch.newFileName === '/dev/null') { + if (patch.isDelete) { if (!pendingWrites.has(oldPath)) { pendingWrites.set(oldPath, DELETE); } From 774bfcb6e0742258d120af4009f4f8d54174f374 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 10 Mar 2026 13:21:48 +0000 Subject: [PATCH 18/27] fixes --- README.md | 13 +++++---- test/patch/readme-rename-example.js | 41 +++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index bfa86b04..ea682379 100644 --- a/README.md +++ b/README.md @@ -370,7 +370,7 @@ applyPatches(patch, { const {applyPatches} = require('diff'); const patch = fs.readFileSync("git-diff.patch").toString(); const DELETE = Symbol('delete'); -const pendingWrites = new Map(); // filePath → content (or DELETE sentinel) +const pendingWrites = new Map(); // filePath → {content, mode} or DELETE sentinel applyPatches(patch, { loadFile: (patch, callback) => { if (patch.isCreate) { @@ -398,7 +398,7 @@ applyPatches(patch, { pendingWrites.set(oldPath, DELETE); } } else { - pendingWrites.set(newPath, patchedContent); + pendingWrites.set(newPath, {content: patchedContent, mode: patch.newMode}); // For renames, delete the old file (but not for copies, // where the old file should be kept) if (patch.isRename && !pendingWrites.has(oldPath)) { @@ -412,11 +412,14 @@ applyPatches(patch, { console.log("Failed with error:", err); return; } - for (const [filePath, content] of pendingWrites) { - if (content === DELETE) { + for (const [filePath, entry] of pendingWrites) { + if (entry === DELETE) { fs.unlinkSync(filePath); } else { - fs.writeFileSync(filePath, content); + fs.writeFileSync(filePath, entry.content); + if (entry.mode) { + fs.chmodSync(filePath, parseInt(entry.mode, 8) & 0o777); + } } } } diff --git a/test/patch/readme-rename-example.js b/test/patch/readme-rename-example.js index 4535f7ca..833b255c 100644 --- a/test/patch/readme-rename-example.js +++ b/test/patch/readme-rename-example.js @@ -196,4 +196,45 @@ index 0000000..fa49b07 expect(fs.readFileSync('brand-new.txt', 'utf-8')).to.equal('hello world\n'); }); + + it('should create a new executable file with correct mode', function() { + const patch = +`diff --git a/run.sh b/run.sh +new file mode 100755 +index 0000000..abc1234 +--- /dev/null ++++ b/run.sh +@@ -0,0 +1,2 @@ ++#!/bin/bash ++echo hello +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.readFileSync('run.sh', 'utf-8')).to.equal('#!/bin/bash\necho hello\n'); + const mode = fs.statSync('run.sh').mode & 0o777; + expect(mode).to.equal(0o755); + }); + + it('should set the mode when a file is modified with a mode change', function() { + fs.writeFileSync('script.sh', 'echo old\n'); + fs.chmodSync('script.sh', 0o644); + + const patch = +`diff --git a/script.sh b/script.sh +old mode 100644 +new mode 100755 +--- a/script.sh ++++ b/script.sh +@@ -1 +1 @@ +-echo old ++echo new +`; + + getReadmeExampleFn()(applyPatches, patch, fs, path); + + expect(fs.readFileSync('script.sh', 'utf-8')).to.equal('echo new\n'); + const mode = fs.statSync('script.sh').mode & 0o777; + expect(mode).to.equal(0o755); + }); }); From 426849539912cade37edda10bf90b58184f96086 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 10 Mar 2026 14:08:07 +0000 Subject: [PATCH 19/27] active reading --- src/patch/parse.ts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index d0878134..12a106a9 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -69,10 +69,10 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { seenDiffHeader = true; index.isGit = true; - // Parse the old and new filenames from the diff --git header and + // Parse the old and new filenames from the `diff --git` header and // tentatively set oldFileName and newFileName from them. These may - // be overridden below by rename from / rename to or copy from / - // copy to extended headers, or by --- and +++ lines. But for Git + // be overridden below by `rename from` / `rename to` or `copy from` / + // `copy to` extended headers, or by --- and +++ lines. But for Git // diffs that lack all of those (e.g. mode-only changes, binary // file changes without rename), these are the only filenames we // get. @@ -86,8 +86,8 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { index.newFileName = paths.newFileName; } - // Consume Git extended headers (old mode, new mode, rename from, - // rename to, similarity index, index, Binary files ... differ, + // Consume Git extended headers (`old mode`, `new mode`, `rename from`, + // `rename to`, `similarity index`, `index`, `Binary files ... differ`, // etc.) i++; while (i < diffstr.length) { @@ -99,9 +99,9 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { break; } - // Parse rename from / rename to lines - these give us + // Parse `rename from` / `rename to` lines - these give us // unambiguous filenames. These lines don't include the - // a/ and b/ prefixes that appear in the diff --git header + // a/ and b/ prefixes that appear in the `diff --git` header // and --- / +++ lines, so we add them for consistency. const renameFromMatch = (/^rename from (.*)/).exec(extLine); if (renameFromMatch) { From ed4e4d6e47b5da7263ecff71a0d93f60ee78dc90 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 10 Mar 2026 14:11:40 +0000 Subject: [PATCH 20/27] fix --- src/patch/parse.ts | 25 +++++++++++++++++++++---- test/patch/parse.js | 30 ++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 12a106a9..8f4898c6 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -103,26 +103,29 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { // unambiguous filenames. These lines don't include the // a/ and b/ prefixes that appear in the `diff --git` header // and --- / +++ lines, so we add them for consistency. + // Git C-style quotes filenames containing special characters + // (tabs, newlines, backslashes, double quotes), so we must + // unquote them when present. const renameFromMatch = (/^rename from (.*)/).exec(extLine); if (renameFromMatch) { - index.oldFileName = 'a/' + renameFromMatch[1]; + index.oldFileName = 'a/' + unquoteIfQuoted(renameFromMatch[1]); index.isRename = true; } const renameToMatch = (/^rename to (.*)/).exec(extLine); if (renameToMatch) { - index.newFileName = 'b/' + renameToMatch[1]; + index.newFileName = 'b/' + unquoteIfQuoted(renameToMatch[1]); index.isRename = true; } // Parse copy from / copy to lines similarly const copyFromMatch = (/^copy from (.*)/).exec(extLine); if (copyFromMatch) { - index.oldFileName = 'a/' + copyFromMatch[1]; + index.oldFileName = 'a/' + unquoteIfQuoted(copyFromMatch[1]); index.isCopy = true; } const copyToMatch = (/^copy to (.*)/).exec(extLine); if (copyToMatch) { - index.newFileName = 'b/' + copyToMatch[1]; + index.newFileName = 'b/' + unquoteIfQuoted(copyToMatch[1]); index.isCopy = true; } @@ -301,6 +304,20 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { return null; } + /** + * If `s` starts with a double quote, unquotes it using C-style escape + * rules (as used by Git). Otherwise returns `s` as-is. + */ + function unquoteIfQuoted(s: string): string { + if (s.startsWith('"')) { + const parsed = parseQuotedFileName(s); + if (parsed) { + return parsed.fileName; + } + } + return s; + } + /** * Parses a C-style quoted filename as used by git. * Returns the unescaped filename and the raw length consumed (including quotes). diff --git a/test/patch/parse.js b/test/patch/parse.js index d431dd76..f63c5c0f 100644 --- a/test/patch/parse.js +++ b/test/patch/parse.js @@ -1106,6 +1106,36 @@ rename to new name.txt`)) }]); }); + it('should unquote C-style quoted filenames in rename from/to', function() { + expect(parsePatch( +`diff --git "a/file\\twith\\ttabs.txt" b/normal.txt +similarity index 100% +rename from "file\\twith\\ttabs.txt" +rename to normal.txt`)) + .to.eql([{ + oldFileName: 'a/file\twith\ttabs.txt', + newFileName: 'b/normal.txt', + isGit: true, + hunks: [], + isRename: true + }]); + }); + + it('should unquote C-style quoted filenames in copy from/to', function() { + expect(parsePatch( +`diff --git a/original.txt "b/copy\\nwith\\nnewlines.txt" +similarity index 100% +copy from original.txt +copy to "copy\\nwith\\nnewlines.txt"`)) + .to.eql([{ + oldFileName: 'a/original.txt', + newFileName: 'b/copy\nwith\nnewlines.txt', + isGit: true, + hunks: [], + isCopy: true + }]); + }); + it('should let --- and +++ lines override filenames from diff --git header', function() { // When --- and +++ are present, they should take precedence over // the filenames parsed from the diff --git header line. From 0b85103e55a6ac646b4e1054885b86d4468a6ad1 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 10 Mar 2026 14:33:17 +0000 Subject: [PATCH 21/27] Simplifying refactor --- src/patch/parse.ts | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 8f4898c6..793e4ac8 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -39,6 +39,7 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { function parseIndex() { const index: Partial = {}; + index.hunks = []; list.push(index); // Parse diff metadata @@ -64,7 +65,7 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { if (isGitDiffHeader(line)) { if (seenDiffHeader) { - break; + return; } seenDiffHeader = true; index.isGit = true; @@ -153,7 +154,7 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { continue; } else if (isDiffHeader(line)) { if (seenDiffHeader) { - break; + return; } seenDiffHeader = true; @@ -186,9 +187,6 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { parseFileHeader(index); parseFileHeader(index); - // Parse hunks - index.hunks = []; - while (i < diffstr.length) { const line = diffstr[i]; if (isDiffHeader(line) || isFileHeader(line) || (/^===================================================================/).test(line)) { From cc8f5d62c79bdb938db9b80e9905835738c744ba Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 10 Mar 2026 15:11:16 +0000 Subject: [PATCH 22/27] Improve a comment --- src/patch/parse.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 793e4ac8..00d4bd7b 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -207,8 +207,8 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { * The format is: * diff --git a/ b/ * - * When filenames contain characters like newlines, tabs, backslashes, or - * double quotes, Git quotes them with C-style escaping: + * When filenames contain special characters (including newlines, tabs, + * backslashes, or double quotes), Git quotes them with C-style escaping: * diff --git "a/file\twith\ttabs.txt" "b/file\twith\ttabs.txt" * * When filenames don't contain special characters and the old and new names From 328e76d90455a7f61e27ac7285f4112f0530f8c4 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 10 Mar 2026 15:30:43 +0000 Subject: [PATCH 23/27] Improve comment --- src/patch/parse.ts | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 00d4bd7b..d6210594 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -214,10 +214,24 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { * When filenames don't contain special characters and the old and new names * are the same, we can unambiguously split on ` b/` by finding where the * two halves (including their a/ and b/ prefixes) yield matching bare names. - * When they differ AND contain spaces AND aren't quoted, parsing is - * inherently ambiguous, so we do our best. * - * Returns null if the header can't be parsed — e.g. if a quoted filename + * A pathological case exists in which we cannot reliably determine the paths + * from the `git --diff` header. This case is when the following are true: + * - the old and new file paths differ + * - they are both unquoted (i.e. contain no special characters) + * - at least one of the underlying file paths includes the substring ` b/` + * In this scenario, we do not know which occurrence of ` b/` indicates the + * start of the new file path, so the header is inherently ambiguous. We thus + * select a possible interpretation arbitrarily and return that. + * + * Fortunately, this ambiguity should never matter, because in any patch + * genuinely output by Git in which this pathological scenario occurs, there + * must also be `rename from`/`rename to` or `copy from`/`copy to` extended + * headers present below the `git --diff` header. `parseIndex` will parse + * THOSE headers, from which we CAN unambiguously determine the filenames, + * and will discard the result returned by this function. + * + * Returns null if the header can't be parsed at all — e.g. a quoted filename * has an unterminated quote, or if the unquoted header doesn't match the * expected `a/... b/...` format. In that case, the caller (parseIndex) * skips setting oldFileName/newFileName from this header, but they may From bf199ff73ad02beeefe7f068d7ab4e1240dfbd22 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 10 Mar 2026 15:30:49 +0000 Subject: [PATCH 24/27] Claude-generated tests --- test/patch/parse.js | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/test/patch/parse.js b/test/patch/parse.js index f63c5c0f..b8059fec 100644 --- a/test/patch/parse.js +++ b/test/patch/parse.js @@ -1066,6 +1066,36 @@ index ce01362..0000000 }]); }); + it('should parse a diff --git empty file creation (no --- / +++ or hunks)', function() { + expect(parsePatch( +`diff --git a/empty.txt b/empty.txt +new file mode 100644 +index 0000000..e69de29`)) + .to.eql([{ + oldFileName: 'a/empty.txt', + newFileName: 'b/empty.txt', + isGit: true, + isCreate: true, + newMode: '100644', + hunks: [] + }]); + }); + + it('should parse a diff --git empty file deletion (no --- / +++ or hunks)', function() { + expect(parsePatch( +`diff --git a/empty.txt b/empty.txt +deleted file mode 100644 +index e69de29..0000000`)) + .to.eql([{ + oldFileName: 'a/empty.txt', + newFileName: 'b/empty.txt', + isGit: true, + isDelete: true, + oldMode: '100644', + hunks: [] + }]); + }); + it('should parse diff --git with quoted filenames containing spaces', function() { expect(parsePatch( `diff --git "a/file with spaces.txt" "b/file with spaces.txt" From 16b917dc85111b047173e5bfd07c382804e4e600 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 10 Mar 2026 15:42:58 +0000 Subject: [PATCH 25/27] Simplify algo --- src/patch/parse.ts | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index d6210594..5c05562d 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -280,34 +280,28 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { // Unquoted paths. Try to find the split point. // The format is: a/ b/ // - // Strategy: if the path starts with a/ and contains " b/", we try - // to find where to split. When old and new names are the same, there's - // a unique split where both halves (after stripping their respective - // a/ and b/ prefixes) match. When they differ, we try the last split. - // The returned filenames include the a/ and b/ prefixes. + // Note the potential ambiguity caused by the possibility of the file paths + // themselves containing the substring ` b/`, plus the pathological case + // described in the comment above. + // + // Strategy: find all occurrences of " b/" and split on the middle + // one. When old and new names are the same (which is the only case where + // we can't rely on extended headers later in the patch so HAVE to get + // this right), this will always be the correct split. if (rest.startsWith('a/')) { - // Try to find a " b/" separator. If the filename itself contains " b/", - // there could be multiple candidates. We try each one and pick the - // split where both halves look like valid prefixed paths. + const splits = []; let searchFrom = 2; // skip past initial "a/" - let bestSplit = -1; while (true) { const idx = rest.indexOf(' b/', searchFrom); if (idx === -1) { break; } - // Candidate: old = rest[0..idx), new = rest[idx+1..) - const candidateOldBare = rest.substring(2, idx); // strip "a/" for comparison - const candidateNewBare = rest.substring(idx + 3); // strip " b/" for comparison - if (candidateOldBare === candidateNewBare) { - // Perfect match - unambiguous - return { oldFileName: rest.substring(0, idx), newFileName: rest.substring(idx + 1) }; - } - bestSplit = idx; + splits.push(idx); searchFrom = idx + 3; } - if (bestSplit !== -1) { + if (splits.length > 0) { + const mid = splits[Math.floor(splits.length / 2)]; return { - oldFileName: rest.substring(0, bestSplit), - newFileName: rest.substring(bestSplit + 1) + oldFileName: rest.substring(0, mid), + newFileName: rest.substring(mid + 1) }; } } From c48eb993bcd41530ccb9ba0afd23889e33d597b1 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 10 Mar 2026 15:46:09 +0000 Subject: [PATCH 26/27] Slight simplification --- src/patch/parse.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 5c05562d..ed8515b4 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -290,12 +290,11 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { // this right), this will always be the correct split. if (rest.startsWith('a/')) { const splits = []; - let searchFrom = 2; // skip past initial "a/" + let idx = 0; while (true) { - const idx = rest.indexOf(' b/', searchFrom); + idx = rest.indexOf(' b/', idx + 1); if (idx === -1) { break; } splits.push(idx); - searchFrom = idx + 3; } if (splits.length > 0) { const mid = splits[Math.floor(splits.length / 2)]; From ffb87bc1df5c87667a1f418f3f00736509b38bb0 Mon Sep 17 00:00:00 2001 From: Mark Amery Date: Tue, 10 Mar 2026 16:55:42 +0000 Subject: [PATCH 27/27] latest changes, manual todos --- release-notes.md | 8 +++++ src/patch/parse.ts | 36 +++++++++++++++++-- test/patch/parse.js | 86 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 127 insertions(+), 3 deletions(-) diff --git a/release-notes.md b/release-notes.md index d7cb4d14..580ebc09 100644 --- a/release-notes.md +++ b/release-notes.md @@ -2,6 +2,14 @@ ## 9.0.0 (prerelease) +TODO: +- Tidy up AI slop below the --- +- Note support for parsing quoted filenames in +++ and --- headers (even outside Git patches as `diff -u` outputs these) +- Note fixes to #640 and #648 + +--- + + - **`parsePatch` now robustly handles Git-style diffs.** Previously, `parsePatch` had inconsistent regex usage that caused several bugs when parsing `diff --git` output: * Multi-file Git diffs containing hunk-less entries (e.g. mode-only changes, binary files, rename-only entries without content changes) could cause file entries to be merged together or lost entirely. * Git extended headers (`rename from`/`rename to`, `copy from`/`copy to`, `old mode`/`new mode`, `index`, etc.) were not parsed and could cause parse errors. diff --git a/src/patch/parse.ts b/src/patch/parse.ts index ed8515b4..eb7a4aea 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -338,10 +338,38 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { if (s[j] === '\\' && j + 1 < s.length) { j++; switch (s[j]) { + case 'a': result += '\x07'; break; + case 'b': result += '\b'; break; + case 'f': result += '\f'; break; case 'n': result += '\n'; break; + case 'r': result += '\r'; break; case 't': result += '\t'; break; + case 'v': result += '\v'; break; case '\\': result += '\\'; break; case '"': result += '"'; break; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': { + // Octal escapes in Git represent raw bytes, not Unicode + // code points. Multi-byte UTF-8 characters are emitted as + // multiple consecutive octal escapes (e.g. 🎉 = + // \360\237\216\211). Collect all consecutive octal-escaped + // bytes and decode them together as UTF-8. + // Validate that we have a full 3-digit octal escape + if (j + 2 >= s.length || s[j + 1] < '0' || s[j + 1] > '7' || s[j + 2] < '0' || s[j + 2] > '7') { + return null; + } + const bytes = [parseInt(s.substring(j, j + 3), 8)]; + j += 3; + while (s[j] === '\\' && s[j + 1] >= '0' && s[j + 1] <= '7') { + if (j + 3 >= s.length || s[j + 2] < '0' || s[j + 2] > '7' || s[j + 3] < '0' || s[j + 3] > '7') { + return null; + } + bytes.push(parseInt(s.substring(j + 1, j + 4), 8)); + j += 4; + } + result += new TextDecoder('utf-8').decode(new Uint8Array(bytes)); + continue; // j already points at the next character + } default: result += '\\' + s[j]; break; } } else { @@ -361,9 +389,11 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { const prefix = fileHeaderMatch[1], data = diffstr[i].substring(3).trim().split('\t', 2), header = (data[1] || '').trim(); - let fileName = data[0].replace(/\\\\/g, '\\'); - if (fileName.startsWith('"') && fileName.endsWith('"')) { - fileName = fileName.substr(1, fileName.length - 2); + let fileName = data[0]; + if (fileName.startsWith('"')) { + fileName = unquoteIfQuoted(fileName); + } else { + fileName = fileName.replace(/\\\\/g, '\\'); } if (prefix === '---') { index.oldFileName = fileName; diff --git a/test/patch/parse.js b/test/patch/parse.js index b8059fec..f5c86616 100644 --- a/test/patch/parse.js +++ b/test/patch/parse.js @@ -1151,6 +1151,65 @@ rename to normal.txt`)) }]); }); + it('should handle all Git C-style escape sequences in quoted filenames', function() { + expect(parsePatch( +`diff --git "a/\\a\\b\\f\\r\\v\\001file.txt" "b/\\a\\b\\f\\r\\v\\001file.txt" +old mode 100644 +new mode 100755`)) + .to.eql([{ + oldFileName: 'a/\x07\b\f\r\v\x01file.txt', + newFileName: 'b/\x07\b\f\r\v\x01file.txt', + isGit: true, + oldMode: '100644', + newMode: '100755', + hunks: [] + }]); + }); + + it('should handle multi-byte UTF-8 octal escapes in quoted filenames (emoji)', function() { + // 🎉 is U+1F389, UTF-8 bytes F0 9F 8E 89 = octal 360 237 216 211 + expect(parsePatch( +`diff --git "a/file\\360\\237\\216\\211.txt" "b/file\\360\\237\\216\\211.txt" +new file mode 100644 +index 0000000..ce01362 +--- /dev/null ++++ "b/file\\360\\237\\216\\211.txt" +@@ -0,0 +1 @@ ++hello`)) + .to.eql([{ + oldFileName: '/dev/null', + oldHeader: '', + newFileName: 'b/file🎉.txt', + newHeader: '', + isGit: true, + isCreate: true, + newMode: '100644', + hunks: [ + { + oldStart: 1, oldLines: 0, + newStart: 1, newLines: 1, + lines: ['+hello'] + } + ] + }]); + }); + + it('should handle multi-byte UTF-8 octal escapes in quoted filenames (accented latin)', function() { + // é is U+00E9, UTF-8 bytes C3 A9 = octal 303 251 + expect(parsePatch( +`diff --git "a/caf\\303\\251.txt" "b/caf\\303\\251.txt" +old mode 100644 +new mode 100755`)) + .to.eql([{ + oldFileName: 'a/café.txt', + newFileName: 'b/café.txt', + isGit: true, + oldMode: '100644', + newMode: '100755', + hunks: [] + }]); + }); + it('should unquote C-style quoted filenames in copy from/to', function() { expect(parsePatch( `diff --git a/original.txt "b/copy\\nwith\\nnewlines.txt" @@ -1373,6 +1432,33 @@ rename to x b/new.txt`)) }]); }); + it('should handle an incomplete octal escape in a quoted filename', function() { + // The quoted filename has a truncated octal escape (\36 instead of \360). + // parseQuotedFileName should return null, so parseGitDiffHeader returns + // null and we fall back to --- / +++ lines for filenames. + expect(parsePatch( +`diff --git "a/file\\36" "b/file\\36" +--- a/file.txt ++++ b/file.txt +@@ -1 +1 @@ +-old ++new`)) + .to.eql([{ + oldFileName: 'a/file.txt', + oldHeader: '', + newFileName: 'b/file.txt', + newHeader: '', + isGit: true, + hunks: [ + { + oldStart: 1, oldLines: 1, + newStart: 1, newLines: 1, + lines: ['-old', '+new'] + } + ] + }]); + }); + it('should handle an unparseable diff --git header with no --- or +++ fallback', function() { // When both the diff --git header is unparseable AND there are no // --- / +++ lines, filenames remain undefined.