diff --git a/src/patch/parse.ts b/src/patch/parse.ts index 29356d61..acd52982 100755 --- a/src/patch/parse.ts +++ b/src/patch/parse.ts @@ -1,5 +1,232 @@ import type { StructuredPatch } from '../types.js'; +/** + * Parse a single Git path token starting at the provided index. + * Supports C-style quoted paths used by Git when `core.quotePath` is enabled. + */ +function parseGitPathToken(input: string, startIndex: number): { value: string; nextIndex: number } | null { + let i = startIndex; + while (i < input.length && input[i] === ' ') { + i++; + } + if (i >= input.length) { + return null; + } + if (input[i] === '"') { + i++; + let value = ''; + while (i < input.length) { + const ch = input[i]; + if (ch === '"') { + return { value, nextIndex: i + 1 }; + } + if (ch === '\\') { + i++; + if (i >= input.length) { + return null; + } + const esc = input[i]; + if (esc >= '0' && esc <= '7') { + let octal = esc; + for (let count = 0; count < 2; count++) { + const next = input[i + 1]; + if (next >= '0' && next <= '7') { + i++; + octal += next; + } else { + break; + } + } + value += String.fromCharCode(parseInt(octal, 8)); + i++; + continue; + } + if (esc === 'x') { + const hex = input.substring(i + 1, i + 3); + if (/^[0-9a-fA-F]{2}$/.test(hex)) { + value += String.fromCharCode(parseInt(hex, 16)); + i += 3; + continue; + } + value += 'x'; + i++; + continue; + } + switch (esc) { + case 'n': + value += '\n'; + break; + case 't': + value += '\t'; + break; + case 'r': + value += '\r'; + break; + case 'b': + value += '\b'; + break; + case 'f': + value += '\f'; + break; + case 'a': + value += '\u0007'; + break; + case 'v': + value += '\u000b'; + break; + case '\\': + value += '\\'; + break; + case '"': + value += '"'; + break; + default: + value += esc; + } + i++; + continue; + } + value += ch; + i++; + } + return null; + } + const start = i; + while (i < input.length && input[i] !== ' ') { + i++; + } + return { value: input.substring(start, i), nextIndex: i }; +} + +/** + * Parse a fixed number of Git path tokens from a string. + */ +function parseGitPathTokens(input: string, count: number): string[] | null { + let index = 0; + const paths: string[] = []; + for (let parsed = 0; parsed < count; parsed++) { + const token = parseGitPathToken(input, index); + if (!token) { + return null; + } + paths.push(token.value); + index = token.nextIndex; + } + return paths; +} + +/** + * Parse unquoted Git diff paths separated by the ` b/` marker. + * If multiple splits are possible, prefer a split where old and new paths match. + * If no matching split exists, use the last possible split. + */ +function parseGitUnquotedDiffPaths(input: string): { oldPath: string; newPath: string } | null { + if (!input.startsWith('a/')) { + return null; + } + const candidates: Array<{ oldPath: string; newPath: string }> = []; + const matchingCandidates: Array<{ oldPath: string; newPath: string }> = []; + let searchIndex = 0; + while (searchIndex < input.length) { + const separatorIndex = input.indexOf(' b/', searchIndex); + if (separatorIndex === -1) { + break; + } + const oldPath = input.substring(0, separatorIndex); + const newPath = input.substring(separatorIndex + 1); + if (oldPath.startsWith('a/') && newPath.startsWith('b/')) { + const candidate = { oldPath, newPath }; + candidates.push(candidate); + if (oldPath.substring(2) === newPath.substring(2)) { + matchingCandidates.push(candidate); + } + } + searchIndex = separatorIndex + 1; + } + if (matchingCandidates.length === 1) { + return matchingCandidates[0]; + } + if (matchingCandidates.length > 1) { + return matchingCandidates[matchingCandidates.length - 1]; + } + if (candidates.length) { + return candidates[candidates.length - 1]; + } + return null; +} + +/** + * Parse a Git `diff --git a/... b/...` header into old/new file names. + */ +function parseGitDiffHeader(line: string): { oldFileName?: string; newFileName?: string } | null { + const prefix = 'diff --git '; + if (!line.startsWith(prefix)) { + return null; + } + const rawPaths = line.substring(prefix.length); + let oldFileName: string | undefined; + let newFileName: string | undefined; + + if (rawPaths.startsWith('"')) { + const firstToken = parseGitPathToken(rawPaths, 0); + if (!firstToken) { + return null; + } + oldFileName = firstToken.value; + const remainder = rawPaths.substring(firstToken.nextIndex).trimStart(); + if (!remainder.length) { + return null; + } + if (remainder.startsWith('"')) { + const secondToken = parseGitPathToken(remainder, 0); + if (!secondToken) { + return null; + } + newFileName = secondToken.value; + } else { + newFileName = remainder; + } + } else { + const unquoted = parseGitUnquotedDiffPaths(rawPaths); + if (unquoted) { + oldFileName = unquoted.oldPath; + newFileName = unquoted.newPath; + } else { + const paths = parseGitPathTokens(rawPaths, 2); + if (!paths) { + return null; + } + [oldFileName, newFileName] = paths; + } + } + + if (oldFileName.startsWith('a/')) { + oldFileName = oldFileName.substring(2); + } + if (newFileName.startsWith('b/')) { + newFileName = newFileName.substring(2); + } + return { oldFileName, newFileName }; +} + +/** + * Parse extended Git headers like `rename from`, `rename to`, `copy from`, and `copy to`. + */ +function parseGitExtendedPath(line: string, prefix: string): string | null { + if (!line.startsWith(prefix)) { + return null; + } + const remainder = line.substring(prefix.length).trimStart(); + if (!remainder.length) { + return null; + } + if (remainder.startsWith('"')) { + const token = parseGitPathToken(remainder, 0); + return token ? token.value : null; + } + return remainder; +} + /** * Parses a patch into structured data, in the same structure returned by `structuredPatch`. * @@ -12,6 +239,7 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { function parseIndex() { const index: Partial = {}; + let seenGitHeader = false; list.push(index); // Parse diff metadata @@ -23,6 +251,56 @@ export function parsePatch(uniDiff: string): StructuredPatch[] { break; } + const gitHeader = parseGitDiffHeader(line); + if (gitHeader) { + if (seenGitHeader || index.index || index.oldFileName || index.newFileName) { + break; + } + seenGitHeader = true; + if (gitHeader.oldFileName) { + index.oldFileName = gitHeader.oldFileName; + } + if (gitHeader.newFileName) { + index.newFileName = gitHeader.newFileName; + if (!index.index) { + index.index = gitHeader.newFileName; + } + } + i++; + continue; + } + + const renameFrom = parseGitExtendedPath(line, 'rename from '); + if (renameFrom) { + index.oldFileName = renameFrom; + i++; + continue; + } + const renameTo = parseGitExtendedPath(line, 'rename to '); + if (renameTo) { + index.newFileName = renameTo; + if (!index.index) { + index.index = renameTo; + } + i++; + continue; + } + const copyFrom = parseGitExtendedPath(line, 'copy from '); + if (copyFrom) { + index.oldFileName = copyFrom; + i++; + continue; + } + const copyTo = parseGitExtendedPath(line, 'copy to '); + if (copyTo) { + index.newFileName = copyTo; + if (!index.index) { + index.index = copyTo; + } + i++; + continue; + } + // Try to parse the line as a diff header, like // Index: README.md // or diff --git a/test/patch/parse.js b/test/patch/parse.js index ed604ab1..89661af8 100644 --- a/test/patch/parse.js +++ b/test/patch/parse.js @@ -299,6 +299,122 @@ diff -r 9117c6561b0b -r 273ce12ad8f1 README ]); }); + it('should parse generic diff headers', function() { + const patchStr = `diff --git a/foo b/foo +--- a/foo ++++ b/foo +@@ -1 +1 @@ +-old ++new`; + + expect(parsePatch(patchStr)).to.eql([{ + index: 'foo', + oldFileName: 'a/foo', + oldHeader: '', + newFileName: 'b/foo', + newHeader: '', + hunks: [ + { oldStart: 1, oldLines: 1, newStart: 1, newLines: 1, lines: ['-old', '+new'] } + ] + }]); + }); + + it('should parse Git rename-only patches', function() { + const patchStr = `diff --git a/README.md b/README-2.md +similarity index 100% +rename from README.md +rename to README-2.md`; + + expect(parsePatch(patchStr)).to.eql([{ + index: 'README-2.md', + oldFileName: 'README.md', + newFileName: 'README-2.md', + hunks: [] + }]); + }); + + it('should parse Git C-quoted paths in headers', function() { + const patchStr = `diff --git "a/old\\040name\\tfile" "b/new\\x20name\\"file" +rename from "old\\040name\\tfile" +rename to "new\\x20name\\"file"`; + + expect(parsePatch(patchStr)).to.eql([{ + index: 'new name"file', + oldFileName: 'old name\tfile', + newFileName: 'new name"file', + hunks: [] + }]); + }); + + it('should handle edge cases in Git C-quoted escapes', function() { + const patchStr = `diff --git "a/odd\\qpath\\7" "b/new\\xZZname\\077" +rename from "odd\\qpath\\7" +rename to "new\\xZZname\\077"`; + + expect(parsePatch(patchStr)).to.eql([{ + index: 'newxZZname?', + oldFileName: 'oddqpath\u0007', + newFileName: 'newxZZname?', + hunks: [] + }]); + }); + + it('should parse Git diff headers with unquoted spaces', function() { + const patchStr = `diff --git a/foo b/x b/foo b/x +new file mode 100644 +index 0000000..e69de29 +diff --git a/name with spaces in it b/name with spaces in it +new file mode 100644 +index 0000000..e69de29`; + + expect(parsePatch(patchStr)).to.eql([{ + index: 'foo b/x', + oldFileName: 'foo b/x', + newFileName: 'foo b/x', + hunks: [] + }, { + index: 'name with spaces in it', + oldFileName: 'name with spaces in it', + newFileName: 'name with spaces in it', + hunks: [] + }]); + }); + + it('should parse Git diff headers with mixed quoting', function() { + const patchStr = `diff --git "a/old name.txt" b/new name.txt +new file mode 100644 +index 0000000..e69de29 +diff --git a/simple.txt "b/new name.txt" +new file mode 100644 +index 0000000..e69de29`; + + expect(parsePatch(patchStr)).to.eql([{ + index: 'new name.txt', + oldFileName: 'old name.txt', + newFileName: 'new name.txt', + hunks: [] + }, { + index: 'new name.txt', + oldFileName: 'simple.txt', + newFileName: 'new name.txt', + hunks: [] + }]); + }); + + it('should parse unquoted rename-from with spaces', function() { + const patchStr = `diff --git a/foo bar "b/baz\\t" +similarity index 100% +rename from foo bar +rename to "baz\\t"`; + + expect(parsePatch(patchStr)).to.eql([{ + index: 'baz\t', + oldFileName: 'foo bar', + newFileName: 'baz\t', + hunks: [] + }]); + }); + it('should parse multiple files without the Index line', function() { expect(parsePatch( `--- from\theader1