Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
278 changes: 278 additions & 0 deletions src/patch/parse.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,232 @@
import type { StructuredPatch } from '../types.js';

/**
* Parse a single Git path token starting at the provided index.
* Supports C-style quoted paths used by Git when `core.quotePath` is enabled.
*/
function parseGitPathToken(input: string, startIndex: number): { value: string; nextIndex: number } | null {
let i = startIndex;
while (i < input.length && input[i] === ' ') {
i++;
}
if (i >= input.length) {
return null;
}
if (input[i] === '"') {
i++;
let value = '';
while (i < input.length) {
const ch = input[i];
if (ch === '"') {
return { value, nextIndex: i + 1 };
}
if (ch === '\\') {
i++;
if (i >= input.length) {
return null;
}
const esc = input[i];
if (esc >= '0' && esc <= '7') {
let octal = esc;
for (let count = 0; count < 2; count++) {
const next = input[i + 1];
if (next >= '0' && next <= '7') {
i++;
octal += next;
} else {
break;
}
}
value += String.fromCharCode(parseInt(octal, 8));
i++;
continue;
}
if (esc === 'x') {
const hex = input.substring(i + 1, i + 3);
if (/^[0-9a-fA-F]{2}$/.test(hex)) {
value += String.fromCharCode(parseInt(hex, 16));
i += 3;
continue;
}
value += 'x';
i++;
continue;
}
switch (esc) {
case 'n':
value += '\n';
break;
case 't':
value += '\t';
break;
case 'r':
value += '\r';
break;
case 'b':
value += '\b';
break;
case 'f':
value += '\f';
break;
case 'a':
value += '\u0007';
break;
case 'v':
value += '\u000b';
break;
case '\\':
value += '\\';
break;
case '"':
value += '"';
break;
default:
value += esc;
}
i++;
continue;
}
value += ch;
i++;
}
return null;
}
const start = i;
while (i < input.length && input[i] !== ' ') {
i++;
}
return { value: input.substring(start, i), nextIndex: i };
}

/**
* Parse a fixed number of Git path tokens from a string.
*/
function parseGitPathTokens(input: string, count: number): string[] | null {
let index = 0;
const paths: string[] = [];
for (let parsed = 0; parsed < count; parsed++) {
const token = parseGitPathToken(input, index);
if (!token) {
return null;
}
paths.push(token.value);
index = token.nextIndex;
}
return paths;
}

/**
* Parse unquoted Git diff paths separated by the ` b/` marker.
* If multiple splits are possible, prefer a split where old and new paths match.
* If no matching split exists, use the last possible split.
*/
function parseGitUnquotedDiffPaths(input: string): { oldPath: string; newPath: string } | null {
if (!input.startsWith('a/')) {
return null;
}
const candidates: Array<{ oldPath: string; newPath: string }> = [];
const matchingCandidates: Array<{ oldPath: string; newPath: string }> = [];
let searchIndex = 0;
while (searchIndex < input.length) {
const separatorIndex = input.indexOf(' b/', searchIndex);
if (separatorIndex === -1) {
break;
}
const oldPath = input.substring(0, separatorIndex);
const newPath = input.substring(separatorIndex + 1);
if (oldPath.startsWith('a/') && newPath.startsWith('b/')) {
const candidate = { oldPath, newPath };
candidates.push(candidate);
if (oldPath.substring(2) === newPath.substring(2)) {
matchingCandidates.push(candidate);
}
}
searchIndex = separatorIndex + 1;
}
if (matchingCandidates.length === 1) {
return matchingCandidates[0];
}
if (matchingCandidates.length > 1) {
return matchingCandidates[matchingCandidates.length - 1];
}
if (candidates.length) {
return candidates[candidates.length - 1];
}
return null;
}

/**
* Parse a Git `diff --git a/... b/...` header into old/new file names.
*/
function parseGitDiffHeader(line: string): { oldFileName?: string; newFileName?: string } | null {
const prefix = 'diff --git ';
if (!line.startsWith(prefix)) {
return null;
}
const rawPaths = line.substring(prefix.length);
let oldFileName: string | undefined;
let newFileName: string | undefined;

if (rawPaths.startsWith('"')) {
const firstToken = parseGitPathToken(rawPaths, 0);
if (!firstToken) {
return null;
}
oldFileName = firstToken.value;
const remainder = rawPaths.substring(firstToken.nextIndex).trimStart();
if (!remainder.length) {
return null;
}
if (remainder.startsWith('"')) {
const secondToken = parseGitPathToken(remainder, 0);
if (!secondToken) {
return null;
}
newFileName = secondToken.value;
} else {
newFileName = remainder;
}
} else {
const unquoted = parseGitUnquotedDiffPaths(rawPaths);
if (unquoted) {
oldFileName = unquoted.oldPath;
newFileName = unquoted.newPath;
} else {
const paths = parseGitPathTokens(rawPaths, 2);
if (!paths) {
return null;
}
[oldFileName, newFileName] = paths;
}
}

if (oldFileName.startsWith('a/')) {
oldFileName = oldFileName.substring(2);
}
if (newFileName.startsWith('b/')) {
newFileName = newFileName.substring(2);
}
return { oldFileName, newFileName };
}

/**
* Parse extended Git headers like `rename from`, `rename to`, `copy from`, and `copy to`.
*/
function parseGitExtendedPath(line: string, prefix: string): string | null {
if (!line.startsWith(prefix)) {
return null;
}
const remainder = line.substring(prefix.length).trimStart();
if (!remainder.length) {
return null;
}
if (remainder.startsWith('"')) {
const token = parseGitPathToken(remainder, 0);
return token ? token.value : null;
}
return remainder;
}

/**
* Parses a patch into structured data, in the same structure returned by `structuredPatch`.
*
Expand All @@ -12,6 +239,7 @@ export function parsePatch(uniDiff: string): StructuredPatch[] {

function parseIndex() {
const index: Partial<StructuredPatch> = {};
let seenGitHeader = false;
list.push(index);

// Parse diff metadata
Expand All @@ -23,6 +251,56 @@ export function parsePatch(uniDiff: string): StructuredPatch[] {
break;
}

const gitHeader = parseGitDiffHeader(line);
if (gitHeader) {
if (seenGitHeader || index.index || index.oldFileName || index.newFileName) {
break;
}
seenGitHeader = true;
if (gitHeader.oldFileName) {
index.oldFileName = gitHeader.oldFileName;
}
if (gitHeader.newFileName) {
index.newFileName = gitHeader.newFileName;
if (!index.index) {
index.index = gitHeader.newFileName;
}
}
i++;
continue;
}

const renameFrom = parseGitExtendedPath(line, 'rename from ');
if (renameFrom) {
index.oldFileName = renameFrom;
i++;
continue;
}
const renameTo = parseGitExtendedPath(line, 'rename to ');
if (renameTo) {
index.newFileName = renameTo;
if (!index.index) {
index.index = renameTo;
}
i++;
continue;
}
const copyFrom = parseGitExtendedPath(line, 'copy from ');
if (copyFrom) {
index.oldFileName = copyFrom;
i++;
continue;
}
const copyTo = parseGitExtendedPath(line, 'copy to ');
if (copyTo) {
index.newFileName = copyTo;
if (!index.index) {
index.index = copyTo;
}
i++;
continue;
}

// Try to parse the line as a diff header, like
// Index: README.md
// or
Expand Down
Loading