Skip to content

Commit 68378a3

Browse files
committed
getPullRequestEditedDiffRanges: use GitHub API
1 parent b7ff308 commit 68378a3

File tree

2 files changed

+57
-101
lines changed

2 files changed

+57
-101
lines changed

src/analyze-action.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ async function run() {
268268
pull_request &&
269269
(await setupDiffInformedQueryRun(
270270
pull_request.base.ref as string,
271-
pull_request.head.ref as string,
271+
pull_request.head.label as string,
272272
codeql,
273273
logger,
274274
features,

src/analyze.ts

+56-100
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import del from "del";
77
import * as yaml from "js-yaml";
88

99
import * as actionsUtil from "./actions-util";
10+
import { getApiClient } from "./api-client";
1011
import { setupCppAutobuild } from "./autobuild";
1112
import {
1213
CODEQL_VERSION_ANALYSIS_SUMMARY_V2,
@@ -17,7 +18,6 @@ import * as configUtils from "./config-utils";
1718
import { addDiagnostic, makeDiagnostic } from "./diagnostics";
1819
import { EnvVar } from "./environment";
1920
import { FeatureEnablement, Feature } from "./feature-flags";
20-
import * as gitUtils from "./git-utils";
2121
import { isScannedLanguage, Language } from "./languages";
2222
import { Logger, withGroupAsync } from "./logging";
2323
import { DatabaseCreationTimings, EventReport } from "./status-report";
@@ -240,7 +240,8 @@ async function finalizeDatabaseCreation(
240240
* Set up the diff-informed analysis feature.
241241
*
242242
* @param baseRef The base branch name, used for calculating the diff range.
243-
* @param headRef The head branch name, used for calculating the diff range.
243+
* @param headLabel The label that uniquely identifies the head branch across
244+
* repositories, used for calculating the diff range.
244245
* @param codeql
245246
* @param logger
246247
* @param features
@@ -249,7 +250,7 @@ async function finalizeDatabaseCreation(
249250
*/
250251
export async function setupDiffInformedQueryRun(
251252
baseRef: string,
252-
headRef: string,
253+
headLabel: string,
253254
codeql: CodeQL,
254255
logger: Logger,
255256
features: FeatureEnablement,
@@ -262,7 +263,7 @@ export async function setupDiffInformedQueryRun(
262263
async () => {
263264
const diffRanges = await getPullRequestEditedDiffRanges(
264265
baseRef,
265-
headRef,
266+
headLabel,
266267
logger,
267268
);
268269
return writeDiffRangeDataExtensionPack(logger, diffRanges);
@@ -280,7 +281,8 @@ interface DiffThunkRange {
280281
* Return the file line ranges that were added or modified in the pull request.
281282
*
282283
* @param baseRef The base branch name, used for calculating the diff range.
283-
* @param headRef The head branch name, used for calculating the diff range.
284+
* @param headLabel The label that uniquely identifies the head branch across
285+
* repositories, used for calculating the diff range.
284286
* @param logger
285287
* @returns An array of tuples, where each tuple contains the absolute path of a
286288
* file, the start line and the end line (both 1-based and inclusive) of an
@@ -289,107 +291,61 @@ interface DiffThunkRange {
289291
*/
290292
async function getPullRequestEditedDiffRanges(
291293
baseRef: string,
292-
headRef: string,
294+
headLabel: string,
293295
logger: Logger,
294296
): Promise<DiffThunkRange[] | undefined> {
295-
const checkoutPath = actionsUtil.getOptionalInput("checkout_path");
296-
if (checkoutPath === undefined) {
297-
return undefined;
298-
}
299-
300-
// To compute the merge bases between the base branch and the PR topic branch,
301-
// we need to fetch the commit graph from the branch heads to those merge
302-
// babes. The following 6-step procedure does so while limiting the amount of
303-
// history fetched.
304-
305-
// Step 1: Deepen from the PR merge commit to the base branch head and the PR
306-
// topic branch head, so that the PR merge commit is no longer considered a
307-
// grafted commit.
308-
await gitUtils.deepenGitHistory();
309-
// Step 2: Fetch the base branch shallow history. This step ensures that the
310-
// base branch name is present in the local repository. Normally the base
311-
// branch name would be added by Step 4. However, if the base branch head is
312-
// an ancestor of the PR topic branch head, Step 4 would fail without doing
313-
// anything, so we need to fetch the base branch explicitly.
314-
await gitUtils.gitFetch(baseRef, ["--depth=1"]);
315-
// Step 3: Fetch the PR topic branch history, stopping when we reach commits
316-
// that are reachable from the base branch head.
317-
await gitUtils.gitFetch(headRef, [`--shallow-exclude=${baseRef}`]);
318-
// Step 4: Fetch the base branch history, stopping when we reach commits that
319-
// are reachable from the PR topic branch head.
320-
await gitUtils.gitFetch(baseRef, [`--shallow-exclude=${headRef}`]);
321-
// Step 5: Repack the history to remove the shallow grafts that were added by
322-
// the previous fetches. This step works around a bug that causes subsequent
323-
// deepening fetches to fail with "fatal: error in object: unshallow <SHA>".
324-
// See https://stackoverflow.com/q/63878612
325-
await gitUtils.gitRepack(["-d"]);
326-
// Step 6: Deepen the history so that we have the merge bases between the base
327-
// branch and the PR topic branch.
328-
await gitUtils.deepenGitHistory();
329-
330-
// To compute the exact same diff as GitHub would compute for the PR, we need
331-
// to use the same merge base as GitHub. That is easy to do if there is only
332-
// one merge base, which is by far the most common case. If there are multiple
333-
// merge bases, we stop without producing a diff range.
334-
const mergeBases = await gitUtils.getAllGitMergeBases([baseRef, headRef]);
335-
logger.info(`Merge bases: ${mergeBases.join(", ")}`);
336-
if (mergeBases.length !== 1) {
337-
logger.info(
338-
"Cannot compute diff range because baseRef and headRef " +
339-
`have ${mergeBases.length} merge bases (instead of exactly 1).`,
340-
);
341-
return undefined;
342-
}
343-
344-
const diffHunkHeaders = await gitUtils.getGitDiffHunkHeaders(
345-
mergeBases[0],
346-
headRef,
347-
);
348-
if (diffHunkHeaders === undefined) {
349-
return undefined;
350-
}
297+
await getFileDiffsWithBasehead(baseRef, headLabel, logger);
298+
return undefined;
299+
}
351300

352-
const results = new Array<DiffThunkRange>();
353-
354-
let changedFile = "";
355-
for (const line of diffHunkHeaders) {
356-
if (line.startsWith("+++ ")) {
357-
const filePath = gitUtils.decodeGitFilePath(line.substring(4));
358-
if (filePath.startsWith("b/")) {
359-
// The file was edited: track all hunks in the file
360-
changedFile = filePath.substring(2);
361-
} else if (filePath === "/dev/null") {
362-
// The file was deleted: skip all hunks in the file
363-
changedFile = "";
364-
} else {
365-
logger.warning(`Failed to parse diff hunk header line: ${line}`);
366-
return undefined;
367-
}
368-
continue;
369-
}
370-
if (line.startsWith("@@ ")) {
371-
if (changedFile === "") continue;
301+
/**
302+
* This interface is an abbreviated version of the file diff object returned by
303+
* the GitHub API.
304+
*/
305+
interface FileDiff {
306+
filename: string;
307+
changes: number;
308+
// A patch may be absent if the file is binary, if the file diff is too large,
309+
// or if the file is unchanged.
310+
patch?: string | undefined;
311+
}
372312

373-
const match = line.match(/^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@/);
374-
if (match === null) {
375-
logger.warning(`Failed to parse diff hunk header line: ${line}`);
376-
return undefined;
377-
}
378-
const startLine = parseInt(match[1], 10);
379-
const numLines = parseInt(match[2], 10);
380-
if (numLines === 0) {
381-
// The hunk was a deletion: skip it
382-
continue;
383-
}
384-
const endLine = startLine + (numLines || 1) - 1;
385-
results.push({
386-
path: path.join(checkoutPath, changedFile),
387-
startLine,
388-
endLine,
389-
});
313+
async function getFileDiffsWithBasehead(
314+
baseRef: string,
315+
headLabel: string,
316+
logger: Logger,
317+
): Promise<FileDiff[] | undefined> {
318+
const ownerRepo = util.getRequiredEnvParam("GITHUB_REPOSITORY").split("/");
319+
const owner = ownerRepo[0];
320+
const repo = ownerRepo[1];
321+
const basehead = `${baseRef}...${headLabel}`;
322+
try {
323+
const response = await getApiClient().rest.repos.compareCommitsWithBasehead(
324+
{
325+
owner,
326+
repo,
327+
basehead,
328+
per_page: 1,
329+
},
330+
);
331+
logger.debug(
332+
`Response from compareCommitsWithBasehead(${basehead}):` +
333+
`\n${JSON.stringify(response, null, 2)}`,
334+
);
335+
return response.data.files;
336+
} catch (error: any) {
337+
if (error.status) {
338+
logger.warning(`Error retrieving diff ${basehead}: ${error.message}`);
339+
logger.debug(
340+
`Error running compareCommitsWithBasehead(${basehead}):` +
341+
`\nRequest: ${JSON.stringify(error.request, null, 2)}` +
342+
`\nError Response: ${JSON.stringify(error.response, null, 2)}`,
343+
);
344+
return undefined;
345+
} else {
346+
throw error;
390347
}
391348
}
392-
return results;
393349
}
394350

395351
/**

0 commit comments

Comments
 (0)