diff --git a/.github/shared/README.md b/.github/shared/README.md new file mode 100644 index 000000000000..64add81cf5be --- /dev/null +++ b/.github/shared/README.md @@ -0,0 +1,3 @@ +# `.github/shared` + +Copied from https://github.com/Azure/azure-rest-api-specs/tree/main/.github/shared diff --git a/.github/shared/src/cache.js b/.github/shared/src/cache.js new file mode 100644 index 000000000000..444a7ce2f932 --- /dev/null +++ b/.github/shared/src/cache.js @@ -0,0 +1,61 @@ +/** + * Caches values in memory with a single key of any type. + * + * @template K, V + */ +export class KeyedCache { + /** @type {Map} */ + #map = new Map(); + + /** + * Returns cached value, initializing if necessary + * + * @param {K} key + * @param {() => V} factory + * @returns {V} cached value + * + * @example + * const result = cache.getOrCreate(42, async () => await doWork(42)); + */ + getOrCreate(key, factory) { + let value = this.#map.get(key); + + if (value === undefined) { + value = factory(); + this.#map.set(key, value); + } + + return value; + } +} + +/** + * Caches values in memory with an ordered pair of keys of any types. + * + * @template K1, K2, V + */ +export class KeyedPairCache { + // Two-layer nested cache + /** @type {KeyedCache>} */ + #cache1 = new KeyedCache(); + + /** + * Returns cached value, initializing if necessary. + * Keys are ordered, so (key1, key2) != (key2, key1). + * + * @param {K1} key1 + * @param {K2} key2 + * @param {() => V} factory + * @returns {V} cached value + * + * @example + * const result = cache.getOrCreate(42, 7, async () => await doWork(42, 7)); + */ + getOrCreate(key1, key2, factory) { + // key1 => cache for the next layer + const cache2 = this.#cache1.getOrCreate(key1, () => new KeyedCache()); + + // key2 => final value + return cache2.getOrCreate(key2, factory); + } +} diff --git a/.github/shared/src/exec.js b/.github/shared/src/exec.js new file mode 100644 index 000000000000..1edc25700919 --- /dev/null +++ b/.github/shared/src/exec.js @@ -0,0 +1,152 @@ +import child_process from "child_process"; +import { dirname, join } from "path"; +import { promisify } from "util"; +const execFileImpl = promisify(child_process.execFile); + +/** + * @typedef {Object} ExecOptions + * @property {string} [cwd] Current working directory. Default: process.cwd(). + * @property {import('./logger.js').ILogger} [logger] + * @property {boolean} [logOutput] Log captured stdout/stderr at info level. Default: false. + * @property {number} [maxBuffer] Max bytes allowed on stdout or stderr. Default: 16 * 1024 * 1024. + */ + +/** + * @typedef {Object} NpmPrefixOptions + * @property {string} [prefix] Prefix to pass to npm via "--prefix". + */ + +/** + * @typedef {ExecOptions & NpmPrefixOptions} ExecNpmOptions + */ + +/** + * @typedef {Object} ExecResult + * @property {string} stdout + * @property {string} stderr + */ + +/** + * @typedef {Error & { stdout?: string, stderr?: string, code?: number }} ExecError + */ + +/** + * Checks whether an unknown error object is an ExecError. + * @param {unknown} error + * @returns {error is ExecError} + */ +export function isExecError(error) { + if (!(error instanceof Error)) return false; + + const e = /** @type {ExecError} */ (error); + return typeof e.stdout === "string" || typeof e.stderr === "string"; +} + +/** + * Wraps `child_process.execFile()`, adding logging and a larger default maxBuffer. + * + * @param {string} file + * @param {string[]} [args] + * @param {ExecOptions} [options] + * @returns {Promise} + * @throws {ExecError} + */ +export async function execFile(file, args, options = {}) { + const { + cwd, + logger, + logOutput = false, + // Node default is 1024 * 1024, which is too small for some git commands returning many entities or large file content. + // To support "git show", should be larger than the largest swagger file in the repo (2.5 MB as of 2/28/2025). + maxBuffer = 16 * 1024 * 1024, + } = options; + + logger?.info(`execFile("${file}", ${JSON.stringify(args)})`); + + try { + // execFile(file, args) is more secure than exec(cmd), since the latter is vulnerable to shell injection + const result = await execFileImpl(file, args, { + cwd, + maxBuffer, + }); + + logger?.debug(`stdout: '${result.stdout}'`); + logger?.debug(`stderr: '${result.stderr}'`); + if (logOutput) { + if (result.stdout) { + logger?.info(result.stdout.trimEnd()); + } + if (result.stderr) { + logger?.info(result.stderr.trimEnd()); + } + } + + return result; + } catch (error) { + /* v8 ignore next */ + logger?.debug(`error: '${JSON.stringify(error)}'`); + if (logOutput && isExecError(error)) { + if (error.stdout) { + logger?.info(error.stdout.trimEnd()); + } + if (error.stderr) { + logger?.info(error.stderr.trimEnd()); + } + } + + throw error; + } +} + +/** + * Calls `execFile()` with appropriate arguments to run `npm` on all platforms + * + * @param {string[]} args + * @param {ExecNpmOptions} [options] + * @returns {Promise} + * @throws {ExecError} + */ +export async function execNpm(args, options = {}) { + const { prefix } = options; + + // Exclude platform-specific code from coverage + /* v8 ignore start */ + const { file, defaultArgs } = + process.platform === "win32" + ? { + // Only way I could find to run "npm" on Windows, without using the shell (e.g. "cmd /c npm ...") + // + // "node.exe", ["--", "npm-cli.js", ...args] + // + // The "--" MUST come BEFORE "npm-cli.js", to ensure args are sent to the script unchanged. + // If the "--" comes after "npm-cli.js", the args sent to the script will be ["--", ...args], + // which is NOT equivalent, and can break if args itself contains another "--". + + // example: "C:\Program Files\nodejs\node.exe" + file: process.execPath, + + // example: "C:\Program Files\nodejs\node_modules\npm\bin\npm-cli.js" + defaultArgs: [ + "--", + join(dirname(process.execPath), "node_modules", "npm", "bin", "npm-cli.js"), + ], + } + : { file: "npm", defaultArgs: [] }; + /* v8 ignore stop */ + + const prefixArgs = prefix ? ["--prefix", prefix] : []; + + return await execFile(file, [...defaultArgs, ...prefixArgs, ...args], options); +} + +/** + * Calls `execNpm()` with arguments ["exec", "--no", "--"] prepended. + * + * @param {string[]} args + * @param {ExecNpmOptions} [options] + * @returns {Promise} + * @throws {ExecError} + */ +export async function execNpmExec(args, options = {}) { + return await execNpm(["exec", "--no", "--", ...args], options); +} diff --git a/.github/shared/src/logger.js b/.github/shared/src/logger.js new file mode 100644 index 000000000000..dc43a06d9782 --- /dev/null +++ b/.github/shared/src/logger.js @@ -0,0 +1,64 @@ +/** + * @typedef {Object} ILogger + * @property {(message:string) => void} debug + * @property {(message:string) => void} error + * @property {(message:string) => void} info + * @property {(message:string) => void} warning + * @property {() => boolean} isDebug + */ + +/** + * @implements {ILogger} + */ +export class ConsoleLogger { + /** @type {boolean} */ + #isDebug; + + /** + * @param {boolean} [isDebug] - If true, debug logs will be printed. Default: false. + */ + constructor(isDebug = false) { + this.#isDebug = isDebug; + } + + /** + * @param {string} message + */ + debug(message) { + if (this.isDebug()) { + console.debug(message); + } + } + + /** + * @param {string} message + */ + error(message) { + console.error(message); + } + + /** + * @param {string} message + */ + info(message) { + console.log(message); + } + + /** + * @returns {boolean} + */ + isDebug() { + return this.#isDebug; + } + + /** + * @param {string} message + */ + warning(message) { + console.warn(message); + } +} + +// Singleton loggers +export const defaultLogger = new ConsoleLogger(); +export const debugLogger = new ConsoleLogger(/*isDebug*/ true); diff --git a/.github/shared/src/path.js b/.github/shared/src/path.js new file mode 100644 index 000000000000..929116c931b6 --- /dev/null +++ b/.github/shared/src/path.js @@ -0,0 +1,104 @@ +import { basename, dirname, resolve } from "path"; + +import { KeyedCache, KeyedPairCache } from "./cache.js"; + +/** @type {KeyedCache} */ +const resolveCache = new KeyedCache(); + +/** @type {KeyedPairCache} */ +const resolvePairCache = new KeyedPairCache(); + +/** + * + * @param {string} path Absolute or relative path + * @param {string} segment File or folder + * @returns {boolean} True if resolved path contains segment + * + * @example + * includesSegment("stable/2025-01-01/examples/foo.json", "examples") + * // -> true + */ +export function includesSegment(path, segment) { + return untilLastSegment(path, segment) !== ""; +} + +/** + * Wraps `path.resolve(path)` with a cache to improve performance + * + * @param {string} path + * @returns {string} + */ +export function resolveCached(path) { + return resolveCache.getOrCreate(path, () => resolve(path)); +} + +/** + * Wraps `path.resolve(from, to)` with a cache to improve performance + +* @param {string} from + * @param {string} to + * @returns {string} + */ +export function resolvePairCached(from, to) { + return resolvePairCache.getOrCreate(from, to, () => resolve(from, to)); +} + +/** + * @param {string} path Absolute or relative path + * @param {string} segment File or folder + * @returns {string} Portion of resolved path up to (and including) the last occurrence of segment + * + * @example + * untilLastSegment("stable/2025-01-01/examples/foo.json", "examples") + * // -> "{cwd}/stable/2025-01-01/examples" + */ +export function untilLastSegment(path, segment) { + // Shares code with `untilLastSegmentWithParent()`, but not worth refactoring yet + + let current = resolveCached(path); + + while (true) { + const parent = dirname(current); + + if (basename(current) === segment) { + // Found the target folder. Return it. + return current; + } else if (parent === current) { + // Reached the filesystem root (folder not found). Return empty string. + return ""; + } else { + // Keep walking upward + current = parent; + } + } +} + +/** + * @param {string} path Absolute or relative path + * @param {string} segment File or folder + * @returns {string} Portion of resolved path up to (and including) the last segment with the specified parent + * + * @example + * untilLastSegmentWithParent("specification/foo/data-plane/stable/2025-01-01/foo.json", "specification") + * // -> "{cwd}/specification/foo" + */ +export function untilLastSegmentWithParent(path, segment) { + // Shares code with `untilLastSegment()`, but not worth refactoring yet + + let current = resolveCached(path); + + while (true) { + const parent = dirname(current); + + if (basename(parent) === segment) { + // Found the target parent. Return current; + return current; + } else if (parent === current) { + // Reached the filesystem root (folder not found). Return empty string. + return ""; + } else { + // Keep walking upward + current = parent; + } + } +} diff --git a/.github/skills/create-api-review-pr/SKILL.md b/.github/skills/create-api-review-pr/SKILL.md new file mode 100644 index 000000000000..f0b999ed3c8e --- /dev/null +++ b/.github/skills/create-api-review-pr/SKILL.md @@ -0,0 +1,92 @@ +--- +name: create-api-review-pr +description: Create a GitHub PR for API review by comparing a baseline API surface against a target tag or branch. Use this when the user wants to create an API review PR, compare API changes between versions, or review API surface differences for a package. +--- + +# Create API Review PR + +Creates a dedicated API review PR that shows the diff between a baseline release and a target tag or branch's API surface using `scripts/api_md_workflow/create_api_review_pr.py`. + +## Unsupported Requests + +If the user asks to create an API review PR for a new package, explain that new packages do not use API review PRs and stop. Do not gather script inputs or run `create_api_review_pr.py` for new packages. + +## Prerequisites + +1. The user must have `gh` CLI installed and authenticated (`gh auth login`), or `GITHUB_TOKEN`/`GH_TOKEN` set with permission to create and update pull requests in this repository. +2. The working tree must be clean (no uncommitted changes). +3. Python 3.10 or later must be available. +4. `azpysdk` must be installed (`pip install -e ./eng/tools/azure-sdk-tools`). +5. ApiView stub generator dependencies must be installed (`pip install -r ./eng/apiview_reqs.txt`). + +## Information to Gather + +Ask the user for the following using `vscode_askQuestions`: + +### 1. Package Name (required) +The Azure SDK package name (e.g. `azure-storage-blob`, `azure-ai-projects`, `azure-servicebus`, `azure-planetarycomputer`). + +### 2. Baseline (required) +The release tag to use as the baseline for comparison. Tags follow the format `_` (e.g. `azure-storage-blob_12.29.0`). + +- If the user provides a package name and version separately, construct the tag as `_`. + +### 3. Target (optional) +The branch or PR to generate the "current" API surface from. Can be: +- A package release tag (e.g. `azure-storage-blob_12.30.0`) — used directly as a tag ref +- A branch name (e.g. `main`, `feature-branch`) — fetched from `origin` +- An `owner:branch` reference (e.g. `someone:their-branch`) — fetched from the fork +- If omitted, defaults to `origin/main` + +## Validation Steps + +Before running the script: + +1. **Validate the package exists**: Confirm a directory matching `sdk/*/` exists with a `pyproject.toml` or `setup.py`. +2. **Validate the baseline tag**: Run `git tag -l ""` to confirm the tag exists. If the user provided a version like `12.29.0`, construct the full tag as `_` and validate that. +3. **Validate the target tag when applicable**: If the user provided a target version or tag, construct or validate the full tag as `_` and run `git tag -l ""`. +4. **Confirm the working tree is clean**: Run `git status --porcelain` and warn if there are uncommitted changes. + +## Execution + +This is a long-running operation. The script may take several minutes because it generates API surfaces for both the baseline and target, creates or reuses review branches, pushes branches, and then opens the draft PR. Do not treat quiet terminal periods during `apistub` generation as failure unless the command exits, prints an error, or waits for input. + +If `create_api_review_pr.py` fails while running this skill, do not patch the script, modify package files, retry with workaround edits, or try to manually complete branch/PR creation. Stop the workflow, report the failure clearly, include the relevant error details, and suggest practical next steps. + +If the script reports that there are no API differences, relay that message to the user and stop. Do not create branches or a PR manually. + +Run the following command from the repository root: + +```bash +python scripts/api_md_workflow/create_api_review_pr.py --package-name --base [--target ] +``` + +### Examples + +**Standard review (comparing a release tag to a PR branch):** +```bash +python scripts/api_md_workflow/create_api_review_pr.py --package-name azure-storage-blob --base azure-storage-blob_12.29.0 --target someone:feature-branch +``` + +**Release-to-release review (comparing two package tags):** +```bash +python scripts/api_md_workflow/create_api_review_pr.py --package-name azure-ai-projects --base azure-ai-projects_2.1.0 --target azure-ai-projects_2.2.0 +``` + +**Review against main (no target specified):** +```bash +python scripts/api_md_workflow/create_api_review_pr.py --package-name azure-cosmos --base azure-cosmos_4.14.0 +``` + +## Post-Execution + +The script will: +1. Generate `api.md` for both baseline and target +2. Push `apireview/base__` and `apireview/review__` branches +3. Open a draft PR (or print a compare URL if `gh pr create` fails) + +During execution, report progress at major phases: baseline generation, target generation, branch creation or reuse, branch push, and PR creation. If the terminal is quiet, check whether the process is still running before assuming it is hung. + +When the target is a tag, the PR body labels it as `Target tag`. Branch and fork targets are labeled as `Working branch`. + +Report the PR URL to the user when complete. diff --git a/.github/skills/generate-api-markdown/SKILL.md b/.github/skills/generate-api-markdown/SKILL.md index f3f96e32c839..dbf28fcbb551 100644 --- a/.github/skills/generate-api-markdown/SKILL.md +++ b/.github/skills/generate-api-markdown/SKILL.md @@ -7,7 +7,7 @@ description: Generate an API markdown file and token file using ApiView. Use thi ## Prerequisites -1. Activate your virtual environment with a Python version that is strictly less than the version limit specified in `eng/tools/azure-sdk-tools/azpysdk/apistub.py`. +1. Activate your virtual environment. 2. Install the required dependencies: ```bash cd @@ -19,5 +19,6 @@ description: Generate an API markdown file and token file using ApiView. Use thi 1. Navigate to the desired package directory 2. Run the command: ```bash - azpysdk apistub --md . + azpysdk apistub --md --extract-metadata --install-deps --dest-dir . . + ``` 3. The command outputs the location of the generated markdown file. Provide this file to the user for review. \ No newline at end of file diff --git a/.github/workflows/api-consistency.yml b/.github/workflows/api-consistency.yml new file mode 100644 index 000000000000..e57e8c539b58 --- /dev/null +++ b/.github/workflows/api-consistency.yml @@ -0,0 +1,61 @@ +name: API.md Consistency + +on: + pull_request: + types: + # default + - opened + - synchronize + - reopened + # re-run if base branch is changed, since previous merge commit may generate incorrect diff + - edited + # re-run if PR changes to/from draft + - converted_to_draft + - ready_for_review + paths: + - "sdk/**" + +permissions: + contents: read + +jobs: + consistency: + if: ${{ !github.event.pull_request.draft }} + runs-on: ubuntu-latest + outputs: + changed_count: ${{ steps.consistency.outputs.changed_count || '0' }} + mismatch_count: ${{ steps.consistency.outputs.mismatch_count || '0' }} + missing_count: ${{ steps.consistency.outputs.missing_count || '0' }} + issue_count: ${{ steps.consistency.outputs.issue_count || '0' }} + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 2 + + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Install azpysdk + shell: bash + run: | + python -m pip install --upgrade pip + python -m pip install -r eng/apiview_reqs.txt --index-url=https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-for-python/pypi/simple/ + python -m pip install ./eng/tools/azure-sdk-tools + + - name: Run API.md consistency checks + id: consistency + uses: actions/github-script@v8 + env: + API_MD_BASE_REF: ${{ github.event.pull_request.base.ref }} + API_MD_CHANGED_FILE: .artifacts/changed_package_dirs.txt + API_MD_PACKAGES_FILE: .artifacts/affected_package_dirs.txt + API_MD_MISMATCHES_FILE: .artifacts/mismatched_api_files.txt + API_MD_MISSING_FILE: .artifacts/missing_api_files.txt + with: + script: | + const { default: apiMdConsistency } = + await import('${{ github.workspace }}/.github/workflows/src/api-md-consistency/api-md-consistency.js'); + return await apiMdConsistency({ github, context, core }); diff --git a/.github/workflows/api-md-workflow-tests.yml b/.github/workflows/api-md-workflow-tests.yml new file mode 100644 index 000000000000..12c4b34515c9 --- /dev/null +++ b/.github/workflows/api-md-workflow-tests.yml @@ -0,0 +1,26 @@ +name: API.md Workflow Unit Tests + +on: + workflow_dispatch: + pull_request: + branches: [ main ] + paths: + - "scripts/api_md_workflow/**" + - ".github/workflows/api-md-workflow-tests.yml" + +permissions: + contents: read + +jobs: + unit-tests: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - name: Run API.md workflow unit tests + run: python -m unittest discover scripts/api_md_workflow "*_test.py" diff --git a/.github/workflows/src/api-md-consistency/adapter_config.js b/.github/workflows/src/api-md-consistency/adapter_config.js new file mode 100644 index 000000000000..89f562f4c50d --- /dev/null +++ b/.github/workflows/src/api-md-consistency/adapter_config.js @@ -0,0 +1,46 @@ +#!/usr/bin/env node + +import fs from "fs"; +import path from "path"; +import { fileURLToPath, pathToFileURL } from "url"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +const DEFAULT_CONFIG = { + adapter: "python", +}; + +export function loadWorkflowConfig() { + const configPath = path.join(__dirname, "api_md_workflow.config.json"); + if (!fs.existsSync(configPath)) { + return { ...DEFAULT_CONFIG }; + } + + const raw = fs.readFileSync(configPath, "utf-8"); + let parsed; + try { + parsed = JSON.parse(raw); + } catch (error) { + throw new Error( + `ERROR: invalid JSON in ${configPath}: ${error instanceof Error ? error.message : String(error)}`, + ); + } + + if (!parsed || typeof parsed !== "object") { + throw new Error(`ERROR: ${configPath} must contain a JSON object.`); + } + + return { + ...DEFAULT_CONFIG, + ...parsed, + }; +} + +export async function loadAdapter(name) { + const adapterPath = path.join(__dirname, "adapters", `${name}.js`); + if (!fs.existsSync(adapterPath)) { + throw new Error(`ERROR: adapter '${name}' not found at ${adapterPath}`); + } + + return import(pathToFileURL(adapterPath).href); +} diff --git a/.github/workflows/src/api-md-consistency/adapters/python.js b/.github/workflows/src/api-md-consistency/adapters/python.js new file mode 100644 index 000000000000..aace3e8fbea9 --- /dev/null +++ b/.github/workflows/src/api-md-consistency/adapters/python.js @@ -0,0 +1,164 @@ +#!/usr/bin/env node + +import fs from "fs"; +import path from "path"; +import { spawnSync } from "child_process"; + +function run(cmd, args, options = {}) { + const logger = options.logger || console; + const printable = [cmd, ...args].join(" "); + logger.info(`$ ${printable}`); + const result = spawnSync(cmd, args, { + cwd: options.cwd, + env: options.env, + encoding: "utf-8", + stdio: options.capture ? "pipe" : "inherit", + shell: options.shell ?? false, + }); + + if (result.error) { + const errorMessage = result.error instanceof Error ? result.error.message : String(result.error); + throw new Error(`Command failed to start: ${printable}\n${errorMessage}`); + } + + if ((options.check ?? true) && result.status !== 0) { + throw new Error(`Command failed (${result.status}): ${printable}`); + } + + return result; +} + +function findPackageDir(repoRoot, packageName) { + const sdkDir = path.join(repoRoot, "sdk"); + const serviceDirs = fs.readdirSync(sdkDir, { withFileTypes: true }); + const matches = []; + + for (const serviceDir of serviceDirs) { + if (!serviceDir.isDirectory()) { + continue; + } + + const candidate = path.join(sdkDir, serviceDir.name, packageName); + if (!fs.existsSync(candidate) || !fs.statSync(candidate).isDirectory()) { + continue; + } + + const hasBuildFile = fs.existsSync(path.join(candidate, "pyproject.toml")) || fs.existsSync(path.join(candidate, "setup.py")); + if (hasBuildFile) { + matches.push(candidate); + } + } + + if (matches.length === 0) { + throw new Error(`ERROR: package '${packageName}' not found under sdk/*/`); + } + + if (matches.length > 1) { + throw new Error(`ERROR: multiple matches for '${packageName}': ${matches.join(", ")}`); + } + + return matches[0]; +} + +function isPackageDir(repoRoot, packageDirRelative) { + const candidate = path.join(repoRoot, packageDirRelative); + if (!fs.existsSync(candidate) || !fs.statSync(candidate).isDirectory()) { + return false; + } + + return fs.existsSync(path.join(candidate, "pyproject.toml")) || fs.existsSync(path.join(candidate, "setup.py")); +} + +function* walkFiles(startDir) { + const entries = fs.readdirSync(startDir, { withFileTypes: true }); + for (const entry of entries) { + const fullPath = path.join(startDir, entry.name); + if (entry.isDirectory()) { + yield* walkFiles(fullPath); + } else { + yield fullPath; + } + } +} + +function readVersion(packageDir) { + const versionRegex = /^\s*VERSION\s*[:=]\s*["']([^"']+)["']/m; + const candidates = []; + + for (const file of walkFiles(packageDir)) { + const name = path.basename(file); + if (name === "_version.py" || name === "version.py") { + // Skip generated code directories — they often contain stale versions + const relative = path.relative(packageDir, file); + if (relative.includes("_generated") || relative.includes("generated_")) { + continue; + } + candidates.push(file); + } + } + + for (const candidate of candidates) { + let text; + try { + text = fs.readFileSync(candidate, "utf-8"); + } catch { + continue; + } + + const match = text.match(versionRegex); + if (match) { + return match[1]; + } + } + + throw new Error(`ERROR: could not find a version string in ${packageDir}`); +} + +function generateApiForPackage({ + repoRoot, + packageName, + runtimeExecutable, + logger, + refLabel, +}) { + const activeLogger = logger || console; + if (refLabel) { + activeLogger.info(`--- Generating api.md on ${refLabel} ---`); + } + + const packageDir = findPackageDir(repoRoot, packageName); + if (runtimeExecutable || process.env.RUNTIME_EXECUTABLE) { + const pythonExecutable = runtimeExecutable || process.env.RUNTIME_EXECUTABLE; + run( + pythonExecutable, + ["-m", "azpysdk.main", "apistub", "--md", "--extract-metadata", "--dest-dir", packageDir, packageName], + { + cwd: repoRoot, + check: true, + logger: activeLogger, + }, + ); + return; + } + + run("azpysdk", ["apistub", "--md", "--extract-metadata", "--dest-dir", packageDir, packageName], { + cwd: repoRoot, + check: true, + logger: activeLogger, + shell: process.platform === "win32", + }); +} + +// Fields in api.metadata.yml that must match between working tree and committed version. +// pythonVersion is excluded because it varies across CI environments. +const metadataFieldsToValidate = ["apiMdSha256", "parserVersion"]; +const name = "python"; + +export { + name, + isPackageDir, + findPackageDir, + readVersion, + generateApiForPackage, + metadataFieldsToValidate, +}; diff --git a/.github/workflows/src/api-md-consistency/api-md-consistency.js b/.github/workflows/src/api-md-consistency/api-md-consistency.js new file mode 100644 index 000000000000..e7635e68b199 --- /dev/null +++ b/.github/workflows/src/api-md-consistency/api-md-consistency.js @@ -0,0 +1,112 @@ +import fs from "fs"; +import path from "path"; +import { execFile } from "../../../shared/src/exec.js"; + +const ANSI = { + bold: "\u001b[1m", + cyan: "\u001b[36m", + yellow: "\u001b[33m", + reset: "\u001b[0m", +}; + +function styleLog(text, ...styles) { + return `${styles.join("")}${text}${ANSI.reset}`; +} + +async function runNode(scriptRelativePath, workspace, core) { + await execFile("node", [scriptRelativePath], { + cwd: workspace, + logger: core, + logOutput: true, + }); +} + +function readLines(fileRelativePath, workspace) { + const fullPath = path.join(workspace, fileRelativePath); + if (!fs.existsSync(fullPath)) { + return []; + } + + return fs + .readFileSync(fullPath, "utf-8") + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => Boolean(line)); +} + +function formatIssueSection(title, apiFiles) { + if (!apiFiles.length) { + return ""; + } + + const lines = [title]; + for (const apiFile of apiFiles) { + const packageDir = apiFile.replace(/\/(api\.md|api\.metadata\.yml)$/, ""); + const packageName = path.basename(packageDir); + lines.push("============================================================"); + lines.push(styleLog(`PACKAGE: ${packageName}`, ANSI.bold, ANSI.cyan)); + lines.push(`PATH: ${packageDir}`); + lines.push(`API FILE: ${apiFile}`); + lines.push(styleLog("Regenerate from the repository root:", ANSI.bold, ANSI.yellow)); + lines.push(styleLog(` azpysdk apistub --md --extract-metadata ${packageName} --dest-dir .`, ANSI.bold, ANSI.yellow)); + lines.push("============================================================"); + } + lines.push(""); + return lines.join("\n"); +} + +export default async function apiMdConsistency({ core }) { + const workspace = process.env.GITHUB_WORKSPACE || process.cwd(); + + await runNode(".github/workflows/src/api-md-consistency/find_affected.js", workspace, core); + + const affected = readLines(process.env.API_MD_PACKAGES_FILE, workspace); + const changedCount = affected.length; + core.setOutput("changed_count", String(changedCount)); + + if (changedCount === 0) { + core.setOutput("mismatch_count", "0"); + core.setOutput("missing_count", "0"); + core.setOutput("issue_count", "0"); + return { + changedCount, + mismatchCount: 0, + missingCount: 0, + issueCount: 0, + }; + } + + await runNode(".github/workflows/src/api-md-consistency/regenerate.js", workspace, core); + await runNode(".github/workflows/src/api-md-consistency/find_mismatches.js", workspace, core); + + const mismatches = readLines(process.env.API_MD_MISMATCHES_FILE, workspace); + const missing = readLines(process.env.API_MD_MISSING_FILE, workspace); + + const mismatchCount = mismatches.length; + const missingCount = missing.length; + const issueCount = mismatchCount + missingCount; + + core.setOutput("mismatch_count", String(mismatchCount)); + core.setOutput("missing_count", String(missingCount)); + core.setOutput("issue_count", String(issueCount)); + + if (issueCount > 0) { + const messageParts = [ + "Generated api.md or api.metadata.yml does not match the committed files, or required API files are missing, for one or more affected packages.", + "api.metadata.yml must be committed alongside api.md, and selected metadata fields are part of pass/fail gating.", + "", + formatIssueSection("Mismatched packages:", mismatches), + formatIssueSection("Missing required API files:", missing), + "To regenerate api.md locally, run the command shown for each package from the repository root.", + ].filter((part) => part !== ""); + + core.setFailed(messageParts.join("\n")); + } + + return { + changedCount, + mismatchCount, + missingCount, + issueCount, + }; +}; diff --git a/.github/workflows/src/api-md-consistency/api_md_workflow.config.json b/.github/workflows/src/api-md-consistency/api_md_workflow.config.json new file mode 100644 index 000000000000..34b0ae2b8ce7 --- /dev/null +++ b/.github/workflows/src/api-md-consistency/api_md_workflow.config.json @@ -0,0 +1,3 @@ +{ + "adapter": "python" +} \ No newline at end of file diff --git a/.github/workflows/src/api-md-consistency/common.js b/.github/workflows/src/api-md-consistency/common.js new file mode 100644 index 000000000000..9b1df6f0f490 --- /dev/null +++ b/.github/workflows/src/api-md-consistency/common.js @@ -0,0 +1,100 @@ +#!/usr/bin/env node + +import fs from "fs"; +import path from "path"; +import { fileURLToPath } from "url"; +import { execFile, isExecError } from "../../../shared/src/exec.js"; +import { defaultLogger } from "../../../shared/src/logger.js"; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const REPO_ROOT = path.resolve(__dirname, "..", "..", "..", ".."); + +async function getDefaultLogger() { + return defaultLogger; +} + +async function runAsync(cmd, args, options = {}) { + const check = options.check ?? true; + const logger = options.logger ?? (await getDefaultLogger()); + + try { + const result = await execFile(cmd, args, { + cwd: options.cwd, + logger, + maxBuffer: options.maxBuffer, + }); + + return { + status: 0, + stdout: result.stdout ?? "", + stderr: result.stderr ?? "", + }; + } catch (error) { + if (!isExecError(error)) { + throw error; + } + + const status = Number.isInteger(error.code) ? error.code : 1; + const stdout = error.stdout ?? ""; + const stderr = error.stderr ?? ""; + + if (!check) { + return { status, stdout, stderr }; + } + + throw new Error(`Command failed (${status}): ${[cmd, ...args].join(" ")}`); + } +} + +function readLines(filePath) { + if (!fs.existsSync(filePath)) { + return []; + } + + return fs + .readFileSync(filePath, "utf-8") + .split(/\r?\n/) + .map((line) => line.trim()) + .filter((line) => Boolean(line)); +} + +function writeLines(filePath, lines) { + fs.mkdirSync(path.dirname(filePath), { recursive: true }); + if (!lines.length) { + fs.writeFileSync(filePath, "", "utf-8"); + return; + } + fs.writeFileSync(filePath, `${lines.join("\n")}\n`, "utf-8"); +} + +function appendGithubOutput(key, value) { + const outputPath = process.env.GITHUB_OUTPUT; + if (!outputPath) { + return; + } + + fs.appendFileSync(outputPath, `${key}=${value}\n`, "utf-8"); +} + +function envPath(name, fallback) { + return process.env[name] || fallback; +} + +function requireEnv(name) { + const value = process.env[name]; + if (!value) { + throw new Error(`Environment variable ${name} is required`); + } + return value; +} + +export { + REPO_ROOT, + getDefaultLogger, + runAsync, + readLines, + writeLines, + appendGithubOutput, + envPath, + requireEnv, +}; diff --git a/.github/workflows/src/api-md-consistency/find_affected.js b/.github/workflows/src/api-md-consistency/find_affected.js new file mode 100644 index 000000000000..9ba4edaa7e40 --- /dev/null +++ b/.github/workflows/src/api-md-consistency/find_affected.js @@ -0,0 +1,72 @@ +#!/usr/bin/env node + +import { + REPO_ROOT, + appendGithubOutput, + envPath, + getDefaultLogger, + requireEnv, + runAsync, + writeLines, +} from "./common.js"; +import { loadAdapter, loadWorkflowConfig } from "./adapter_config.js"; +import { includesSegment } from "../../../shared/src/path.js"; + +async function main() { + const config = loadWorkflowConfig(); + const adapterName = config.adapter; + const adapter = await loadAdapter(adapterName); + if (typeof adapter.isPackageDir !== "function") { + throw new Error(`ERROR: adapter '${adapterName}' does not implement isPackageDir(repoRoot, packageDirRelative).`); + } + + const baseRef = requireEnv("API_MD_BASE_REF"); + const packagesFile = envPath("API_MD_PACKAGES_FILE", ".artifacts/affected_package_dirs.txt"); + const changedFile = envPath("API_MD_CHANGED_FILE", ".artifacts/changed_package_dirs.txt"); + + await runAsync("git", ["fetch", "--no-tags", "--depth=1", "origin", baseRef], { + cwd: REPO_ROOT, + }); + const diff = ( + await runAsync("git", ["diff", "--name-only", `origin/${baseRef}..HEAD`], { + cwd: REPO_ROOT, + }) + ).stdout; + + const changedDirs = new Set(); + for (const filePath of diff.split(/\r?\n/)) { + const trimmed = filePath.trim(); + if (!trimmed) { + continue; + } + if (!includesSegment(trimmed, "sdk")) { + continue; + } + + const parts = trimmed.split("/"); + if (parts.length < 3 || parts[0] !== "sdk") { + continue; + } + + changedDirs.add(parts.slice(0, 3).join("/")); + } + + const sortedChanged = [...changedDirs].sort(); + writeLines(changedFile, sortedChanged); + + const affected = []; + for (const packageDir of sortedChanged) { + if (adapter.isPackageDir(REPO_ROOT, packageDir)) { + affected.push(packageDir); + } + } + + writeLines(packagesFile, affected); + appendGithubOutput("count", affected.length); +} + +main().catch(async (error) => { + const logger = await getDefaultLogger(); + logger.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +}); diff --git a/.github/workflows/src/api-md-consistency/find_mismatches.js b/.github/workflows/src/api-md-consistency/find_mismatches.js new file mode 100644 index 000000000000..fff3cbc06017 --- /dev/null +++ b/.github/workflows/src/api-md-consistency/find_mismatches.js @@ -0,0 +1,99 @@ +#!/usr/bin/env node + +import fs from "fs"; + +import { appendGithubOutput, envPath, getDefaultLogger, readLines, runAsync, writeLines } from "./common.js"; +import { loadAdapter, loadWorkflowConfig } from "./adapter_config.js"; + +/** + * Parse a simple key: value YAML file into an object. + * Only handles flat scalar mappings (no nesting, no multi-line values). + */ +function parseSimpleYaml(text) { + const result = {}; + for (const line of text.split(/\r?\n/)) { + const match = line.match(/^(\w+)\s*:\s*(.*)$/); + if (match) { + result[match[1]] = match[2].trim(); + } + } + return result; +} + +async function main() { + const config = loadWorkflowConfig(); + const adapter = await loadAdapter(config.adapter); + + // Fields to compare in api.metadata.yml. If the adapter doesn't specify, + // compare all fields (strict default for languages that don't opt out). + const fieldsToValidate = adapter.metadataFieldsToValidate || null; + + const packagesFile = envPath("API_MD_PACKAGES_FILE", ".artifacts/affected_package_dirs.txt"); + const mismatchesFile = envPath("API_MD_MISMATCHES_FILE", ".artifacts/mismatched_api_files.txt"); + const missingFile = envPath("API_MD_MISSING_FILE", ".artifacts/missing_api_files.txt"); + const packages = readLines(packagesFile); + + const mismatches = []; + const missing = []; + for (const pkgDir of packages) { + const apiFile = `${pkgDir}/api.md`; + const metadataFile = `${pkgDir}/api.metadata.yml`; + + // Enforce that each affected package has a committed api.md file. + if (!fs.existsSync(apiFile) || !fs.statSync(apiFile).isFile()) { + missing.push(apiFile); + continue; + } + + const diffResult = await runAsync("git", ["ls-files", "--error-unmatch", "--", apiFile], { + check: false, + }); + if (diffResult.status !== 0) { + missing.push(apiFile); + continue; + } + + // api.metadata.yml must be present alongside api.md. + if (!fs.existsSync(metadataFile) || !fs.statSync(metadataFile).isFile()) { + missing.push(metadataFile); + } else { + const committedMeta = await runAsync("git", ["show", `HEAD:${metadataFile}`], { + check: false, + }); + if (committedMeta.status !== 0) { + // Not yet committed — treat as missing + missing.push(metadataFile); + } else { + const current = parseSimpleYaml(fs.readFileSync(metadataFile, "utf-8")); + const committed = parseSimpleYaml(committedMeta.stdout); + + // Compare only adapter-specified fields, or all fields if not specified. + const keys = fieldsToValidate || Object.keys({ ...committed, ...current }); + const mismatch = keys.some((key) => current[key] !== committed[key]); + if (mismatch) { + mismatches.push(metadataFile); + } + } + } + + // Diff-gate the full api.md content; metadata is field-gated above. + const quietDiffResult = await runAsync("git", ["diff", "--quiet", "--", apiFile], { + check: false, + }); + if (quietDiffResult.status !== 0) { + mismatches.push(apiFile); + } + } + + writeLines(mismatchesFile, mismatches); + writeLines(missingFile, missing); + appendGithubOutput("mismatch_count", mismatches.length); + appendGithubOutput("missing_count", missing.length); + appendGithubOutput("issue_count", mismatches.length + missing.length); +} + +main().catch(async (error) => { + const logger = await getDefaultLogger(); + logger.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +}); diff --git a/.github/workflows/src/api-md-consistency/regenerate.js b/.github/workflows/src/api-md-consistency/regenerate.js new file mode 100644 index 000000000000..498ca5d4e8ec --- /dev/null +++ b/.github/workflows/src/api-md-consistency/regenerate.js @@ -0,0 +1,41 @@ +#!/usr/bin/env node + +import path from "path"; + +import { REPO_ROOT, envPath, getDefaultLogger, readLines } from "./common.js"; +import { loadAdapter, loadWorkflowConfig } from "./adapter_config.js"; + +async function main() { + const logger = await getDefaultLogger(); + const config = loadWorkflowConfig(); + const adapter = await loadAdapter(config.adapter); + if (typeof adapter.generateApiForPackage !== "function") { + throw new Error( + `ERROR: adapter '${config.adapter}' does not implement generateApiForPackage({ repoRoot, packageName, runtimeExecutable }).`, + ); + } + + const packagesFile = envPath("API_MD_PACKAGES_FILE", ".artifacts/affected_package_dirs.txt"); + const packages = readLines(packagesFile); + if (!packages.length) { + return; + } + + const runtimeExecutable = process.env.RUNTIME_EXECUTABLE || null; + for (const pkgDir of packages) { + const packageName = path.basename(pkgDir); + logger.info(`Generating api.md for ${packageName}`); + await adapter.generateApiForPackage({ + repoRoot: REPO_ROOT, + packageName, + runtimeExecutable, + logger, + }); + } +} + +main().catch(async (error) => { + const logger = await getDefaultLogger(); + logger.error(error instanceof Error ? error.message : String(error)); + process.exit(1); +}); diff --git a/doc/eng_sys_checks.md b/doc/eng_sys_checks.md index 3515f24c3f8c..28839bf6d6af 100644 --- a/doc/eng_sys_checks.md +++ b/doc/eng_sys_checks.md @@ -177,8 +177,6 @@ analyze_python_version = "3.11" This setting is read by `eng/scripts/dispatch_checks.py` and is passed to `azpysdk` via the `--python` flag (which requires `--isolate` and `uv`). This is useful for packages that use newer syntax or type features that require a more recent Python interpreter. > **Note:** This setting only affects the Python interpreter version used for the analyze venv; it does not change the minimum supported Python version declared in `setup.py`/`pyproject.toml`. -> -> **Warning:** This override applies to _all_ analyze checks dispatched by `dispatch_checks.py`, including `apistub`. The `apistub` tool currently requires Python < 3.11 (`PYTHON_VERSION_LIMIT = (3, 11)` in `azpysdk/apistub.py`). Do not set `analyze_python_version` to `3.11` or higher for packages that still run `apistub` through the standard dispatched analyze flow. ## Environment variables important to CI diff --git a/eng/scripts/Extract-APIViewMetadata-Python.ps1 b/eng/scripts/Extract-APIViewMetadata-Python.ps1 new file mode 100644 index 000000000000..abf162e71f20 --- /dev/null +++ b/eng/scripts/Extract-APIViewMetadata-Python.ps1 @@ -0,0 +1,153 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. See License.txt in the project root for license information. + +<# +.SYNOPSIS +Extracts Python APIView metadata from API markdown and writes api.metadata.yml. + +.DESCRIPTION +Reads an API markdown file, extracts parser and Python runtime versions from the +Python APIView metadata header, removes that header from the markdown, trims leading +blank lines from the markdown body, and writes api.metadata.yml beside the markdown file. + +.PARAMETER ApiMarkdownPath +Optional. Path to API markdown file. If omitted, api.md will be resolved from OutputPath. + +.PARAMETER OutputPath +Optional. Directory containing API markdown output. Defaults to current directory. + +.EXAMPLE +./Extract-APIViewMetadata-Python.ps1 -OutputPath ./sdk/template/azure-template + +.EXAMPLE +./Extract-APIViewMetadata-Python.ps1 -ApiMarkdownPath ./sdk/template/azure-template/api.md +#> + +[CmdletBinding()] +param( + [Parameter(Mandatory = $false)] + [string]$ApiMarkdownPath, + + [Parameter(Mandatory = $false)] + [string]$OutputPath = "." +) + +Set-StrictMode -Version 3 +$ErrorActionPreference = 'Stop' + +function Resolve-ApiMarkdownPath { + param( + [string]$ProvidedPath, + [string]$OutputDirectory + ) + + if ($ProvidedPath) { + return $ProvidedPath + } + + $resolvedOutput = Resolve-Path -LiteralPath $OutputDirectory -ErrorAction Stop + $apiLower = Join-Path $resolvedOutput.Path "api.md" + if (Test-Path -LiteralPath $apiLower -PathType Leaf) { + return $apiLower + } + + throw "Could not find API markdown file in '$OutputDirectory'. Expected api.md." +} + +function Trim-LeadingBlankLines { + param([string[]]$Lines) + + $start = 0 + while ($start -lt $Lines.Count -and [string]::IsNullOrWhiteSpace($Lines[$start])) { + $start++ + } + + if ($start -eq 0) { + return $Lines + } + + if ($start -ge $Lines.Count) { + return @() + } + + return $Lines[$start..($Lines.Count - 1)] +} + +function Get-Sha256Hex { + param([string]$Text) + + $sha256 = [System.Security.Cryptography.SHA256]::Create() + try { + $bytes = [System.Text.Encoding]::UTF8.GetBytes($Text) + $hashBytes = $sha256.ComputeHash($bytes) + return ([System.BitConverter]::ToString($hashBytes)).Replace("-", "").ToLowerInvariant() + } + finally { + $sha256.Dispose() + } +} + +$resolvedApiPath = Resolve-ApiMarkdownPath -ProvidedPath $ApiMarkdownPath -OutputDirectory $OutputPath +if (-not (Test-Path -LiteralPath $resolvedApiPath -PathType Leaf)) { + throw "API markdown file not found: $resolvedApiPath" +} + +$metadataPattern = '^# Package is parsed using apiview-stub-generator\(version:([^\)]+)\), Python version:\s*([^\s]+)\s*$' + +$fileText = Get-Content -LiteralPath $resolvedApiPath -Raw +$lineEnding = if ($fileText -match "`r`n") { "`r`n" } else { "`n" } +$lines = $fileText -split '\r?\n' + +$metadata = [ordered]@{} +$filtered = [System.Collections.Generic.List[string]]::new() + +foreach ($line in $lines) { + $match = [regex]::Match($line, $metadataPattern) + if ($match.Success) { + # Alphabetical keys in output YAML. + $metadata['parserVersion'] = $match.Groups[1].Value + $metadata['pythonVersion'] = $match.Groups[2].Value + continue + } + + $filtered.Add($line) +} + +# Remove blank lines after opening fence so markdown body starts at namespace. +if ($filtered.Count -gt 0 -and $filtered[0].StartsWith('```')) { + $fence = $filtered[0] + $body = Trim-LeadingBlankLines -Lines @($filtered | Select-Object -Skip 1) + $rewritten = [System.Collections.Generic.List[string]]::new() + $rewritten.Add($fence) + foreach ($line in $body) { + $rewritten.Add($line) + } + $filtered = $rewritten +} +else { + $trimmed = Trim-LeadingBlankLines -Lines @($filtered) + $filtered = [System.Collections.Generic.List[string]]::new($trimmed) +} + +$normalizedLinesForHash = @($filtered | ForEach-Object { $_.TrimEnd() }) +$newlineForHash = [string][char]10 +$normalizedTextForHash = $normalizedLinesForHash -join $newlineForHash +$metadata['apiMdSha256'] = Get-Sha256Hex -Text $normalizedTextForHash + +Set-Content -LiteralPath $resolvedApiPath -Value ($filtered -join $lineEnding) -NoNewline -Encoding utf8 +Write-Host "Updated markdown: $resolvedApiPath" + +$metadataPath = Join-Path (Split-Path -Parent $resolvedApiPath) "api.metadata.yml" +if ($metadata.Count -gt 0) { + $yamlLines = [System.Collections.Generic.List[string]]::new() + foreach ($key in ($metadata.Keys | Sort-Object)) { + $yamlLines.Add(("{0}: {1}" -f $key, $metadata[$key])) + } + + Set-Content -LiteralPath $metadataPath -Value ($yamlLines -join $lineEnding) -Encoding utf8 + Write-Host "Generated metadata: $metadataPath" +} +elseif (Test-Path -LiteralPath $metadataPath) { + Remove-Item -LiteralPath $metadataPath -Force + Write-Host "Removed stale metadata: $metadataPath" +} diff --git a/eng/scripts/dispatch_checks.py b/eng/scripts/dispatch_checks.py index 3f6c56b6b7cc..df490a56cde3 100644 --- a/eng/scripts/dispatch_checks.py +++ b/eng/scripts/dispatch_checks.py @@ -214,6 +214,8 @@ async def run_check( async with semaphore: start = time.time() cmd = base_args + [check, "--isolate", package] + if check == "apistub": + cmd += ["--install-deps"] if python_version: cmd += ["--python", python_version] if service: diff --git a/eng/tools/azure-sdk-tools/azpysdk/apistub.py b/eng/tools/azure-sdk-tools/azpysdk/apistub.py index 4465562950c3..84e2cd8c964f 100644 --- a/eng/tools/azure-sdk-tools/azpysdk/apistub.py +++ b/eng/tools/azure-sdk-tools/azpysdk/apistub.py @@ -13,7 +13,6 @@ from ci_tools.parsing import ParsedSetup REPO_ROOT = discover_repo_root() -PYTHON_VERSION_LIMIT = (3, 11) # apistub doesn't support Python 3.11+ def get_package_wheel_path(pkg_root: str) -> str: @@ -70,16 +69,31 @@ def register( action="store_true", help="Generate api.md from the JSON token file using Export-APIViewMarkdown.ps1. Output directory for api.md is the same as the generated token file.", ) + p.add_argument( + "--extract-metadata", + dest="extract_metadata", + default=False, + action="store_true", + help="Extract language-specific metadata from generated api.md into api.metadata.yml and remove metadata header from api.md.", + ) + p.add_argument( + "--install-deps", + dest="install_deps", + default=False, + action="store_true", + help=( + "Install dev requirements and apiview dependencies before running. " + "Skipped by default for faster local iteration." + ), + ) p.set_defaults(func=self.run) def run(self, args: argparse.Namespace) -> int: """Run the apistub check command.""" logger.info("Running apistub check...") - if sys.version_info >= PYTHON_VERSION_LIMIT: - logger.error( - f"Python version {sys.version_info.major}.{sys.version_info.minor} is not supported. Version must be less than {PYTHON_VERSION_LIMIT[0]}.{PYTHON_VERSION_LIMIT[1]}." - ) + if getattr(args, "extract_metadata", False) and not getattr(args, "generate_md", False): + logger.error("--extract-metadata requires --md.") return 1 set_envvar_defaults() @@ -101,22 +115,25 @@ def run(self, args: argparse.Namespace) -> int: ) logger.info(f"Processing {package_name} for apistub check") - # install dependencies - self.install_dev_reqs(executable, args, package_dir) - - try: - install_into_venv( - executable, - [ - "-r", - os.path.join(REPO_ROOT, "eng", "apiview_reqs.txt"), - "--index-url=https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-for-python/pypi/simple/", - ], - package_dir, - ) - except CalledProcessError as e: - logger.error(f"Failed to install dependencies: {e}") - return e.returncode + install_deps = getattr(args, "install_deps", False) + + if install_deps: + # install dependencies + self.install_dev_reqs(executable, args, package_dir) + + try: + install_into_venv( + executable, + [ + "-r", + os.path.join(REPO_ROOT, "eng", "apiview_reqs.txt"), + "--index-url=https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-for-python/pypi/simple/", + ], + package_dir, + ) + except CalledProcessError as e: + logger.error(f"Failed to install dependencies: {e}") + return e.returncode if not os.getenv("PREBUILT_WHEEL_DIR"): create_package_and_install( @@ -131,14 +148,15 @@ def run(self, args: argparse.Namespace) -> int: python_executable=executable, ) - self.pip_freeze(executable) + if install_deps: + self.pip_freeze(executable) pkg_path = get_package_wheel_path(package_dir) pkg_path = os.path.abspath(pkg_path) dest_dir = getattr(args, "dest_dir", None) if dest_dir: - out_token_path = os.path.join(os.path.abspath(dest_dir), package_name) + out_token_path = os.path.abspath(dest_dir) os.makedirs(out_token_path, exist_ok=True) else: out_token_path = os.path.abspath(staging_directory) @@ -164,6 +182,7 @@ def run(self, args: argparse.Namespace) -> int: if getattr(args, "generate_md", False): token_json_path = os.path.join(out_token_path, f"{package_name}_python.json") md_script = os.path.join(REPO_ROOT, "eng", "common", "scripts", "Export-APIViewMarkdown.ps1") + metadata_script = os.path.join(REPO_ROOT, "eng", "scripts", "Extract-APIViewMetadata-Python.ps1") logger.info(f"Generating api.md for {package_name}") try: result = run( @@ -175,11 +194,22 @@ def run(self, args: argparse.Namespace) -> int: # pwsh script logs the api.md location if result.stdout: logger.info(result.stdout) + + if getattr(args, "extract_metadata", False): + logger.info(f"Extracting API metadata for {package_name}") + metadata_result = run( + ["pwsh", metadata_script, "-OutputPath", out_token_path], + check=True, + capture_output=True, + text=True, + ) + if metadata_result.stdout: + logger.info(metadata_result.stdout) except FileNotFoundError: logger.error("Failed to generate api.md: pwsh (PowerShell) is not installed or not on PATH.") results.append(1) except CalledProcessError as e: - logger.error(f"Failed to generate api.md (exit code {e.returncode}):") + logger.error(f"Failed to generate api.md or extract metadata (exit code {e.returncode}):") if e.stderr: logger.error(e.stderr) if e.stdout: diff --git a/eng/tools/azure-sdk-tools/azpysdk/changelog.py b/eng/tools/azure-sdk-tools/azpysdk/changelog.py index 0f996bf99509..9a69ff4c69b0 100644 --- a/eng/tools/azure-sdk-tools/azpysdk/changelog.py +++ b/eng/tools/azure-sdk-tools/azpysdk/changelog.py @@ -9,10 +9,10 @@ from ci_tools.functions import get_package_from_repo from ci_tools.logging import logger -# Chronus is pinned as a dev dependency in .github/chronus/package.json with -# a committed lockfile so both the top-level version and all transitive +# Chronus is pinned as a dev dependency in .github/package.json with a +# committed lockfile so both the top-level version and all transitive # dependencies are reproducible. -_CHRONUS_INSTALL_DIR = os.path.join(".github", "chronus") +_CHRONUS_INSTALL_DIR = ".github" _CHRONUS_BIN_NAME = "chronus.cmd" if os.name == "nt" else "chronus" _CHRONUS_BIN_PATH = os.path.join(_CHRONUS_INSTALL_DIR, "node_modules", ".bin", _CHRONUS_BIN_NAME) @@ -127,7 +127,7 @@ def _is_chronus_installed() -> bool: def _ensure_chronus_installed(self) -> None: """Verify Chronus is installed locally, offering to install if not. - Runs ``npm ci`` against ``.github/chronus`` so only the exact + Runs ``npm ci`` against ``.github`` so only the exact versions recorded in ``package-lock.json`` are installed. Raises ``SystemExit`` if the user declines or installation fails. """ @@ -198,9 +198,15 @@ def _resolve_package(package_arg: Optional[str]) -> Optional[str]: """Resolve a package argument to a Chronus package name.""" if not package_arg: return None - # Resolve relative paths (e.g. ".") to absolute so get_package_from_repo - # doesn't accidentally glob against the repo root. - target = os.path.abspath(package_arg) if os.path.exists(package_arg) else package_arg + path_like = ( + os.path.isabs(package_arg) or package_arg.startswith(".") or os.sep in package_arg or "/" in package_arg + ) + if os.path.isabs(package_arg): + target = os.path.abspath(package_arg) + elif path_like: + target = os.path.join(REPO_ROOT, package_arg) + else: + target = package_arg try: parsed = get_package_from_repo(target, REPO_ROOT) return parsed.name if parsed else package_arg diff --git a/eng/tools/azure-sdk-tools/tests/test_apistub.py b/eng/tools/azure-sdk-tools/tests/test_apistub.py index 85a60a407794..26af6468945e 100644 --- a/eng/tools/azure-sdk-tools/tests/test_apistub.py +++ b/eng/tools/azure-sdk-tools/tests/test_apistub.py @@ -73,29 +73,101 @@ def test_no_prebuilt_dir_falls_back_to_pkg_root(self, mock_find_whl, mock_parsed class TestRunOutputDirectory: """Verify that dest_dir controls where the output token path ends up.""" - def _make_args(self, dest_dir=None, generate_md=False): + def _make_args(self, dest_dir=None, generate_md=False, isolate=False, install_deps=False): return argparse.Namespace( target=".", - isolate=False, + isolate=isolate, command="apistub", service=None, dest_dir=dest_dir, generate_md=generate_md, + install_deps=install_deps, ) @patch( "azpysdk.apistub.REPO_ROOT", os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")) ) - @patch("azpysdk.apistub.PYTHON_VERSION_LIMIT", (99, 99)) @patch("azpysdk.apistub.get_cross_language_mapping_path", return_value=None) @patch("azpysdk.apistub.get_package_wheel_path", return_value="/fake/pkg.whl") @patch("azpysdk.apistub.create_package_and_install") @patch("azpysdk.apistub.install_into_venv") @patch("azpysdk.apistub.set_envvar_defaults") - def test_dest_dir_creates_package_subfolder( + def test_isolate_does_not_install_dependencies( + self, _env, install_into_venv, _create, _get_whl, _get_mapping, tmp_path, monkeypatch + ): + """When only --isolate is passed, apistub should not install dependencies.""" + monkeypatch.chdir(os.getcwd()) + stub = apistub() + staging = str(tmp_path / "staging") + os.makedirs(staging, exist_ok=True) + fake_parsed = MagicMock() + fake_parsed.folder = str(tmp_path) + fake_parsed.name = "azure-core" + + with patch.object(stub, "get_targeted_directories", return_value=[fake_parsed]), patch.object( + stub, "get_executable", return_value=(sys.executable, staging) + ), patch.object(stub, "install_dev_reqs") as install_dev_reqs, patch.object( + stub, "pip_freeze" + ) as pip_freeze, patch.object( + stub, "run_venv_command" + ): + stub.run(self._make_args(isolate=True)) + + install_dev_reqs.assert_not_called() + install_into_venv.assert_not_called() + pip_freeze.assert_not_called() + + @patch( + "azpysdk.apistub.REPO_ROOT", os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")) + ) + @patch("azpysdk.apistub.get_cross_language_mapping_path", return_value=None) + @patch("azpysdk.apistub.get_package_wheel_path", return_value="/fake/pkg.whl") + @patch("azpysdk.apistub.create_package_and_install") + @patch("azpysdk.apistub.install_into_venv") + @patch("azpysdk.apistub.set_envvar_defaults") + def test_install_deps_installs_dependencies( + self, _env, install_into_venv, _create, _get_whl, _get_mapping, tmp_path, monkeypatch + ): + """When --install-deps is passed, apistub should install dependencies.""" + monkeypatch.chdir(os.getcwd()) + stub = apistub() + staging = str(tmp_path / "staging") + os.makedirs(staging, exist_ok=True) + fake_parsed = MagicMock() + fake_parsed.folder = str(tmp_path) + fake_parsed.name = "azure-core" + + with patch.object(stub, "get_targeted_directories", return_value=[fake_parsed]), patch.object( + stub, "get_executable", return_value=(sys.executable, staging) + ), patch.object(stub, "install_dev_reqs") as install_dev_reqs, patch.object( + stub, "pip_freeze" + ) as pip_freeze, patch.object( + stub, "run_venv_command" + ): + args = self._make_args(install_deps=True) + stub.run(args) + + install_dev_reqs.assert_called_once_with(sys.executable, args, str(tmp_path)) + install_into_venv.assert_called_once() + repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")) + assert install_into_venv.call_args.args[1][0:2] == [ + "-r", + os.path.join(repo_root, "eng", "apiview_reqs.txt"), + ] + pip_freeze.assert_called_once_with(sys.executable) + + @patch( + "azpysdk.apistub.REPO_ROOT", os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")) + ) + @patch("azpysdk.apistub.get_cross_language_mapping_path", return_value=None) + @patch("azpysdk.apistub.get_package_wheel_path", return_value="/fake/pkg.whl") + @patch("azpysdk.apistub.create_package_and_install") + @patch("azpysdk.apistub.install_into_venv") + @patch("azpysdk.apistub.set_envvar_defaults") + def test_dest_dir_uses_destination_directory( self, _env, _install, _create, _get_whl, _get_mapping, tmp_path, monkeypatch ): - """When --dest-dir is given, output should go to //.""" + """When --dest-dir is given, output should go directly to /.""" monkeypatch.chdir(os.getcwd()) dest = tmp_path / "output" dest.mkdir() @@ -131,7 +203,7 @@ def fake_pwsh(cmd, **kwargs): stub.run(self._make_args(dest_dir=str(dest), generate_md=True)) - expected_out = os.path.join(str(dest), "azure-core") + expected_out = str(dest) assert os.path.isdir(expected_out) assert os.path.exists(os.path.join(expected_out, "api.md")) assert os.path.exists(os.path.join(expected_out, "azure-core_python.json")) @@ -139,7 +211,6 @@ def fake_pwsh(cmd, **kwargs): @patch( "azpysdk.apistub.REPO_ROOT", os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")) ) - @patch("azpysdk.apistub.PYTHON_VERSION_LIMIT", (99, 99)) @patch("azpysdk.apistub.get_cross_language_mapping_path", return_value=None) @patch("azpysdk.apistub.get_package_wheel_path", return_value="/fake/pkg.whl") @patch("azpysdk.apistub.create_package_and_install") @@ -191,7 +262,6 @@ def fake_pwsh(cmd, **kwargs): @patch( "azpysdk.apistub.REPO_ROOT", os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")) ) - @patch("azpysdk.apistub.PYTHON_VERSION_LIMIT", (99, 99)) @patch("azpysdk.apistub.get_cross_language_mapping_path", return_value=None) @patch("azpysdk.apistub.get_package_wheel_path", return_value="/fake/pkg.whl") @patch("azpysdk.apistub.create_package_and_install") @@ -235,7 +305,6 @@ def fake_pwsh(cmd, **kwargs): @patch( "azpysdk.apistub.REPO_ROOT", os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..", "..")) ) - @patch("azpysdk.apistub.PYTHON_VERSION_LIMIT", (99, 99)) @patch("azpysdk.apistub.get_cross_language_mapping_path", return_value=None) @patch("azpysdk.apistub.get_package_wheel_path", return_value="/fake/pkg.whl") @patch("azpysdk.apistub.create_package_and_install") diff --git a/eng/tools/azure-sdk-tools/tests/test_changelog_commands.py b/eng/tools/azure-sdk-tools/tests/test_changelog_commands.py index 7e07d9ff177a..ef02b7a69ad8 100644 --- a/eng/tools/azure-sdk-tools/tests/test_changelog_commands.py +++ b/eng/tools/azure-sdk-tools/tests/test_changelog_commands.py @@ -161,7 +161,7 @@ class TestChangelogExecution: All tests in this class patch ``_is_chronus_installed`` to return True so the installation check is bypassed. Chronus is invoked via the - pinned binary at ``.github/chronus/node_modules/.bin/chronus``. + pinned binary at ``.github/node_modules/.bin/chronus``. """ @patch("azpysdk.changelog.changelog._is_chronus_installed", return_value=True) @@ -303,7 +303,7 @@ def test_explicit_path_resolves_name(self, mock_get_pkg): mock_get_pkg.return_value = SimpleNamespace(name="azure-core") result = changelog._resolve_package("sdk/core/azure-core") assert result == "azure-core" - mock_get_pkg.assert_called_once_with("sdk/core/azure-core", REPO_ROOT) + mock_get_pkg.assert_called_once_with(os.path.join(REPO_ROOT, "sdk/core/azure-core"), REPO_ROOT) @patch("azpysdk.changelog.get_package_from_repo") def test_bare_name_resolves(self, mock_get_pkg): @@ -356,9 +356,9 @@ def test_non_interactive_with_auto_install_env(self, mock_stdin, mock_which, moc mock_call.assert_called_once() cmd = mock_call.call_args[0][0] assert cmd == ["/usr/bin/npm", "ci"] - # And it must run from the .github/chronus directory, not repo root. + # And it must run from the .github directory, not repo root. _, kwargs = mock_call.call_args - assert kwargs["cwd"].endswith(os.path.join(".github", "chronus")) + assert kwargs["cwd"].endswith(".github") @patch("azpysdk.changelog.changelog._is_chronus_installed", return_value=False) @patch("azpysdk.changelog.shutil.which", return_value="/usr/bin/npm") diff --git a/scripts/api_md_workflow/README.md b/scripts/api_md_workflow/README.md new file mode 100644 index 000000000000..4d96401c29a5 --- /dev/null +++ b/scripts/api_md_workflow/README.md @@ -0,0 +1,21 @@ +# API Review PR Helper + +This folder contains the standalone Python helper used to create API review PRs from generated `api.md` files. + +## Purpose + +`create_api_review_pr.py` compares a baseline package release tag with a target API surface, creates or reuses dedicated API review branches, and opens a draft PR that shows the `api.md` diff. + +The API consistency workflow helpers live under `.github/workflows/src/api-md-consistency`. + +## Usage + +The script includes Python package discovery, version parsing, `api.md` generation, git branch orchestration, and GitHub PR creation in one file. + +`create_api_review_pr.py` compares a baseline package release tag with a target API surface. The target can be a package release tag, an `origin` branch, or an `owner:branch` fork reference. When the target is a tag, the generated PR body identifies it as a target tag instead of a working branch. + +Example comparing two package release tags: + +```bash +python scripts/api_md_workflow/create_api_review_pr.py --package-name azure-ai-projects --base azure-ai-projects_2.1.0 --target azure-ai-projects_2.2.0 +``` diff --git a/scripts/api_md_workflow/create_api_review_pr.py b/scripts/api_md_workflow/create_api_review_pr.py new file mode 100644 index 000000000000..e9e823682cf5 --- /dev/null +++ b/scripts/api_md_workflow/create_api_review_pr.py @@ -0,0 +1,994 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os +import re +import shutil +import subprocess +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Callable +from urllib.error import HTTPError, URLError +from urllib.parse import quote, urlencode +from urllib.request import Request, urlopen + + +REPO_ROOT = Path(__file__).resolve().parents[2] +REPO_OWNER = "Azure" +REPO_NAME = "azure-sdk-for-python" +REPO_SLUG = f"{REPO_OWNER}/{REPO_NAME}" +REMOTE = "origin" +MAIN_REF = f"{REMOTE}/main" +SYNC_METADATA_MARKER = "api-md-review-sync" +SYNC_METADATA_WARNING = "DO NOT MODIFY THESE CONTENTS!" +GITHUB_API_TIMEOUT_SECONDS = 30 + + +class GitHubApiError(Exception): + def __init__(self, status: int, message: str): + super().__init__(message) + self.status = status + + +@dataclass +class CommandResult: + status: int + stdout: str = "" + stderr: str = "" + + +@dataclass +class ApiResult: + api_md: bytes + metadata: bytes | None + version: str + + +@dataclass +class BranchState: + has_api_md: bool + has_metadata: bool + api_md_sha256: str | None + + +@dataclass +class BranchSelection: + branch_name: str + reused: bool + remote_ref: str | None + + +GitRunner = Callable[[list[str], bool], CommandResult] +_git_runner: GitRunner | None = None +_github_api: "GitHubApi | None" = None + + +def set_command_runner_for_test(git_runner: GitRunner | None) -> None: + global _git_runner + _git_runner = git_runner + + +def set_github_api_for_test(api: "GitHubApi | None") -> None: + global _github_api + _github_api = api + + +def log_info(message: str) -> None: + print(message) + + +def log_warning(message: str) -> None: + print(message, file=sys.stderr) + + +def log_error(message: str) -> None: + print(message, file=sys.stderr) + + +def run(args: list[str], *, cwd: Path = REPO_ROOT, check: bool = True, capture: bool = False, shell: bool = False) -> CommandResult: + printable = " ".join(args) + log_info(f"$ {printable}") + completed = subprocess.run( + args, + cwd=cwd, + check=False, + capture_output=capture, + text=True, + shell=shell, + ) + result = CommandResult(completed.returncode, completed.stdout or "", completed.stderr or "") + if check and result.status != 0: + raise RuntimeError(f"Command failed ({result.status}): {printable}") + return result + + +def git(args: list[str], *, check: bool = True) -> CommandResult: + if _git_runner: + result = _git_runner(args, check) + if check and result.status != 0: + raise RuntimeError(f"Command failed ({result.status}): {' '.join(['git', *args])}") + return result + return run(["git", *args], check=check, capture=True) + + +def git_out(args: list[str]) -> str: + return git(args).stdout.strip() + + +def resolve_github_token() -> str | None: + token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN") + if token: + return token + + gh = shutil.which("gh") + if not gh: + return None + + try: + return subprocess.run( + [gh, "auth", "token"], + cwd=REPO_ROOT, + check=True, + capture_output=True, + text=True, + ).stdout.strip() + except subprocess.SubprocessError: + return None + + +def normalize_pull_request(pr: dict[str, Any] | None) -> dict[str, Any] | None: + if not isinstance(pr, dict): + return None + + owner_login = None + if isinstance(pr.get("headRepositoryOwner"), dict): + owner_login = pr["headRepositoryOwner"].get("login") + elif isinstance(pr.get("head"), dict): + repo = pr["head"].get("repo") if isinstance(pr["head"].get("repo"), dict) else {} + owner = repo.get("owner") if isinstance(repo.get("owner"), dict) else {} + owner_login = owner.get("login") + + return { + "number": pr.get("number"), + "url": pr.get("url") or pr.get("html_url"), + "state": pr.get("state"), + "updatedAt": pr.get("updatedAt") or pr.get("updated_at"), + "body": pr.get("body"), + "headRefName": pr.get("headRefName") or (pr.get("head") or {}).get("ref"), + "headRepositoryOwner": {"login": owner_login}, + } + + +class GitHubApi: + def __init__(self, token: str | None): + self.token = token + + def _request(self, method: str, url: str, payload: dict[str, Any] | None = None) -> Any: + data = json.dumps(payload).encode("utf-8") if payload is not None else None + headers = { + "Accept": "application/vnd.github+json", + "User-Agent": "azure-sdk-python-api-md-workflow", + "X-GitHub-Api-Version": "2022-11-28", + } + if self.token: + headers["Authorization"] = f"Bearer {self.token}" + if data is not None: + headers["Content-Type"] = "application/json" + + request = Request(url, data=data, headers=headers, method=method) + try: + with urlopen(request, timeout=GITHUB_API_TIMEOUT_SECONDS) as response: + body = response.read().decode("utf-8") + return json.loads(body) if body else None + except HTTPError as error: + details = error.read().decode("utf-8", errors="replace") + raise GitHubApiError(error.code, details or str(error)) from error + except TimeoutError as error: + raise GitHubApiError(1, f"GitHub API request timed out: {error}") from error + except URLError as error: + raise GitHubApiError(1, str(error)) from error + + def _rest_url(self, path: str, query: dict[str, Any] | None = None) -> str: + url = f"https://api.github.com{path}" + if query: + url = f"{url}?{urlencode(query)}" + return url + + def list_pull_requests_by_head(self, head: str, limit: int) -> list[dict[str, Any]]: + data = self._request( + "GET", + self._rest_url( + f"/repos/{REPO_OWNER}/{REPO_NAME}/pulls", + {"head": head, "state": "open", "per_page": limit}, + ), + ) + return [pr for pr in (normalize_pull_request(item) for item in data or []) if pr] + + def search_pull_requests(self, query: str, limit: int) -> list[dict[str, Any]]: + graphql_query = """ +query($query: String!, $first: Int!) { + search(query: $query, type: ISSUE, first: $first) { + nodes { + ... on PullRequest { + number + url + state + updatedAt + body + headRefName + headRepositoryOwner { login } + } + } + } +} +""" + data = self._request( + "POST", + "https://api.github.com/graphql", + {"query": graphql_query, "variables": {"query": query, "first": limit}}, + ) + nodes = ((data or {}).get("data") or {}).get("search", {}).get("nodes", []) + return [pr for pr in (normalize_pull_request(item) for item in nodes) if pr] + + def list_pull_requests_by_branches(self, base: str, head: str, limit: int) -> list[dict[str, Any]]: + data = self._request( + "GET", + self._rest_url( + f"/repos/{REPO_OWNER}/{REPO_NAME}/pulls", + {"base": base, "head": f"{REPO_OWNER}:{head}", "state": "open", "per_page": limit}, + ), + ) + return [pr for pr in (normalize_pull_request(item) for item in data or []) if pr] + + def update_pull_request_body(self, number: int, body: str) -> None: + self._request("PATCH", self._rest_url(f"/repos/{REPO_OWNER}/{REPO_NAME}/pulls/{number}"), {"body": body}) + + def create_draft_pull_request(self, base: str, head: str, title: str, body: str) -> dict[str, Any]: + return self._request( + "POST", + self._rest_url(f"/repos/{REPO_OWNER}/{REPO_NAME}/pulls"), + {"base": base, "head": head, "title": title, "body": body, "draft": True}, + ) + + +def get_github_api() -> GitHubApi: + global _github_api + if _github_api is None: + _github_api = GitHubApi(resolve_github_token()) + return _github_api + + +def ensure_clean_worktree() -> None: + status = git_out(["status", "--porcelain"]) + if status: + raise RuntimeError(f"ERROR: working tree is not clean. Commit or stash changes before running.\n{status}") + + +def current_branch() -> str: + return git_out(["rev-parse", "--abbrev-ref", "HEAD"]) + + +def tag_exists(tag: str) -> bool: + return git(["rev-parse", "--verify", "--quiet", f"refs/tags/{tag}"], check=False).status == 0 + + +def validate_base_tag(package_name: str, base_tag: str) -> str: + prefix = f"{package_name}_" + if not base_tag.startswith(prefix): + raise RuntimeError(f"ERROR: --base tag '{base_tag}' must start with '{prefix}'.") + + version = base_tag[len(prefix) :] + if not version: + raise RuntimeError(f"ERROR: --base tag '{base_tag}' is missing the version suffix.") + + if not tag_exists(base_tag): + raise RuntimeError(f"ERROR: tag '{base_tag}' does not exist in this repository.") + + return version + + +def is_explicit_package_tag(target: str, package_name: str | None = None) -> bool: + if ":" in target: + return False + if package_name: + return target.startswith(f"{package_name}_") + return "_" in target + + +def resolve_target_tag(target: str, package_name: str | None = None) -> str | None: + if not is_explicit_package_tag(target, package_name): + return None + if tag_exists(target): + return target + git(["fetch", REMOTE, "tag", target], check=False) + return target if tag_exists(target) else None + + +def try_remote_branch_ref(branch: str) -> str | None: + remote_ref = f"refs/remotes/{REMOTE}/{branch}" + result = git(["fetch", REMOTE, f"{branch}:{remote_ref}"], check=False) + return f"{REMOTE}/{branch}" if result.status == 0 else None + + +def fork_url(owner: str) -> str: + return f"https://github.com/{owner}/{REPO_NAME}.git" + + +def try_fork_branch_ref(owner: str, branch: str) -> str | None: + result = git(["fetch", fork_url(owner), branch], check=False) + return "FETCH_HEAD" if result.status == 0 else None + + +def resolve_target_ref(target: str, package_name: str | None = None) -> str: + if ":" not in target: + target_tag = resolve_target_tag(target, package_name) + if target_tag: + return target_tag + + branch_ref = try_remote_branch_ref(target) + if branch_ref: + return branch_ref + + raise RuntimeError(f"ERROR: --target '{target}' is neither a branch on {REMOTE} nor a tag in this repository.") + + owner, branch = target.split(":", 1) + if not owner or not branch: + raise RuntimeError(f"ERROR: invalid --target '{target}'. Expected 'tag', 'branch', or 'owner:branch'.") + + branch_ref = try_fork_branch_ref(owner, branch) + if not branch_ref: + raise RuntimeError(f"ERROR: branch '{branch}' does not exist in fork '{owner}'.") + return branch_ref + + +def walk_files(start_dir: Path): + for root, _, files in os.walk(start_dir): + for file_name in files: + yield Path(root) / file_name + + +def find_package_dir(package_name: str) -> Path: + sdk_dir = REPO_ROOT / "sdk" + matches: list[Path] = [] + for service_dir in sdk_dir.iterdir(): + if not service_dir.is_dir(): + continue + candidate = service_dir / package_name + if not candidate.is_dir(): + continue + if (candidate / "pyproject.toml").exists() or (candidate / "setup.py").exists(): + matches.append(candidate) + + if not matches: + raise RuntimeError(f"ERROR: package '{package_name}' not found under sdk/*/") + if len(matches) > 1: + raise RuntimeError(f"ERROR: multiple matches for '{package_name}': {', '.join(str(match) for match in matches)}") + return matches[0] + + +def read_version(package_dir: Path) -> str: + version_regex = re.compile(r"^\s*VERSION\s*[:=]\s*[\"']([^\"']+)[\"']", re.MULTILINE) + candidates: list[Path] = [] + for file_path in walk_files(package_dir): + if file_path.name not in {"_version.py", "version.py"}: + continue + relative = file_path.relative_to(package_dir).as_posix() + if "_generated" in relative or "generated_" in relative: + continue + candidates.append(file_path) + + for candidate in candidates: + try: + text = candidate.read_text(encoding="utf-8") + except OSError: + continue + match = version_regex.search(text) + if match: + return match.group(1) + + raise RuntimeError(f"ERROR: could not find a version string in {package_dir}") + + +def generate_api_for_package(package_name: str, runtime_executable: str | None, ref_label: str | None = None) -> None: + if ref_label: + log_info(f"--- Generating api.md on {ref_label} ---") + + package_dir = find_package_dir(package_name) + if runtime_executable or os.environ.get("RUNTIME_EXECUTABLE"): + python_executable = runtime_executable or os.environ["RUNTIME_EXECUTABLE"] + run( + [ + python_executable, + "-m", + "azpysdk.main", + "apistub", + "--md", + "--extract-metadata", + "--dest-dir", + str(package_dir), + package_name, + ], + check=True, + ) + return + + run( + ["azpysdk", "apistub", "--md", "--extract-metadata", "--dest-dir", str(package_dir), package_name], + check=True, + shell=sys.platform == "win32", + ) + + +def package_rel_dir(package_dir: Path) -> str: + return package_dir.relative_to(REPO_ROOT).as_posix() + + +def normalize_package_dir(package_dir: Path | str) -> str: + path_value = Path(package_dir) + if path_value.is_absolute(): + return path_value.relative_to(REPO_ROOT).as_posix() + return str(package_dir).replace("\\", "/") + + +def api_md_path(package_dir: Path) -> Path: + return package_dir / "api.md" + + +def api_md_rel(package_dir: Path) -> str: + return f"{package_rel_dir(package_dir)}/api.md" + + +def metadata_path(package_dir: Path) -> Path: + return package_dir / "api.metadata.yml" + + +def metadata_rel(package_dir: Path) -> str: + return f"{package_rel_dir(package_dir)}/api.metadata.yml" + + +def api_review_branch_name(kind: str, package_name: str, version: str) -> str: + return f"apireview/{kind}_{package_name}_{version}" + + +def parse_simple_yaml(text: str) -> dict[str, str]: + result: dict[str, str] = {} + for line in text.splitlines(): + match = re.match(r"^(\w+)\s*:\s*(.*)$", line) + if match: + result[match.group(1)] = match.group(2).strip() + return result + + +def metadata_sha_or_none(metadata_bytes: bytes | None) -> str | None: + if not metadata_bytes: + return None + metadata = parse_simple_yaml(metadata_bytes.decode("utf-8")) + return metadata.get("apiMdSha256") + + +def branch_remote_ref(branch: str) -> str: + return f"{REMOTE}/{branch}" + + +def list_remote_branches_with_prefix(prefix: str) -> list[str]: + result = git(["ls-remote", "--heads", REMOTE, f"refs/heads/{prefix}*"], check=False) + if result.status != 0 or not result.stdout.strip(): + return [] + + branches = [] + for line in result.stdout.splitlines(): + parts = line.strip().split(None, 1) + if len(parts) < 2 or not parts[1].startswith("refs/heads/"): + continue + branch = parts[1][len("refs/heads/") :] + if branch == prefix or branch.startswith(f"{prefix}_"): + branches.append(branch) + return branches + + +def fetch_remote_branch(branch: str) -> str: + git(["fetch", REMOTE, branch]) + return branch_remote_ref(branch) + + +def read_ref_file_bytes(ref: str, relative_path: str) -> bytes | None: + result = git(["show", f"{ref}:{relative_path}"], check=False) + if result.status != 0: + return None + return result.stdout.encode("utf-8") + + +def desired_branch_state(result: ApiResult | None) -> BranchState: + if result is None: + return BranchState(False, False, None) + return BranchState(True, bool(result.metadata), metadata_sha_or_none(result.metadata)) + + +def api_results_have_api_diff(base_result: ApiResult, target_result: ApiResult) -> bool: + return base_result.api_md != target_result.api_md + + +def branch_state_matches_desired(actual: BranchState, desired: BranchState) -> bool: + return actual == desired + + +def read_branch_state(ref: str, api_relative: str, meta_relative: str) -> BranchState: + metadata_bytes = read_ref_file_bytes(ref, meta_relative) + api_md_bytes = read_ref_file_bytes(ref, api_relative) + return BranchState(bool(api_md_bytes), bool(metadata_bytes), metadata_sha_or_none(metadata_bytes)) + + +def branch_suffix_from_index(index: int) -> str: + value = index + suffix = "" + while True: + suffix = chr(97 + (value % 26)) + suffix + value = value // 26 - 1 + if value < 0: + return suffix + + +def next_available_branch_name(preferred_branch: str, existing_branches: set[str]) -> str: + if preferred_branch not in existing_branches: + return preferred_branch + + index = 0 + while f"{preferred_branch}_{branch_suffix_from_index(index)}" in existing_branches: + index += 1 + return f"{preferred_branch}_{branch_suffix_from_index(index)}" + + +def is_ancestor_ref(ancestor_ref: str, branch_ref: str) -> bool: + return git(["merge-base", "--is-ancestor", ancestor_ref, branch_ref], check=False).status == 0 + + +def resolve_branch_selection( + *, + preferred_branch: str, + desired_state: BranchState, + api_relative: str, + meta_relative: str, + required_ancestor_ref: str | None = None, +) -> BranchSelection: + existing_branches = set(list_remote_branches_with_prefix(preferred_branch)) + ordered_candidates = sorted(existing_branches, key=lambda branch: (branch != preferred_branch, branch)) + + for candidate_branch in ordered_candidates: + remote_ref = fetch_remote_branch(candidate_branch) + actual_state = read_branch_state(remote_ref, api_relative, meta_relative) + if not branch_state_matches_desired(actual_state, desired_state): + continue + if required_ancestor_ref and not is_ancestor_ref(required_ancestor_ref, remote_ref): + continue + return BranchSelection(candidate_branch, True, remote_ref) + + return BranchSelection(next_available_branch_name(preferred_branch, existing_branches), False, None) + + +def ensure_branch_state_has_metadata_sha(branch_label: str, state: BranchState) -> None: + if state.has_api_md and not state.api_md_sha256: + raise RuntimeError(f"ERROR: {branch_label} is missing apiMdSha256 in api.metadata.yml.") + + +def select_best_pr(prs: list[dict[str, Any]]) -> dict[str, Any] | None: + candidates = [pr for pr in prs if pr.get("number") is not None and pr.get("url") and pr.get("state") and pr.get("updatedAt")] + if not candidates: + return None + open_prs = [pr for pr in candidates if str(pr.get("state", "")).lower() == "open"] + pool = open_prs or candidates + return sorted(pool, key=lambda pr: str(pr.get("updatedAt") or ""), reverse=True)[0] + + +def branch_reference_parts(head_selector: str) -> dict[str, str]: + if head_selector == MAIN_REF: + return {"owner": REPO_OWNER, "branch": "main", "display": head_selector} + if ":" in head_selector: + owner, branch = head_selector.split(":", 1) + return {"owner": owner, "branch": branch, "display": head_selector} + return {"owner": REPO_OWNER, "branch": head_selector, "display": head_selector} + + +def target_branch_exists(head_selector: str) -> bool: + parts = branch_reference_parts(head_selector) + if parts["owner"] == REPO_OWNER: + return bool(try_remote_branch_ref(parts["branch"])) + return bool(try_fork_branch_ref(parts["owner"], parts["branch"])) + + +def sync_working_branch_info(head_selector: str | None, package_name: str | None = None) -> dict[str, str] | None: + if not head_selector: + return None + if resolve_target_tag(head_selector, package_name): + return None + if target_branch_exists(head_selector): + parts = branch_reference_parts(head_selector) + return {"owner": parts["owner"], "branch": parts["branch"]} + return None + + +def build_sync_metadata_object( + *, + package_name: str, + package_dir: Path | str, + base_branch: str, + review_branch: str, + head_selector: str, +) -> dict[str, Any] | None: + working_branch = sync_working_branch_info(head_selector, package_name) + if not working_branch: + return None + + metadata: dict[str, Any] = { + "schemaVersion": 1, + "repository": REPO_SLUG, + "packageName": package_name, + "packageDir": normalize_package_dir(package_dir), + "baseBranch": base_branch, + "reviewBranch": review_branch, + "workingOwner": working_branch["owner"], + "workingBranch": working_branch["branch"], + } + working_pr = find_open_pr_for_head(head_selector) + metadata["workingPrNumber"] = working_pr.get("number") if working_pr and isinstance(working_pr.get("number"), int) else None + return metadata + + +def build_sync_metadata_block(metadata: dict[str, Any] | None) -> str | None: + if not metadata: + return None + return "\n".join( + [ + f"", + ] + ) + + +def replace_sync_metadata_block(body: str | None, metadata_block: str | None) -> str: + cleaned_body = re.sub(rf"\s*", "", str(body or "")).rstrip() + if not metadata_block: + return cleaned_body + return f"{cleaned_body}\n\n{metadata_block}" + + +def build_review_pr_body( + *, + package_name: str, + target_version: str, + base_version: str, + working_reference: dict[str, str], + baseline_ref: str, + sync_metadata_block: str | None, +) -> str: + lines = [ + f"Automated API review PR for {package_name}.", + "", + f"- **{working_reference['label']}:** {working_reference['markdown']} (version {target_version})", + f"- **Baseline:** {baseline_ref} (version {base_version})", + ] + if working_reference["label"] == "Target tag": + lines.extend( + [ + "", + "> [!WARNING]", + "> Static tag-to-tag review; this PR cannot be automatically updated from a working branch.", + ] + ) + lines.extend(["", "Generated by scripts/api_md_workflow/create_api_review_pr.py."]) + return replace_sync_metadata_block("\n".join(lines), sync_metadata_block) + + +def update_pr_body(pr_number: int, body: str) -> None: + get_github_api().update_pull_request_body(pr_number, body) + + +def ensure_pr_body_sync_metadata(pr: dict[str, Any] | None, metadata_block: str | None) -> None: + if not metadata_block or not pr or not isinstance(pr.get("number"), int): + return + desired_body = replace_sync_metadata_block(pr.get("body") or "", metadata_block) + if desired_body == (pr.get("body") or ""): + return + try: + update_pr_body(pr["number"], desired_body) + log_info(f"Updated API review sync metadata on PR #{pr['number']}.") + except Exception as error: # pylint: disable=broad-except + details = str(error) + log_warning(f"WARNING: failed to update API review sync metadata on PR #{pr['number']}." + (f"\n {details}" if details else "")) + + +def find_open_pr_for_head(head_selector: str) -> dict[str, Any] | None: + parts = branch_reference_parts(head_selector) + selector = f"{parts['owner']}:{parts['branch']}" + all_prs: list[dict[str, Any]] = [] + github = get_github_api() + + try: + all_prs.extend(github.list_pull_requests_by_head(selector, 50)) + except Exception: # pylint: disable=broad-except + pass + + try: + all_prs.extend(github.search_pull_requests(f"repo:{REPO_SLUG} is:pr is:open head:{parts['branch']}", 50)) + except Exception: # pylint: disable=broad-except + pass + + deduped: dict[int, dict[str, Any]] = {} + for pr in all_prs: + if ( + pr.get("number") is not None + and pr.get("headRefName") == parts["branch"] + and (pr.get("headRepositoryOwner") or {}).get("login") == parts["owner"] + ): + deduped[int(pr["number"])] = pr + return select_best_pr(list(deduped.values())) + + +def find_open_pr_for_branches(base_branch: str, head_branch: str) -> dict[str, Any] | None: + github = get_github_api() + try: + prs = github.list_pull_requests_by_branches(base_branch, head_branch, 20) + if prs: + return select_best_pr(prs) + except Exception: # pylint: disable=broad-except + pass + + try: + prs = github.search_pull_requests(f"repo:{REPO_SLUG} is:pr is:open head:{head_branch} base:{base_branch}", 20) + return select_best_pr(prs) + except Exception: # pylint: disable=broad-except + return None + + +def create_draft_pr(base_branch: str, head_branch: str, title: str, body: str) -> dict[str, Any]: + try: + created_pr = get_github_api().create_draft_pull_request(base_branch, head_branch, title, body) + return {"ok": True, "url": created_pr.get("html_url") or created_pr.get("url") or "", "stderr": "", "stdout": ""} + except GitHubApiError as error: + return {"ok": False, "status": error.status, "stdout": "", "stderr": str(error)} + + +def branch_reference_markdown(head_selector: str) -> str: + parts = branch_reference_parts(head_selector) + branch_url = f"https://github.com/{parts['owner']}/{REPO_NAME}/tree/{quote(parts['branch'], safe='')}" + return f"[branch `{parts['display']}`]({branch_url})" + + +def baseline_reference_markdown(base_tag: str | None) -> str: + if not base_tag: + return "empty" + commit_sha = git_out(["rev-list", "-n", "1", base_tag]) + commit_url = f"https://github.com/{REPO_SLUG}/commit/{commit_sha}" + return f"[tag `{base_tag}`]({commit_url})" + + +def target_reference_info(head_selector: str, package_name: str | None = None) -> dict[str, str]: + target_tag = resolve_target_tag(head_selector, package_name) + if target_tag: + return {"label": "Target tag", "markdown": baseline_reference_markdown(target_tag)} + + if target_branch_exists(head_selector): + pr = find_open_pr_for_head(head_selector) + if pr: + return {"label": "Working PR", "markdown": f"[PR #{pr['number']}]({pr['url']})"} + return {"label": "Working branch", "markdown": branch_reference_markdown(head_selector)} + + return {"label": "Working branch", "markdown": branch_reference_markdown(head_selector)} + + +def write_bytes(file_path: Path, contents: bytes) -> None: + file_path.parent.mkdir(parents=True, exist_ok=True) + file_path.write_bytes(contents) + + +def generate_api_bytes_for_ref( + *, + package_name: str, + package_dir: Path, + runtime_executable: str | None, + ref: str, + ref_label: str, +) -> ApiResult: + package_relative = package_rel_dir(package_dir) + log_info(f"Overlaying package source from {ref_label} ({ref})") + + git(["checkout", ref, "--", package_relative]) + try: + version = read_version(package_dir) + generate_api_for_package(package_name, runtime_executable, ref_label) + + output_path = api_md_path(package_dir) + if not output_path.exists(): + raise RuntimeError(f"ERROR: did not produce {output_path}") + + metadata = metadata_path(package_dir).read_bytes() if metadata_path(package_dir).exists() else None + return ApiResult(output_path.read_bytes(), metadata, version) + finally: + git(["reset", "--", package_relative], check=False) + git(["checkout", "HEAD", "--", package_relative]) + git(["clean", "-fd", "--", package_relative], check=False) + + +def parse_args(argv: list[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Create an API review PR for a Python package api.md diff.") + parser.add_argument("--package-name", required=True) + parser.add_argument("--base", required=True) + parser.add_argument("--target") + parser.add_argument("--python", "--runtime", dest="runtime_executable", default=os.environ.get("RUNTIME_EXECUTABLE")) + return parser.parse_args(argv) + + +def main(argv: list[str] | None = None) -> int: + args = parse_args(argv or sys.argv[1:]) + package_dir = find_package_dir(args.package_name) + log_info(f"Found package at: {package_dir}") + + ensure_clean_worktree() + original_branch = current_branch() + if original_branch == "HEAD": + raise RuntimeError("ERROR: refusing to run from a detached HEAD.") + + git(["fetch", REMOTE, "main"]) + base_version = validate_base_tag(args.package_name, args.base) + target_ref = resolve_target_ref(args.target, args.package_name) if args.target else MAIN_REF + + try: + log_info(f"\n=== Capturing baseline api.md from tag {args.base} ===") + base_result = generate_api_bytes_for_ref( + package_name=args.package_name, + package_dir=package_dir, + runtime_executable=args.runtime_executable, + ref=args.base, + ref_label=args.base, + ) + + log_info(f"\n=== Capturing target api.md from {target_ref} ===") + target_result = generate_api_bytes_for_ref( + package_name=args.package_name, + package_dir=package_dir, + runtime_executable=args.runtime_executable, + ref=target_ref, + ref_label=target_ref, + ) + target_version = target_result.version + + if not api_results_have_api_diff(base_result, target_result): + log_info( + f"\nNo API differences found for {args.package_name} between {args.base} " + f"(version {base_version}) and {target_ref} (version {target_version}). " + "No API review branches or PR were created." + ) + return 0 + + api_path = api_md_path(package_dir) + api_relative = api_md_rel(package_dir) + meta_file_path = metadata_path(package_dir) + meta_relative = metadata_rel(package_dir) + desired_base_state = desired_branch_state(base_result) + desired_review_state = desired_branch_state(target_result) + + ensure_branch_state_has_metadata_sha("baseline API result", desired_base_state) + ensure_branch_state_has_metadata_sha("target API result", desired_review_state) + + base_selection = resolve_branch_selection( + preferred_branch=api_review_branch_name("base", args.package_name, base_version), + desired_state=desired_base_state, + api_relative=api_relative, + meta_relative=meta_relative, + ) + base_branch = base_selection.branch_name + + if base_selection.reused: + log_info(f"\n=== Reusing base branch {base_branch} ===") + git(["checkout", "-B", base_branch, base_selection.remote_ref or ""]) + else: + log_info(f"\n=== Creating base branch {base_branch} ===") + git(["checkout", "-B", base_branch, MAIN_REF]) + write_bytes(api_path, base_result.api_md) + git(["add", api_relative]) + if base_result.metadata: + write_bytes(meta_file_path, base_result.metadata) + git(["add", meta_relative]) + git(["commit", "-m", f"[API Review] Baseline api.md for {args.package_name} {base_version}"]) + git(["push", "--force-with-lease", REMOTE, base_branch]) + + review_selection = resolve_branch_selection( + preferred_branch=api_review_branch_name("review", args.package_name, target_version), + desired_state=desired_review_state, + api_relative=api_relative, + meta_relative=meta_relative, + required_ancestor_ref=base_branch, + ) + review_branch = review_selection.branch_name + + if review_selection.reused: + log_info(f"\n=== Reusing review branch {review_branch} ===") + git(["checkout", "-B", review_branch, review_selection.remote_ref or ""]) + else: + log_info(f"\n=== Creating review branch {review_branch} ===") + git(["checkout", "-B", review_branch, base_branch]) + write_bytes(api_path, target_result.api_md) + git(["add", api_relative]) + if target_result.metadata: + write_bytes(meta_file_path, target_result.metadata) + git(["add", meta_relative]) + git(["commit", "-m", f"[API Review] api.md for {args.package_name} {target_version}"]) + git(["push", "--force-with-lease", REMOTE, review_branch]) + + title = f"[API Review] {args.package_name} {target_version} (base {base_version})" + working_selector = args.target or "main" + working_reference = target_reference_info(working_selector, args.package_name) + baseline_ref = baseline_reference_markdown(args.base) + sync_metadata = build_sync_metadata_object( + package_name=args.package_name, + package_dir=package_dir, + base_branch=base_branch, + review_branch=review_branch, + head_selector=working_selector, + ) + sync_metadata_block = build_sync_metadata_block(sync_metadata) + body = build_review_pr_body( + package_name=args.package_name, + target_version=target_version, + base_version=base_version, + working_reference=working_reference, + baseline_ref=baseline_ref, + sync_metadata_block=sync_metadata_block, + ) + + if base_selection.reused and review_selection.reused: + existing_pr = find_open_pr_for_branches(base_branch, review_branch) + if existing_pr: + ensure_pr_body_sync_metadata(existing_pr, sync_metadata_block) + log_info(f"\n=== Reusing existing PR #{existing_pr['number']} ===") + log_info(existing_pr["url"]) + return 0 + + log_info("\n=== Opening PR ===") + compare_url = f"https://github.com/{REPO_SLUG}/compare/{base_branch}...{review_branch}?expand=1" + pr_create = create_draft_pr(base_branch, review_branch, title, body) + if pr_create["ok"]: + if pr_create.get("url"): + log_info(pr_create["url"]) + else: + existing_pr = find_open_pr_for_branches(base_branch, review_branch) + if existing_pr: + ensure_pr_body_sync_metadata(existing_pr, sync_metadata_block) + log_info(f"\n=== Reusing existing PR #{existing_pr['number']} ===") + log_info(existing_pr["url"]) + return 0 + + error_details = "\n ".join( + item + for item in [ + f"Exit code: {pr_create.get('status')}", + f"stderr: {str(pr_create.get('stderr') or '').replace(chr(10), ' ').replace(chr(13), ' ').strip()}" + if pr_create.get("stderr") + else "", + f"stdout: {str(pr_create.get('stdout') or '').replace(chr(10), ' ').replace(chr(13), ' ').strip()}" + if pr_create.get("stdout") + else "", + f"Debug repro: use the GitHub REST API endpoint POST /repos/{REPO_SLUG}/pulls with base/head/title/body/draft=true.", + ] + if item + ) + log_warning( + "\nWARNING: GitHub PR creation failed. Both branches were pushed successfully -- open the PR manually here:\n" + f" {compare_url}\n" + f" Title: {title}" + + (f"\n {error_details}" if error_details else "") + ) + return 0 + finally: + git(["checkout", original_branch], check=False) + + +if __name__ == "__main__": + try: + sys.exit(main()) + except Exception as error: # pylint: disable=broad-except + log_error(str(error)) + sys.exit(1) diff --git a/scripts/api_md_workflow/create_api_review_pr_test.py b/scripts/api_md_workflow/create_api_review_pr_test.py new file mode 100644 index 000000000000..89dd5211d2b4 --- /dev/null +++ b/scripts/api_md_workflow/create_api_review_pr_test.py @@ -0,0 +1,324 @@ +import json +import unittest +from unittest.mock import MagicMock, patch + +from scripts.api_md_workflow import create_api_review_pr as workflow + + +def command_result(stdout="", status=0): + return workflow.CommandResult(status=status, stdout=stdout, stderr="") + + +def stub_git_branches(branches): + branch_set = set(branches) + + def runner(args, check): + if args[0] == "fetch" and len(args) > 2 and args[2].split(":", 1)[0] in branch_set: + return command_result() + return command_result(status=1) + + return runner + + +class StubGithubApi: + def __init__(self, head_results=None, search_results=None, on_lookup=None): + self.head_results = head_results or [] + self.search_results = search_results or [] + self.on_lookup = on_lookup + + def _lookup(self, results): + if self.on_lookup: + self.on_lookup() + return results + + def list_pull_requests_by_head(self, _head, _limit): + return self._lookup(self.head_results) + + def search_pull_requests(self, _query, _limit): + return self._lookup(self.search_results) + + def list_pull_requests_by_branches(self, _base, _head, _limit): + return [] + + def update_pull_request_body(self, _number, _body): + return None + + def create_draft_pull_request(self, _base, _head, _title, _body): + return {"html_url": "https://github.com/Azure/azure-sdk-for-python/pull/1"} + + +class ApiReviewPrTests(unittest.TestCase): + def tearDown(self): + workflow.set_command_runner_for_test(None) + workflow.set_github_api_for_test(None) + + def test_github_api_request_uses_timeout(self): + response = MagicMock() + response.read.return_value = b'{"ok": true}' + response_context = MagicMock() + response_context.__enter__.return_value = response + + with patch.object(workflow, "urlopen", return_value=response_context) as urlopen: + self.assertEqual(workflow.GitHubApi(None)._request("GET", "https://example.test"), {"ok": True}) + + urlopen.assert_called_once() + self.assertEqual(urlopen.call_args.kwargs["timeout"], workflow.GITHUB_API_TIMEOUT_SECONDS) + + def test_target_reference_info_links_matching_open_pr_from_direct_head_query(self): + workflow.set_command_runner_for_test(stub_git_branches(["users/example/direct-feature"])) + workflow.set_github_api_for_test(StubGithubApi( + head_results=[ + { + "number": 45678, + "url": "https://github.com/Azure/azure-sdk-for-python/pull/45678", + "state": "OPEN", + "updatedAt": "2026-06-05T00:00:00Z", + "headRefName": "users/example/direct-feature", + "headRepositoryOwner": {"login": "example"}, + } + ] + )) + + self.assertEqual( + workflow.target_reference_info("example:users/example/direct-feature"), + { + "label": "Working PR", + "markdown": "[PR #45678](https://github.com/Azure/azure-sdk-for-python/pull/45678)", + }, + ) + + def test_target_reference_info_keeps_origin_main_as_branch(self): + workflow.set_command_runner_for_test(stub_git_branches(["main"])) + workflow.set_github_api_for_test(StubGithubApi( + search_results=[ + { + "number": 23456, + "url": "https://github.com/Azure/azure-sdk-for-python/pull/23456", + "state": "OPEN", + "updatedAt": "2026-06-05T00:00:00Z", + "headRefName": "main", + "headRepositoryOwner": {"login": "example"}, + } + ] + )) + + self.assertEqual( + workflow.target_reference_info("origin/main"), + { + "label": "Working branch", + "markdown": "[branch `origin/main`](https://github.com/Azure/azure-sdk-for-python/tree/main)", + }, + ) + + def test_target_reference_info_treats_existing_target_tag_as_tag(self): + pr_lookup_count = 0 + + def runner(args, check): + if args[0] == "rev-parse" and "refs/tags/azure-example_1.2.3" in args: + return command_result() + if args[0] == "rev-list": + return command_result("abc123def456\n") + return command_result(status=1) + + def on_lookup(): + nonlocal pr_lookup_count + pr_lookup_count += 1 + + workflow.set_command_runner_for_test(runner) + workflow.set_github_api_for_test(StubGithubApi(on_lookup=on_lookup)) + + self.assertEqual( + workflow.target_reference_info("azure-example_1.2.3"), + { + "label": "Target tag", + "markdown": "[tag `azure-example_1.2.3`](https://github.com/Azure/azure-sdk-for-python/commit/abc123def456)", + }, + ) + self.assertEqual(pr_lookup_count, 0) + + def test_explicit_package_tag_target_wins_over_same_named_remote_branch(self): + pr_lookup_count = 0 + + def runner(args, check): + if args == ["rev-parse", "--verify", "--quiet", "refs/tags/azure-example_1.2.3"]: + return command_result() + if args == ["rev-list", "-n", "1", "azure-example_1.2.3"]: + return command_result("abc123def456\n") + if args == ["fetch", "origin", "azure-example_1.2.3:refs/remotes/origin/azure-example_1.2.3"]: + return command_result() + return command_result(status=1) + + def on_lookup(): + nonlocal pr_lookup_count + pr_lookup_count += 1 + + workflow.set_command_runner_for_test(runner) + workflow.set_github_api_for_test(StubGithubApi(on_lookup=on_lookup)) + + self.assertEqual(workflow.resolve_target_ref("azure-example_1.2.3", "azure-example"), "azure-example_1.2.3") + self.assertIsNone(workflow.sync_working_branch_info("azure-example_1.2.3", "azure-example")) + self.assertEqual( + workflow.target_reference_info("azure-example_1.2.3", "azure-example"), + { + "label": "Target tag", + "markdown": "[tag `azure-example_1.2.3`](https://github.com/Azure/azure-sdk-for-python/commit/abc123def456)", + }, + ) + self.assertEqual(pr_lookup_count, 0) + + def test_build_sync_metadata_object_records_fork_owner_and_branch(self): + workflow.set_command_runner_for_test(stub_git_branches(["users/example/feature"])) + workflow.set_github_api_for_test(StubGithubApi( + search_results=[ + { + "number": 47204, + "url": "https://github.com/Azure/azure-sdk-for-python/pull/47204", + "state": "OPEN", + "updatedAt": "2026-06-05T00:00:00Z", + "headRefName": "users/example/feature", + "headRepositoryOwner": {"login": "example"}, + } + ] + )) + + metadata = workflow.build_sync_metadata_object( + package_name="azure-example", + package_dir="sdk/service/azure-example", + base_branch="apireview/base_azure-example_1.0.0", + review_branch="apireview/review_azure-example_1.1.0", + head_selector="example:users/example/feature", + ) + + self.assertEqual(metadata["workingOwner"], "example") + self.assertEqual(metadata["workingBranch"], "users/example/feature") + self.assertEqual(metadata["workingPrNumber"], 47204) + + def test_build_sync_metadata_object_omits_metadata_for_tag_targets(self): + pr_lookup_count = 0 + + def runner(args, check): + if args[0] == "rev-parse" and "refs/tags/azure-example_1.2.3" in args: + return command_result() + return command_result(status=1) + + def on_lookup(): + nonlocal pr_lookup_count + pr_lookup_count += 1 + + workflow.set_command_runner_for_test(runner) + workflow.set_github_api_for_test(StubGithubApi(on_lookup=on_lookup)) + + self.assertIsNone( + workflow.build_sync_metadata_object( + package_name="azure-example", + package_dir="sdk/service/azure-example", + base_branch="apireview/base_azure-example_1.0.0", + review_branch="apireview/review_azure-example_1.1.0", + head_selector="azure-example_1.2.3", + ) + ) + self.assertEqual(pr_lookup_count, 0) + + def test_build_review_pr_body_calls_out_static_tag_to_tag_reviews(self): + body = workflow.build_review_pr_body( + package_name="azure-example", + target_version="1.2.3", + base_version="1.2.2", + working_reference={ + "label": "Target tag", + "markdown": "[tag `azure-example_1.2.3`](https://github.com/Azure/azure-sdk-for-python/commit/abc123)", + }, + baseline_ref="[tag `azure-example_1.2.2`](https://github.com/Azure/azure-sdk-for-python/commit/def456)", + sync_metadata_block=None, + ) + + self.assertIn( + "> [!WARNING]\n" + "> Static tag-to-tag review; this PR cannot be automatically updated from a working branch.", + body, + ) + self.assertNotIn("Update behavior", body) + self.assertNotIn("api-md-review-sync", body) + + def test_build_review_pr_body_includes_sync_metadata_for_working_branch_reviews(self): + metadata_block = workflow.build_sync_metadata_block( + { + "schemaVersion": 1, + "repository": "Azure/azure-sdk-for-python", + "packageName": "azure-example", + "packageDir": "sdk/service/azure-example", + "baseBranch": "apireview/base_azure-example_1.0.0", + "reviewBranch": "apireview/review_azure-example_1.1.0", + "workingOwner": "Azure", + "workingBranch": "main", + "workingPrNumber": None, + } + ) + + body = workflow.build_review_pr_body( + package_name="azure-example", + target_version="1.1.0b1", + base_version="1.0.0", + working_reference={ + "label": "Working branch", + "markdown": "[branch `main`](https://github.com/Azure/azure-sdk-for-python/tree/main)", + }, + baseline_ref="[tag `azure-example_1.0.0`](https://github.com/Azure/azure-sdk-for-python/commit/def456)", + sync_metadata_block=metadata_block, + ) + + self.assertIn("- **Working branch:**", body) + self.assertNotIn("Static tag-to-tag review", body) + self.assertIn("", "") + self.assertEqual(json.loads(json_text), {"schemaVersion": 1, "workingPrNumber": None}) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file