diff --git a/src/cfengine_cli/changelog.py b/src/cfengine_cli/changelog.py new file mode 100644 index 0000000..7dde025 --- /dev/null +++ b/src/cfengine_cli/changelog.py @@ -0,0 +1,432 @@ +"""Changelog generator for CFEngine repositories. + +Auto-detects which repos to include based on the current working directory (core, masterfiles, enterprise). + +Enterprise' changelog also reflects changes in mission-portal, nova and buildscripts (dependency-updates). +Core and Masterfiles only reflect themselves. + +Usage: + cfengine dev changelog-generator [options] [commit-range] + +Examples: + - cfengine dev generate-changelog + on 3.27.x this will check changelog for latest known (e.g. 3.27.1) and update the changelog for 3.27.1 -> 3.27.2 + + - cfengine dev generate-changelog -o 3.26.0..3.27.0 + on any branch will print changelog from version 2.26.0 -> 3.27.0 to stdout +""" + +import os +import re +import subprocess +import logging + +# --------------------------------------------------------------------------- +# Regex +# --------------------------------------------------------------------------- + +JIRA_REGEX = r"(?:Jira:? *)?(?:https?://northerntech\.atlassian\.net/browse/)?((?:CFE|ENT|INF|ARCHIVE|MEN|QA)-[0-9]+)" +JIRA_TITLE_REGEX = r"^(?:CFE|ENT|INF|ARCHIVE|MEN|QA)-[0-9]+" +TRACKER_REGEX = r"\(?(?:Ref:? *)?%s\)?:? *" % JIRA_REGEX + +DEP_RE = r"Updated dependency '([^']+)' from version (\S+) to (\S+)" +REVERT_RE = r'^Revert "Updated dependency \'([^\']+)\' from version (\S+) to (\S+)"' +REAPPLY_RE = r'^Reapply "Updated dependency \'([^\']+)\' from version (\S+) to (\S+)"' + + +def fetch_git_output(args): + return subprocess.run( + args, + stdout=subprocess.PIPE, + check=True, + ).stdout.splitlines(keepends=True) + + +# --------------------------------------------------------------------------- +# Fetch and merge dependency upgrades / reverts +# --------------------------------------------------------------------------- +def collect_version_updates(repos, git_args): + dep_history: dict[str, list[tuple[str, str]]] = {} + for repo in repos: + for raw in fetch_git_output( + [ + "git", + "-C", + f"{os.path.join(os.getcwd(), repo)}", + "log", + "--no-merges", + "--reverse", + "--pretty=format:%s", + ] + + git_args, + ): + subject = raw.decode().strip() + + m = re.match(REVERT_RE, subject) + if m: + dep, frm, to = m.group(1), m.group(2), m.group(3) + history = dep_history.get(dep, []) + if history and history[-1] == (frm, to): + history.pop() + continue + + m = re.match(REAPPLY_RE, subject) or re.search(DEP_RE, subject) + if m: + dep, frm, to = m.group(1), m.group(2), m.group(3) + dep_history.setdefault(dep, []).append((frm, to)) + continue + + # Collapse dep chains + results = [] + for dep, history in dep_history.items(): + if not history: + continue + first_from = history[0][0] + last_to = history[-1][1] + results.append( + f"Updated dependency '{dep}' from version {first_from} to {last_to}" + ) + + results.sort() + return results + + +# --------------------------------------------------------------------------- +# Git-log parsing +# --------------------------------------------------------------------------- +def parse_sha(raw_sha, entries, sha_to_tracker, linked_shas, repo): + def add_entry(sha, msg): + if msg.lower().strip() == "none": + return + entries.setdefault(sha, []).append(msg) + + sha = raw_sha.decode().rstrip("\n") + subject = "".join( + line.decode() + for line in fetch_git_output( + [ + "git", + "-C", + f"{os.path.join(os.getcwd(), repo)}", + "log", + "--format=%B", + "-n", + "1", + sha, + ] + ) + ) + + for match in re.finditer(TRACKER_REGEX, subject, re.IGNORECASE): + sha_to_tracker.setdefault(sha, set()).add("".join(match.groups(""))) + + commit_stripped = re.sub(TRACKER_REGEX, "", subject, flags=re.IGNORECASE) + parts = commit_stripped.split("\n", 1) + title = parts[0].strip() + body = parts[1].strip() if len(parts) > 1 else "" + + TOKEN_PATTERNS = [ + r"^Changelog:", + r"^Signed-off-by:", + r"^Co-authored-by:", + r"^Ticket:", + r"^\(cherry picked from commit [0-9a-f]+\)", + r"^Cancel-Changelog:\s*[0-9a-f]+", + r"^This reverts commit [0-9a-f]+", + ] + token_re = re.compile("|".join(TOKEN_PATTERNS), re.IGNORECASE) + + trailers = {} + current_token = None + collected_lines = [] + + for line in body.splitlines(): + stripped = line.strip() + if token_re.match(stripped): + if current_token: + trailers[current_token] = "\n".join(collected_lines).strip() + + if re.match(r"^Changelog:", stripped, re.IGNORECASE): + current_token = "Changelog" + first_val = re.sub(r"^Changelog:\s*", "", stripped, flags=re.IGNORECASE) + collected_lines = [first_val] if first_val else [] + + elif m := re.match( + r"^\(cherry picked from commit ([0-9a-f]+)\)", stripped, re.IGNORECASE + ): + trailers["CherryPick"] = m and m.group(1) + current_token = None + collected_lines = [] + + elif m := re.match( + r"^Cancel-Changelog:\s*([0-9a-f]+)", stripped, re.IGNORECASE + ): + trailers["Cancel"] = m and m.group(1) + current_token = None + collected_lines = [] + + elif m := re.match( + r"^This reverts commit ([0-9a-f]+)", stripped, re.IGNORECASE + ): + trailers["Cancel"] = m and m.group(1) + current_token = None + collected_lines = [] + + else: + current_token = "Other" + collected_lines = [] + else: + if current_token: + collected_lines.append(line) + + if current_token: + trailers[current_token] = "\n".join(collected_lines).strip() + + body_lines = [] + for line in body.splitlines(): + if token_re.match(line.strip()): + break + body_lines.append(line) + clean_commit_body = "\n".join(body_lines).strip() + + if "Cancel" in trailers: + target = trailers["Cancel"] + linked = [target] + linked_shas.get(target, []) + for lsha in linked: + linked_shas.pop(lsha, None) + entries.pop(lsha, None) + return + + if "CherryPick" in trailers: + other = trailers["CherryPick"] + linked_shas.setdefault(sha, []).append(other) + linked_shas.setdefault(other, []).append(sha) + + if "Changelog" in trailers: + changelog_val = trailers["Changelog"] + if re.match(r"^Title[ .]*$", changelog_val, re.IGNORECASE): + add_entry(sha, title) + elif re.match(r"^(Commit|Body)[ .]*$", changelog_val, re.IGNORECASE): + add_entry(sha, clean_commit_body) + elif re.match(r"^None[ .]*$", changelog_val, re.IGNORECASE): + pass + else: + add_entry(sha, changelog_val) + elif re.match(JIRA_TITLE_REGEX, title): + add_entry(sha, title) + + +def parse_git_log(repos, git_args): + """Walk git history across repos and return (entry_list, missed_tickets).""" + entries = {} # sha -> [msg, ...] + linked_shas = {} # sha -> [linked_sha, ...] + sha_to_tracker = {} # sha -> set of ticket strings + + for repo in repos: + for raw_sha in fetch_git_output( + [ + "git", + "-C", + f"{os.path.join(os.getcwd(), repo)}", + "rev-list", + "--no-merges", + "--reverse", + ] + + git_args, + ): + parse_sha(raw_sha, entries, sha_to_tracker, linked_shas, repo) + + entry_list = [] + missed_tickets = {} + + for sha, msgs in entries.items(): + tracker = "" + if sha_to_tracker.get(sha): + jiras = sorted(t.upper() for t in sha_to_tracker[sha]) + tracker = "(" + ", ".join(jiras) + ")" + + for entry in msgs: + m = re.search(r"[0-9]{4,}", entry) + if m: + missed_tickets[sha] = m.group(0) + entry = entry.strip("\n") + if tracker: + sep = ( + "\n" + if (len(entry) - entry.rfind("\n") + len(tracker)) >= 70 + else " " + ) + entry += sep + tracker + entry_list.append(entry) + + return entry_list, missed_tickets + + +# --------------------------------------------------------------------------- +# Repostuff +# --------------------------------------------------------------------------- +REPO_MAP = { + "core": [ + "../core", + ], + "enterprise": [ + "../enterprise", + "../nova", + "../mission-portal", + ], + "masterfiles": [ + "../masterfiles", + ], + "packaging": [ + "../buildscripts", + ], +} + + +def detect_repos(): + curr_dir = os.path.basename(os.path.abspath(os.curdir)) + repos = REPO_MAP.get(curr_dir) + if repos is None: + logging.error( + f" current directory '{curr_dir}' is not a recognised repository " + "(expected: core, enterprise or masterfiles).", + ) + exit(1) + return repos + + +def get_current_branch(): + return subprocess.check_output( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], text=True + ).strip() + + +# --------------------------------------------------------------------------- +# Versionstuff +# --------------------------------------------------------------------------- +def read_cfversion(): + try: + with open(".CFVERSION") as f: + parts = f.readline().strip().split(".") + return int(parts[0]), int(parts[1]), int(parts[2]) + except FileNotFoundError: + logging.error(" .CFVERSION not found in current directory.") + exit(1) + except (ValueError, IndexError): + logging.error( + " .CFVERSION has unexpected format (expected MAJOR.MINOR.PATCH).", + ) + exit(1) + + +def get_next_version(old_version, branch): + if branch == "HEAD": + logging.error( + " Cannot generate changelog whilst in 'detached HEAD' state", + ) + exit(1) + + version_parts = old_version.split(".") + if branch == "master": + version_parts[1] = str(int(version_parts[1]) + 1) + version_parts[2] = "0" + else: + if branch == f"{version_parts[0]}.{version_parts[1]}.x": + version_parts[2] = str(int(version_parts[2]) + 1) + else: + branch_name_parts = branch.split(".") + version_parts = branch_name_parts[0:2] + ["0"] + return ".".join(version_parts) + + +# --------------------------------------------------------------------------- +# Generate changelog +# --------------------------------------------------------------------------- +def generate_changelog_impl(args): + major, minor, patch = read_cfversion() + if args.show_version: + print(f"{major}.{minor}.{patch}") + return 0 + + repos = detect_repos() + + try: + with open("CHANGELOG.md") as f: + prev = f.readline().strip(":;# \n") + except FileNotFoundError as e: + print(e) + exit(1) + + assert prev, "Could not read previous version from CHANGELOG.md" + + branch = get_current_branch() + expected = get_next_version(prev, branch) + actual = f"{major}.{minor}.{patch}" + + assert actual == expected, ( + f"Version mismatch: CHANGELOG has {prev}, branch '{branch}' expects " + f"next version to be {expected}, but cfversion says {actual}" + ) + + if args.git_args: + versions = args.git_args + else: + # Assumes tag exists and origin follows the same naming scheme for this release + versions = [f"{prev}..origin/{branch}"] + + entry_list, missed_tickets = parse_git_log(repos, versions) + entry_list.sort() + + output = "" + + lines = [] + for entry in entry_list: + entry = "- " + entry + entry = re.sub(r"\n\n+", "\n", entry) # collapse blank lines + entry = entry.replace("\n", "\n ") # indent continuations + lines.append(entry) + if lines: + output = ( + f"## {actual}\n" + if not args.git_args + else f"## {args.git_args[0].split('..')[-1]}\n" + ) + + if ( + os.path.basename(os.path.abspath(os.curdir)) == "enterprise" + ): # packaging changes only included in enterprise + pkg_changes, missed_pkg_tickets = parse_git_log(REPO_MAP["packaging"], versions) + missed_tickets.update(missed_pkg_tickets) + pkg_changes += collect_version_updates(REPO_MAP["packaging"], versions) + pkg_changes.sort() + if pkg_changes: + lines.append("\n**Packaging changes:**") + for entry in pkg_changes: + entry = "- " + entry + entry = re.sub(r"\n\n+", "\n", entry) # collapse blank lines + entry = entry.replace("\n", "\n ") # indent continuations + lines.append(entry) + + if output: + output += "\n".join(lines) + output += "\n\n" + + if args.output and output: + print(output) + elif output: + try: + with open("CHANGELOG.md", "r+") as f: + old = f.read() + f.seek(0, 0) + f.write(output + old) + except FileNotFoundError as e: + print(e) + exit(1) + + for sha, number in missed_tickets.items(): + logging.warning( + f" *** Commit {sha} had a number `{number}` which may be a missed " + "ticket reference.", + ) + + return 0 diff --git a/src/cfengine_cli/dev.py b/src/cfengine_cli/dev.py index b242815..ddbcbb9 100644 --- a/src/cfengine_cli/dev.py +++ b/src/cfengine_cli/dev.py @@ -3,6 +3,7 @@ generate_release_information_impl, ) from cfengine_cli.utils import UserError +from cfengine_cli.changelog import generate_changelog_impl from cfengine_cli.deptool import ( update_dependency_tables as _update_dependency_tables, print_release_dependency_tables, @@ -79,5 +80,7 @@ def dispatch_dev_subcommand(subcommand, args) -> int: return generate_release_information( args.omit_download, args.check_against_git, args.minimum_version ) + if subcommand == "generate-changelog": + return generate_changelog_impl(args) raise UserError("Invalid cfengine dev subcommand - " + subcommand) diff --git a/src/cfengine_cli/main.py b/src/cfengine_cli/main.py index 2132df7..5af2a6b 100644 --- a/src/cfengine_cli/main.py +++ b/src/cfengine_cli/main.py @@ -126,6 +126,41 @@ def _get_arg_parser(): up_parser.add_argument( "--validate", action="store_true", help="Validate the given config" ) + parser = dev_subparsers.add_parser( + "generate-changelog", + description="""Changelog generator for CFEngine repositories. + +Auto-detects which repos to include based on the current working directory (core, masterfiles, enterprise) + +Enterprise' changelog also reflects changes in mission-portal, nova and buildscripts (dependency-updates) +Core and Masterfiles only reflect themselves""", + epilog="""Examples: + - cfengine dev generate-changelog + on 3.27.x this will check changelog for latest known (e.g. 3.27.1) and update the changelog for 3.27.1 -> 3.27.2 + + - cfengine dev generate-changelog -o 3.26.0..3.27.0 + on any branch will print changelog from version 2.26.0 -> 3.27.0 to stdout""", + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "-o", + "--output", + action="store_true", + default=False, + help="Write changelog to stdout instead of prepending to CHANGELOG.md", + ) + parser.add_argument( + "--show-version", + action="store_true", + dest="show_version", + help="Print the version from .CFVERSION and exit", + ) + parser.add_argument( + "git_args", + nargs="*", + metavar="GIT_ARG", + help="Commit range [other optional args], e.g. 3.27.0..origin/3.27.x", + ) return ap diff --git a/tests/unit/test_changelog.py b/tests/unit/test_changelog.py new file mode 100644 index 0000000..9b37ab2 --- /dev/null +++ b/tests/unit/test_changelog.py @@ -0,0 +1,147 @@ +"""Unit tests for changelog_generator.py""" + +import unittest +from unittest.mock import patch +import cfengine_cli.changelog as cg + + +def _lines(*texts): + """Return list of byte-lines as fetch_git_output would.""" + return [t.encode() for t in texts] + + +# =========================================================================== +# collect_version_updates +# =========================================================================== +class TestCollectVersionUpdates(unittest.TestCase): + + def _run(self, subjects): + lines = _lines(*subjects) + with patch.object(cg, "fetch_git_output", return_value=lines): + return cg.collect_version_updates(["../somerepo"], ["3.27.0..3.27.1"]) + + def test_single_update(self): + result = self._run(["Updated dependency 'openssl' from version 1.1.1 to 3.0.0"]) + self.assertEqual( + result, ["Updated dependency 'openssl' from version 1.1.1 to 3.0.0"] + ) + + def test_revert_cancels_update(self): + result = self._run( + [ + "Updated dependency 'openssl' from version 1.1.1 to 3.0.0", + "Revert \"Updated dependency 'openssl' from version 1.1.1 to 3.0.0\"", + ] + ) + self.assertEqual(result, []) + + def test_reapply_after_revert(self): + result = self._run( + [ + "Updated dependency 'openssl' from version 1.1.1 to 3.0.0", + "Revert \"Updated dependency 'openssl' from version 1.1.1 to 3.0.0\"", + "Reapply \"Updated dependency 'openssl' from version 1.1.1 to 3.0.0\"", + ] + ) + self.assertEqual( + result, ["Updated dependency 'openssl' from version 1.1.1 to 3.0.0"] + ) + + def test_chain_collapses_to_first_from_last_to(self): + result = self._run( + [ + "Updated dependency 'zlib' from version 1.0 to 1.1", + "Updated dependency 'zlib' from version 1.1 to 1.2", + ] + ) + self.assertEqual(result, ["Updated dependency 'zlib' from version 1.0 to 1.2"]) + + def test_multiple_deps_sorted(self): + result = self._run( + [ + "Updated dependency 'zlib' from version 1.0 to 1.1", + "Updated dependency 'openssl' from version 1.1.1 to 3.0.0", + ] + ) + self.assertEqual(result[0].split("'")[1], "openssl") + self.assertEqual(result[1].split("'")[1], "zlib") + + def test_no_deps(self): + result = self._run(["Fix some bug", "Add a feature"]) + self.assertEqual(result, []) + + +# =========================================================================== +# Main logic / git parsing tests +# =========================================================================== +class TestParseSha(unittest.TestCase): + def __init__(self, *args, **kwargs): + super(TestParseSha, self).__init__(*args, **kwargs) + self.repo = "enterprise" + self.SHA = b"TEST" + self.SHA_STR = "TEST" + + def _run(self, commit_text, entries=None, sha_to_tracker=None, linked_shas=None): + entries = {} if entries is None else entries + sha_to_tracker = {} if sha_to_tracker is None else sha_to_tracker + linked_shas = {} if linked_shas is None else linked_shas + + lines = _lines(commit_text) + with patch.object(cg, "fetch_git_output", return_value=lines): + cg.parse_sha(self.SHA, entries, sha_to_tracker, linked_shas, self.repo) + + return entries, sha_to_tracker, linked_shas + + def test_body_keyword(self): + commit = "fix: something\n\nThis is the body text.\nChangelog: Body" + entries, *_ = self._run(commit) + self.assertIn("This is the body text.", entries[self.SHA_STR]) + + def test_commit_keyword(self): + commit = "fix: something\n\nThis is the body text.\nChangelog: Commit" + entries, *_ = self._run(commit) + self.assertIn("This is the body text.", entries[self.SHA_STR]) + + def test_changelog_none(self): + lines = "fix: something\n\nA thing was fixed.\nChangelog: none\nSigned-off-by: someone" + self.assertDictEqual(self._run(lines)[0], {}) + + def test_custom_multiline(self): + commit = "fix: something\n\nChangelog: Line one\n line two continued\nSigned-off-by: x" + entries, *_ = self._run(commit) + entry_text = entries[self.SHA_STR][0] + self.assertIn("Line one", entry_text) + self.assertIn("line two continued", entry_text) + + def test_uses_title(self): + commit = "feat: shiny new feature\n\nChangelog: Title" + entries, *_ = self._run(commit) + self.assertIn("feat: shiny new feature", entries[self.SHA_STR]) + + def test_title_with_trailing_period(self): + commit = "feat: shiny new feature\n\nChangelog: Title." + entries, *_ = self._run(commit) + self.assertIn("feat: shiny new feature", entries[self.SHA_STR]) + + def test_cherry_pick_links_shas(self): + other = "aabbccdd" + commit = ( + f"fix: something\n\nChangelog: Title\n(cherry picked from commit {other})" + ) + _, _, linked_shas = self._run(commit) + self.assertIn(other, linked_shas.get(self.SHA_STR, [])) + self.assertIn(self.SHA_STR, linked_shas.get(other, [])) + + def test_no_tracker_no_entry(self): + commit = "fix: plain commit\n\nChangelog: Title" + _, sha_to_tracker, _ = self._run(commit) + self.assertNotIn(self.SHA_STR, sha_to_tracker) + + def test_jira_tracker_extracted(self): + commit = "CFE-456 fix something\n\nChangelog: Title" + _, sha_to_tracker, _ = self._run(commit) + self.assertIn(self.SHA_STR, sha_to_tracker) + self.assertTrue( + any("CFE-456" in t for t in sha_to_tracker[self.SHA_STR]), + f"Expected CFE-456 in tracker set, got: {sha_to_tracker}", + )