From d052906af9d627659ece8614fc963f302f6005be Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sun, 27 Oct 2024 12:12:22 +0100 Subject: [PATCH] Fix generation of commit history after moving providers Another teething problem after moving providers in #42505. After moving providers, the history of the current folder in "providers" only contains changes after the move - it does not include changes from before the move - and since we always regenerate the full list of commits - they were missing. We cannot use `--follow` - because `git log --follow` only works for single files, not directories, but since the move was very predictable ("airflow/providers/nnn" -> "airflow/providers/src/airflow/providers/nnn") we can add the old path to `git log` command to get both - pre and post move commit history. --- .../provider_documentation.py | 33 +++++++++++++------ .../src/airflow_breeze/utils/packages.py | 7 ++++ .../src/airflow_breeze/utils/path_utils.py | 1 + .../tests/test_provider_documentation.py | 28 +++++++++++++--- 4 files changed, 55 insertions(+), 14 deletions(-) diff --git a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py index dc14ea6d08dbf..fc2d55f86e28a 100644 --- a/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py +++ b/dev/breeze/src/airflow_breeze/prepare_providers/provider_documentation.py @@ -49,6 +49,7 @@ refresh_provider_metadata_with_provider_id, render_template, ) +from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT from airflow_breeze.utils.run_utils import run_command from airflow_breeze.utils.shared_options import get_verbose from airflow_breeze.utils.versions import get_version_tag @@ -186,11 +187,14 @@ class PrepareReleaseDocsUserQuitException(Exception): } -def _get_git_log_command(from_commit: str | None = None, to_commit: str | None = None) -> list[str]: +def _get_git_log_command( + folder_paths: list[Path] | None = None, from_commit: str | None = None, to_commit: str | None = None +) -> list[str]: """Get git command to run for the current repo from the current folder. The current directory should always be the package folder. + :param folder_paths: list of folder paths to check for changes :param from_commit: if present - base commit from which to start the log from :param to_commit: if present - final commit which should be the start of the log :return: git command to run @@ -207,7 +211,8 @@ def _get_git_log_command(from_commit: str | None = None, to_commit: str | None = git_cmd.append(from_commit) elif to_commit: raise ValueError("It makes no sense to specify to_commit without from_commit.") - git_cmd.extend(["--", "."]) + folders = [folder_path.as_posix() for folder_path in folder_paths] if folder_paths else ["."] + git_cmd.extend(["--", *folders]) return git_cmd @@ -307,18 +312,24 @@ def _get_all_changes_for_package( get_console().print(f"[info]Checking if tag '{current_tag_no_suffix}' exist.") result = run_command( ["git", "rev-parse", current_tag_no_suffix], - cwd=provider_details.source_provider_package_path, + cwd=AIRFLOW_SOURCES_ROOT, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=False, ) + providers_folder_paths = [ + provider_details.source_provider_package_path, + provider_details.old_source_provider_package_path, + ] if not reapply_templates_only and result.returncode == 0: if get_verbose(): get_console().print(f"[info]The tag {current_tag_no_suffix} exists.") # The tag already exists result = run_command( - _get_git_log_command(f"{HTTPS_REMOTE}/{base_branch}", current_tag_no_suffix), - cwd=provider_details.source_provider_package_path, + _get_git_log_command( + providers_folder_paths, f"{HTTPS_REMOTE}/{base_branch}", current_tag_no_suffix + ), + cwd=AIRFLOW_SOURCES_ROOT, capture_output=True, text=True, check=True, @@ -333,8 +344,10 @@ def _get_all_changes_for_package( last_doc_only_hash = doc_only_change_file.read_text().strip() try: result = run_command( - _get_git_log_command(f"{HTTPS_REMOTE}/{base_branch}", last_doc_only_hash), - cwd=provider_details.source_provider_package_path, + _get_git_log_command( + providers_folder_paths, f"{HTTPS_REMOTE}/{base_branch}", last_doc_only_hash + ), + cwd=AIRFLOW_SOURCES_ROOT, capture_output=True, text=True, check=True, @@ -387,8 +400,8 @@ def _get_all_changes_for_package( for version in provider_details.versions[1:]: version_tag = get_version_tag(version, provider_package_id) result = run_command( - _get_git_log_command(next_version_tag, version_tag), - cwd=provider_details.source_provider_package_path, + _get_git_log_command(providers_folder_paths, next_version_tag, version_tag), + cwd=AIRFLOW_SOURCES_ROOT, capture_output=True, text=True, check=True, @@ -402,7 +415,7 @@ def _get_all_changes_for_package( next_version_tag = version_tag current_version = version result = run_command( - _get_git_log_command(next_version_tag), + _get_git_log_command(providers_folder_paths, next_version_tag), cwd=provider_details.source_provider_package_path, capture_output=True, text=True, diff --git a/dev/breeze/src/airflow_breeze/utils/packages.py b/dev/breeze/src/airflow_breeze/utils/packages.py index 5ee2c2a3edfc3..9f1eae36938ee 100644 --- a/dev/breeze/src/airflow_breeze/utils/packages.py +++ b/dev/breeze/src/airflow_breeze/utils/packages.py @@ -36,6 +36,7 @@ ) from airflow_breeze.utils.console import get_console from airflow_breeze.utils.path_utils import ( + AIRFLOW_OLD_PROVIDERS_DIR, AIRFLOW_PROVIDERS_NS_PACKAGE, BREEZE_SOURCES_ROOT, DOCS_ROOT, @@ -75,6 +76,7 @@ class ProviderPackageDetails(NamedTuple): full_package_name: str pypi_package_name: str source_provider_package_path: Path + old_source_provider_package_path: Path documentation_provider_package_path: Path changelog_path: Path provider_description: str @@ -385,6 +387,10 @@ def get_source_package_path(provider_id: str) -> Path: return AIRFLOW_PROVIDERS_NS_PACKAGE.joinpath(*provider_id.split(".")) +def get_old_source_package_path(provider_id: str) -> Path: + return AIRFLOW_OLD_PROVIDERS_DIR.joinpath(*provider_id.split(".")) + + def get_documentation_package_path(provider_id: str) -> Path: return DOCS_ROOT / f"apache-airflow-providers-{provider_id.replace('.', '-')}" @@ -515,6 +521,7 @@ def get_provider_details(provider_id: str) -> ProviderPackageDetails: full_package_name=f"airflow.providers.{provider_id}", pypi_package_name=f"apache-airflow-providers-{provider_id.replace('.', '-')}", source_provider_package_path=get_source_package_path(provider_id), + old_source_provider_package_path=get_old_source_package_path(provider_id), documentation_provider_package_path=get_documentation_package_path(provider_id), changelog_path=get_source_package_path(provider_id) / "CHANGELOG.rst", provider_description=provider_info["description"], diff --git a/dev/breeze/src/airflow_breeze/utils/path_utils.py b/dev/breeze/src/airflow_breeze/utils/path_utils.py index 4e510cb768009..0feba56356bac 100644 --- a/dev/breeze/src/airflow_breeze/utils/path_utils.py +++ b/dev/breeze/src/airflow_breeze/utils/path_utils.py @@ -281,6 +281,7 @@ def find_airflow_sources_root_to_operate_on() -> Path: AIRFLOW_SOURCES_ROOT = find_airflow_sources_root_to_operate_on().resolve() AIRFLOW_WWW_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "www" AIRFLOW_UI_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "ui" +AIRFLOW_OLD_PROVIDERS_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "providers" AIRFLOW_PROVIDERS_PROJECT = AIRFLOW_SOURCES_ROOT / "providers" AIRFLOW_PROVIDERS_SRC = AIRFLOW_PROVIDERS_PROJECT / "src" AIRFLOW_PROVIDERS_NS_PACKAGE = AIRFLOW_PROVIDERS_SRC / "airflow" / "providers" diff --git a/dev/breeze/tests/test_provider_documentation.py b/dev/breeze/tests/test_provider_documentation.py index db770b7856a2c..228fec4ebd1e7 100644 --- a/dev/breeze/tests/test_provider_documentation.py +++ b/dev/breeze/tests/test_provider_documentation.py @@ -18,6 +18,7 @@ import random import string +from pathlib import Path import pytest @@ -97,23 +98,42 @@ def test_get_version_tag(version: str, provider_id: str, suffix: str, tag: str): @pytest.mark.parametrize( - "from_commit, to_commit, git_command", + "folder_paths, from_commit, to_commit, git_command", [ - (None, None, ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "--", "."]), + (None, None, None, ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "--", "."]), ( + None, "from_tag", None, ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "from_tag", "--", "."], ), ( + None, "from_tag", "to_tag", ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "from_tag...to_tag", "--", "."], ), + ( + [Path("a"), Path("b")], + "from_tag", + "to_tag", + [ + "git", + "log", + "--pretty=format:%H %h %cd %s", + "--date=short", + "from_tag...to_tag", + "--", + "a", + "b", + ], + ), ], ) -def test_get_git_log_command(from_commit: str | None, to_commit: str | None, git_command: list[str]): - assert _get_git_log_command(from_commit, to_commit) == git_command +def test_get_git_log_command( + folder_paths: list[str] | None, from_commit: str | None, to_commit: str | None, git_command: list[str] +): + assert _get_git_log_command(folder_paths, from_commit, to_commit) == git_command def test_get_git_log_command_wrong():