Skip to content

Commit

Permalink
Fix generation of commit history after moving providers
Browse files Browse the repository at this point in the history
Another teething problem after moving providers in apache#42505. After
moving providers, the history of the current folder in "providers"
only contains changes after the move - it does not include
changes from before the move - and since we always regenerate
the full list of commits - they were missing.

We cannot use `--follow` - because `git log --follow` only
works for single files, not directories, but since the move
was very predictable ("airflow/providers/nnn" ->
"airflow/providers/src/airflow/providers/nnn") we can add the old
path to `git log` command to get both - pre and post move commit
history.
  • Loading branch information
potiuk committed Oct 27, 2024
1 parent 77783e0 commit bf88970
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
refresh_provider_metadata_with_provider_id,
render_template,
)
from airflow_breeze.utils.path_utils import AIRFLOW_SOURCES_ROOT
from airflow_breeze.utils.run_utils import run_command
from airflow_breeze.utils.shared_options import get_verbose
from airflow_breeze.utils.versions import get_version_tag
Expand Down Expand Up @@ -186,11 +187,14 @@ class PrepareReleaseDocsUserQuitException(Exception):
}


def _get_git_log_command(from_commit: str | None = None, to_commit: str | None = None) -> list[str]:
def _get_git_log_command(
folder_paths: list[Path] | None = None, from_commit: str | None = None, to_commit: str | None = None
) -> list[str]:
"""Get git command to run for the current repo from the current folder.
The current directory should always be the package folder.
:param folder_paths: list of folder paths to check for changes
:param from_commit: if present - base commit from which to start the log from
:param to_commit: if present - final commit which should be the start of the log
:return: git command to run
Expand All @@ -207,7 +211,8 @@ def _get_git_log_command(from_commit: str | None = None, to_commit: str | None =
git_cmd.append(from_commit)
elif to_commit:
raise ValueError("It makes no sense to specify to_commit without from_commit.")
git_cmd.extend(["--", "."])
folders = [folder_path.as_posix() for folder_path in folder_paths] if folder_paths else ["."]
git_cmd.extend(["--", *folders])
return git_cmd


Expand Down Expand Up @@ -307,18 +312,24 @@ def _get_all_changes_for_package(
get_console().print(f"[info]Checking if tag '{current_tag_no_suffix}' exist.")
result = run_command(
["git", "rev-parse", current_tag_no_suffix],
cwd=provider_details.source_provider_package_path,
cwd=AIRFLOW_SOURCES_ROOT,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
check=False,
)
providers_folder_paths = [
provider_details.source_provider_package_path,
provider_details.old_source_provider_package_path,
]
if not reapply_templates_only and result.returncode == 0:
if get_verbose():
get_console().print(f"[info]The tag {current_tag_no_suffix} exists.")
# The tag already exists
result = run_command(
_get_git_log_command(f"{HTTPS_REMOTE}/{base_branch}", current_tag_no_suffix),
cwd=provider_details.source_provider_package_path,
_get_git_log_command(
providers_folder_paths, f"{HTTPS_REMOTE}/{base_branch}", current_tag_no_suffix
),
cwd=AIRFLOW_SOURCES_ROOT,
capture_output=True,
text=True,
check=True,
Expand All @@ -333,8 +344,10 @@ def _get_all_changes_for_package(
last_doc_only_hash = doc_only_change_file.read_text().strip()
try:
result = run_command(
_get_git_log_command(f"{HTTPS_REMOTE}/{base_branch}", last_doc_only_hash),
cwd=provider_details.source_provider_package_path,
_get_git_log_command(
providers_folder_paths, f"{HTTPS_REMOTE}/{base_branch}", last_doc_only_hash
),
cwd=AIRFLOW_SOURCES_ROOT,
capture_output=True,
text=True,
check=True,
Expand Down Expand Up @@ -387,8 +400,8 @@ def _get_all_changes_for_package(
for version in provider_details.versions[1:]:
version_tag = get_version_tag(version, provider_package_id)
result = run_command(
_get_git_log_command(next_version_tag, version_tag),
cwd=provider_details.source_provider_package_path,
_get_git_log_command(providers_folder_paths, next_version_tag, version_tag),
cwd=AIRFLOW_SOURCES_ROOT,
capture_output=True,
text=True,
check=True,
Expand All @@ -402,7 +415,7 @@ def _get_all_changes_for_package(
next_version_tag = version_tag
current_version = version
result = run_command(
_get_git_log_command(next_version_tag),
_get_git_log_command(providers_folder_paths, next_version_tag),
cwd=provider_details.source_provider_package_path,
capture_output=True,
text=True,
Expand Down
7 changes: 7 additions & 0 deletions dev/breeze/src/airflow_breeze/utils/packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
)
from airflow_breeze.utils.console import get_console
from airflow_breeze.utils.path_utils import (
AIRFLOW_OLD_PROVIDERS_DIR,
AIRFLOW_PROVIDERS_NS_PACKAGE,
BREEZE_SOURCES_ROOT,
DOCS_ROOT,
Expand Down Expand Up @@ -75,6 +76,7 @@ class ProviderPackageDetails(NamedTuple):
full_package_name: str
pypi_package_name: str
source_provider_package_path: Path
old_source_provider_package_path: Path
documentation_provider_package_path: Path
changelog_path: Path
provider_description: str
Expand Down Expand Up @@ -385,6 +387,10 @@ def get_source_package_path(provider_id: str) -> Path:
return AIRFLOW_PROVIDERS_NS_PACKAGE.joinpath(*provider_id.split("."))


def get_old_source_package_path(provider_id: str) -> Path:
return AIRFLOW_OLD_PROVIDERS_DIR.joinpath(*provider_id.split("."))


def get_documentation_package_path(provider_id: str) -> Path:
return DOCS_ROOT / f"apache-airflow-providers-{provider_id.replace('.', '-')}"

Expand Down Expand Up @@ -515,6 +521,7 @@ def get_provider_details(provider_id: str) -> ProviderPackageDetails:
full_package_name=f"airflow.providers.{provider_id}",
pypi_package_name=f"apache-airflow-providers-{provider_id.replace('.', '-')}",
source_provider_package_path=get_source_package_path(provider_id),
old_source_provider_package_path=get_old_source_package_path(provider_id),
documentation_provider_package_path=get_documentation_package_path(provider_id),
changelog_path=get_source_package_path(provider_id) / "CHANGELOG.rst",
provider_description=provider_info["description"],
Expand Down
1 change: 1 addition & 0 deletions dev/breeze/src/airflow_breeze/utils/path_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ def find_airflow_sources_root_to_operate_on() -> Path:
AIRFLOW_SOURCES_ROOT = find_airflow_sources_root_to_operate_on().resolve()
AIRFLOW_WWW_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "www"
AIRFLOW_UI_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "ui"
AIRFLOW_OLD_PROVIDERS_DIR = AIRFLOW_SOURCES_ROOT / "airflow" / "providers"
AIRFLOW_PROVIDERS_PROJECT = AIRFLOW_SOURCES_ROOT / "providers"
AIRFLOW_PROVIDERS_SRC = AIRFLOW_PROVIDERS_PROJECT / "src"
AIRFLOW_PROVIDERS_NS_PACKAGE = AIRFLOW_PROVIDERS_SRC / "airflow" / "providers"
Expand Down
30 changes: 25 additions & 5 deletions dev/breeze/tests/test_provider_documentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import random
import string
from pathlib import Path

import pytest

Expand Down Expand Up @@ -97,28 +98,47 @@ def test_get_version_tag(version: str, provider_id: str, suffix: str, tag: str):


@pytest.mark.parametrize(
"from_commit, to_commit, git_command",
"folder_paths, from_commit, to_commit, git_command",
[
(None, None, ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "--", "."]),
(None, None, None, ["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "--", "."]),
(
None,
"from_tag",
None,
["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "from_tag", "--", "."],
),
(
None,
"from_tag",
"to_tag",
["git", "log", "--pretty=format:%H %h %cd %s", "--date=short", "from_tag...to_tag", "--", "."],
),
(
[Path("a"), Path("b")],
"from_tag",
"to_tag",
[
"git",
"log",
"--pretty=format:%H %h %cd %s",
"--date=short",
"from_tag...to_tag",
"--",
"a",
"b",
],
),
],
)
def test_get_git_log_command(from_commit: str | None, to_commit: str | None, git_command: list[str]):
assert _get_git_log_command(from_commit, to_commit) == git_command
def test_get_git_log_command(
folder_paths: list[str] | None, from_commit: str | None, to_commit: str | None, git_command: list[str]
):
assert _get_git_log_command(folder_paths, from_commit, to_commit) == git_command


def test_get_git_log_command_wrong():
with pytest.raises(ValueError, match=r"to_commit without from_commit"):
_get_git_log_command(None, "to_commit")
_get_git_log_command(None, None, "to_commit")


@pytest.mark.parametrize(
Expand Down

0 comments on commit bf88970

Please sign in to comment.