From 7ca71bd14239d26c274fffdec6bf4f89f340c030 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Fri, 1 Dec 2023 12:15:43 +0100 Subject: [PATCH] Add support for chicken-egg providers to dockerhub release process --- .github/workflows/release_dockerhub_image.yml | 17 +++++ Dockerfile | 7 +++ .../MANAGING_PROVIDERS_LIFECYCLE.rst | 58 +++++++++++++++++ .../commands/release_management_commands.py | 33 +++++++--- .../release_management_commands_config.py | 1 + .../airflow_breeze/utils/common_options.py | 8 +++ .../src/airflow_breeze/utils/versions.py | 6 ++ ...release-management_release-prod-images.svg | 62 ++++++++++++------- ...release-management_release-prod-images.txt | 2 +- .../install_from_docker_context_files.sh | 7 +++ 10 files changed, 170 insertions(+), 31 deletions(-) diff --git a/.github/workflows/release_dockerhub_image.yml b/.github/workflows/release_dockerhub_image.yml index 6889539387f6a..3d6d4e065e0a0 100644 --- a/.github/workflows/release_dockerhub_image.yml +++ b/.github/workflows/release_dockerhub_image.yml @@ -35,6 +35,7 @@ concurrency: cancel-in-progress: true env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + VERBOSE: true jobs: build-info: timeout-minutes: 10 @@ -46,6 +47,7 @@ jobs: pythonVersions: ${{ steps.selective-checks.outputs.python-versions }} allPythonVersions: ${{ steps.selective-checks.outputs.all-python-versions }} defaultPythonVersion: ${{ steps.selective-checks.outputs.default-python-version }} + chicken-egg-providers: ${{ steps.selective-checks.outputs.chicken-egg-providers }} skipLatest: ${{ github.event.inputs.skipLatest == '' && ' ' || '--skip-latest' }} limitPlatform: ${{ github.repository == 'apache/airflow' && ' ' || '--limit-platform linux/amd64' }} env: @@ -107,6 +109,20 @@ jobs: run: > echo ${{ secrets.DOCKERHUB_TOKEN }} | docker login --password-stdin --username ${{ secrets.DOCKERHUB_USER }} + - name: "Prepare chicken-eggs provider packages" + # In case of provider packages which use latest dev0 version of providers, we should prepare them + # from the source code, not from the PyPI because they have apache-airflow>=X.Y.Z dependency + # And when we prepare them from sources they will have apache-airflow>=X.Y.Z.dev0 + shell: bash + run: > + breeze release-management prepare-provider-packages + --package-format wheel + --version-suffix-for-pypi dev0 ${{ needs.build-info.outputs.chicken-egg-providers }} + if: needs.build-info.outputs.chicken-egg-providers != '' + - name: "Copy dist packages to docker-context files" + shell: bash + run: cp -v --no-preserve=mode,ownership ./dist/*.whl ./docker-context-files + if: needs.build-info.outputs.chicken-egg-providers != '' - name: > Release regular images: ${{ github.event.inputs.airflowVersion }}, ${{ matrix.python-version }} run: > @@ -116,6 +132,7 @@ jobs: ${{ needs.build-info.outputs.skipLatest }} ${{ needs.build-info.outputs.limitPlatform }} --limit-python ${{ matrix.python-version }} + --chicken-egg-providers "${{ needs.build-info.outputs.chicken-egg-providers }}" env: COMMIT_SHA: ${{ github.sha }} - name: > diff --git a/Dockerfile b/Dockerfile index 549fe0668c3dc..b2789e3b25ba0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -627,6 +627,13 @@ function install_airflow_and_providers_from_docker_context_files(){ reinstalling_apache_airflow_package="apache-airflow[${AIRFLOW_EXTRAS}]==$ver" fi + if [[ -z "${reinstalling_apache_airflow_package}" && ${AIRFLOW_VERSION=} != "" ]]; then + # When we install only provider packages from docker-context files, we need to still + # install airflow from PyPI when AIRFLOW_VERSION is set. This handles the case where + # pre-release dockerhub image of airflow is built, but we want to install some providers from + # docker-context files + reinstalling_apache_airflow_package="apache-airflow[${AIRFLOW_EXTRAS}]==${AIRFLOW_VERSION}" + fi # Find Apache Airflow packages in docker-context files local reinstalling_apache_airflow_providers_packages reinstalling_apache_airflow_providers_packages=$(ls \ diff --git a/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst b/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst index 0fd2e16215c17..40cbd5ff617d3 100644 --- a/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst +++ b/airflow/providers/MANAGING_PROVIDERS_LIFECYCLE.rst @@ -109,6 +109,64 @@ breeze and I'll run unit tests for my Hook. root@fafd8d630e46:/opt/airflow# python -m pytest tests/providers//hook/.py +Adding chicken-egg providers +---------------------------- + +Sometimes we want to release provider that depends on the version of airflow that has not yet been released +- for example when we released ``common.io`` provider it had ``apache-airflow>=2.8.0`` dependency. This +creates problem when generating constraints and building docker image for pre-release versions of +Airflow - because ``pip`` does not recognize the ``.dev0`` or ``.b1`` suffixes of those packages as +valid in the ``>=X.Y.Z`` comparison. So when you want to install a provider package with +``apache-airflow>=2.8.0`` requirement and you have ``2.8.0.dev0`` airflow package, ``pip`` will not +install the package, because it does not recognize ``2.8.0.dev0`` as a valid version for ``>=2.8.0`` +dependency. This is because ``pip`` currently implements the minimum version selection algorithm +requirement specified in packaging as described in the packaging version specification +https://packaging.python.org/en/latest/specifications/version-specifiers/#handling-of-pre-releases +Currently ``pip`` only allows to include pre-release versions for all installed packages using ``--pre`` +flag, but it does not have the possibility of selectively using this flag to only one package. +In order to implement our desired behaviour, we need the case where only ``apache-airflow`` is considered +as pre-release version while all the other dependencies only have stable versions and this is currently +not possible. + +To work around this limitation, we have introduced the concept of "chicken-egg" providers. Those providers +are providers that are released together with the version of Airflow they depend on. They are released +with the same version number as the Airflow version they depend on, but with a different suffix. For example +``apache-airflow-providers-common-io==2.8.0.dev0`` is a chicken-egg provider for ``apache-airflow==2.8.0.dev0``. + +However - we should not release those providers to ``pypi``, so in order to allow our CI to work with +pre-release versions and perform both - constraint generation and image releasing, we introduced workarounds +in our tooling where in case we build a pre-release version of Airflow, we will locally build the +chicken-egg providers from sources and they are installed from local directory instead of from PyPI. + +This is controlled by ``chicken_egg_providers`` property in Selective Checks - and our CI will automatically +build and use those chicken-egg providers during the CI process if pre-release version of Airflow is built. + +The code responsible for that is in ``src/airflow_breeze/utils/selective_checks.py``: + +.. code-block:: python + + @cached_property + def chicken_egg_providers(self) -> str: + """Space separated list of providers with chicken-egg problem and should be built from sources.""" + return "common.io" + +This list should be kept here until the official version of Airflow the chicken-egg-providers depend on +is released and the version of airflow is updated in the ``main`` and ``v2-X-Y`` branch to ``2.X+1.0.dev0`` +and ``2.X.1.dev0`` respectively. After that the chicken-egg providers will be correctly installed because +both ``2.X.1.dev0`` and ``2.X+1.0.dev0`` are considered by ``pip`` as ``>2.X.0`` (unlike ``2.X.0.dev0``). + +This workaround might be removed if ``pip`` implements the possibility of selectively using ``--pre`` flag +for only one package (Which is foreseen as a possibility in the packaging specification but not implemented +by ``pip``). + +.. note:: + + The current solution of building pre-release images will not work well if the chicken-egg-provider is + pre-installed package because slim imges will not use the chicken-egg-provider. This could be solved + by adding ``--chicken-egg-providers`` flag to slim image building step in ``released_dockerhub_image.yml`` + but it would also require filtering out the non-pre-installed packages from it, so the current solution + is to assume pre-installed packages are not chicken-egg providers. + Integration tests ----------------- diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py index 42a55111ea551..3822d9da4b6fd 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands.py @@ -71,6 +71,7 @@ option_airflow_extras, option_airflow_site_directory, option_answer, + option_chicken_egg_providers, option_commit_sha, option_debug_resources, option_dry_run, @@ -139,6 +140,7 @@ run_compile_www_assets, ) from airflow_breeze.utils.shared_options import get_dry_run, get_verbose +from airflow_breeze.utils.versions import is_pre_release option_debug_release_management = click.option( "--debug", @@ -685,14 +687,7 @@ def run_generate_constraints_in_parallel( @option_image_tag_for_running @option_debug_release_management @option_airflow_constraints_mode_ci -@click.option( - "--chicken-egg-providers", - default="", - help="List of chicken-egg provider packages - " - "those that have airflow_version >= current_version and should " - "be installed in CI from locally built packages with >= current_version.dev0 ", - envvar="CHICKEN_EGG_PROVIDERS", -) +@option_chicken_egg_providers @option_github_repository @option_verbose @option_dry_run @@ -1207,6 +1202,18 @@ def add_back_references( start_generating_back_references(site_path, list(expand_all_provider_packages(doc_packages))) +def _add_chicken_egg_providers_to_build_args( + python_build_args: dict[str, str], chicken_egg_providers: str, airflow_version: str +): + if chicken_egg_providers and is_pre_release(airflow_version): + get_console().print( + f"[info]Adding chicken egg providers to build args as {airflow_version} is " + f"pre release and we have chicken-egg packages '{chicken_egg_providers}' defined[/]" + ) + python_build_args["INSTALL_PACKAGES_FROM_CONTEXT"] = "true" + python_build_args["DOCKER_CONTEXT_FILES"] = "./docker-context-files" + + @release_management.command( name="release-prod-images", help="Release production images to DockerHub (needs DockerHub permissions)." ) @@ -1243,6 +1250,7 @@ def add_back_references( "rc/alpha/beta images are built.", ) @option_commit_sha +@option_chicken_egg_providers @option_verbose @option_dry_run def release_prod_images( @@ -1253,10 +1261,11 @@ def release_prod_images( limit_python: str | None, commit_sha: str | None, skip_latest: bool, + chicken_egg_providers: str, ): perform_environment_checks() check_remote_ghcr_io_commands() - if not re.match(r"^\d*\.\d*\.\d*$", airflow_version): + if is_pre_release(airflow_version): get_console().print( f"[warning]Skipping latest image tagging as this is a pre-release version: {airflow_version}" ) @@ -1308,6 +1317,9 @@ def release_prod_images( get_console().print(f"[info]Building slim {airflow_version} image for Python {python}[/]") python_build_args = deepcopy(slim_build_args) slim_image_name = f"{dockerhub_repo}:slim-{airflow_version}-python{python}" + _add_chicken_egg_providers_to_build_args( + python_build_args, chicken_egg_providers, airflow_version + ) docker_buildx_command = [ "docker", "buildx", @@ -1337,6 +1349,9 @@ def release_prod_images( } if commit_sha: regular_build_args["COMMIT_SHA"] = commit_sha + _add_chicken_egg_providers_to_build_args( + regular_build_args, chicken_egg_providers, airflow_version + ) docker_buildx_command = [ "docker", "buildx", diff --git a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py index ff0c9cdcd0426..fb60e12463e9b 100644 --- a/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py +++ b/dev/breeze/src/airflow_breeze/commands/release_management_commands_config.py @@ -161,6 +161,7 @@ "--limit-platform", "--skip-latest", "--commit-sha", + "--chicken-egg-providers", ], } ], diff --git a/dev/breeze/src/airflow_breeze/utils/common_options.py b/dev/breeze/src/airflow_breeze/utils/common_options.py index cf59df8bc09a3..d053a00514195 100644 --- a/dev/breeze/src/airflow_breeze/utils/common_options.py +++ b/dev/breeze/src/airflow_breeze/utils/common_options.py @@ -787,3 +787,11 @@ def _set_default_from_parent(ctx: click.core.Context, option: click.core.Option, is_flag=True, envvar="RESTART", ) +option_chicken_egg_providers = click.option( + "--chicken-egg-providers", + default="", + help="List of chicken-egg provider packages - " + "those that have airflow_version >= current_version and should " + "be installed in CI from locally built packages with >= current_version.dev0 ", + envvar="CHICKEN_EGG_PROVIDERS", +) diff --git a/dev/breeze/src/airflow_breeze/utils/versions.py b/dev/breeze/src/airflow_breeze/utils/versions.py index 88c5986f7d975..70dc6ad77d38b 100644 --- a/dev/breeze/src/airflow_breeze/utils/versions.py +++ b/dev/breeze/src/airflow_breeze/utils/versions.py @@ -34,3 +34,9 @@ def get_version_tag(version: str, provider_package_id: str, version_suffix: str if version_suffix is None: version_suffix = "" return f"providers-{provider_package_id.replace('.','-')}/{version}{version_suffix}" + + +def is_pre_release(version: str) -> bool: + from packaging.version import Version + + return Version(version).is_prerelease diff --git a/images/breeze/output_release-management_release-prod-images.svg b/images/breeze/output_release-management_release-prod-images.svg index 76922057e1f60..f1e6d24f2c072 100644 --- a/images/breeze/output_release-management_release-prod-images.svg +++ b/images/breeze/output_release-management_release-prod-images.svg @@ -1,4 +1,4 @@ - +