From fc9083b35f982d3f1c5579cc0d06e9ae87689f77 Mon Sep 17 00:00:00 2001 From: Damon Date: Wed, 20 Nov 2024 09:19:50 -0800 Subject: [PATCH] Create Python SDK Distroless variant (#33160) * Create Python SDK Distroless variant * Trigger Python PostCommit * Add support for LANG UTF8 --- .../trigger_files/beam_PostCommit_Python.json | 2 +- sdks/python/container/Dockerfile-distroless | 50 +++++++++++++++++++ .../python/test-suites/dataflow/common.gradle | 44 ++++++++++++++++ 3 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 sdks/python/container/Dockerfile-distroless diff --git a/.github/trigger_files/beam_PostCommit_Python.json b/.github/trigger_files/beam_PostCommit_Python.json index 9e1d1e1b80dd..2d7af65a3815 100644 --- a/.github/trigger_files/beam_PostCommit_Python.json +++ b/.github/trigger_files/beam_PostCommit_Python.json @@ -1,5 +1,5 @@ { "comment": "Modify this file in a trivial way to cause this test suite to run.", - "modification": 4 + "modification": 5 } diff --git a/sdks/python/container/Dockerfile-distroless b/sdks/python/container/Dockerfile-distroless new file mode 100644 index 000000000000..2799bd2be81f --- /dev/null +++ b/sdks/python/container/Dockerfile-distroless @@ -0,0 +1,50 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### + +ARG BASE +FROM ${BASE} AS base +ENV LANG=C.UTF8 +ARG TARGETARCH +LABEL Author="Apache Beam " + +RUN if [ -z "${TARGETARCH}" ]; then echo "fatal: TARGETARCH not set; run as docker buildx build or use --build-arg=TARGETARCH=amd64|arm64" >&2; exit 1; fi + +FROM gcr.io/distroless/python3-debian12:latest-${TARGETARCH} AS distroless + +# Prevents internal errors found with distroless container images and Flex templates. +COPY --from=base /usr/lib/locale /usr/lib/locale + +# Contains header files needed by the Python interpreter. +COPY --from=base /usr/local/include /usr/local/include + +# Contains the Python interpreter executables. +COPY --from=base /usr/local/bin /usr/local/bin + +# Contains the Python library dependencies. +COPY --from=base /usr/local/lib /usr/local/lib + +# Python standard library modules. +COPY --from=base /usr/lib/python* /usr/lib/. + +# Contains the boot entrypoint and related files such as licenses. +COPY --from=base /opt /opt + +ENV PATH "$PATH:/usr/local/bin" + +# Despite the ENTRYPOINT set in base image, need to reset since deriving the layer derives from a different image. +ENTRYPOINT ["/opt/apache/beam/boot"] diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index 71d44652bc7e..e851a5420673 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -380,6 +380,50 @@ task validatesContainer() { } } +tasks.register('validatesDistrolessContainer', Task.class) { + dependsOn 'initializeForDataflowJob' + def repository = "us.gcr.io/apache-beam-testing/${System.getenv('USER')}" + def tag = java.time.Instant.now().getEpochSecond() + def imageURL = "${repository}/beam_python${pythonVersion}_sdk_distroless:${tag}" + doLast { + exec { + executable 'docker' + workingDir rootDir + args = [ + 'buildx', + 'build', + '-t', + imageURL, + '-f', + 'sdks/python/container/Dockerfile-distroless', + "--build-arg=BASE=gcr.io/apache-beam-testing/beam-sdk/beam_python${pythonVersion}_sdk", + "." + ] + } + exec { + executable 'docker' + args = ['push', imageURL] + } + exec { + def testTarget = "apache_beam/examples/wordcount_it_test.py::WordCountIT::test_wordcount_it" + def argMap = [ + "output" : "gs://temp-storage-for-end-to-end-tests/py-it-cloud/output", + "project" : "apache-beam-testing", + "region" : "us-central1", + "runner" : "TestDataflowRunner", + "sdk_container_image": "${imageURL}", + "sdk_location" : "container", + "staging_location" : "gs://temp-storage-for-end-to-end-tests/staging-it", + "temp_location" : "gs://temp-storage-for-end-to-end-tests/temp-it", + ] + def cmdArgs = mapToArgString(argMap) + workingDir = "${rootDir}/sdks/python" + executable 'sh' + args '-c', ". ${envdir}/bin/activate && pytest ${testTarget} --test-pipeline-options=\"${cmdArgs}\"" + } + } +} + task validatesContainerARM() { def pyversion = "${project.ext.pythonVersion.replace('.', '')}" dependsOn 'initializeForDataflowJob'