diff --git a/.github/actions/setup-action/action.yml b/.github/actions/setup-action/action.yml index cb24a065f98c..da69dd9a97dd 100644 --- a/.github/actions/setup-action/action.yml +++ b/.github/actions/setup-action/action.yml @@ -70,3 +70,5 @@ runs: shell: bash run: | echo KUBELET_GCLOUD_CONFIG_PATH=/var/lib/kubelet/pods/$POD_UID/volumes/kubernetes.io~empty-dir/gcloud >> $GITHUB_ENV + - name: Setup environment + uses: ./.github/actions/setup-environment-action diff --git a/.github/actions/setup-self-hosted-action/action.yml b/.github/actions/setup-environment-action/action.yml similarity index 50% rename from .github/actions/setup-self-hosted-action/action.yml rename to .github/actions/setup-environment-action/action.yml index ba3bf8d0d5d8..3452a16c132c 100644 --- a/.github/actions/setup-self-hosted-action/action.yml +++ b/.github/actions/setup-environment-action/action.yml @@ -15,47 +15,42 @@ # specific language governing permissions and limitations # under the License. -name: 'Setup environment for self-hosted runners' -description: 'Setup action to run jobs in a self-hosted runner' +name: 'Setup environment action' +description: 'Setup environment to run jobs' inputs: - requires-py-38: + python-version: required: false - description: 'Set as false if does not require py38 setup' - default: 'true' - requires-py-39: + description: 'Install Python version' + default: '' + java-version: required: false - description: 'Set as false if does not require py39 setup' - default: 'true' - requires-java-8: + description: 'Install Java version' + default: '' + go-version: required: false - description: 'Set as false if does not require java-8 setup' - default: 'true' - requires-go: - required: false - description: 'Set as false if does not require go setup' - default: 'true' + description: 'Install Go version' + default: '' runs: using: "composite" steps: - - name: Install python 3.8 - if: ${{ inputs.requires-py-38 == 'true' }} - uses: actions/setup-python@v4 - with: - python-version: "3.8" - - name: Install python 3.9 - if: ${{ inputs.requires-py-39 == 'true' }} + - name: Install Python + if: ${{ inputs.python-version != '' }} uses: actions/setup-python@v4 with: - python-version: "3.9" - - name: Set Java Version - if: ${{ inputs.requires-java-8 == 'true' }} + python-version: ${{ inputs.python-version }} + - name: Install Java + if: ${{ inputs.java-version != '' }} uses: actions/setup-java@v3 with: distribution: 'temurin' - java-version: 8 - - name: Set Go Version - if: ${{ inputs.requires-go == 'true' }} + java-version: ${{ inputs.java-version }} + - name: Setup Gradle + uses: gradle/gradle-build-action@v2 + with: + cache-read-only: false + - name: Install Go + if: ${{ inputs.go-version != '' }} uses: actions/setup-go@v3 with: - go-version: '1.21' # never set patch, to get latest patch releases. + go-version: ${{ inputs.go-version }} # never set patch, to get latest patch releases. diff --git a/.github/gh-actions-self-hosted-runners/arc/images/Dockerfile b/.github/gh-actions-self-hosted-runners/arc/images/Dockerfile index 41f8061d44a9..2cbfea75ab55 100644 --- a/.github/gh-actions-self-hosted-runners/arc/images/Dockerfile +++ b/.github/gh-actions-self-hosted-runners/arc/images/Dockerfile @@ -59,10 +59,15 @@ RUN curl -OL https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-c rm google-cloud-sdk-367.0.0-linux-x86_64.tar.gz && \ mv google-cloud-sdk /usr/local/google-cloud-sdk && \ /usr/local/google-cloud-sdk/install.sh --quiet && \ - /usr/local/google-cloud-sdk/bin/gcloud components install kubectl && \ + /usr/local/google-cloud-sdk/bin/gcloud components install gke-gcloud-auth-plugin && \ #revert permission chown -R runner:runner /home/runner/.config +ENV USE_GKE_GCLOUD_AUTH_PLUGIN=True ENV PATH="${PATH}:/usr/local/google-cloud-sdk/bin" +#Install Kubectl +RUN curl -OL https://dl.k8s.io/release/v1.28.1/bin/linux/amd64/kubectl && \ + chmod +x ./kubectl && \ + mv ./kubectl /usr/local/bin/kubectl #Install Apache Maven RUN curl -OL https://dlcdn.apache.org/maven/maven-3/3.9.4/binaries/apache-maven-3.9.4-bin.tar.gz && \ tar -xvf apache-maven-3.9.4-bin.tar.gz && \ @@ -73,4 +78,5 @@ ENV MAVEN_HOME="/usr/local/maven" # Needed to transfer path addtitions to runner environment RUN echo PATH=$PATH >> /runnertmp/.env +RUN echo USE_GKE_GCLOUD_AUTH_PLUGIN=$USE_GKE_GCLOUD_AUTH_PLUGIN >> /runnertmp/.env USER runner diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 8636f513b71b..0d0277bd478d 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -180,36 +180,90 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex ```Run Python PreCommit (3.8)``` | Workflow name | Matrix | Trigger Phrase | Cron Status | |:-------------:|:------:|:--------------:|:-----------:| -| [ PostCommit BeamMetrics Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml) | N/A |`Run Beam Metrics Deployment`| [![.github/workflows/beam_PostCommit_BeamMetrics_Publish](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) -| [ PostCommit TransformService Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) | N/A |`Run TransformService_Direct PostCommit`| [![.github/workflows/beam_PostCommit_TransformService_Direct](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) -| [ PostCommit Go ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | N/A |`Run Go PostCommit`| [![.github/workflows/beam_PostCommit_Go](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | -| [ PostCommit Go Dataflow ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) | N/A |`Run Go PostCommit Dataflow ARM`| [![.github/workflows/beam_PostCommit_Go_Dataflow_ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) | -| [ PostCommit Go VR Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | N/A |`Run Go Flink ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | -| [ PostCommit Go VR Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | N/A |`Run Go Samza ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | -| [ PostCommit Go VR Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | N/A |`Run Go Spark ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | -| [ PostCommit Java ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml) | N/A |`Run Java PostCommit`| [![PostCommit Java](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml) | -| [ PostCommit Java Examples Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | N/A |`Run Java examples on Dataflow Java 11`| [![PostCommit Java Examples Dataflow Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | -| [ PostCommit Java Avro Versions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | N/A |`Run Java Avro Versions PostCommit`| [![PostCommit Java Avro Versions](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | -| [ PostCommit Java Dataflow V1 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | N/A |`Run PostCommit_Java_Dataflow`| [![PostCommit Java Dataflow V1](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | -| [ PostCommit Java Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | N/A |`Run PostCommit_Java_DataflowV2`| [![PostCommit Java Dataflow V2](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | -| [ PostCommit Java Examples Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | N/A |`Run Java examples on Dataflow Java 11`| [![PostCommit Java Examples Dataflow Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | -| [ PostCommit Java Examples Dataflow ARM ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | N/A |`Run Java_Examples_Dataflow_ARM PostCommit`| [![PostCommit Java Examples Dataflow ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | -| [ PostCommit Java Examples Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | N/A |`Run Java examples on Dataflow Java 11`| [![PostCommit Java Examples Dataflow Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml) | -| [ PostCommit Java Examples Dataflow Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml) | N/A |`Run Java examples on Dataflow Java 17`| [![PostCommit Java Examples Dataflow Java17](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml) | +| [ Load Tests CoGBK Dataflow Streaming Java ](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml) | N/A |`Run Load Tests Java CoGBK Dataflow Streaming`| [![.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml) +| [ Performance Tests BigQueryIO Batch Java Avro ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml) | N/A |`Run BigQueryIO Batch Performance Test Java Avro`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml) +| [ Performance Tests BigQueryIO Batch Java Json ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml) | N/A |`Run BigQueryIO Batch Performance Test Java Json`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml) +| [ Performance Tests BigQueryIO Streaming Java ](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml) | N/A |`Run BigQueryIO Streaming Performance Test Java`| [![.github/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml) +| [ PostCommit BeamMetrics Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml) | N/A |`Run Beam Metrics Deployment`| [![.github/workflows/beam_PostCommit_BeamMetrics_Publish.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_BeamMetrics_Publish.yml) +| [ PostCommit Go ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | N/A |`Run Go PostCommit`| [![.github/workflows/beam_PostCommit_Go.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go.yml) | +| [ PostCommit Go Dataflow ARM](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) | N/A |`Run Go PostCommit Dataflow ARM`| [![.github/workflows/beam_PostCommit_Go_Dataflow_ARM.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) |[label](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_Dataflow_ARM.yml) +| [ PostCommit Go VR Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | N/A |`Run Go Flink ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Flink.yml) | +| [ PostCommit Go VR Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | N/A |`Run Go Samza ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Samza.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Samza.yml) | +| [ PostCommit Go VR Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | N/A |`Run Go Spark ValidatesRunner`| [![.github/workflows/beam_PostCommit_Go_VR_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Go_VR_Spark.yml) | +| [ PostCommit Java Avro Versions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | N/A |`Run Java Avro Versions PostCommit`| [![.github/workflows/beam_PostCommit_Java_Avro_Versions.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Avro_Versions.yml) | +| [ PostCommit Java Dataflow V1 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | N/A |`Run PostCommit_Java_Dataflow`| [![.github/workflows/beam_PostCommit_Java_DataflowV1.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV1.yml) | +| [ PostCommit Java Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | N/A |`Run PostCommit_Java_DataflowV2`| [![.github/workflows/beam_PostCommit_Java_DataflowV2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_DataflowV2.yml) | +| [ PostCommit Java Examples Dataflow ARM ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | N/A |`Run Java_Examples_Dataflow_ARM PostCommit`| [![.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml) | +| [ PostCommit Java Examples Dataflow Java ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml) | ['11','17'] |`Run Java examples on Dataflow Java (matrix_element)`| [![.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml) | +| [ PostCommit Java Examples Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Direct.yml) | N/A |`Run Java Examples_Direct`| [![.github/workflows/beam_PostCommit_Java_Examples_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Direct.yml) | +| [ PostCommit Java Examples Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Flink.yml) | N/A |`Run Java Examples_Flink`| [![.github/workflows/beam_PostCommit_Java_Examples_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Flink.yml) | +| [ PostCommit Java Examples Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Spark.yml) | N/A |`Run Java Examples_Spark`| [![.github/workflows/beam_PostCommit_Java_Examples_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Examples_Spark.yml) | +| [ PostCommit Java Hadoop Versions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Hadoop_Versions.yml) | N/A |`Run PostCommit_Java_Hadoop_Versions`| [![.github/workflows/beam_PostCommit_Java_Hadoop_Versions.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Hadoop_Versions.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Hadoop_Versions.yml) | | [ PostCommit Java Jpms Dataflow Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml) | N/A |`Run Jpms Dataflow Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml) | | [ PostCommit Java Jpms Dataflow Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml) | N/A |`Run Jpms Dataflow Java 17 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml) | | [ PostCommit Java Jpms Direct Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml) | N/A |`Run Jpms Direct Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml) | | [ PostCommit Java Jpms Direct Java17 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml) | N/A |`Run Jpms Direct Java 17 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml) | | [ PostCommit Java Jpms Flink Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml) | N/A |`Run Jpms Flink Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml) | | [ PostCommit Java Jpms Spark Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml) | N/A |`Run Jpms Spark Java 11 PostCommit`| [![.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml) | -| [ PostCommit Java Sickbay ](https://github.com/apache/beam/actions/workflows/bbeam_PostCommit_Java_Sickbay.yml) | N/A |`Run Java Sickbay`| [![PostCommit Java Sickbay](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/bbeam_PostCommit_Java_Sickbay.yml) | -| [ PostCommit Python Examples Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | N/A |`Run Python Examples_Dataflow`| [![PostCommit Python Examples Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | -| [ PostCommit Python Examples Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | N/A |`Run Python Examples_Direct`| [![PostCommit Python Examples Direct](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | -| [ PostCommit Python Examples Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | N/A |`Run Python Examples_Flink`| [![PostCommit Python Examples Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | -| [ PostCommit Python Examples Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | N/A |`Run Python Examples_Spark`| [![PostCommit Python Examples Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | -| [ PostCommit Website Publish](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | N/A | N/A | [![.github/workflows/beam_PostCommit_Website_Publish](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | +| [ PostCommit Java Nexmark Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow.yml) | N/A |`Run Dataflow Runner Nexmark Tests`| [![.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow.yml) | +| [ PostCommit Java Nexmark Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2.yml) | N/A |`Run Dataflow Runner V2 Nexmark Tests`| [![.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2.yml) | +| [ PostCommit Java Nexmark Dataflow V2 Java ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml) | ['11','17'] |`Run Dataflow Runner V2 Java (matrix) Nexmark Tests`| [![.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml) | +| [ PostCommit Java Nexmark Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Direct.yml) | N/A |`Run Direct Runner Nexmark Tests`| [![.github/workflows/beam_PostCommit_Java_Nexmark_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Direct.yml) | +| [ PostCommit Java Nexmark Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Flink.yml) | N/A |`Run Flink Runner Nexmark Tests`| [![.github/workflows/beam_PostCommit_Java_Nexmark_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Flink.yml) | +| [ PostCommit Java Nexmark Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Spark.yml) | N/A |`Run Spark Runner Nexmark Tests`| [![.github/workflows/beam_PostCommit_Java_Nexmark_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Nexmark_Spark.yml) | +| [ PostCommit Java PVR Flink Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml) | N/A |`Run Java Flink PortableValidatesRunner Streaming`| [![PostCommit Java PVR Flink Streaming](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml) | +| [ PostCommit Java PVR Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Samza.yml) | N/A |`Run Java Samza PortableValidatesRunner`| [![PostCommit Java PVR Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Samza.yml) | +| [ PostCommit Java PVR Spark3 Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml) | N/A |`Run Java Spark v3 PortableValidatesRunner Streaming`| [![PostCommit Java PVR Spark3 Streaming](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml) | +| [ PostCommit Java PVR Spark Batch ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml) | N/A |`Run Java Spark PortableValidatesRunner Batch`| [![PostCommit Java PVR Spark Batch](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml) | +| [ PostCommit Java Sickbay ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml) | N/A |`Run Java Sickbay`| [![.github/workflows/beam_PostCommit_Java_Sickbay.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_Sickbay.yml) | +| [ PostCommit Java ValidatesRunner Dataflow JavaVersions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml) | ['11','17'] |`Run Dataflow ValidatesRunner Java (matrix_element)`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Java.yml) | +| [ PostCommit Java ValidatesRunner Dataflow Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml) | N/A |`Run Dataflow Streaming ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml) | +| [ PostCommit Java ValidatesRunner Dataflow V2 Streaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml) | N/A |`Run Java Dataflow V2 ValidatesRunner Streaming`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml) | +| [ PostCommit Java ValidatesRunner Dataflow V2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml) | N/A |`Run Java Dataflow V2 ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml) | +| [ PostCommit Java ValidatesRunner Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml) | N/A |`Run Dataflow ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml) | +| [ PostCommit Java ValidatesRunner Direct JavaVersions ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml) | ['11','17'] |`Run Direct ValidatesRunner Java (matrix_element)`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_Java.yml) | +| [ PostCommit Java ValidatesRunner Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml) | N/A |`Run Direct ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml) | +| [ PostCommit Java ValidatesRunner Flink Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml) | N/A |`Run Flink ValidatesRunner Java 11`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml) | +| [ PostCommit Java ValidatesRunner Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml) | N/A |`Run Flink ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml) | +| [ PostCommit Java ValidatesRunner Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml) | N/A |`Run Samza ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml) | +| [ PostCommit Java ValidatesRunner Spark Java11 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml) | N/A |`Run Spark ValidatesRunner Java 11`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml) | +| [ PostCommit Java ValidatesRunner Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml) | N/A |`Run Spark ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml) | +| [ PostCommit Java ValidatesRunner SparkStructuredStreaming ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml) | N/A |`Run Spark StructuredStreaming ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml) | +| [ PostCommit Java ValidatesRunner Twister2 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml) | N/A |`Run Twister2 ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml) | +| [ PostCommit Java ValidatesRunner ULR ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml) | N/A |`Run ULR Loopback ValidatesRunner`| [![.github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml) | +| [ PostCommit Java ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml) | N/A |`Run Java PostCommit`| [![.github/workflows/beam_PostCommit_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Java.yml) | +| [ PostCommit Javadoc ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Javadoc.yml) | N/A |`Run Javadoc PostCommit`| [![.github/workflows/beam_PostCommit_Javadoc.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Javadoc.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Javadoc.yml) | +| [ PostCommit PortableJar Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Flink.yml) | N/A |`Run PortableJar_Flink PostCommit`| [![.github/workflows/beam_PostCommit_PortableJar_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Flink.yml) | +| [ PostCommit PortableJar Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Spark.yml) | N/A |`Run PortableJar_Spark PostCommit`| [![.github/workflows/beam_PostCommit_PortableJar_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_PortableJar_Spark.yml) | +| [ PostCommit Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python PostCommit (matrix_element)`| [![.github/workflows/beam_PostCommit_Python.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python.yml) | +| [ PostCommit Python Examples Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | N/A |`Run Python Examples_Dataflow`| [![.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Dataflow.yml) | +| [ PostCommit Python Examples Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python Examples_Direct (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_Examples_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Direct.yml) | +| [ PostCommit Python Examples Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | ['3.8','3.11'] |`Run Python Examples_Flink (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_Examples_Flink.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Flink.yml) | +| [ PostCommit Python Examples Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | ['3.8','3.11'] |`Run Python Examples_Spark (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_Examples_Spark.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Examples_Spark.yml) | +| [ PostCommit Python MongoDBIO IT ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_MongoDBIO_IT.yml) | N/A |`Run Python MongoDBIO_IT`| [![.github/workflows/beam_PostCommit_Python_MongoDBIO_IT.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_MongoDBIO_IT.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_MongoDBIO_IT.yml) | +| [ PostCommit Python Nexmark Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Nexmark_Direct.yml) | N/A |`Run Python Direct Runner Nexmark Tests`| [![.github/workflows/beam_PostCommit_Python_Nexmark_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Nexmark_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Nexmark_Direct.yml) | +| [ PostCommit Python ValidatesContainer Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python Dataflow ValidatesContainer (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml) | +| [ PostCommit Python ValidatesContainer Dataflow With RC ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python RC Dataflow ValidatesContainer (matrix_element)`| [![.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml) | +| [ PostCommit Python ValidatesRunner Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml) | ['3.8','3.11'] |`Run Python Dataflow ValidatesRunner (matrix_element)`| [![PostCommit Python ValidatesRunner Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml) | +| [ PostCommit Python ValidatesRunner Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml) | ['3.8','3.11'] |`Run Python Flink ValidatesRunner (matrix_element)`| [![PostCommit Python ValidatesRunner Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml) | +| [ PostCommit Python ValidatesRunner Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml) | ['3.8','3.11'] |`Run Python Samza ValidatesRunner (matrix_element)`| [![PostCommit Python ValidatesRunner Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml) | +| [ PostCommit Python ValidatesRunner Spark ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml) | ['3.8','3.9','3.11'] |`Run Python Spark ValidatesRunner (matrix_element)`| [![PostCommit Python ValidatesRunner Spark](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml) | +| [ PostCommit Python Xlang Gcp Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml) | N/A |`Run Python_Xlang_Gcp_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml) | +| [ PostCommit Python Xlang Gcp Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml) | N/A |`Run Python_Xlang_Gcp_Direct PostCommit`| [![.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml) | +| [ PostCommit Python Xlang IO Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml) | N/A |`Run Python_Xlang_IO_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml) | +| [ PostCommit Sickbay Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python (matrix_element) PostCommit Sickbay`| [![.github/workflows/beam_PostCommit_Sickbay_Python.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | +| [ PostCommit SQL ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_SQL.yml) | N/A |`Run SQL PostCommit`| [![.github/workflows/beam_PostCommit_SQL.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_SQL.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_SQL.yml) | +| [ PostCommit TransformService Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) | N/A |`Run TransformService_Direct PostCommit`| [![.github/workflows/beam_PostCommit_TransformService_Direct.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_TransformService_Direct.yml) +| [ PostCommit Website Publish ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | N/A | N/A | [![.github/workflows/beam_PostCommit_Website_Publish.yml](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Publish.yml) | +| [ PostCommit Website Test](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Test.yml) | N/A |`Run Full Website Test`| [![.github/workflows/beam_PostCommit_Website_Test](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Test.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Website_Test.yml) | | [ PostCommit XVR GoUsingJava Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml) | N/A |`Run XVR_GoUsingJava_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml) | -| [ PostCommit Sickbay Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | ['3.8','3.9','3.10','3.11'] |`Run Python PostCommit Sickbay tests (matrix_element)Upda`| [![PostCommit Sickbay Python ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_Sickbay_Python.yml) | +| [ PostCommit XVR Direct ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Direct.yml) | N/A |`Run XVR_Direct PostCommit`| [![.github/workflows/beam_PostCommit_XVR_Direct](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Direct.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Direct.yml) | +| [ PostCommit XVR Flink ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Flink.yml) | N/A |`Run XVR_Flink PostCommit`| [![.github/workflows/beam_PostCommit_XVR_Flink](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Flink.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Flink.yml) | +| [ PostCommit XVR JavaUsingPython Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml) | N/A |`Run XVR_JavaUsingPython_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml) | +| [ PostCommit XVR PythonUsingJava Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml) | N/A |`Run XVR_PythonUsingJava_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml) | +| [ PostCommit XVR PythonUsingJavaSQL Dataflow ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml) | N/A |`Run XVR_PythonUsingJavaSQL_Dataflow PostCommit`| [![.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml) | +| [ PostCommit XVR Samza ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml) | N/A |`Run XVR_Samza PostCommit`| [![.github/workflows/beam_PostCommit_XVR_Samza](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Samza.yml) | +| [ PostCommit XVR Spark3 ](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml) | N/A |`Run XVR_Spark3 PostCommit`| [![.github/workflows/beam_PostCommit_XVR_Spark3](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PostCommit_XVR_Spark3.yml) | | [ PreCommit Community Metrics ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml) | N/A |`Run CommunityMetrics PreCommit`| [![.github/workflows/beam_PreCommit_CommunityMetrics.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_CommunityMetrics.yml) | | [ PreCommit Go ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml) | N/A |`Run Go PreCommit`| [![.github/workflows/beam_PreCommit_Go.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Go.yml) | | [ PreCommit Java ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java.yml) | N/A |`Run Java PreCommit`| [![.github/workflows/beam_PreCommit_Java.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Java.yml) | @@ -280,3 +334,4 @@ Please note that jobs with matrix need to have matrix element in the comment. Ex | [ PreCommit GoPortable ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_GoPortable.yml) | N/A |`Run GoPortable PreCommit`| [![.github/workflows/beam_PreCommit_GoPortable.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_GoPortable.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_GoPortable.yml) | | [ PreCommit Kotlin Examples ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Kotlin_Examples.yml) | N/A | `Run Kotlin_Examples PreCommit` | [![.github/workflows/beam_PreCommit_Kotlin_Examples.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Kotlin_Examples.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Kotlin_Examples.yml) | | [ PreCommit Portable Python ](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Portable_Python.yml) | ['3.8','3.11'] | `Run Portable_Python PreCommit` | [![.github/workflows/beam_PreCommit_Portable_Python.yml](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Portable_Python.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_PreCommit_Portable_Python.yml) | +| [ Cancel Stale Dataflow Jobs ](https://github.com/apache/beam/actions/workflows/beam_CancelStaleDataflowJobs.yml) | N/A | `Run Cancel Stale Dataflow Jobs` | [![.github/workflows/beam_CancelStaleDataflowJobs.yml](https://github.com/apache/beam/actions/workflows/beam_CancelStaleDataflowJobs.yml/badge.svg?event=schedule)](https://github.com/apache/beam/actions/workflows/beam_CancelStaleDataflowJobs.yml) | diff --git a/.github/workflows/assign_milestone.yml b/.github/workflows/assign_milestone.yml index 9be68ecd3d0f..17cc167ebcbb 100644 --- a/.github/workflows/assign_milestone.yml +++ b/.github/workflows/assign_milestone.yml @@ -31,7 +31,7 @@ jobs: issues: write runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 2 diff --git a/.github/workflows/beam_CancelStaleDataflowJobs.yml b/.github/workflows/beam_CancelStaleDataflowJobs.yml new file mode 100644 index 000000000000..df896528c84e --- /dev/null +++ b/.github/workflows/beam_CancelStaleDataflowJobs.yml @@ -0,0 +1,84 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Cancel Stale Dataflow Jobs + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */4 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +jobs: + beam_CancelStaleDataflowJobs: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_CancelStaleDataflowJobs] + job_phrase: [Run Cancel Stale Dataflow Jobs] + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + github.event.comment.body == 'Run Cancel Stale Dataflow Jobs' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Authenticate on GCP + id: auth + uses: google-github-actions/auth@v1 + with: + credentials_json: ${{ secrets.GCP_SA_KEY }} + project_id: ${{ secrets.GCP_PROJECT_ID }} + - name: run cancel stale dataflow jobs + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :beam-test-tools:cancelStaleDataflowJobs + diff --git a/.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml b/.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml new file mode 100644 index 000000000000..0cb601522a81 --- /dev/null +++ b/.github/workflows/beam_LoadTests_Java_CoGBK_Dataflow_Streaming.yml @@ -0,0 +1,126 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Load Tests CoGBK Dataflow Streaming Java + +on: + issue_comment: + types: [created] + schedule: + - cron: '50 10 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_LoadTests_Java_CoGBK_Dataflow_Streaming: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Load Tests Java CoGBK Dataflow Streaming' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: [ "beam_LoadTests_Java_CoGBK_Dataflow_Streaming" ] + job_phrase: ["Run Load Tests Java CoGBK Dataflow Streaming"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Prepare configs + id: set_configs + shell: bash + run: | + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_SingleKey.txt | tr '\n' ' ') + echo "prepared_config_1=$CURCONFIG" >> $GITHUB_OUTPUT + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_MultipleKey.txt | tr '\n' ' ') + echo "prepared_config_2=$CURCONFIG" >> $GITHUB_OUTPUT + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_10KB.txt | tr '\n' ' ') + echo "prepared_config_3=$CURCONFIG" >> $GITHUB_OUTPUT + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_2MB.txt | tr '\n' ' ') + echo "prepared_config_4=$CURCONFIG" >> $GITHUB_OUTPUT + - name: run CoGBK Dataflow Streaming Java Load Test 1 (single key) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:load-tests:run + arguments: | + -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ + -Prunner=:runners:google-cloud-dataflow-java \ + '-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_1 }}' \ + - name: run CoGBK Dataflow Streaming Java Load Test 2 (multiple key) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:load-tests:run + arguments: | + -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ + -Prunner=:runners:google-cloud-dataflow-java \ + '-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_2 }}' \ + - name: run CoGBK Dataflow Streaming Java Load Test 3 (reiteration 10KB value) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:load-tests:run + arguments: | + -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ + -Prunner=:runners:google-cloud-dataflow-java \ + '-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_3 }}' \ + - name: run CoGBK Dataflow Streaming Java Load Test 4 (reiteration 2MB value) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:load-tests:run + arguments: | + -PloadTest.mainClass=org.apache.beam.sdk.loadtests.CoGroupByKeyLoadTest \ + -Prunner=:runners:google-cloud-dataflow-java \ + '-PloadTest.args=${{ steps.set_configs.outputs.prepared_config_4 }}' \ + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml b/.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml new file mode 100644 index 000000000000..d29acbfc765f --- /dev/null +++ b/.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Avro.yml @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Performance Tests BigQueryIO Batch Java Avro + +on: + issue_comment: + types: [created] + schedule: + - cron: '10 1,13 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PerformanceTests_BigQueryIO_Batch_Java_Avro: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run BigQueryIO Batch Performance Test Java Avro' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PerformanceTests_BigQueryIO_Batch_Java_Avro"] + job_phrase: ["Run BigQueryIO Batch Performance Test Java Avro"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Prepare config + id: set_config + shell: bash + run: | + CURDATE=$(date '+%m%d%H%M%S' --utc) + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Avro.txt | tr '\n' ' ') + CONFIGWITHDATE=$(echo "${CURCONFIG/bqio_write_10GB_java_avro_/bqio_write_10GB_java_avro_$CURDATE}") + echo "prepared_config=$CONFIGWITHDATE" >> $GITHUB_OUTPUT + - name: run Java BigQueryIO Batch Avro Performance Test + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:io:bigquery-io-perf-tests:integrationTest + arguments: | + --tests org.apache.beam.sdk.bigqueryioperftests.BigQueryIOIT \ + --info \ + -DintegrationTestRunner=dataflow \ + -DintegrationTestPipelineOptions=${{ steps.set_config.outputs.prepared_config }} \ + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml b/.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml new file mode 100644 index 000000000000..067d0e4b95b8 --- /dev/null +++ b/.github/workflows/beam_PerformanceTests_BigQueryIO_Batch_Java_Json.yml @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Performance Tests BigQueryIO Batch Java Json + +on: + issue_comment: + types: [created] + schedule: + - cron: '30 8,20 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PerformanceTests_BigQueryIO_Batch_Java_Json: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run BigQueryIO Batch Performance Test Java Json' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PerformanceTests_BigQueryIO_Batch_Java_Json"] + job_phrase: ["Run BigQueryIO Batch Performance Test Java Json"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Prepare config + id: set_config + shell: bash + run: | + CURDATE=$(date '+%m%d%H%M%S' --utc) + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Json.txt | tr '\n' ' ') + CONFIGWITHDATE=$(echo "${CURCONFIG/bqio_write_10GB_java_json_/bqio_write_10GB_java_json_$CURDATE}") + echo "prepared_config=$CONFIGWITHDATE" >> $GITHUB_OUTPUT + - name: run Java BigQueryIO Batch Json Performance Test + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:io:bigquery-io-perf-tests:integrationTest + arguments: | + --tests org.apache.beam.sdk.bigqueryioperftests.BigQueryIOIT \ + --info \ + -DintegrationTestRunner=dataflow \ + -DintegrationTestPipelineOptions=${{ steps.set_config.outputs.prepared_config }} \ + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml b/.github/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml new file mode 100644 index 000000000000..bf10d4be522e --- /dev/null +++ b/.github/workflows/beam_PerformanceTests_BigQueryIO_Streaming_Java.yml @@ -0,0 +1,101 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Performance Tests BigQueryIO Streaming Java + +on: + issue_comment: + types: [created] + schedule: + - cron: '20 15,22 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login }}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PerformanceTests_BigQueryIO_Streaming_Java: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run BigQueryIO Streaming Performance Test Java' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PerformanceTests_BigQueryIO_Streaming_Java"] + job_phrase: ["Run BigQueryIO Streaming Performance Test Java"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Prepare config + id: set_config + shell: bash + run: | + CURDATE=$(date '+%m%d%H%M%S' --utc) + CURCONFIG=$(grep -v "^#.*" ./.github/workflows/performance-tests-job-configs/config_BigQueryIO_Streaming_Java.txt | tr '\n' ' ') + CONFIGWITHDATE=$(echo "${CURCONFIG/bqio_write_10GB_java_stream_/bqio_write_10GB_java_stream_$CURDATE}") + echo "prepared_config=$CONFIGWITHDATE" >> $GITHUB_OUTPUT + - name: run Java BigQueryIO Streaming Performance Test + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:io:bigquery-io-perf-tests:integrationTest + arguments: | + --tests org.apache.beam.sdk.bigqueryioperftests.BigQueryIOIT \ + --info \ + -DintegrationTestRunner=dataflow \ + -DintegrationTestPipelineOptions=${{ steps.set_config.outputs.prepared_config }} \ + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_BeamMetrics_Publish.yml b/.github/workflows/beam_PostCommit_BeamMetrics_Publish.yml index dfc86953d4b9..3ce0a48824aa 100644 --- a/.github/workflows/beam_PostCommit_BeamMetrics_Publish.yml +++ b/.github/workflows/beam_PostCommit_BeamMetrics_Publish.yml @@ -66,7 +66,7 @@ jobs: job_name: ["beam_PostCommit_BeamMetrics_Publish"] job_phrase: ["Run Beam Metrics Deployment"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PostCommit_Go.yml b/.github/workflows/beam_PostCommit_Go.yml index 1b27c006125e..dc1180314d67 100644 --- a/.github/workflows/beam_PostCommit_Go.yml +++ b/.github/workflows/beam_PostCommit_Go.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Go"] job_phrase: ["Run Go PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PostCommit_Go_Dataflow_ARM.yml b/.github/workflows/beam_PostCommit_Go_Dataflow_ARM.yml index 223640bbbaa3..8e5651e29279 100644 --- a/.github/workflows/beam_PostCommit_Go_Dataflow_ARM.yml +++ b/.github/workflows/beam_PostCommit_Go_Dataflow_ARM.yml @@ -66,26 +66,18 @@ jobs: job_name: ["beam_PostCommit_Go_Dataflow_ARM"] job_phrase: ["Run Go PostCommit Dataflow ARM"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Install Go - uses: actions/setup-go@v4 - with: - go-version: '1.21' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + go-version: 1.21 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v1 - name: Authenticate on GCP diff --git a/.github/workflows/beam_PostCommit_Go_VR_Flink.yml b/.github/workflows/beam_PostCommit_Go_VR_Flink.yml index 0f7d9b0c5355..21dcf7f8e72a 100644 --- a/.github/workflows/beam_PostCommit_Go_VR_Flink.yml +++ b/.github/workflows/beam_PostCommit_Go_VR_Flink.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Go_VR_Flink"] job_phrase: ["Run Go Flink ValidatesRunner"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PostCommit_Go_VR_Samza.yml b/.github/workflows/beam_PostCommit_Go_VR_Samza.yml index e199d04b11f2..4e647962ccac 100644 --- a/.github/workflows/beam_PostCommit_Go_VR_Samza.yml +++ b/.github/workflows/beam_PostCommit_Go_VR_Samza.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Go_VR_Samza"] job_phrase: ["Run Go Samza ValidatesRunner"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PostCommit_Go_VR_Spark.yml b/.github/workflows/beam_PostCommit_Go_VR_Spark.yml index eed15c1fe966..0fc0f8d400de 100644 --- a/.github/workflows/beam_PostCommit_Go_VR_Spark.yml +++ b/.github/workflows/beam_PostCommit_Go_VR_Spark.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Go_VR_Spark"] job_phrase: ["Run Go Spark ValidatesRunner"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PostCommit_Java.yml b/.github/workflows/beam_PostCommit_Java.yml index 4cc7f638e1c1..3e3cd79e8477 100644 --- a/.github/workflows/beam_PostCommit_Java.yml +++ b/.github/workflows/beam_PostCommit_Java.yml @@ -64,7 +64,7 @@ jobs: github.event_name == 'schedule' || github.event.comment.body == 'Run Java PostCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -75,8 +75,16 @@ jobs: uses: ./.github/actions/gradle-command-self-hosted-action with: gradle-command: :javaPostCommit - - name: Upload test report + - name: Archive JUnit Test Results uses: actions/upload-artifact@v3 + if: failure() with: - name: java-code-coverage-report - path: "**/build/test-results/**/*.xml" \ No newline at end of file + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Avro_Versions.yml b/.github/workflows/beam_PostCommit_Java_Avro_Versions.yml index b18beb51beed..cf3064a6a7de 100644 --- a/.github/workflows/beam_PostCommit_Java_Avro_Versions.yml +++ b/.github/workflows/beam_PostCommit_Java_Avro_Versions.yml @@ -45,6 +45,11 @@ permissions: security-events: read statuses: read +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PostCommit_Java_Avro_Versions: name: ${{matrix.job_name}} (${{matrix.job_phrase}}) @@ -59,7 +64,7 @@ jobs: github.event_name == 'schedule' || github.event.comment.body == 'Run Java Avro Versions PostCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -80,4 +85,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_DataflowV1.yml b/.github/workflows/beam_PostCommit_Java_DataflowV1.yml index 1a3f07201fe8..02896bd3a383 100644 --- a/.github/workflows/beam_PostCommit_Java_DataflowV1.yml +++ b/.github/workflows/beam_PostCommit_Java_DataflowV1.yml @@ -64,7 +64,7 @@ jobs: github.event_name == 'schedule' || github.event.comment.body == 'Run PostCommit_Java_Dataflow' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -92,4 +92,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_DataflowV2.yml b/.github/workflows/beam_PostCommit_Java_DataflowV2.yml index 4b06592a972c..56b1e6dc91bb 100644 --- a/.github/workflows/beam_PostCommit_Java_DataflowV2.yml +++ b/.github/workflows/beam_PostCommit_Java_DataflowV2.yml @@ -64,7 +64,7 @@ jobs: github.event_name == 'schedule' || github.event.comment.body == 'Run PostCommit_Java_DataflowV2' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -85,4 +85,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml index 596b7f35ab5f..5a42b9f95237 100644 --- a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml +++ b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_ARM.yml @@ -78,28 +78,17 @@ jobs: github. event_name == 'workflow_dispatch' || startswith(github.event.comment.body, 'Run Java_Examples_Dataflow_ARM PostCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{matrix.java_version}} github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) ${{matrix.java_version}} - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{matrix.java_version}}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - requires-py-38: false - requires-py-39: false - requires-go: false - - name: Set up Java${{ matrix.java_version }} - uses: actions/setup-java@v3.8.0 - with: - distribution: 'temurin' java-version: ${{ matrix.java_version }} - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - name: Authenticate on GCP @@ -140,4 +129,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml new file mode 100644 index 000000000000..f50ae9d30f6c --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java.yml @@ -0,0 +1,100 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Examples Dataflow Java + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Examples_Dataflow_Java: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.java_version }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Examples_Dataflow_Java] + job_phrase: [Run Java examples on Dataflow Java] + java_version: ['11','17'] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + startswith(github.event.comment.body, 'Run Java examples on Dataflow Java') + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{matrix.java_version}} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.java_version }}) + - name: Set up Java${{ matrix.java_version }} + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + ${{ matrix.java_version }} + 8 + - name: run java${{ matrix.java_version }}PostCommit script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:examples:java${{ matrix.java_version }}PostCommit + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2.yml b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2.yml new file mode 100644 index 000000000000..af01933be446 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2.yml @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Examples Dataflow V2 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Examples_Dataflow_V2: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + strategy: + matrix: + job_name: [beam_PostCommit_Java_Examples_Dataflow_V2] + job_phrase: [Run Java Examples on Dataflow Runner V2] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Examples on Dataflow Runner V2' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Examples Dataflow V2 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:examplesJavaRunnerV2IntegrationTest + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2_Java.yml b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2_Java.yml new file mode 100644 index 000000000000..0dfea680f824 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_V2_Java.yml @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Examples Dataflow V2 Java + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Examples_Dataflow_V2_Java: + name: ${{matrix.job_name}} (${{matrix.job_phrase_1}}${{matrix.job_phrase_2}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Examples_Dataflow_V2_Java] + job_phrase_1: [Run Java ] + job_phrase_2: [Examples on Dataflow Runner V2] + java_version: ['11', '17'] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + (contains(github.event.comment.body, 'Run Java') && + contains(github.event.comment.body, 'Examples on Dataflow Runner V2')) + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase_1 }} ${{matrix.java_version}} ${{ matrix.job_phrase_2 }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} ${{ matrix.job_phrase_1 }} ${{matrix.java_version}} ${{ matrix.job_phrase_2 }} + - name: Set up Java${{ matrix.java_version }} + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: ${{ matrix.java_version }} + - name: run PostCommit Java Examples Dataflow V2 Java${{ matrix.java_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:examplesJavaRunnerV2IntegrationTest + arguments: | + -PdisableSpotlessCheck=true \ + -PdisableCheckStyle=true \ + -PskipCheckerFramework \ + -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Direct.yml b/.github/workflows/beam_PostCommit_Java_Examples_Direct.yml new file mode 100644 index 000000000000..b8014342042c --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Examples_Direct.yml @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Examples Direct + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Examples_Direct: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_Examples_Direct] + job_phrase: [Run Java Examples_Direct] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Examples_Direct' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run examplesIntegrationTest script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:direct:examplesIntegrationTest + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Flink.yml b/.github/workflows/beam_PostCommit_Java_Examples_Flink.yml new file mode 100644 index 000000000000..3aec68316f81 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Examples_Flink.yml @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Examples Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Examples_Flink: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_Examples_Flink] + job_phrase: [Run Java Examples_Flink] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Examples_Flink' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: 3.8 + - name: run examplesIntegrationTest script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:1.15:examplesIntegrationTest + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Spark.yml b/.github/workflows/beam_PostCommit_Java_Examples_Spark.yml new file mode 100644 index 000000000000..fd5c60952b7b --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Examples_Spark.yml @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Examples Spark + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Examples_Spark: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_Examples_Spark] + job_phrase: [Run Java Examples_Spark] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Examples_Spark' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run examplesIntegrationTest script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:examplesIntegrationTest + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Hadoop_Versions.yml b/.github/workflows/beam_PostCommit_Java_Hadoop_Versions.yml new file mode 100644 index 000000000000..da4606400e8b --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Hadoop_Versions.yml @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java Hadoop Versions + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_Hadoop_Versions: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + matrix: + job_name: [beam_PostCommit_Java_Hadoop_Versions] + job_phrase: [Run PostCommit_Java_Hadoop_Versions] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run PostCommit_Java_Hadoop_Versions' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :javaHadoopVersionsTest + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml index ffd5751fd8b3..471782621fa7 100644 --- a/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml +++ b/.github/workflows/beam_PostCommit_Java_IO_Performance_Tests.yml @@ -67,7 +67,7 @@ jobs: job_phrase: ["Run Java PostCommit IO Performance Tests"] test_case: ["GCSPerformanceTest", "BigTablePerformanceTest"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -76,19 +76,14 @@ jobs: github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.test_case }}) - name: Checkout release branch if: github.event_name == 'schedule' #This has scheduled runs run against the latest release - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: v2.50.0 #TODO(https://github.com/apache/beam/issues/28330) automate updating this repository: apache/beam - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 - name: Authenticate on GCP uses: google-github-actions/setup-gcloud@v0 with: @@ -111,4 +106,17 @@ jobs: gradle-command: :it:${{ matrix.test_case }} env: exportDataset: performance_tests - exportTable: io_performance_metrics_test \ No newline at end of file + exportTable: io_performance_metrics_test + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml index 925942d46b90..6c35a10e5e92 100644 --- a/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java11.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Java_Jpms_Dataflow_Java11"] job_phrase: ["Run Jpms Dataflow Java 11 PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -89,4 +89,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml index 6094cdc7e4e6..5d0d2273cf55 100644 --- a/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Dataflow_Java17.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Java_Jpms_Dataflow_Java17"] job_phrase: ["Run Jpms Dataflow Java 17 PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -94,4 +94,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml index 0fa065edd34b..4e26f29276b2 100644 --- a/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java11.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Java_Jpms_Direct_Java11"] job_phrase: ["Run Jpms Direct Java 11 PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -89,4 +89,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml index f85ad12437bc..0a4801d46c6c 100644 --- a/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Direct_Java17.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Java_Jpms_Direct_Java17"] job_phrase: ["Run Jpms Direct Java 17 PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -94,4 +94,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml index 84f294de5c21..aa5e3b40c9dd 100644 --- a/.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Flink_Java11.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Java_Jpms_Flink_Java11"] job_phrase: ["Run Jpms Flink Java 11 PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -89,4 +89,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml b/.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml index 56ec589b38d7..a81df741d9cc 100644 --- a/.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml +++ b/.github/workflows/beam_PostCommit_Java_Jpms_Spark_Java11.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_Java_Jpms_Spark_Java11"] job_phrase: ["Run Jpms Spark Java 11 PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -89,4 +89,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow.yml b/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow.yml new file mode 100644 index 000000000000..ef90fbad5bf0 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow.yml @@ -0,0 +1,115 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Nexmark Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GRADLE_COMMAND_ARGUMENTS: | + -Pnexmark.args=--manageResources=false + --monitorJobs=true + --bigQueryTable=nexmark + --bigQueryDataset=nexmark + --project=apache-beam-testing + --resourceNameMode=QUERY_RUNNER_AND_MODE + --exportSummaryToBigQuery=true + --tempLocation=gs://temp-storage-for-perf-tests/nexmark + --influxDatabase=beam_test_metrics + --influxHost=http://10.128.0.96:8086 + --baseInfluxMeasurement=nexmark + --exportSummaryToInfluxDB=true + --influxRetentionPolicy=forever + --region=us-central1 + --suite=STRESS + --numWorkers=4 + --maxNumWorkers=4 + --autoscalingAlgorithm=NONE + --nexmarkParallel=16 + --enforceEncodability=true + --enforceImmutability=true + --runner=DataflowRunner + +jobs: + beam_PostCommit_Java_Nexmark_Dataflow: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Nexmark_Dataflow] + job_phrase: [Run Dataflow Runner Nexmark Tests] + streaming: [false, true] + queryLanguage: [sql, zetasql, none] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Dataflow Runner Nexmark Tests' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Nexmark Dataflow (${{ matrix.streaming }} ${{ matrix.queryLanguage }}) script + if: matrix.queryLanguage != 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:google-cloud-dataflow-java \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }}--streaming=${{ matrix.streaming }} --queryLanguage=${{ matrix.queryLanguage }}" \ + - name: run PostCommit Java Nexmark Dataflow (${{ matrix.streaming }}) script + if: matrix.queryLanguage == 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:google-cloud-dataflow-java \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }}--streaming=${{ matrix.streaming }}" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2.yml b/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2.yml new file mode 100644 index 000000000000..3eb93e6687f8 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2.yml @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Nexmark Dataflow V2 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GRADLE_COMMAND_ARGUMENTS: | + -Pnexmark.args=--manageResources=false + --monitorJobs=true + --bigQueryTable=nexmark + --bigQueryDataset=nexmark + --project=apache-beam-testing + --resourceNameMode=QUERY_RUNNER_AND_MODE + --exportSummaryToBigQuery=false + --tempLocation=gs://temp-storage-for-perf-tests/nexmark + --influxDatabase=beam_test_metrics + --influxHost=http://10.128.0.96:8086 + --baseInfluxMeasurement=nexmark + --exportSummaryToInfluxDB=true + --influxRetentionPolicy=forever + --influxTags={"runnerVersion":"V2","javaVersion":"8"} + --region=us-central1 + --suite=STRESS + --numWorkers=4 + --maxNumWorkers=4 + --autoscalingAlgorithm=NONE + --nexmarkParallel=16 + --enforceEncodability=true + --enforceImmutability=true + --runner=DataflowRunner + +jobs: + beam_PostCommit_Java_Nexmark_Dataflow_V2: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Nexmark_Dataflow_V2] + job_phrase: [Run Dataflow Runner V2 Nexmark Tests] + streaming: [false, true] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Dataflow Runner V2 Nexmark Tests' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Nexmark Dataflow V2 (streaming = ${{ matrix.streaming }}) script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:google-cloud-dataflow-java \ + '${{ env.GRADLE_COMMAND_ARGUMENTS }}--streaming=${{ matrix.streaming }}' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml b/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml new file mode 100644 index 000000000000..06438510400b --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Nexmark_Dataflow_V2_Java.yml @@ -0,0 +1,116 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Nexmark Dataflow V2 Java + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GRADLE_COMMAND_ARGUMENTS: | + -Pnexmark.args=--manageResources=false + --monitorJobs=true + --bigQueryTable=nexmark + --bigQueryDataset=nexmark + --project=apache-beam-testing + --resourceNameMode=QUERY_RUNNER_AND_MODE + --exportSummaryToBigQuery=false + --tempLocation=gs://temp-storage-for-perf-tests/nexmark + --influxDatabase=beam_test_metrics + --influxHost=http://10.128.0.96:8086 + --baseInfluxMeasurement=nexmark + --exportSummaryToInfluxDB=true + --influxRetentionPolicy=forever + --region=us-central1 + --suite=STRESS + --numWorkers=4 + --maxNumWorkers=4 + --autoscalingAlgorithm=NONE + --nexmarkParallel=16 + --enforceEncodability=true + --enforceImmutability=true + --runner=DataflowRunner + +jobs: + beam_PostCommit_Java_Nexmark_Dataflow_V2_Java: + name: ${{matrix.job_name}} (${{matrix.job_phrase_1}} ${{matrix.java_version}} ${{matrix.job_phrase_2}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Nexmark_Dataflow_V2_Java] + job_phrase_1: [Run Dataflow Runner V2 Java] + job_phrase_2: [Nexmark Tests] + streaming: [false, true] + java_version: ['11','17'] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + (contains(github.event.comment.body, 'Run Dataflow Runner V2 Java') && + contains(github.event.comment.body, 'Nexmark Tests')) + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase_1 }} ${{matrix.java_version}} ${{ matrix.job_phrase_2 }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} ${{ matrix.job_phrase_1 }} ${{matrix.java_version}} ${{ matrix.job_phrase_2 }} + - name: Set up Java${{ matrix.java_version }} + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: ${{ matrix.java_version }} + - name: run PostCommit Java ${{matrix.java_version}} Nexmark Dataflow V2 (streaming = ${{ matrix.streaming }}) script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -PcompileAndRunTestsWithJava${{ matrix.java_version }} \ + -Pjava${{ matrix.java_version }}Home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ + -Pnexmark.runner.version=V2 \ + -Pnexmark.runner=:runners:google-cloud-dataflow-java \ + '${{ env.GRADLE_COMMAND_ARGUMENTS }}--influxTags={"runnerVersion":"V2","javaVersion":"${{matrix.java_version}}"}--streaming=${{ matrix.streaming }}' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Nexmark_Direct.yml b/.github/workflows/beam_PostCommit_Java_Nexmark_Direct.yml new file mode 100644 index 000000000000..2386d7e26f38 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Nexmark_Direct.yml @@ -0,0 +1,110 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Nexmark Direct + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GRADLE_COMMAND_ARGUMENTS: | + -Pnexmark.args=--manageResources=false + --monitorJobs=true + --bigQueryTable=nexmark + --bigQueryDataset=nexmark + --project=apache-beam-testing + --resourceNameMode=QUERY_RUNNER_AND_MODE + --exportSummaryToBigQuery=true + --tempLocation=gs://temp-storage-for-perf-tests/nexmark + --influxDatabase=beam_test_metrics + --influxHost=http://10.128.0.96:8086 + --baseInfluxMeasurement=nexmark + --exportSummaryToInfluxDB=true + --influxRetentionPolicy=forever + --suite=SMOKE + --enforceEncodability=true + --enforceImmutability=true + --runner=DirectRunner + +jobs: + beam_PostCommit_Java_Nexmark_Direct: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Nexmark_Direct] + job_phrase: [Run Direct Runner Nexmark Tests] + streaming: [false, true] + queryLanguage: [sql, zetasql, none] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Direct Runner Nexmark Tests' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Nexmark Direct (${{ matrix.streaming }} ${{ matrix.queryLanguage }}) script + if: matrix.queryLanguage != 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:direct-java \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }} --streaming=${{ matrix.streaming }} --queryLanguage=${{ matrix.queryLanguage }}" \ + - name: run PostCommit Java Nexmark Direct (${{ matrix.streaming }}) script + if: matrix.queryLanguage == 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:direct-java \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }}--streaming=${{ matrix.streaming }}" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Nexmark_Flink.yml b/.github/workflows/beam_PostCommit_Java_Nexmark_Flink.yml new file mode 100644 index 000000000000..9123c9079605 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Nexmark_Flink.yml @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Nexmark Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GRADLE_COMMAND_ARGUMENTS: | + -Pnexmark.args=--manageResources=false + --monitorJobs=true + --bigQueryTable=nexmark + --bigQueryDataset=nexmark + --project=apache-beam-testing + --resourceNameMode=QUERY_RUNNER_AND_MODE + --exportSummaryToBigQuery=true + --tempLocation=gs://temp-storage-for-perf-tests/nexmark + --influxDatabase=beam_test_metrics + --influxHost=http://10.128.0.96:8086 + --baseInfluxMeasurement=nexmark + --exportSummaryToInfluxDB=true + --influxRetentionPolicy=forever + --suite=SMOKE + --streamTimeout=60 + --runner=FlinkRunner + +jobs: + beam_PostCommit_Java_Nexmark_Flink: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Nexmark_Flink] + job_phrase: [Run Flink Runner Nexmark Tests] + streaming: [false, true] + queryLanguage: [sql, zetasql, none] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Flink Runner Nexmark Tests' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Nexmark Flink (${{ matrix.streaming }} ${{ matrix.queryLanguage }}) script + if: matrix.queryLanguage != 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:flink:1.15 \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }} --streaming=${{ matrix.streaming }} --queryLanguage=${{ matrix.queryLanguage }}" \ + - name: run PostCommit Java Nexmark Flink (${{ matrix.streaming }}) script + if: matrix.queryLanguage == 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:flink:1.15 \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }}--streaming=${{ matrix.streaming }}" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Nexmark_Spark.yml b/.github/workflows/beam_PostCommit_Java_Nexmark_Spark.yml new file mode 100644 index 000000000000..7492eb9b8262 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_Nexmark_Spark.yml @@ -0,0 +1,109 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java Nexmark Spark + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GRADLE_COMMAND_ARGUMENTS: | + -Pnexmark.args=--manageResources=false + --monitorJobs=true + --bigQueryTable=nexmark + --bigQueryDataset=nexmark + --project=apache-beam-testing + --resourceNameMode=QUERY_RUNNER_AND_MODE + --exportSummaryToBigQuery=true + --tempLocation=gs://temp-storage-for-perf-tests/nexmark + --influxDatabase=beam_test_metrics + --influxHost=http://10.128.0.96:8086 + --baseInfluxMeasurement=nexmark + --exportSummaryToInfluxDB=true + --influxRetentionPolicy=forever + --suite=SMOKE + --streamTimeout=60 + --streaming=false + +jobs: + beam_PostCommit_Java_Nexmark_Spark: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_Nexmark_Spark] + job_phrase: [Run Spark Runner Nexmark Tests] + runner: [SparkRunner, SparkStructuredStreamingRunner --skipQueries=3] + queryLanguage: [sql, zetasql, none] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Spark Runner Nexmark Tests' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Nexmark Spark (runner = ${{ matrix.runner }} queryLanguage = ${{ matrix.queryLanguage }}) script + if: matrix.queryLanguage != 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:spark:3 \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }} --runner=${{ matrix.runner }} --queryLanguage=${{ matrix.queryLanguage }}" \ + - name: run PostCommit Java Nexmark Spark (${{ matrix.runner }}) script + if: matrix.queryLanguage == 'none' + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:spark:3 \ + "${{ env.GRADLE_COMMAND_ARGUMENTS }}--runner=${{ matrix.runner }}" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml b/.github/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml new file mode 100644 index 000000000000..709ec8215983 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_PVR_Flink_Streaming.yml @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java PVR Flink Streaming + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_PVR_Flink_Streaming: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_PVR_Flink_Streaming] + job_phrase: [Run Java Flink PortableValidatesRunner Streaming] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Flink PortableValidatesRunner Streaming' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Flink PortableValidatesRunner Streaming script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: runners:flink:1.15:job-server:validatesPortableRunnerStreaming diff --git a/.github/workflows/beam_PostCommit_Java_PVR_Samza.yml b/.github/workflows/beam_PostCommit_Java_PVR_Samza.yml new file mode 100644 index 000000000000..06b88ee5bcb8 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_PVR_Samza.yml @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java PVR Samza + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_PVR_Samza: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_PVR_Samza] + job_phrase: [Run Java Samza PortableValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Samza PortableValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java Samza script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:samza:job-server:validatesPortableRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml b/.github/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml new file mode 100644 index 000000000000..e87c62674c40 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_PVR_Spark3_Streaming.yml @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java PVR Spark3 Streaming + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_PVR_Spark3_Streaming: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_PVR_Spark3_Streaming] + job_phrase: [Run Java Spark v3 PortableValidatesRunner Streaming] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Spark v3 PortableValidatesRunner Streaming' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java PortableValidatesRunner Spark3 Streaming script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:job-server:validatesPortableRunnerStreaming + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' diff --git a/.github/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml b/.github/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml new file mode 100644 index 000000000000..ce4e17dc25ea --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_PVR_Spark_Batch.yml @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java PVR Spark Batch + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_PVR_Spark_Batch: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + matrix: + job_name: [beam_PostCommit_Java_PVR_Spark_Batch] + job_phrase: [Run Java Spark PortableValidatesRunner Batch] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Spark PortableValidatesRunner Batch' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Java PortableValidatesRunner Spark Batch script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: | + :runners:spark:3:job-server:validatesPortableRunnerBatch + :runners:spark:3:job-server:validatesPortableRunnerDocker + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Sickbay.yml b/.github/workflows/beam_PostCommit_Java_Sickbay.yml index ad437500dba4..dc719c2d53dd 100644 --- a/.github/workflows/beam_PostCommit_Java_Sickbay.yml +++ b/.github/workflows/beam_PostCommit_Java_Sickbay.yml @@ -21,7 +21,7 @@ on: issue_comment: types: [created] schedule: - - cron: '29 6 * * *' + - cron: '0 0 * * *' workflow_dispatch: # This allows a subsequently queued workflow run to interrupt previous runs @@ -64,7 +64,7 @@ jobs: github.event_name == 'schedule' || github.event.comment.body == 'Run Java Sickbay' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -85,4 +85,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml similarity index 78% rename from .github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml rename to .github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml index bc26da173042..92ddeeb8663e 100644 --- a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java11.yml +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow.yml @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -name: PostCommit Java Examples Dataflow Java11 +name: PostCommit Java ValidatesRunner Dataflow on: issue_comment: @@ -51,40 +51,35 @@ env: GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} jobs: - beam_PostCommit_Java_Examples_Dataflow_Java11: + beam_PostCommit_Java_ValidatesRunner_Dataflow: name: ${{matrix.job_name}} (${{matrix.job_phrase}}) runs-on: [self-hosted, ubuntu-20.04, main] - timeout-minutes: 180 + timeout-minutes: 480 strategy: - fail-fast: false matrix: - job_name: [beam_PostCommit_Java_Examples_Dataflow_Java11] - job_phrase: [Run Java examples on Dataflow Java 11] + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow] + job_phrase: [Run Dataflow ValidatesRunner] if: | github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || - github.event.comment.body == 'Run Java examples on Dataflow Java 11' + github.event.comment.body == 'Run Dataflow ValidatesRunner' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - # The test requires Java 11 and Java 8 versions. - # Java 8 is installed second because JAVA_HOME needs to point to Java 8. - - name: Set up Java + - name: Install Java uses: actions/setup-java@v3.8.0 with: - distribution: 'temurin' - java-version: | - 11 - 8 - - name: run PostCommit Java Examples Dataflow Java11 script + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :runners:google-cloud-dataflow-java:examples:java11PostCommit + gradle-command: :runners:google-cloud-dataflow-java:validatesRunner max-workers: 12 - name: Archive JUnit Test Results uses: actions/upload-artifact@v3 @@ -96,4 +91,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions.yml new file mode 100644 index 000000000000..37ec6b145f35 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions.yml @@ -0,0 +1,116 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Dataflow JavaVersions + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions: + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{matrix.java_version}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 480 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow_JavaVersions] + job_phrase: [Run Dataflow ValidatesRunner Java] + java_version: ['11','17'] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + startswith(github.event.comment.body, 'Run Dataflow ValidatesRunner Java') + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{matrix.java_version}} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) ${{matrix.java_version}} + - name: Set up Java${{ matrix.java_version }} + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + ${{ matrix.java_version }} + 8 + - name: run jar Java${{ matrix.java_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:testJar :runners:google-cloud-dataflow-java:worker:shadowJar + arguments: | + -Dorg.gradle.java.home=$JAVA_HOME_8_X64 \ + - name: run validatesRunner Java${{ matrix.java_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesRunner + arguments: | + -x shadowJar \ + -x shadowTestJar \ + -x compileJava \ + -x compileTestJava \ + -x jar \ + -x testJar \ + -x classes \ + -x testClasses \ + -Dorg.gradle.java.home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml new file mode 100644 index 000000000000..708ef0b7ad16 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming.yml @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Dataflow Streaming + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 720 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow_Streaming] + job_phrase: [Run Dataflow Streaming ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Dataflow Streaming ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunnerStreaming script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesRunnerStreaming + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml new file mode 100644 index 000000000000..aa665508ddee --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2.yml @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Dataflow V2 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */8 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Dataflow_V2: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 390 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow_V2] + job_phrase: [Run Java Dataflow V2 ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Dataflow V2 ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunnerV2 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesRunnerV2 + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml new file mode 100644 index 000000000000..ca380efa5cb4 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming.yml @@ -0,0 +1,96 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Dataflow V2 Streaming + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */8 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 510 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Dataflow_V2_Streaming] + job_phrase: [Run Java Dataflow V2 ValidatesRunner Streaming] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Java Dataflow V2 ValidatesRunner Streaming' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunnerV2Streaming script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesRunnerV2Streaming + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml similarity index 81% rename from .github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml rename to .github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml index f61cc41d5222..24e2c8014336 100644 --- a/.github/workflows/beam_PostCommit_Java_Examples_Dataflow_Java17.yml +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct.yml @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -name: PostCommit Java Examples Dataflow Java17 +name: PostCommit Java ValidatesRunner Direct on: issue_comment: @@ -51,38 +51,35 @@ env: GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} jobs: - beam_PostCommit_Java_Examples_Dataflow_Java17: + beam_PostCommit_Java_ValidatesRunner_Direct: name: ${{matrix.job_name}} (${{matrix.job_phrase}}) runs-on: [self-hosted, ubuntu-20.04, main] timeout-minutes: 180 strategy: matrix: - job_name: [beam_PostCommit_Java_Examples_Dataflow_Java17] - job_phrase: [Run Java examples on Dataflow Java 17] + job_name: [beam_PostCommit_Java_ValidatesRunner_Direct] + job_phrase: [Run Direct ValidatesRunner] if: | github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || - github.event.comment.body == 'Run Java examples on Dataflow Java 17' + github.event.comment.body == 'Run Direct ValidatesRunner' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Set up Java + - name: Install Java uses: actions/setup-java@v3.8.0 with: - distribution: 'temurin' - java-version: | - 17 - 8 - - name: run PostCommit Java Examples Dataflow Java17 script + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script uses: ./.github/actions/gradle-command-self-hosted-action with: - gradle-command: :runners:google-cloud-dataflow-java:examples:java17PostCommit - max-workers: 12 + gradle-command: :runners:direct-java:validatesRunner - name: Archive JUnit Test Results uses: actions/upload-artifact@v3 if: failure() @@ -93,4 +90,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions.yml new file mode 100644 index 000000000000..168cd245e0db --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions.yml @@ -0,0 +1,111 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Direct JavaVersions + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions: + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{matrix.java_version}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 480 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Direct_JavaVersions] + job_phrase: [Run Direct ValidatesRunner Java] + java_version: ['11','17'] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + startswith(github.event.comment.body, 'Run Direct ValidatesRunner Java') + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{matrix.java_version}} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) ${{matrix.java_version}} + - name: Set up Java${{ matrix.java_version }} + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + ${{ matrix.java_version }} + 8 + - name: run jar Java${{ matrix.java_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:direct-java:shadowJar :runners:direct-java:shadowTestJar + arguments: | + -Dorg.gradle.java.home=$JAVA_HOME_8_X64 \ + - name: run validatesRunner Java${{ matrix.java_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:direct-java:validatesRunner + arguments: | + -x shadowJar \ + -x shadowTestJar \ + -x compileJava \ + -x compileTestJava \ + -Dorg.gradle.java.home=$JAVA_HOME_${{ matrix.java_version }}_X64 \ + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml new file mode 100644 index 000000000000..20aac8a30608 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink.yml @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Flink: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Flink] + job_phrase: [Run Flink ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Flink ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: 3.8 + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:1.15:validatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml new file mode 100644 index 000000000000..d5de5fbfa3ff --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Flink_Java11.yml @@ -0,0 +1,114 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Flink Java11 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Flink_Java11: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 270 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Flink_Java11] + job_phrase: [Run Flink ValidatesRunner Java 11] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + startswith(github.event.comment.body, 'Run Flink ValidatesRunner Java 11') + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Set up Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + 11 + 8 + - name: run jar Java8 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:1.15:jar :runners:flink:1.15:testJar + arguments: | + -Dorg.gradle.java.home=$JAVA_HOME_8_X64 \ + - name: run validatesRunner Java11 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:1.15:validatesRunner + arguments: | + -x shadowJar \ + -x shadowTestJar \ + -x compileJava \ + -x compileTestJava \ + -x jar \ + -x testJar \ + -x classes \ + -x testClasses \ + -Dorg.gradle.java.home=$JAVA_HOME_11_X64 \ + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml new file mode 100644 index 000000000000..0613f794a4f4 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Samza.yml @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner Samza + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Samza: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Samza] + job_phrase: [Run Samza ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Samza ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:samza:validatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml new file mode 100644 index 000000000000..146a88c921d2 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark.yml @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner Spark + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Spark: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Spark] + job_phrase: [Run Spark ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Spark ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:validatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml new file mode 100644 index 000000000000..fcaa3c6eabd4 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming.yml @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner SparkStructuredStreaming + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_SparkStructuredStreaming] + job_phrase: [Run Spark StructuredStreaming ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Spark StructuredStreaming ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesStructuredStreamingRunnerBatch script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:validatesStructuredStreamingRunnerBatch + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml new file mode 100644 index 000000000000..e6fe19393d13 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Spark_Java11.yml @@ -0,0 +1,114 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Java ValidatesRunner Spark Java11 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Spark_Java11: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 270 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Spark_Java11] + job_phrase: [Run Spark ValidatesRunner Java 11] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + startswith(github.event.comment.body, 'Run Spark ValidatesRunner Java 11') + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Set up Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'temurin' + java-version: | + 11 + 8 + - name: run jar Java8 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:jar :runners:spark:3:testJar + arguments: | + -Dorg.gradle.java.home=$JAVA_HOME_8_X64 \ + - name: run validatesRunner Java11 script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:validatesRunner + arguments: | + -x shadowJar \ + -x shadowTestJar \ + -x compileJava \ + -x compileTestJava \ + -x jar \ + -x testJar \ + -x classes \ + -x testClasses \ + -Dorg.gradle.java.home=$JAVA_HOME_11_X64 \ + max-workers: 12 + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml new file mode 100644 index 000000000000..89364f32865b --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_Twister2.yml @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner Twister2 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_Twister2: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_Twister2] + job_phrase: [Run Twister2 ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Twister2 ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: run validatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:twister2:validatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml new file mode 100644 index 000000000000..e5e397c8c9d3 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Java_ValidatesRunner_ULR.yml @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Java ValidatesRunner ULR + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Java_ValidatesRunner_ULR: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + strategy: + matrix: + job_name: [beam_PostCommit_Java_ValidatesRunner_ULR] + job_phrase: [Run ULR Loopback ValidatesRunner] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run ULR Loopback ValidatesRunner' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Java + uses: actions/setup-java@v3.8.0 + with: + distribution: 'zulu' + java-version: '8' + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: '3.8' + - name: run ulrLoopbackValidatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:portability:java:ulrLoopbackValidatesRunner + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Javadoc.yml b/.github/workflows/beam_PostCommit_Javadoc.yml new file mode 100644 index 000000000000..240b0e43d271 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Javadoc.yml @@ -0,0 +1,82 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Javadoc + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Javadoc: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + matrix: + job_name: [beam_PostCommit_Javadoc] + job_phrase: [Run Javadoc PostCommit] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Javadoc PostCommit' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run aggregateJavadoc script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:javadoc:aggregateJavadoc + - name: Upload Javadoc Results + uses: actions/upload-artifact@v3 + with: + name: Javadoc Results + path: '**/sdks/java/javadoc/build/docs/javadoc/**' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_PortableJar_Flink.yml b/.github/workflows/beam_PostCommit_PortableJar_Flink.yml new file mode 100644 index 000000000000..38c68059bdbe --- /dev/null +++ b/.github/workflows/beam_PostCommit_PortableJar_Flink.yml @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit PortableJar Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_PortableJar_Flink: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + github.event.comment.body == 'Run PortableJar_Flink PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + strategy: + matrix: + job_name: ["beam_PostCommit_PortableJar_Flink"] + job_phrase: ["Run PortableJar_Flink PostCommit"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: run testPipelineJarFlinkRunner script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:portable:py38:testPipelineJarFlinkRunner + arguments: | + -PpythonVersion=3.8 \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_PortableJar_Spark.yml b/.github/workflows/beam_PostCommit_PortableJar_Spark.yml new file mode 100644 index 000000000000..4cb99f85f0fb --- /dev/null +++ b/.github/workflows/beam_PostCommit_PortableJar_Spark.yml @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit PortableJar Spark + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_PortableJar_Spark: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + github.event.comment.body == 'Run PortableJar_Spark PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + strategy: + matrix: + job_name: ["beam_PostCommit_PortableJar_Spark"] + job_phrase: ["Run PortableJar_Spark PostCommit"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: run testPipelineJarSparkRunner script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:portable:py38:testPipelineJarSparkRunner + arguments: | + -PpythonVersion=3.8 \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python.yml b/.github/workflows/beam_PostCommit_Python.yml new file mode 100644 index 000000000000..7dde6ed150df --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python.yml @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Python + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python: + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{matrix.python_version}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Python] + job_phrase: [Run Python PostCommit] + python_version: ['3.8', '3.9', '3.10', '3.11'] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Python PostCommit' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: ${{matrix.python_version}} + - name: Install docker compose + run: | + sudo curl -L https://github.com/docker/compose/releases/download/1.22.0/docker-compose-$(uname -s)-$(uname -m) -o /usr/local/bin/docker-compose + sudo chmod +x /usr/local/bin/docker-compose + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: run PostCommit Python ${{ matrix.python_version }} script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :python${{steps.set_py_ver_clean.outputs.py_ver_clean}}PostCommit + arguments: | + -PuseWheelDistribution \ + -PpythonVersion=${{ matrix.python_version }} \ + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml b/.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml index 2aba1760606c..14cdecb356bd 100644 --- a/.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml +++ b/.github/workflows/beam_PostCommit_Python_Examples_Dataflow.yml @@ -58,31 +58,22 @@ jobs: runs-on: [self-hosted, ubuntu-20.04, main] timeout-minutes: 180 strategy: - fail-fast: false matrix: job_name: ["beam_PostCommit_Python_Examples_Dataflow"] job_phrase: ["Run Python Examples_Dataflow"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: 3.11 - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Run examplesPostCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PostCommit_Python_Examples_Direct.yml b/.github/workflows/beam_PostCommit_Python_Examples_Direct.yml index 913bac96a174..7c792d7a3c27 100644 --- a/.github/workflows/beam_PostCommit_Python_Examples_Direct.yml +++ b/.github/workflows/beam_PostCommit_Python_Examples_Direct.yml @@ -64,26 +64,18 @@ jobs: job_phrase: ["Run Python Examples_Direct"] python_version: ['3.8','3.9','3.10','3.11'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PostCommit_Python_Examples_Flink.yml b/.github/workflows/beam_PostCommit_Python_Examples_Flink.yml index 8d2bc47f267e..ccf03918f29d 100644 --- a/.github/workflows/beam_PostCommit_Python_Examples_Flink.yml +++ b/.github/workflows/beam_PostCommit_Python_Examples_Flink.yml @@ -64,26 +64,18 @@ jobs: job_phrase: ["Run Python Examples_Flink"] python_version: ['3.8', '3.11'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PostCommit_Python_Examples_Spark.yml b/.github/workflows/beam_PostCommit_Python_Examples_Spark.yml index 9435720a1af9..073ed0aeda64 100644 --- a/.github/workflows/beam_PostCommit_Python_Examples_Spark.yml +++ b/.github/workflows/beam_PostCommit_Python_Examples_Spark.yml @@ -64,26 +64,18 @@ jobs: job_phrase: ["Run Python Examples_Spark"] python_version: ['3.8', '3.11'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PostCommit_Python_MongoDBIO_IT.yml b/.github/workflows/beam_PostCommit_Python_MongoDBIO_IT.yml new file mode 100644 index 000000000000..be8f0e10dc18 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_MongoDBIO_IT.yml @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python MongoDBIO IT + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_MongoDBIO_IT: + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + github.event.comment.body == 'Run Python MongoDBIO_IT' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + strategy: + matrix: + job_name: ["beam_PostCommit_Python_MongoDBIO_IT"] + job_phrase: ["Run Python MongoDBIO_IT"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: 3.11 + - name: Run mongodbioIT script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:direct:py311:mongodbioIT + arguments: | + -PpythonVersion=3.11 \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_Nexmark_Direct.yml b/.github/workflows/beam_PostCommit_Python_Nexmark_Direct.yml new file mode 100644 index 000000000000..81c9b4a8b484 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_Nexmark_Direct.yml @@ -0,0 +1,138 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit Python Nexmark Direct + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + GRADLE_JAVA_COMMAND_ARGUMENTS: | + --manageResources=false + --monitorJobs=true + --bigQueryTable=nexmark + --bigQueryDataset=nexmark + --project=apache-beam-testing + --resourceNameMode=QUERY_RUNNER_AND_MODE + --exportSummaryToBigQuery=true + --tempLocation=gs://temp-storage-for-perf-tests/nexmark + --influxDatabase=beam_test_metrics + --influxHost=http://10.128.0.96:8086 + --baseInfluxMeasurement=nexmark + --exportSummaryToInfluxDB=true + --influxRetentionPolicy=forever + --suite=SMOKE + --enforceEncodability=true + --enforceImmutability=true + --runner=DirectRunner + --numEvents=100000 + GRADLE_PYTHON_COMMAND_ARGUMENTS: | + --monitor_jobs + --big_query_table=nexmark + --big_query_dataset=nexmark + --project=apache-beam-testing + --resource_name_mode=QUERY_RUNNER_AND_MODE + --export_summary_to_big_query + --temp_location=gs://temp-storage-for-perf-tests/nexmark + --influx_database=beam_test_metrics + --influx_host=http://10.128.0.96:8086 + --base_influx_measurement=nexmark + --export_summary_to_influx_db + --influx_retention_policy=forever + --suite=SMOKE + --enforce_encodability + --enforce_immutability + --runner=DirectRunner + --num_events=100000 + +jobs: + beam_PostCommit_Python_Nexmark_Direct: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + fail-fast: false + matrix: + job_name: [beam_PostCommit_Python_Nexmark_Direct] + job_phrase: [Run Python Direct Runner Nexmark Tests] + # Query numbers are listed explicitly due to issues with Python Nexmark Query: + # query = 1 - https://github.com/apache/beam/issues/24678 + # query = 4,6,9 - https://github.com/apache/beam/issues/24679 + # query = 12 - https://github.com/apache/beam/issues/24680 + query: [0, 2, 3, 5, 7, 8, 10, 11] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Python Direct Runner Nexmark Tests' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: 3.8 + - name: run Java Testing Nexmark (query ${{ matrix.query }}) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:java:testing:nexmark:run + arguments: | + -Pnexmark.runner=:runners:direct-java \ + "-Pnexmark.args=${{ env.GRADLE_JAVA_COMMAND_ARGUMENTS }} \ + --query=${{ matrix.query }} \ + --generateEventFilePathPrefix=gs://temp-storage-for-perf-tests/nexmark/eventFiles/beam_PostCommit_Python_Nexmark_Direct/query${{ matrix.query }}-" \ + - name: run Python Testing Benchmarks Nexmark (query ${{ matrix.query }}) + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:apache_beam:testing:benchmarks:nexmark:run + arguments: | + -PpythonVersion=3.8 \ + "-Pnexmark.args=${{ env.GRADLE_PYTHON_COMMAND_ARGUMENTS }} \ + --query=${{ matrix.query }} \ + --input=gs://temp-storage-for-perf-tests/nexmark/eventFiles/beam_PostCommit_Python_Nexmark_Direct/query${{ matrix.query }}-\*" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml b/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml new file mode 100644 index 000000000000..ca5753010d1c --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow.yml @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python ValidatesContainer Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_ValidatesContainer_Dataflow: + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + startsWith(github.event.comment.body, 'Run Python Dataflow ValidatesContainer') + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + strategy: + fail-fast: false + matrix: + job_name: ["beam_PostCommit_Python_ValidatesContainer_Dataflow"] + job_phrase: ["Run Python Dataflow ValidatesContainer"] + python_version: ['3.8','3.9','3.10','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: ${{ matrix.python_version }} + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: Run validatesContainer script + env: + USER: github-actions + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:validatesContainer + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml b/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml new file mode 100644 index 000000000000..ded9ff0a4bd5 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC.yml @@ -0,0 +1,98 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python ValidatesContainer Dataflow With RC + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC: + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + startsWith(github.event.comment.body, 'Run Python RC Dataflow ValidatesContainer') + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + strategy: + fail-fast: false + matrix: + job_name: ["beam_PostCommit_Python_ValidatesContainer_Dataflow_With_RC"] + job_phrase: ["Run Python RC Dataflow ValidatesContainer"] + python_version: ['3.8','3.9','3.10','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: ${{ matrix.python_version }} + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: Run validatesContainer script + env: + USER: github-actions + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:validatesContainer + arguments: | + -PtestRCDependencies=true + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml new file mode 100644 index 000000000000..4119ddd56020 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Dataflow.yml @@ -0,0 +1,103 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python ValidatesRunner Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_ValidatesRunner_Dataflow: + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + startsWith(github.event.comment.body, 'Run Python Dataflow ValidatesRunner') + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 200 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + strategy: + fail-fast: false + matrix: + job_name: ["beam_PostCommit_Python_ValidatesRunner_Dataflow"] + job_phrase: ["Run Python Dataflow ValidatesRunner"] + python_version: ['3.8', '3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: ${{ matrix.python_version }} + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: Run validatesRunnerBatchTests script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:validatesRunnerBatchTests + arguments: | + -PuseWheelDistribution \ + -PpythonVersion=${{ matrix.python_version }} \ + - name: Run validatesRunnerStreamingTests script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:validatesRunnerStreamingTests + arguments: | + -PuseWheelDistribution \ + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml new file mode 100644 index 000000000000..608bba248b3b --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Flink.yml @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python ValidatesRunner Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_ValidatesRunner_Flink: + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + startsWith(github.event.comment.body, 'Run Python Flink ValidatesRunner') + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + strategy: + fail-fast: false + matrix: + job_name: ["beam_PostCommit_Python_ValidatesRunner_Flink"] + job_phrase: ["Run Python Flink ValidatesRunner"] + python_version: ['3.8', '3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: ${{ matrix.python_version }} + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: Run flinkValidatesRunner script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:portable:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:flinkValidatesRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml new file mode 100644 index 000000000000..bd0bbe1d6ff1 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Samza.yml @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python ValidatesRunner Samza + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_ValidatesRunner_Samza: + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + startsWith(github.event.comment.body, 'Run Python Samza ValidatesRunner') + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + strategy: + fail-fast: false + matrix: + job_name: ["beam_PostCommit_Python_ValidatesRunner_Samza"] + job_phrase: ["Run Python Samza ValidatesRunner"] + python_version: ['3.8', '3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: ${{ matrix.python_version }} + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: Run samzaValidatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:portable:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:samzaValidatesRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml new file mode 100644 index 000000000000..6fda3a210aaf --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_ValidatesRunner_Spark.yml @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python ValidatesRunner Spark + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_ValidatesRunner_Spark: + if: | + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + startsWith(github.event.comment.body, 'Run Python Spark ValidatesRunner') + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + strategy: + fail-fast: false + matrix: + job_name: ["beam_PostCommit_Python_ValidatesRunner_Spark"] + job_phrase: ["Run Python Spark ValidatesRunner"] + python_version: ['3.8', '3.9', '3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + python-version: ${{ matrix.python_version }} + - name: Set PY_VER_CLEAN + id: set_py_ver_clean + run: | + PY_VER=${{ matrix.python_version }} + PY_VER_CLEAN=${PY_VER//.} + echo "py_ver_clean=$PY_VER_CLEAN" >> $GITHUB_OUTPUT + - name: Run sparkValidatesRunner script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:portable:py${{steps.set_py_ver_clean.outputs.py_ver_clean}}:sparkValidatesRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + with: + name: python-code-coverage-report + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml b/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml new file mode 100644 index 000000000000..34364bb38bcc --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Dataflow.yml @@ -0,0 +1,88 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python Xlang Gcp Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_Xlang_Gcp_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Python_Xlang_Gcp_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Python_Xlang_Gcp_Dataflow"] + job_phrase: ["Run Python_Xlang_Gcp_Dataflow PostCommit"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit Python Xlang Gcp Dataflow script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:gcpCrossLanguagePostCommit + arguments: -PuseWheelDistribution + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml b/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml new file mode 100644 index 000000000000..f753b3cf15df --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_Xlang_Gcp_Direct.yml @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python Xlang Gcp Direct + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_Xlang_Gcp_Direct: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Python_Xlang_Gcp_Direct PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Python_Xlang_Gcp_Direct"] + job_phrase: ["Run Python_Xlang_Gcp_Direct PostCommit"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit Python Xlang Gcp Direct script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:direct:gcpCrossLanguagePostCommit + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml b/.github/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml new file mode 100644 index 000000000000..6cc132935e66 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Python_Xlang_IO_Dataflow.yml @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Python Xlang IO Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Python_Xlang_IO_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Python_Xlang_IO_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 180 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Python_Xlang_IO_Dataflow"] + job_phrase: ["Run Python_Xlang_IO_Dataflow PostCommit"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit Python Xlang IO Dataflow script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:dataflow:ioCrossLanguagePostCommit + arguments: | + -PuseWheelDistribution \ + -PkafkaBootstrapServer=10.128.0.40:9094,10.128.0.28:9094,10.128.0.165:9094 \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_SQL.yml b/.github/workflows/beam_PostCommit_SQL.yml new file mode 100644 index 000000000000..d27c5718d6f9 --- /dev/null +++ b/.github/workflows/beam_PostCommit_SQL.yml @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: PostCommit SQL + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: write + checks: write + contents: read + deployments: read + id-token: none + issues: write + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_SQL: + name: ${{matrix.job_name}} (${{matrix.job_phrase}}) + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 240 + strategy: + matrix: + job_name: [beam_PostCommit_SQL] + job_phrase: [Run SQL PostCommit] + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run SQL PostCommit' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit SQL script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sqlPostCommit + - name: Archive JUnit Test Results + uses: actions/upload-artifact@v3 + if: failure() + with: + name: JUnit Test Results + path: "**/build/reports/tests/" + - name: Publish JUnit Test Results + uses: EnricoMi/publish-unit-test-result-action@v2 + if: always() + with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} + files: '**/build/test-results/**/*.xml' diff --git a/.github/workflows/beam_PostCommit_Sickbay_Python.yml b/.github/workflows/beam_PostCommit_Sickbay_Python.yml index af5fda81a4ec..465dc9966023 100644 --- a/.github/workflows/beam_PostCommit_Sickbay_Python.yml +++ b/.github/workflows/beam_PostCommit_Sickbay_Python.yml @@ -21,12 +21,12 @@ on: issue_comment: types: [created] schedule: - - cron: '0 */6 * * *' + - cron: '0 0 * * *' workflow_dispatch: # This allows a subsequently queued workflow run to interrupt previous runs concurrency: - group: '${{ github.workflow }} @ ${{ github.sha || github.head_ref || github.ref }}-${{ github.event.sender.login }}-${{ github.event.schedule }}' + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' cancel-in-progress: true #Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event @@ -52,27 +52,29 @@ env: jobs: beam_PostCommit_Sickbay_Python: - name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{matrix.python_version}}) + name: ${{matrix.job_name}} (${{matrix.job_phrase_1}} ${{matrix.python_version}} ${{matrix.job_phrase_2}}) runs-on: [self-hosted, ubuntu-20.04, main] timeout-minutes: 180 strategy: fail-fast: false matrix: job_name: [beam_PostCommit_Sickbay_Python] - job_phrase: [Run Python PostCommit Sickbay tests] + job_phrase_1: [Run Python] + job_phrase_2: [PostCommit Sickbay] python_version: ['3.8', '3.9', '3.10', '3.11'] if: | github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || - github.event.comment.body == 'Run Python PostCommit Sickbay tests' + (startswith(github.event.comment.body, 'Run Python') && + endswith(github.event.comment.body, 'PostCommit Sickbay')) steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: - comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + comment_phrase: ${{ matrix.job_phrase_1 }} ${{matrix.python_version}} ${{ matrix.job_phrase_2 }} github_token: ${{ secrets.GITHUB_TOKEN }} - github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + github_job: ${{ matrix.job_name }} ${{ matrix.job_phrase_1 }} ${{matrix.python_version}} ${{ matrix.job_phrase_2 }} - name: Install Python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/beam_PostCommit_TransformService_Direct.yml b/.github/workflows/beam_PostCommit_TransformService_Direct.yml index 346abcc9951a..1d8b35098e45 100644 --- a/.github/workflows/beam_PostCommit_TransformService_Direct.yml +++ b/.github/workflows/beam_PostCommit_TransformService_Direct.yml @@ -63,7 +63,7 @@ jobs: job_phrase: ["Run TransformService_Direct PostCommit"] python_version: ['3.8','3.11'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PostCommit_Website_Publish.yml b/.github/workflows/beam_PostCommit_Website_Publish.yml index 1493c506772f..4cb99532543d 100644 --- a/.github/workflows/beam_PostCommit_Website_Publish.yml +++ b/.github/workflows/beam_PostCommit_Website_Publish.yml @@ -55,7 +55,7 @@ jobs: timeout-minutes: 30 name: beam_PostCommit_Website_Publish steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: run PostCommit Website Publish script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PostCommit_Website_Test.yml b/.github/workflows/beam_PostCommit_Website_Test.yml new file mode 100644 index 000000000000..6155a45ef6a6 --- /dev/null +++ b/.github/workflows/beam_PostCommit_Website_Test.yml @@ -0,0 +1,76 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit Website Test + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_Website_Test: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run Full Website Test' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 30 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_Website_Test"] + job_phrase: ["Run Full Website Test"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: run PostCommit Website Test script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :website:testWebsite + arguments: -PdisableExternal=false \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_Direct.yml b/.github/workflows/beam_PostCommit_XVR_Direct.yml new file mode 100644 index 000000000000..988b153986e9 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_Direct.yml @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR Direct + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_Direct: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_Direct PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 120 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_Direct"] + job_phrase: ["Run XVR_Direct PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR Direct script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version != '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:direct:xlang:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=true \ + - name: run PostCommit XVR Direct script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version == '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :sdks:python:test-suites:direct:xlang:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=false \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_Flink.yml b/.github/workflows/beam_PostCommit_XVR_Flink.yml new file mode 100644 index 000000000000..0ab819c57a02 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_Flink.yml @@ -0,0 +1,105 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR Flink + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + FlinkVersion: 1.15 + +jobs: + beam_PostCommit_XVR_Flink: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_Flink PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_Flink"] + job_phrase: ["Run XVR_Flink PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR Flink script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version != '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:${{ env.FlinkVersion }}:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=true \ + - name: run PostCommit XVR Flink script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version == '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:flink:${{ env.FlinkVersion }}:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=false \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml b/.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml index ee715495ab0e..687c77e572f2 100644 --- a/.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml +++ b/.github/workflows/beam_PostCommit_XVR_GoUsingJava_Dataflow.yml @@ -62,7 +62,7 @@ jobs: job_name: ["beam_PostCommit_XVR_GoUsingJava_Dataflow"] job_phrase: ["Run XVR_GoUsingJava_Dataflow PostCommit"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -89,4 +89,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml b/.github/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml new file mode 100644 index 000000000000..71b3d27473e1 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_JavaUsingPython_Dataflow.yml @@ -0,0 +1,92 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR JavaUsingPython Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_JavaUsingPython_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_JavaUsingPython_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_JavaUsingPython_Dataflow"] + job_phrase: ["Run XVR_JavaUsingPython_Dataflow PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR JavaUsingPython Dataflow script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesCrossLanguageRunnerJavaUsingPython + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml b/.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml new file mode 100644 index 000000000000..ea3fd2cb86b8 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow.yml @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR PythonUsingJavaSQL Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_PythonUsingJavaSQL_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_PythonUsingJavaSQL_Dataflow"] + job_phrase: ["Run XVR_PythonUsingJavaSQL_Dataflow PostCommit"] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: 3.11 + - name: run PostCommit XVR PythonUsingJavaSQL Dataflow script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesCrossLanguageRunnerPythonUsingSql + arguments: | + -PpythonVersion=3.11 \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml b/.github/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml new file mode 100644 index 000000000000..d575bbeabbc8 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_PythonUsingJava_Dataflow.yml @@ -0,0 +1,92 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR PythonUsingJava Dataflow + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_PythonUsingJava_Dataflow: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_PythonUsingJava_Dataflow PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_PythonUsingJava_Dataflow"] + job_phrase: ["Run XVR_PythonUsingJava_Dataflow PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR PythonUsingJava Dataflow script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:google-cloud-dataflow-java:validatesCrossLanguageRunnerPythonUsingJava + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/pytest*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_Samza.yml b/.github/workflows/beam_PostCommit_XVR_Samza.yml new file mode 100644 index 000000000000..f808456b8c97 --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_Samza.yml @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR Samza + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_Samza: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_Samza PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_Samza"] + job_phrase: ["Run XVR_Samza PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR Samza script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version != '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:samza:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=true \ + - name: run PostCommit XVR Samza script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version == '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:samza:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=false \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PostCommit_XVR_Spark3.yml b/.github/workflows/beam_PostCommit_XVR_Spark3.yml new file mode 100644 index 000000000000..8ca97bb23edb --- /dev/null +++ b/.github/workflows/beam_PostCommit_XVR_Spark3.yml @@ -0,0 +1,104 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PostCommit XVR Spark3 + +on: + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +jobs: + beam_PostCommit_XVR_Spark3: + if: | + github.event_name == 'workflow_dispatch' || + github.event_name == 'schedule' || + github.event.comment.body == 'Run XVR_Spark3 PostCommit' + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 100 + name: ${{matrix.job_name}} (${{matrix.job_phrase}} ${{ matrix.python_version }}) + strategy: + matrix: + job_name: ["beam_PostCommit_XVR_Spark3"] + job_phrase: ["Run XVR_Spark3 PostCommit"] + python_version: ['3.8','3.11'] + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) + - name: Install Python + uses: actions/setup-python@v4 + with: + python-version: | + 3.8 + 3.11 + - name: run PostCommit XVR Spark3 script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version != '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=true \ + - name: run PostCommit XVR Spark3 script + env: + CLOUDSDK_CONFIG: ${{ env.KUBELET_GCLOUD_CONFIG_PATH}} + if: ${{ matrix.python_version == '3.8' }} + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :runners:spark:3:job-server:validatesCrossLanguageRunner + arguments: | + -PpythonVersion=${{ matrix.python_version }} \ + -PskipNonPythonTask=false \ + - name: Archive code coverage results + uses: actions/upload-artifact@v3 + if: always() + with: + name: archiveJunit + path: "**/build/test-results/**/*.xml" \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_CommunityMetrics.yml b/.github/workflows/beam_PreCommit_CommunityMetrics.yml index 2b59e6290a58..7a93873168e0 100644 --- a/.github/workflows/beam_PreCommit_CommunityMetrics.yml +++ b/.github/workflows/beam_PreCommit_CommunityMetrics.yml @@ -71,22 +71,17 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run CommunityMetrics PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - name: Remove default github maven configuration diff --git a/.github/workflows/beam_PreCommit_Go.yml b/.github/workflows/beam_PreCommit_Go.yml index 50c2b3a265b1..227f3c7648ab 100644 --- a/.github/workflows/beam_PreCommit_Go.yml +++ b/.github/workflows/beam_PreCommit_Go.yml @@ -71,26 +71,18 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Go PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Install Go - uses: actions/setup-go@v4 - with: - go-version: '1.21' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + go-version: 1.21 - name: run goPreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_GoPortable.yml b/.github/workflows/beam_PreCommit_GoPortable.yml index 397dac39b3f0..8156df15133c 100644 --- a/.github/workflows/beam_PreCommit_GoPortable.yml +++ b/.github/workflows/beam_PreCommit_GoPortable.yml @@ -71,18 +71,18 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run GoPortable PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - requires-py-39: false - requires-go: false + python-version: 3.8 + java-version: 8 - name: Run goPortablePreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_GoPrism.yml b/.github/workflows/beam_PreCommit_GoPrism.yml new file mode 100644 index 000000000000..1a669c157007 --- /dev/null +++ b/.github/workflows/beam_PreCommit_GoPrism.yml @@ -0,0 +1,89 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: PreCommit GoPrism + +on: + push: + tags: ['v*'] + branches: ['master', 'release-*'] + paths: ['model/**', 'sdks/go.**', 'release/**','.github/workflows/beam_PreCommit_GoPrism.yml'] + pull_request_target: + branches: ['master', 'release-*'] + paths: ['model/**', 'sdks/go.**', 'release/**'] + issue_comment: + types: [created] + schedule: + - cron: '0 */6 * * *' + workflow_dispatch: + +# This allows a subsequently queued workflow run to interrupt previous runs +concurrency: + group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' + cancel-in-progress: true + +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + +# Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event +permissions: + actions: write + pull-requests: read + checks: read + contents: read + deployments: read + id-token: none + issues: read + discussions: read + packages: read + pages: read + repository-projects: read + security-events: read + statuses: read + +jobs: + beam_PreCommit_GoPrism: + name: ${{matrix.job_name}} (${{ matrix.job_phrase }}) + runs-on: [self-hosted, ubuntu-20.04, main] + strategy: + matrix: + job_name: [beam_PreCommit_GoPrism] + job_phrase: [Run GoPrism PreCommit] + timeout-minutes: 120 + if: | + github.event_name == 'push' || + github.event_name == 'pull_request_target' || + github.event_name == 'schedule' || + github.event_name == 'workflow_dispatch' || + github.event.comment.body == 'Run GoPrism PreCommit' + steps: + - uses: actions/checkout@v4 + - name: Setup repository + uses: ./.github/actions/setup-action + with: + comment_phrase: ${{ matrix.job_phrase }} + github_token: ${{ secrets.GITHUB_TOKEN }} + github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + python-version: 3.8 + java-version: 8 + - name: Run goPrismPreCommit script + uses: ./.github/actions/gradle-command-self-hosted-action + with: + gradle-command: :goPrismPreCommit \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_ItFramework.yml b/.github/workflows/beam_PreCommit_ItFramework.yml index 83f29211da35..e8ec1287be51 100644 --- a/.github/workflows/beam_PreCommit_ItFramework.yml +++ b/.github/workflows/beam_PreCommit_ItFramework.yml @@ -74,23 +74,17 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run It_Framework PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - requires-py-38: false - requires-py-39: false - requires-go: false - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 - name: run ItFrameworkPrecommit script run: ./gradlew -p it build - name: Archive JUnit Test Results @@ -103,4 +97,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java.yml b/.github/workflows/beam_PreCommit_Java.yml index 98022dba98ab..ab5c17c55f4c 100644 --- a/.github/workflows/beam_PreCommit_Java.yml +++ b/.github/workflows/beam_PreCommit_Java.yml @@ -141,6 +141,11 @@ permissions: security-events: read statuses: read +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Java: name: ${{matrix.job_name}} (${{matrix.job_phrase}}) @@ -158,7 +163,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Java PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -182,6 +187,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services2_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services2_IO_Direct.yml index ef1dcb636032..220dea3208cc 100644 --- a/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services2_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services2_IO_Direct.yml @@ -92,7 +92,7 @@ jobs: github.event.comment.body == 'Run Java_Amazon-Web-Services2_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -123,6 +123,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services_IO_Direct.yml index cf57623f0602..03ee673e2e24 100644 --- a/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Amazon-Web-Services_IO_Direct.yml @@ -92,7 +92,7 @@ jobs: github.event.comment.body == 'Run Java_Amazon-Web-Services_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -123,6 +123,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Amqp_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Amqp_IO_Direct.yml index 28aeae758376..fede8fa6d548 100644 --- a/.github/workflows/beam_PreCommit_Java_Amqp_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Amqp_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Amqp_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Azure_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Azure_IO_Direct.yml index 05f1bb5ca53b..7e20dd043cad 100644 --- a/.github/workflows/beam_PreCommit_Java_Azure_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Azure_IO_Direct.yml @@ -92,7 +92,7 @@ jobs: github.event.comment.body == 'Run Java_Azure_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -116,6 +116,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Cassandra_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Cassandra_IO_Direct.yml index 3c56c4f062a5..5f63a25440b8 100644 --- a/.github/workflows/beam_PreCommit_Java_Cassandra_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Cassandra_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Cassandra_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Cdap_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Cdap_IO_Direct.yml index fa83a5958fe2..caa1c475f5b0 100644 --- a/.github/workflows/beam_PreCommit_Java_Cdap_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Cdap_IO_Direct.yml @@ -78,7 +78,7 @@ jobs: github.event.comment.body == 'Run Java_Cdap_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -102,6 +102,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Clickhouse_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Clickhouse_IO_Direct.yml index 266e311c092a..8f2a5cde3749 100644 --- a/.github/workflows/beam_PreCommit_Java_Clickhouse_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Clickhouse_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Clickhouse_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Csv_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Csv_IO_Direct.yml index 9b6d73395a1b..1c304500b567 100644 --- a/.github/workflows/beam_PreCommit_Java_Csv_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Csv_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Csv_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Debezium_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Debezium_IO_Direct.yml index 5d08a5e03e61..db348a7684af 100644 --- a/.github/workflows/beam_PreCommit_Java_Debezium_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Debezium_IO_Direct.yml @@ -53,6 +53,11 @@ concurrency: group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' cancel-in-progress: true +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Java_Debezium_IO_Direct: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) @@ -69,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Debezium_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -102,6 +107,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_ElasticSearch_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_ElasticSearch_IO_Direct.yml index c58876de7ff0..27a3e175e7e1 100644 --- a/.github/workflows/beam_PreCommit_Java_ElasticSearch_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_ElasticSearch_IO_Direct.yml @@ -55,6 +55,11 @@ concurrency: group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' cancel-in-progress: true +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Java_ElasticSearch_IO_Direct: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) @@ -71,7 +76,7 @@ jobs: github.event.comment.body == 'Run Java_ElasticSearch_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -106,6 +111,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow.yml b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow.yml index 2d9e34eeebae..1dfd9ea1eb4d 100644 --- a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow.yml +++ b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow.yml @@ -85,23 +85,17 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Java_Examples_Dataflow PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - requires-py-38: false - requires-py-39: false - requires-go: false - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 - name: Authenticate on GCP uses: google-github-actions/setup-gcloud@v0 with: @@ -126,4 +120,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml index 5c7c81497756..8484360de7b1 100644 --- a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml +++ b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java11.yml @@ -86,7 +86,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Java_Examples_Dataflow_Java11 PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -127,4 +127,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml index 724daf16d889..bc5c457eb761 100644 --- a/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml +++ b/.github/workflows/beam_PreCommit_Java_Examples_Dataflow_Java17.yml @@ -63,6 +63,11 @@ concurrency: group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' cancel-in-progress: true +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Java_Examples_Dataflow_Java17: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) @@ -79,7 +84,7 @@ jobs: github.event.comment.body == 'Run Java_Examples_Dataflow_Java17 PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -124,6 +129,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v2 diff --git a/.github/workflows/beam_PreCommit_Java_File-schema-transform_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_File-schema-transform_IO_Direct.yml index abb3c5d029a4..d256bca9ebaa 100644 --- a/.github/workflows/beam_PreCommit_Java_File-schema-transform_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_File-schema-transform_IO_Direct.yml @@ -53,6 +53,11 @@ concurrency: group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' cancel-in-progress: true +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Java_File-schema-transform_IO_Direct: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) @@ -69,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_File-schema-transform_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -94,6 +99,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Flink_Versions.yml b/.github/workflows/beam_PreCommit_Java_Flink_Versions.yml index 8fafdbcaa9e5..e4a04839cef4 100644 --- a/.github/workflows/beam_PreCommit_Java_Flink_Versions.yml +++ b/.github/workflows/beam_PreCommit_Java_Flink_Versions.yml @@ -73,25 +73,17 @@ jobs: github.event_name == 'schedule' || github.event.comment.body == 'Run Java_Flink_Versions PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: 'Run Java_Flink_Versions PreCommit' github_token: ${{ secrets.GITHUB_TOKEN }} - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Install Python - uses: actions/setup-python@v4 - with: - python-version: '3.8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + python-version: 3.8 - name: run Java Flink Versions PreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: @@ -108,4 +100,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml index f567a4fd2a0d..840b753d8190 100644 --- a/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_GCP_IO_Direct.yml @@ -92,7 +92,7 @@ jobs: github.event.comment.body == 'Run Java_GCP_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -120,6 +120,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_HBase_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_HBase_IO_Direct.yml index 0664f969996e..5734e0e9d453 100644 --- a/.github/workflows/beam_PreCommit_Java_HBase_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_HBase_IO_Direct.yml @@ -76,7 +76,7 @@ jobs: github.event.comment.body == 'Run Java_HBase_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -100,6 +100,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml index 688a4ef2e5ff..6230c8dbae8a 100644 --- a/.github/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_HCatalog_IO_Direct.yml @@ -76,7 +76,7 @@ jobs: github.event.comment.body == 'Run Java_HCatalog_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -100,6 +100,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Hadoop_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Hadoop_IO_Direct.yml index cbfa9c371e38..ad69f5fb0895 100644 --- a/.github/workflows/beam_PreCommit_Java_Hadoop_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Hadoop_IO_Direct.yml @@ -100,7 +100,7 @@ jobs: github.event.comment.body == 'Run Java_Hadoop_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -138,6 +138,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_IOs_Direct.yml b/.github/workflows/beam_PreCommit_Java_IOs_Direct.yml index 8c7796da9a74..b20e9886e701 100644 --- a/.github/workflows/beam_PreCommit_Java_IOs_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_IOs_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_IOs_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -99,6 +99,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_InfluxDb_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_InfluxDb_IO_Direct.yml index 17f1676eb70f..9acbe5f8705a 100644 --- a/.github/workflows/beam_PreCommit_Java_InfluxDb_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_InfluxDb_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_InfluxDb_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_JDBC_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_JDBC_IO_Direct.yml index cbdfdaa5d304..869c189c1c9d 100644 --- a/.github/workflows/beam_PreCommit_Java_JDBC_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_JDBC_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_JDBC_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -105,6 +105,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Jms_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Jms_IO_Direct.yml index 57aae16ebefc..160e01520845 100644 --- a/.github/workflows/beam_PreCommit_Java_Jms_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Jms_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Jms_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -105,6 +105,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml index 003b56f80416..64fc4122923b 100644 --- a/.github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Kafka_IO_Direct.yml @@ -82,7 +82,7 @@ jobs: github.event.comment.body == 'Run Java_Kafka_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -107,6 +107,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Kinesis_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Kinesis_IO_Direct.yml index 3673fbcfef3a..d1788288d3d5 100644 --- a/.github/workflows/beam_PreCommit_Java_Kinesis_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Kinesis_IO_Direct.yml @@ -92,7 +92,7 @@ jobs: github.event.comment.body == 'Run Java_Kinesis_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -130,6 +130,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Kudu_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Kudu_IO_Direct.yml index be65e5522761..693cead4b031 100644 --- a/.github/workflows/beam_PreCommit_Java_Kudu_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Kudu_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Kudu_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_MongoDb_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_MongoDb_IO_Direct.yml index e457693adfec..472b0f7f5531 100644 --- a/.github/workflows/beam_PreCommit_Java_MongoDb_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_MongoDb_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_MongoDb_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Mqtt_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Mqtt_IO_Direct.yml index d01078fa08ca..ad0e5ac9feda 100644 --- a/.github/workflows/beam_PreCommit_Java_Mqtt_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Mqtt_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Mqtt_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Neo4j_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Neo4j_IO_Direct.yml index 0a0ba8664a07..b6f51b243a8f 100644 --- a/.github/workflows/beam_PreCommit_Java_Neo4j_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Neo4j_IO_Direct.yml @@ -76,7 +76,7 @@ jobs: github.event.comment.body == 'Run Java_Neo4j_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -107,6 +107,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_PVR_Flink_Batch.yml b/.github/workflows/beam_PreCommit_Java_PVR_Flink_Batch.yml index b23d70c4f5af..9679b1825cf5 100644 --- a/.github/workflows/beam_PreCommit_Java_PVR_Flink_Batch.yml +++ b/.github/workflows/beam_PreCommit_Java_PVR_Flink_Batch.yml @@ -59,6 +59,11 @@ permissions: security-events: read statuses: read +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Java_PVR_Flink_Batch: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) @@ -75,7 +80,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Java_PVR_Flink_Batch PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,4 +103,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java_PVR_Flink_Docker.yml b/.github/workflows/beam_PreCommit_Java_PVR_Flink_Docker.yml index 312210245477..30dd710e1848 100644 --- a/.github/workflows/beam_PreCommit_Java_PVR_Flink_Docker.yml +++ b/.github/workflows/beam_PreCommit_Java_PVR_Flink_Docker.yml @@ -85,7 +85,7 @@ jobs: github.event.comment.body == 'Run Java_PVR_Flink_Docker PreCommit' timeout-minutes: 240 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -108,4 +108,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java_Parquet_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Parquet_IO_Direct.yml index 04468ae7588c..f2a27da20dd2 100644 --- a/.github/workflows/beam_PreCommit_Java_Parquet_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Parquet_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Parquet_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Pulsar_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Pulsar_IO_Direct.yml index 258e95e5d1b9..ed7c55c98f4d 100644 --- a/.github/workflows/beam_PreCommit_Java_Pulsar_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Pulsar_IO_Direct.yml @@ -92,7 +92,7 @@ jobs: github.event.comment.body == 'Run Java_Pulsar_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -116,6 +116,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_RabbitMq_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_RabbitMq_IO_Direct.yml index f137a5441bf3..962c1526c90a 100644 --- a/.github/workflows/beam_PreCommit_Java_RabbitMq_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_RabbitMq_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_RabbitMq_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Redis_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Redis_IO_Direct.yml index 7a90efe0ede2..efd8d472f8ae 100644 --- a/.github/workflows/beam_PreCommit_Java_Redis_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Redis_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Redis_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_SingleStore_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_SingleStore_IO_Direct.yml index 26b7c81503bd..ad64ff286aab 100644 --- a/.github/workflows/beam_PreCommit_Java_SingleStore_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_SingleStore_IO_Direct.yml @@ -76,7 +76,7 @@ jobs: github.event.comment.body == 'Run Java_SingleStore_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -100,6 +100,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Snowflake_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Snowflake_IO_Direct.yml index 30cc95108239..f8eaec4c11c4 100644 --- a/.github/workflows/beam_PreCommit_Java_Snowflake_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Snowflake_IO_Direct.yml @@ -78,7 +78,7 @@ jobs: github.event.comment.body == 'Run Java_Snowflake_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -109,6 +109,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Solr_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Solr_IO_Direct.yml index a19db8a6faf4..8b834682ffd5 100644 --- a/.github/workflows/beam_PreCommit_Java_Solr_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Solr_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Solr_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Spark3_Versions.yml b/.github/workflows/beam_PreCommit_Java_Spark3_Versions.yml index b7b1dfa0ea29..2bb53629614d 100644 --- a/.github/workflows/beam_PreCommit_Java_Spark3_Versions.yml +++ b/.github/workflows/beam_PreCommit_Java_Spark3_Versions.yml @@ -76,7 +76,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Java_Spark3_Versions PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -112,4 +112,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Java_Splunk_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Splunk_IO_Direct.yml index 4131bbdfa04f..ff0a31eb0d0e 100644 --- a/.github/workflows/beam_PreCommit_Java_Splunk_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Splunk_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Splunk_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Thrift_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Thrift_IO_Direct.yml index d8b275ccfefb..649e4ad28c65 100644 --- a/.github/workflows/beam_PreCommit_Java_Thrift_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Thrift_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Thrift_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Java_Tika_IO_Direct.yml b/.github/workflows/beam_PreCommit_Java_Tika_IO_Direct.yml index c3e340b3aa0e..42fe14cd8338 100644 --- a/.github/workflows/beam_PreCommit_Java_Tika_IO_Direct.yml +++ b/.github/workflows/beam_PreCommit_Java_Tika_IO_Direct.yml @@ -74,7 +74,7 @@ jobs: github.event.comment.body == 'Run Java_Tika_IO_Direct PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,6 +98,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_Kotlin_Examples.yml b/.github/workflows/beam_PreCommit_Kotlin_Examples.yml index 892d0935a26a..b30163b12a47 100644 --- a/.github/workflows/beam_PreCommit_Kotlin_Examples.yml +++ b/.github/workflows/beam_PreCommit_Kotlin_Examples.yml @@ -86,7 +86,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Kotlin_Examples PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PreCommit_Portable_Python.yml b/.github/workflows/beam_PreCommit_Portable_Python.yml index 7da598d58310..35e2c535f4ea 100644 --- a/.github/workflows/beam_PreCommit_Portable_Python.yml +++ b/.github/workflows/beam_PreCommit_Portable_Python.yml @@ -91,7 +91,7 @@ jobs: github.event_name == 'schedule' || startsWith(github.event.comment.body, 'Run Portable_Python PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PreCommit_Python.yml b/.github/workflows/beam_PreCommit_Python.yml index fe0a15aa651b..80c69afce6e2 100644 --- a/.github/workflows/beam_PreCommit_Python.yml +++ b/.github/workflows/beam_PreCommit_Python.yml @@ -72,26 +72,18 @@ jobs: github.event_name == 'workflow_dispatch' || startsWith(github.event.comment.body, 'Run Python PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PreCommit_PythonDocker.yml b/.github/workflows/beam_PreCommit_PythonDocker.yml index 94f3d0bc7592..669f316549c5 100644 --- a/.github/workflows/beam_PreCommit_PythonDocker.yml +++ b/.github/workflows/beam_PreCommit_PythonDocker.yml @@ -71,30 +71,19 @@ jobs: github.event_name == 'schedule' || startsWith(github.event.comment.body, 'Run PythonDocker PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Install Go - uses: actions/setup-go@v4 - with: - go-version: '1.16' - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + go-version: 1.16 - name: Setup Buildx uses: docker/setup-buildx-action@v2 with: diff --git a/.github/workflows/beam_PreCommit_PythonDocs.yml b/.github/workflows/beam_PreCommit_PythonDocs.yml index e5e30c2a00cc..a67e8afa3a4e 100644 --- a/.github/workflows/beam_PreCommit_PythonDocs.yml +++ b/.github/workflows/beam_PreCommit_PythonDocs.yml @@ -71,26 +71,18 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run PythonDocs PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Install Python - uses: actions/setup-python@v4 - with: - python-version: '3.8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + python-version: 3.8 - name: run pythonDocsPreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_PythonFormatter.yml b/.github/workflows/beam_PreCommit_PythonFormatter.yml index 2bfdaff26f68..1a3335b370e9 100644 --- a/.github/workflows/beam_PreCommit_PythonFormatter.yml +++ b/.github/workflows/beam_PreCommit_PythonFormatter.yml @@ -70,26 +70,18 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run PythonFormatter PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Install Python - uses: actions/setup-python@v4 - with: - python-version: '3.8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + python-version: 3.8 - name: run pythonFormatterPreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_PythonLint.yml b/.github/workflows/beam_PreCommit_PythonLint.yml index 859c6532654d..9d290f1ba86d 100644 --- a/.github/workflows/beam_PreCommit_PythonLint.yml +++ b/.github/workflows/beam_PreCommit_PythonLint.yml @@ -70,30 +70,19 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run PythonLint PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Install Go - uses: actions/setup-go@v4 - with: - go-version: '1.16' - - name: Install Python - uses: actions/setup-python@v4 - with: - python-version: '3.8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + python-version: 3.8 + go-version: 1.16 - name: run pythonLintPreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_Python_Coverage.yml b/.github/workflows/beam_PreCommit_Python_Coverage.yml index 36cd2a6b42b4..65002f9da894 100644 --- a/.github/workflows/beam_PreCommit_Python_Coverage.yml +++ b/.github/workflows/beam_PreCommit_Python_Coverage.yml @@ -70,26 +70,18 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Python_Coverage PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - python-version: '3.8' - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + python-version: 3.8 - name: Run preCommitPyCoverage uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_Python_Dataframes.yml b/.github/workflows/beam_PreCommit_Python_Dataframes.yml index 5b1ec1492d34..a7a0ec1836ce 100644 --- a/.github/workflows/beam_PreCommit_Python_Dataframes.yml +++ b/.github/workflows/beam_PreCommit_Python_Dataframes.yml @@ -72,26 +72,18 @@ jobs: github.event_name == 'workflow_dispatch' || startsWith(github.event.comment.body, 'Run Python_Dataframes PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase}} ${{ matrix.python_version}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name}} (${{ matrix.job_phrase}} ${{ matrix.python_version}}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PreCommit_Python_Examples.yml b/.github/workflows/beam_PreCommit_Python_Examples.yml index b0dd7c16d296..1b03b4c0a35a 100644 --- a/.github/workflows/beam_PreCommit_Python_Examples.yml +++ b/.github/workflows/beam_PreCommit_Python_Examples.yml @@ -72,26 +72,18 @@ jobs: github.event_name == 'workflow_dispatch' || startsWith(github.event.comment.body, 'Run Python_Examples PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PreCommit_Python_Integration.yml b/.github/workflows/beam_PreCommit_Python_Integration.yml index ffe53bcfc323..5a1b7bc720f3 100644 --- a/.github/workflows/beam_PreCommit_Python_Integration.yml +++ b/.github/workflows/beam_PreCommit_Python_Integration.yml @@ -72,26 +72,18 @@ jobs: github.event_name == 'workflow_dispatch' || startsWith(github.event.comment.body, 'Run Python_Integration PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3.8.0 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PreCommit_Python_PVR_Flink.yml b/.github/workflows/beam_PreCommit_Python_PVR_Flink.yml index 840abc69a86a..a0011354749a 100644 --- a/.github/workflows/beam_PreCommit_Python_PVR_Flink.yml +++ b/.github/workflows/beam_PreCommit_Python_PVR_Flink.yml @@ -71,6 +71,11 @@ concurrency: group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' cancel-in-progress: true +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Python_PVR_Flink: name: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) @@ -87,7 +92,7 @@ jobs: github.event.comment.body == 'Run Python_PVR_Flink PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -98,9 +103,6 @@ jobs: uses: actions/setup-python@v4 with: python-version: 3.11 - - name: Configure passenv for tox - run: | - sed -i '/^\[testenv\]$/,/^\[/ s/^passenv=TERM/passenv=TERM,CLOUDSDK_CONFIG/' sdks/python/tox.ini - name: run Python PVR Flink PreCommit script uses: ./.github/actions/gradle-command-self-hosted-action env: diff --git a/.github/workflows/beam_PreCommit_Python_Runners.yml b/.github/workflows/beam_PreCommit_Python_Runners.yml index e317902a3eb8..775af7f39d24 100644 --- a/.github/workflows/beam_PreCommit_Python_Runners.yml +++ b/.github/workflows/beam_PreCommit_Python_Runners.yml @@ -72,26 +72,18 @@ jobs: github.event_name == 'workflow_dispatch' || startsWith(github.event.comment.body, 'Run Python_Runners PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PreCommit_Python_Transforms.yml b/.github/workflows/beam_PreCommit_Python_Transforms.yml index 95f6945caad7..291dcde8665a 100644 --- a/.github/workflows/beam_PreCommit_Python_Transforms.yml +++ b/.github/workflows/beam_PreCommit_Python_Transforms.yml @@ -72,26 +72,18 @@ jobs: github.event_name == 'workflow_dispatch' || startsWith(github.event.comment.body, 'Run Python_Transforms PreCommit') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: + java-version: 8 python-version: ${{ matrix.python_version }} - - name: Install Java - uses: actions/setup-java@v3 - with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set PY_VER_CLEAN id: set_py_ver_clean run: | diff --git a/.github/workflows/beam_PreCommit_RAT.yml b/.github/workflows/beam_PreCommit_RAT.yml index 3a09b28af7da..390413fb7aca 100644 --- a/.github/workflows/beam_PreCommit_RAT.yml +++ b/.github/workflows/beam_PreCommit_RAT.yml @@ -69,22 +69,17 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run RAT PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 - name: run RAT script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_SQL.yml b/.github/workflows/beam_PreCommit_SQL.yml index 3b8ad98db155..18d84b219d5b 100644 --- a/.github/workflows/beam_PreCommit_SQL.yml +++ b/.github/workflows/beam_PreCommit_SQL.yml @@ -71,7 +71,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run SQL PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -101,6 +101,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v2 diff --git a/.github/workflows/beam_PreCommit_SQL_Java11.yml b/.github/workflows/beam_PreCommit_SQL_Java11.yml index 2ff5e7459214..29a89d7f54a8 100644 --- a/.github/workflows/beam_PreCommit_SQL_Java11.yml +++ b/.github/workflows/beam_PreCommit_SQL_Java11.yml @@ -71,7 +71,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run SQL_Java11 PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -116,6 +116,8 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' - name: Archive SpotBugs Results uses: actions/upload-artifact@v3 diff --git a/.github/workflows/beam_PreCommit_SQL_Java17.yml b/.github/workflows/beam_PreCommit_SQL_Java17.yml index 15a223dcc19e..b493dd7f5fee 100644 --- a/.github/workflows/beam_PreCommit_SQL_Java17.yml +++ b/.github/workflows/beam_PreCommit_SQL_Java17.yml @@ -71,7 +71,7 @@ jobs: github.event.comment.body == 'Run SQL_Java17 PreCommit' runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -119,4 +119,6 @@ jobs: uses: EnricoMi/publish-unit-test-result-action@v2 if: always() with: + commit: '${{ env.prsha || env.GITHUB_SHA }}' + comment_mode: ${{ github.event_name == 'issue_comment' && 'always' || 'off' }} files: '**/build/test-results/**/*.xml' \ No newline at end of file diff --git a/.github/workflows/beam_PreCommit_Spotless.yml b/.github/workflows/beam_PreCommit_Spotless.yml index a703454a74fe..552a92e104a1 100644 --- a/.github/workflows/beam_PreCommit_Spotless.yml +++ b/.github/workflows/beam_PreCommit_Spotless.yml @@ -62,6 +62,11 @@ permissions: security-events: read statuses: read +env: + GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + jobs: beam_PreCommit_Spotless: name: ${{matrix.job_name}} (${{matrix.job_phrase}}) @@ -79,7 +84,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Spotless PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: diff --git a/.github/workflows/beam_PreCommit_Typescript.yml b/.github/workflows/beam_PreCommit_Typescript.yml index 728dede57106..21c760f2e525 100644 --- a/.github/workflows/beam_PreCommit_Typescript.yml +++ b/.github/workflows/beam_PreCommit_Typescript.yml @@ -72,22 +72,18 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Typescript PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{matrix.job_phrase}} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - requires-py-39: false - requires-go: false - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + python-version: 3.8 + java-version: 8 - name: run typescriptPreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_Website.yml b/.github/workflows/beam_PreCommit_Website.yml index 7dd58b27768a..87218dc28033 100644 --- a/.github/workflows/beam_PreCommit_Website.yml +++ b/.github/workflows/beam_PreCommit_Website.yml @@ -71,22 +71,17 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Website PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 - name: run websitePreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_PreCommit_Website_Stage_GCS.yml b/.github/workflows/beam_PreCommit_Website_Stage_GCS.yml index da2ba216994e..f910f9a88da3 100644 --- a/.github/workflows/beam_PreCommit_Website_Stage_GCS.yml +++ b/.github/workflows/beam_PreCommit_Website_Stage_GCS.yml @@ -38,6 +38,8 @@ env: GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + gcsbucket: apache-beam-website-pull-requests + ghprbPullId: #Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event permissions: @@ -55,10 +57,6 @@ permissions: security-events: read statuses: read -env: - gcsbucket: apache-beam-website-pull-requests - ghprbPullId: - jobs: beam_PreCommit_Website_Stage_GCS: name: ${{matrix.job_name}} (${{matrix.job_phrase}}) @@ -75,7 +73,7 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Website_Stage_GCS PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: @@ -84,15 +82,11 @@ jobs: github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - name: Echo PR number run: echo "ghprbPullId=${{ github.event.pull_request.number || github.event.issue.number }}" >> $GITHUB_ENV - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action - with: - requires-py-39: false - requires-go: false - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - cache-read-only: false + python-version: 3.8 + java-version: 8 - name: Authenticate on GCP uses: google-github-actions/setup-gcloud@v0 with: diff --git a/.github/workflows/beam_PreCommit_Whitespace.yml b/.github/workflows/beam_PreCommit_Whitespace.yml index 04705d49928c..03a976cfe444 100644 --- a/.github/workflows/beam_PreCommit_Whitespace.yml +++ b/.github/workflows/beam_PreCommit_Whitespace.yml @@ -70,26 +70,18 @@ jobs: github.event_name == 'workflow_dispatch' || github.event.comment.body == 'Run Whitespace PreCommit' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }}) - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Install Python - uses: actions/setup-python@v4 - with: - python-version: '3.8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 + python-version: 3.8 - name: run whitespacePreCommit script uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.github/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml b/.github/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml index 7e929b751e35..d4c9178e6c91 100644 --- a/.github/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml +++ b/.github/workflows/beam_Python_ValidatesContainer_Dataflow_ARM.yml @@ -69,21 +69,17 @@ jobs: runs-on: [self-hosted, ubuntu-20.04, main] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: comment_phrase: ${{ matrix.job_phrase }} ${{ matrix.python_version }} github_token: ${{ secrets.GITHUB_TOKEN }} github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }} ${{ matrix.python_version }}) - - name: Install Python - uses: actions/setup-python@v4 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: python-version: ${{ matrix.python_version }} - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - name: Authenticate on GCP diff --git a/.github/workflows/beam_Release_NightlySnapshot.yml b/.github/workflows/beam_Release_NightlySnapshot.yml index 1b572f4aa0f3..a4be830cd3c5 100644 --- a/.github/workflows/beam_Release_NightlySnapshot.yml +++ b/.github/workflows/beam_Release_NightlySnapshot.yml @@ -54,22 +54,17 @@ jobs: github.event_name == 'schedule' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: github_job: ${{matrix.job_name}} github_token: ${{ secrets.GITHUB_TOKEN }} comment_phrase: "Release Nightly Snapshot" - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + java-version: 8 - name: Auth on snapshot repository run: | mkdir -p ${HOME}/.m2 diff --git a/.github/workflows/beam_Release_Python_NightlySnapshot.yml b/.github/workflows/beam_Release_Python_NightlySnapshot.yml index 2787de9eefef..62019c536969 100644 --- a/.github/workflows/beam_Release_Python_NightlySnapshot.yml +++ b/.github/workflows/beam_Release_Python_NightlySnapshot.yml @@ -53,26 +53,18 @@ jobs: github.event_name == 'schedule' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup repository uses: ./.github/actions/setup-action with: github_job: ${{matrix.job_name}} github_token: ${{ secrets.GITHUB_TOKEN }} comment_phrase: ${{matrix.job_phrase}} - - name: Install Java - uses: actions/setup-java@v3.8.0 + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' - java-version: '8' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: '3.8' + java-version: 8 + python-version: 3.8 - name: Authenticate on GCP uses: google-github-actions/setup-gcloud@v0 with: diff --git a/.github/workflows/build_release_candidate.yml b/.github/workflows/build_release_candidate.yml index 9e7cbe741a26..2d826d572d90 100644 --- a/.github/workflows/build_release_candidate.yml +++ b/.github/workflows/build_release_candidate.yml @@ -50,7 +50,7 @@ jobs: runs-on: [self-hosted, ubuntu-20.04, main] steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: "v${{ github.event.inputs.RELEASE }}-RC${{ github.event.inputs.RC }}" repository: apache/beam @@ -154,7 +154,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: "v${{ github.event.inputs.RELEASE }}-RC${{ github.event.inputs.RC }}" repository: apache/beam @@ -204,13 +204,13 @@ jobs: SITE_ROOT_DIR: ${{ github.workspace }}/beam-site steps: - name: Checkout Beam Repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: "v${{ github.event.inputs.RELEASE }}-RC${{ github.event.inputs.RC }}" repository: apache/beam path: beam - name: Checkout Beam Site Repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: apache/beam-site path: beam-site diff --git a/.github/workflows/build_runner_image.yml b/.github/workflows/build_runner_image.yml index f64ada281d72..069b8b7db68c 100644 --- a/.github/workflows/build_runner_image.yml +++ b/.github/workflows/build_runner_image.yml @@ -36,7 +36,7 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ github.event.pull_request.head.sha }} - name: Authenticate on GCP diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 1dd586082331..1028dd79af02 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -54,7 +54,7 @@ jobs: py-versions-full: ${{ steps.set-py-versions.outputs.py-versions-full }} py-versions-test: ${{ steps.set-py-versions.outputs.py-versions-test }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "Check are GCP variables set" run: "./scripts/ci/ci_check_are_gcp_variables_set.sh" id: check_gcp_variables @@ -87,7 +87,7 @@ jobs: rc_num: ${{ steps.get_rc_version.outputs.RC_NUM }} steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: @@ -269,7 +269,7 @@ jobs: # TODO: https://github.com/apache/beam/issues/23048 CIBW_SKIP: "*-musllinux_*" CIBW_ENVIRONMENT: "SETUPTOOLS_USE_DISTUTILS=stdlib" - CIBW_BEFORE_BUILD: pip install cython==0.29.36 numpy && pip install --upgrade setuptools + CIBW_BEFORE_BUILD: pip install cython==0.29.36 numpy --config-settings=setup-args="-Dallow-noblas=true" && pip install --upgrade setuptools run: cibuildwheel --print-build-identifiers && cibuildwheel --output-dir wheelhouse shell: bash - name: install sha512sum on MacOS @@ -295,7 +295,7 @@ jobs: # TODO: https://github.com/apache/beam/issues/23048 CIBW_SKIP: "*-musllinux_*" CIBW_ENVIRONMENT: "SETUPTOOLS_USE_DISTUTILS=stdlib" - CIBW_BEFORE_BUILD: pip install cython==0.29.36 numpy && pip install --upgrade setuptools + CIBW_BEFORE_BUILD: pip install cython==0.29.36 numpy --config-settings=setup-args="-Dallow-noblas=true" && pip install --upgrade setuptools run: cibuildwheel --print-build-identifiers && cibuildwheel --output-dir wheelhouse shell: bash - name: Add RC checksums @@ -385,7 +385,7 @@ jobs: if: github.repository_owner == 'apache' && github.event_name == 'schedule' steps: - name: Checkout code on master branch - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive diff --git a/.github/workflows/cancel.yml b/.github/workflows/cancel.yml index df9172473f98..f826b22e043b 100644 --- a/.github/workflows/cancel.yml +++ b/.github/workflows/cancel.yml @@ -37,7 +37,7 @@ jobs: runs-on: ubuntu-latest steps: - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive diff --git a/.github/workflows/choose_rc_commit.yml b/.github/workflows/choose_rc_commit.yml index d4de9d454a5c..0e51e5284a76 100644 --- a/.github/workflows/choose_rc_commit.yml +++ b/.github/workflows/choose_rc_commit.yml @@ -55,7 +55,7 @@ jobs: DEBUG: "" steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: release-${{ github.event.inputs.RELEASE }} - name: Set git config diff --git a/.github/workflows/code_completion_plugin_tests.yml b/.github/workflows/code_completion_plugin_tests.yml index 9244608543db..38ffd2fbd3f4 100644 --- a/.github/workflows/code_completion_plugin_tests.yml +++ b/.github/workflows/code_completion_plugin_tests.yml @@ -56,13 +56,13 @@ jobs: # Check out beam repository - name: Fetch beam Sources - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: path: main # Check out intellij community repository for tests - name: Fetch intellij-community Sources - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: JetBrains/intellij-community path: intellij diff --git a/.github/workflows/cut_release_branch.yml b/.github/workflows/cut_release_branch.yml index 4e104d78a445..4201d6018c60 100644 --- a/.github/workflows/cut_release_branch.yml +++ b/.github/workflows/cut_release_branch.yml @@ -68,7 +68,7 @@ jobs: exit 1 fi - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set git config run: | git config user.name $GITHUB_ACTOR @@ -116,7 +116,7 @@ jobs: exit 1 fi - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set git config run: | git config user.name $GITHUB_ACTOR @@ -184,7 +184,7 @@ jobs: tar zvxvf hub-linux-amd64-2.14.2.tgz sudo ./hub-linux-amd64-2.14.2/install echo "eval "$(hub alias -s)"" >> ~/.bashrc - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set git config run: | git config user.name $GITHUB_ACTOR diff --git a/.github/workflows/dask_runner_tests.yml b/.github/workflows/dask_runner_tests.yml index c4d8c9a1cf9e..46d2707ca063 100644 --- a/.github/workflows/dask_runner_tests.yml +++ b/.github/workflows/dask_runner_tests.yml @@ -39,7 +39,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: @@ -73,7 +73,7 @@ jobs: ] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/git_tag_released_version.yml b/.github/workflows/git_tag_released_version.yml index 871149bd26a1..0c6782603856 100644 --- a/.github/workflows/git_tag_released_version.yml +++ b/.github/workflows/git_tag_released_version.yml @@ -37,7 +37,7 @@ jobs: VERSION_PATH: ${{ github.event.inputs.VERSION_TAG }} steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set git config run: | git config user.name $GITHUB_ACTOR diff --git a/.github/workflows/go_tests.yml b/.github/workflows/go_tests.yml index 07e58bba2f1d..32b494da25be 100644 --- a/.github/workflows/go_tests.yml +++ b/.github/workflows/go_tests.yml @@ -29,6 +29,7 @@ on: branches: ['master', 'release-*'] tags: ['v*'] paths: ['sdks/go/pkg/**', 'sdks/go.mod', 'sdks/go.sum', 'sdks/go/container/*', 'sdks/java/container/*', 'sdks/python/container/*', 'sdks/typescript/container/*'] + workflow_dispatch: # This allows a subsequently queued workflow run to interrupt previous runs concurrency: group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.event.pull_request.head.label || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.body || github.event.sender.login}}' @@ -39,7 +40,7 @@ jobs: name: Go Build steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: fetch-depth: 2 - uses: actions/setup-go@v4 @@ -65,3 +66,8 @@ jobs: go install "honnef.co/go/tools/cmd/staticcheck@2023.1.3" cd sdks/go/pkg/beam $(go env GOPATH)/bin/staticcheck ./... + - uses: golang/govulncheck-action@v1.0.1 + with: + work-dir: ./sdks + go-package: ./... + go-version-input: 1.21 \ No newline at end of file diff --git a/.github/workflows/issue-tagger.yml b/.github/workflows/issue-tagger.yml index 39f92d87f788..dbfe2e996d5e 100644 --- a/.github/workflows/issue-tagger.yml +++ b/.github/workflows/issue-tagger.yml @@ -24,7 +24,7 @@ jobs: permissions: issues: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: damccorm/tag-ur-it@6fa72bbf1a2ea157b533d7e7abeafdb5855dbea5 with: repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/java_tests.yml b/.github/workflows/java_tests.yml index 82e2d3ce3df5..ceff29b50d4f 100644 --- a/.github/workflows/java_tests.yml +++ b/.github/workflows/java_tests.yml @@ -52,7 +52,7 @@ jobs: gcp-variables-set: ${{ steps.check_gcp_variables.outputs.gcp-variables-set }} steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: "Check are GCP variables set" run: "./scripts/ci/ci_check_are_gcp_variables_set.sh" id: check_gcp_variables @@ -73,15 +73,15 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - requires-py-38: false - requires-py-39: false + java-version: 8 + go-version: 1.21 - name: Remove default github maven configuration # This step is a workaround to avoid a decryption issue of Beam's # net.linguica.gradle.maven.settings plugin and github's provided maven @@ -132,16 +132,15 @@ jobs: os: [ubuntu-latest, macos-latest, windows-latest] steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - requires-py-38: false - requires-py-39: false - + java-version: 8 + go-version: 1.21 - name: Remove default github maven configuration # This step is a workaround to avoid a decryption issue of Beam's # net.linguica.gradle.maven.settings plugin and github's provided maven @@ -176,15 +175,15 @@ jobs: ) steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive - - name: Setup self-hosted - uses: ./.github/actions/setup-self-hosted-action - with: - requires-py-38: false - requires-py-39: false + - name: Setup environment + uses: ./.github/actions/setup-environment-action + with: + java-version: 8 + go-version: 1.21 - name: Authenticate on GCP uses: google-github-actions/setup-gcloud@v0 with: diff --git a/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_MultipleKey.txt b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_MultipleKey.txt new file mode 100644 index 000000000000..5fd9518bc8d0 --- /dev/null +++ b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_MultipleKey.txt @@ -0,0 +1,34 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +--project=apache-beam-testing +--region=us-central1 +--appName=load_tests_Java_Dataflow_streaming_CoGBK_2 +--tempLocation=gs://temp-storage-for-perf-tests/loadtests +--influxMeasurement=java_streaming_cogbk_2 +--publishToInfluxDB=true +--sourceOptions={"numRecords":20000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":5} +--coSourceOptions={"numRecords":2000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":1000} +--iterations=1 +--numWorkers=5 +--autoscalingAlgorithm=NONE +--streaming=true +--inputWindowDurationSec=1200 +--coInputWindowDurationSec=1200 +--influxDatabase=beam_test_metrics +--influxHost=http://10.128.0.96:8086 +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_10KB.txt b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_10KB.txt new file mode 100644 index 000000000000..2840fe75d5af --- /dev/null +++ b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_10KB.txt @@ -0,0 +1,34 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +--project=apache-beam-testing +--region=us-central1 +--appName=load_tests_Java_Dataflow_streaming_CoGBK_3 +--tempLocation=gs://temp-storage-for-perf-tests/loadtests +--influxMeasurement=java_streaming_cogbk_3 +--publishToInfluxDB=true +--sourceOptions={"numRecords":2000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":200000} +--coSourceOptions={"numRecords":2000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":1000} +--iterations=4 +--numWorkers=5 +--autoscalingAlgorithm=NONE +--streaming=true +--inputWindowDurationSec=1200 +--coInputWindowDurationSec=1200 +--influxDatabase=beam_test_metrics +--influxHost=http://10.128.0.96:8086 +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_2MB.txt b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_2MB.txt new file mode 100644 index 000000000000..bcc8a36cf31f --- /dev/null +++ b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_Reiteration_2MB.txt @@ -0,0 +1,34 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +--project=apache-beam-testing +--region=us-central1 +--appName=load_tests_Java_Dataflow_streaming_CoGBK_4 +--tempLocation=gs://temp-storage-for-perf-tests/loadtests +--influxMeasurement=java_streaming_cogbk_4 +--publishToInfluxDB=true +--sourceOptions={"numRecords":2000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":1000} +--coSourceOptions={"numRecords":2000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":1000} +--iterations=4 +--numWorkers=5 +--autoscalingAlgorithm=NONE +--streaming=true +--inputWindowDurationSec=1200 +--coInputWindowDurationSec=1200 +--influxDatabase=beam_test_metrics +--influxHost=http://10.128.0.96:8086 +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_SingleKey.txt b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_SingleKey.txt new file mode 100644 index 000000000000..afae1a1bd6bf --- /dev/null +++ b/.github/workflows/load-tests-job-configs/config_CoGBK_Java_Streaming_2GB_SingleKey.txt @@ -0,0 +1,34 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +--project=apache-beam-testing +--region=us-central1 +--appName=load_tests_Java_Dataflow_streaming_CoGBK_1 +--tempLocation=gs://temp-storage-for-perf-tests/loadtests +--influxMeasurement=java_streaming_cogbk_1 +--publishToInfluxDB=true +--sourceOptions={"numRecords":20000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":1} +--coSourceOptions={"numRecords":2000000,"keySizeBytes":10,"valueSizeBytes":90,"numHotKeys":1000} +--iterations=1 +--numWorkers=5 +--autoscalingAlgorithm=NONE +--streaming=true +--inputWindowDurationSec=1200 +--coInputWindowDurationSec=1200 +--influxDatabase=beam_test_metrics +--influxHost=http://10.128.0.96:8086 +--runner=DataflowRunner \ No newline at end of file diff --git a/.github/workflows/local_env_tests.yml b/.github/workflows/local_env_tests.yml index a689959dac7e..32e2975c0712 100644 --- a/.github/workflows/local_env_tests.yml +++ b/.github/workflows/local_env_tests.yml @@ -45,7 +45,7 @@ jobs: name: "Ubuntu run local environment shell script" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-go@v4 with: go-version: '1.21' @@ -63,7 +63,7 @@ jobs: name: "Mac run local environment shell script" runs-on: macos-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-go@v4 with: go-version: '1.21' diff --git a/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Avro.txt b/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Avro.txt new file mode 100644 index 000000000000..5e7e53821231 --- /dev/null +++ b/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Avro.txt @@ -0,0 +1,38 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +'["--tempLocation=gs://temp-storage-for-perf-tests/loadtests", +"--project=apache-beam-testing", +"--tempRoot=gs://temp-storage-for-perf-tests/loadtests", +"--writeMethod=FILE_LOADS", +"--writeFormat=AVRO", +"--testBigQueryDataset=beam_performance", +"--testBigQueryTable=bqio_write_10GB_java_avro_", +"--metricsBigQueryDataset=beam_performance", +"--metricsBigQueryTable=bqio_10GB_results_java_batch_avro", +"--influxMeasurement=bqio_10GB_results_java_batch_avro", +"--sourceOptions={ +\"numRecords\":\"10485760\", +\"keySizeBytes\":\"1\", +\"valueSizeBytes\":\"1024\" +}", +"--runner=DataflowRunner", +"--maxNumWorkers=5", +"--numWorkers=5", +"--autoscalingAlgorithm=NONE", +"--influxDatabase=beam_test_metrics", +"--influxHost=http://10.128.0.96:8086"]' \ No newline at end of file diff --git a/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Json.txt b/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Json.txt new file mode 100644 index 000000000000..7bd9c30ae738 --- /dev/null +++ b/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Batch_Java_Json.txt @@ -0,0 +1,38 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +'["--tempLocation=gs://temp-storage-for-perf-tests/loadtests", +"--project=apache-beam-testing", +"--tempRoot=gs://temp-storage-for-perf-tests/loadtests", +"--writeMethod=FILE_LOADS", +"--writeFormat=JSON", +"--testBigQueryDataset=beam_performance", +"--testBigQueryTable=bqio_write_10GB_java_json_", +"--metricsBigQueryDataset=beam_performance", +"--metricsBigQueryTable=bqio_10GB_results_java_batch_json", +"--influxMeasurement=bqio_10GB_results_java_batch_json", +"--sourceOptions={ +\"numRecords\":\"10485760\", +\"keySizeBytes\":\"1\", +\"valueSizeBytes\":\"1024\" +}", +"--runner=DataflowRunner", +"--maxNumWorkers=5", +"--numWorkers=5", +"--autoscalingAlgorithm=NONE", +"--influxDatabase=beam_test_metrics", +"--influxHost=http://10.128.0.96:8086"]' \ No newline at end of file diff --git a/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Streaming_Java.txt b/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Streaming_Java.txt new file mode 100644 index 000000000000..8bddea5fcb8b --- /dev/null +++ b/.github/workflows/performance-tests-job-configs/config_BigQueryIO_Streaming_Java.txt @@ -0,0 +1,39 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +############################################################################### +'["--tempLocation=gs://temp-storage-for-perf-tests/loadtests", +"--project=apache-beam-testing", +"--tempRoot=gs://temp-storage-for-perf-tests/loadtests", +"--writeMethod=STREAMING_INSERTS", +"--writeFormat=JSON", +"--pipelineTimeout=1200", +"--testBigQueryDataset=beam_performance", +"--testBigQueryTable=bqio_write_10GB_java_stream_", +"--metricsBigQueryDataset=beam_performance", +"--metricsBigQueryTable=bqio_10GB_results_java_stream", +"--influxMeasurement=bqio_10GB_results_java_stream", +"--sourceOptions={ +\"numRecords\":\"10485760\", +\"keySizeBytes\":\"1\", +\"valueSizeBytes\":\"1024\" +}", +"--runner=DataflowRunner", +"--maxNumWorkers=5", +"--numWorkers=5", +"--autoscalingAlgorithm=NONE", +"--influxDatabase=beam_test_metrics", +"--influxHost=http://10.128.0.96:8086"]' \ No newline at end of file diff --git a/.github/workflows/playground_backend_precommit.yml b/.github/workflows/playground_backend_precommit.yml index dedac3db9299..114ca4aac1cb 100644 --- a/.github/workflows/playground_backend_precommit.yml +++ b/.github/workflows/playground_backend_precommit.yml @@ -37,20 +37,13 @@ jobs: JAVA_VERSION: '11' steps: - name: Check out the repo - uses: actions/checkout@v3 - - - uses: actions/setup-python@v4 - with: - python-version: '${{ env.PYTHON_VERSION }}' - - uses: actions/setup-java@v3.8.0 + uses: actions/checkout@v4 + + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: - distribution: 'zulu' java-version: '${{ env.JAVA_VERSION }}' - - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false + python-version: '${{ env.PYTHON_VERSION }}' - name: Add GOPATH/bin to PATH run: echo "PATH=$PATH:$(go env GOPATH)/bin" >> $GITHUB_ENV diff --git a/.github/workflows/playground_frontend_test.yml b/.github/workflows/playground_frontend_test.yml index 543d166f432d..6f6e02a9697c 100644 --- a/.github/workflows/playground_frontend_test.yml +++ b/.github/workflows/playground_frontend_test.yml @@ -45,7 +45,7 @@ jobs: FLUTTER_VERSION: '3.10.4' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: 'Cache Flutter Dependencies' uses: actions/cache@v3 diff --git a/.github/workflows/pr-bot-new-prs.yml b/.github/workflows/pr-bot-new-prs.yml index 8ba27fbec3dc..ef825e067b7d 100644 --- a/.github/workflows/pr-bot-new-prs.yml +++ b/.github/workflows/pr-bot-new-prs.yml @@ -33,7 +33,7 @@ jobs: if: github.repository == 'apache/beam' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Node uses: actions/setup-node@v3 with: diff --git a/.github/workflows/pr-bot-pr-updates.yml b/.github/workflows/pr-bot-pr-updates.yml index d96a11368cb8..c882c18feeba 100644 --- a/.github/workflows/pr-bot-pr-updates.yml +++ b/.github/workflows/pr-bot-pr-updates.yml @@ -35,7 +35,7 @@ jobs: steps: # Pin to master so users can't do anything malicious on their own branch and run it here. - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: ref: 'master' - name: Setup Node diff --git a/.github/workflows/pr-bot-prs-needing-attention.yml b/.github/workflows/pr-bot-prs-needing-attention.yml index e96d3983746b..9dff7c8565a4 100644 --- a/.github/workflows/pr-bot-prs-needing-attention.yml +++ b/.github/workflows/pr-bot-prs-needing-attention.yml @@ -33,7 +33,7 @@ jobs: if: github.repository == 'apache/beam' runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Node uses: actions/setup-node@v3 with: diff --git a/.github/workflows/pr-bot-update-reviewers.yml b/.github/workflows/pr-bot-update-reviewers.yml index f3d343b12fb9..b4c41b66f9d6 100644 --- a/.github/workflows/pr-bot-update-reviewers.yml +++ b/.github/workflows/pr-bot-update-reviewers.yml @@ -33,7 +33,7 @@ jobs: if: github.repository == 'apache/beam' runs-on: [self-hosted, ubuntu-20.04] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Node uses: actions/setup-node@v3 with: diff --git a/.github/workflows/publish_github_release_notes.yml b/.github/workflows/publish_github_release_notes.yml index 246ce690f8b1..473e0deef83d 100644 --- a/.github/workflows/publish_github_release_notes.yml +++ b/.github/workflows/publish_github_release_notes.yml @@ -36,7 +36,7 @@ jobs: properties: ${{ steps.test-properties.outputs.properties }} steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - id: test-properties uses: ./.github/actions/setup-default-test-properties @@ -49,7 +49,7 @@ jobs: name: Publish Github Release Notes steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Publish github release notes run: | POST_PATH="website/www/site/content/en/blog/beam-${{env.RELEASE_VERSION}}.md" diff --git a/.github/workflows/python_dependency_tests.yml b/.github/workflows/python_dependency_tests.yml index aea77330f0ae..0c91e64b0db9 100644 --- a/.github/workflows/python_dependency_tests.yml +++ b/.github/workflows/python_dependency_tests.yml @@ -33,7 +33,7 @@ jobs: ] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index ba7585c31bf5..406949eda96e 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -49,7 +49,7 @@ jobs: outputs: gcp-variables-set: ${{ steps.check_gcp_variables.outputs.gcp-variables-set }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "Check are GCP variables set" run: "./scripts/ci/ci_check_are_gcp_variables_set.sh" id: check_gcp_variables @@ -73,7 +73,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: @@ -108,7 +108,7 @@ jobs: ] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: @@ -143,7 +143,7 @@ jobs: python: ["3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: @@ -171,7 +171,7 @@ jobs: python: ["3.8", "3.9", "3.10", "3.11"] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/reportGenerator.yml b/.github/workflows/reportGenerator.yml index 44055cd56310..8f6bccddcfad 100644 --- a/.github/workflows/reportGenerator.yml +++ b/.github/workflows/reportGenerator.yml @@ -26,7 +26,7 @@ jobs: name: Generate issue report runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Node uses: actions/setup-node@v3 with: diff --git a/.github/workflows/run_perf_alert_tool.yml b/.github/workflows/run_perf_alert_tool.yml index 1cdcd858e61a..6946011f0617 100644 --- a/.github/workflows/run_perf_alert_tool.yml +++ b/.github/workflows/run_perf_alert_tool.yml @@ -35,7 +35,7 @@ jobs: issues: write steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Install python uses: actions/setup-python@v4 with: diff --git a/.github/workflows/run_rc_validation.yml b/.github/workflows/run_rc_validation.yml index 720150b57450..4902fee81016 100644 --- a/.github/workflows/run_rc_validation.yml +++ b/.github/workflows/run_rc_validation.yml @@ -78,7 +78,7 @@ jobs: WORKING_BRANCH: "v${{github.event.inputs.RELEASE_VER}}-RC${{github.event.inputs.RC_NUM}}_validations" steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{ env.RC_TAG }} - name: Setup GitHub CLI @@ -109,7 +109,7 @@ jobs: py_version: [3.8] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{env.RC_TAG}} @@ -174,7 +174,7 @@ jobs: py_version: [3.8] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{env.RC_TAG}} - name: Verify ENV values @@ -280,7 +280,7 @@ jobs: needs: generate_shared_pubsub steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{env.RC_TAG}} - name: Install Python @@ -355,7 +355,7 @@ jobs: needs: generate_shared_pubsub steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{env.RC_TAG}} @@ -403,7 +403,7 @@ jobs: needs: [generate_shared_pubsub] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{env.RC_TAG}} @@ -456,7 +456,7 @@ jobs: needs: [generate_shared_pubsub] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{env.RC_TAG}} - name: Install Python @@ -505,7 +505,7 @@ jobs: needs: [generate_shared_pubsub] steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: ${{env.RC_TAG}} - name: Install Python diff --git a/.github/workflows/tour_of_beam_backend.yml b/.github/workflows/tour_of_beam_backend.yml index 665cec4e42cd..5c67c9f54fe3 100644 --- a/.github/workflows/tour_of_beam_backend.yml +++ b/.github/workflows/tour_of_beam_backend.yml @@ -41,7 +41,7 @@ jobs: run: working-directory: ./learning/tour-of-beam/backend steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-go@v4 with: # pin to the biggest Go version supported by Cloud Functions runtime diff --git a/.github/workflows/tour_of_beam_backend_integration.yml b/.github/workflows/tour_of_beam_backend_integration.yml index ab644358c977..8f56d3f2e2fa 100644 --- a/.github/workflows/tour_of_beam_backend_integration.yml +++ b/.github/workflows/tour_of_beam_backend_integration.yml @@ -74,17 +74,14 @@ jobs: run: working-directory: ./learning/tour-of-beam/backend steps: - - uses: actions/checkout@v3 - - uses: actions/setup-go@v4 + - uses: actions/checkout@v4 + + - name: Setup environment + uses: ./.github/actions/setup-environment-action with: # pin to the biggest Go version supported by Cloud Functions runtime go-version: '1.16' - - name: Setup Gradle - uses: gradle/gradle-build-action@v2 - with: - cache-read-only: false - - name: Build Playground router image run: ./gradlew -i playground:backend:containers:router:docker working-directory: ${{ env.GITHUB_WORKSPACE }} diff --git a/.github/workflows/tour_of_beam_frontend_test.yml b/.github/workflows/tour_of_beam_frontend_test.yml index 8880bc287266..5337bb7dd720 100644 --- a/.github/workflows/tour_of_beam_frontend_test.yml +++ b/.github/workflows/tour_of_beam_frontend_test.yml @@ -47,7 +47,7 @@ jobs: FLUTTER_VERSION: '3.10.4' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: 'Cache Flutter Dependencies' uses: actions/cache@v3 diff --git a/.github/workflows/typescript_tests.yml b/.github/workflows/typescript_tests.yml index 542673cbbca8..825b2808af5b 100644 --- a/.github/workflows/typescript_tests.yml +++ b/.github/workflows/typescript_tests.yml @@ -45,7 +45,7 @@ jobs: os: [[self-hosted, ubuntu-20.04], macos-latest, [self-hosted, windows-server-2019]] steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive @@ -74,7 +74,7 @@ jobs: fail-fast: false steps: - name: Check out code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive @@ -109,7 +109,7 @@ jobs: outputs: gcp-variables-set: ${{ steps.check_gcp_variables.outputs.gcp-variables-set }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: "Check are GCP variables set" run: "./scripts/ci/ci_check_are_gcp_variables_set.sh" id: check_gcp_variables @@ -131,7 +131,7 @@ jobs: fail-fast: false steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: persist-credentials: false submodules: recursive diff --git a/.github/workflows/update_python_dependencies.yml b/.github/workflows/update_python_dependencies.yml index d2b8fccdb586..b4b839c3204c 100644 --- a/.github/workflows/update_python_dependencies.yml +++ b/.github/workflows/update_python_dependencies.yml @@ -38,7 +38,7 @@ jobs: properties: ${{ steps.test-properties.outputs.properties }} steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - id: test-properties uses: ./.github/actions/setup-default-test-properties @@ -48,9 +48,15 @@ jobs: name: Update Python Dependencies steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup environment - uses: ./.github/actions/setup-self-hosted-action + uses: ./.github/actions/setup-environment-action + with: + python-version: | + 3.8 + 3.9 + java-version: 8 + go-version: 1.21 - name: Update Python Dependencies uses: ./.github/actions/gradle-command-self-hosted-action with: diff --git a/.test-infra/jenkins/job_PreCommit_CommunityMetrics.groovy b/.test-infra/jenkins/job_PreCommit_CommunityMetrics.groovy deleted file mode 100644 index 2bf63df9cd5a..000000000000 --- a/.test-infra/jenkins/job_PreCommit_CommunityMetrics.groovy +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'CommunityMetrics', - gradleTask: ':communityMetricsPreCommit', - triggerPathPatterns: ['^.test-infra/metrics/.*$']) -builder.build() - diff --git a/.test-infra/jenkins/job_PreCommit_Go.groovy b/.test-infra/jenkins/job_PreCommit_Go.groovy deleted file mode 100644 index 73b6cf4c9384..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Go.groovy +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Go', - gradleTask: ':goPreCommit', - triggerPathPatterns: [ - '^model/.*$', - '^sdks/go.*$', - '^release/.*$', - ] - ) -builder.build() diff --git a/.test-infra/jenkins/job_PreCommit_Go_Portable.groovy b/.test-infra/jenkins/job_PreCommit_Go_Portable.groovy deleted file mode 100644 index 12c762e5eb37..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Go_Portable.groovy +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'GoPortable', - gradleTask: ':goPortablePreCommit', - triggerPathPatterns: [ - '^model/.*$', - '^sdks/go.*$', - '^release/.*$', - ] - ) -builder.build() diff --git a/.test-infra/jenkins/job_PreCommit_Kotlin_Examples.groovy b/.test-infra/jenkins/job_PreCommit_Kotlin_Examples.groovy deleted file mode 100644 index c3ce31f1f6d1..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Kotlin_Examples.groovy +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Kotlin_Examples', - gradleTask: ':examples:kotlin:preCommit', - triggerPathPatterns: [ - '^model/.*$', - '^sdks/java/.*$', - '^runners/flink/.*$', - '^runners/spark/.*$', - '^runners/direct-java/.*$', - '^examples/kotlin/.*$', - '^release/.*$', - ] - ) -builder.build() diff --git a/.test-infra/jenkins/job_PreCommit_PythonAutoformatter.groovy b/.test-infra/jenkins/job_PreCommit_PythonAutoformatter.groovy deleted file mode 100644 index 90e037edf2af..000000000000 --- a/.test-infra/jenkins/job_PreCommit_PythonAutoformatter.groovy +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'PythonFormatter', - gradleTask: ':pythonFormatterPreCommit', - triggerPathPatterns: [ - '^sdks/python/apache_beam/.*$', - ] - ) -builder.build() diff --git a/.test-infra/jenkins/job_PreCommit_Typescript.groovy b/.test-infra/jenkins/job_PreCommit_Typescript.groovy deleted file mode 100644 index 29cb93ede8b6..000000000000 --- a/.test-infra/jenkins/job_PreCommit_Typescript.groovy +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import PrecommitJobBuilder - -PrecommitJobBuilder builder = new PrecommitJobBuilder( - scope: this, - nameBase: 'Typescript', - gradleTask: ':typescriptPreCommit', - triggerPathPatterns: [ - '^sdks/python/apache_beam/runners/interactive/extensions/.*$', - ] - ) -builder.build() diff --git a/CHANGES.md b/CHANGES.md index 40a9a1dc9490..a990a5fd7304 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -68,11 +68,14 @@ * In Python, [RunInference](https://beam.apache.org/documentation/sdks/python-machine-learning/#why-use-the-runinference-api) now supports loading many models in the same transform using a [KeyedModelHandler](https://beam.apache.org/documentation/sdks/python-machine-learning/#use-a-keyed-modelhandler) ([#27628](https://github.com/apache/beam/issues/27628)). * In Python, the [VertexAIModelHandlerJSON](https://beam.apache.org/releases/pydoc/current/apache_beam.ml.inference.vertex_ai_inference.html#apache_beam.ml.inference.vertex_ai_inference.VertexAIModelHandlerJSON) now supports passing in inference_args. These will be passed through to the Vertex endpoint as parameters. +* Added support to run `mypy` on user pipelines ([#27906](https://github.com/apache/beam/issues/27906)) ## Breaking Changes * Removed fastjson library dependency for Beam SQL. Table property is changed to be based on jackson ObjectNode (Java) ([#24154](https://github.com/apache/beam/issues/24154)). * Removed TensorFlow from Beam Python container images [PR](https://github.com/apache/beam/pull/28424). If you have been negatively affected by this change, please comment on [#20605](https://github.com/apache/beam/issues/20605). +* Removed the parameter `t reflect.Type` from `parquetio.Write`. The element type is derived from the input PCollection (Go) ([#28490](https://github.com/apache/beam/issues/28490)) +* Refactor BeamSqlSeekableTable.setUp adding a parameter joinSubsetType. [#28283](https://github.com/apache/beam/issues/28283) ## Deprecations @@ -80,7 +83,9 @@ ## Bugfixes -* Fixed X (Java/Python) ([#X](https://github.com/apache/beam/issues/X)). +* Fixed exception chaining issue in GCS connector (Python) ([#26769](https://github.com/apache/beam/issues/26769#issuecomment-1700422615)). +* Fixed streaming inserts exception handling, GoogleAPICallErrors are now retried according to retry strategy and routed to failed rows where appropriate rather than causing a pipeline error (Python) ([#21080](https://github.com/apache/beam/issues/21080)). + ## Security Fixes * Python containers updated, fixing [CVE-2021-30474](https://nvd.nist.gov/vuln/detail/CVE-2021-30474), [CVE-2021-30475](https://nvd.nist.gov/vuln/detail/CVE-2021-30475), [CVE-2021-30473](https://nvd.nist.gov/vuln/detail/CVE-2021-30473), [CVE-2020-36133](https://nvd.nist.gov/vuln/detail/CVE-2020-36133), [CVE-2020-36131](https://nvd.nist.gov/vuln/detail/CVE-2020-36131), [CVE-2020-36130](https://nvd.nist.gov/vuln/detail/CVE-2020-36130), and [CVE-2020-36135](https://nvd.nist.gov/vuln/detail/CVE-2020-36135) diff --git a/build.gradle.kts b/build.gradle.kts index 0d23861a495b..7bd847895293 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -443,6 +443,10 @@ tasks.register("goPortablePreCommit") { dependsOn(":sdks:go:test:ulrValidatesRunner") } +tasks.register("goPrismPreCommit") { + dependsOn(":sdks:go:test:prismValidatesRunner") +} + tasks.register("goPostCommitDataflowARM") { dependsOn(":sdks:go:test:dataflowValidatesRunnerARM64") } diff --git a/buildSrc/build.gradle.kts b/buildSrc/build.gradle.kts index a8a760029069..0ca748e3eb04 100644 --- a/buildSrc/build.gradle.kts +++ b/buildSrc/build.gradle.kts @@ -57,7 +57,7 @@ dependencies { runtimeOnly("com.avast.gradle:gradle-docker-compose-plugin:0.16.12") // Enable docker compose tasks runtimeOnly("ca.cutterslade.gradle:gradle-dependency-analyze:1.8.3") // Enable dep analysis runtimeOnly("gradle.plugin.net.ossindex:ossindex-gradle-plugin:0.4.11") // Enable dep vulnerability analysis - runtimeOnly("org.checkerframework:checkerframework-gradle-plugin:0.6.30") // Enable enhanced static checking plugin + runtimeOnly("org.checkerframework:checkerframework-gradle-plugin:0.6.33") // Enable enhanced static checking plugin } // Because buildSrc is built and tested automatically _before_ gradle diff --git a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy index a3c5bcf226fc..9f341c5673fd 100644 --- a/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy +++ b/buildSrc/src/main/groovy/org/apache/beam/gradle/BeamModulePlugin.groovy @@ -724,7 +724,7 @@ class BeamModulePlugin implements Plugin { // Keep version consistent with the version in google_cloud_resourcemanager, managed by google_cloud_platform_libraries_bom google_api_services_cloudresourcemanager : "com.google.apis:google-api-services-cloudresourcemanager:v1-rev20230129-$google_clients_version", google_api_services_dataflow : "com.google.apis:google-api-services-dataflow:v1b3-rev20220920-$google_clients_version", - google_api_services_healthcare : "com.google.apis:google-api-services-healthcare:v1-rev20230817-$google_clients_version", + google_api_services_healthcare : "com.google.apis:google-api-services-healthcare:v1-rev20230830-$google_clients_version", google_api_services_pubsub : "com.google.apis:google-api-services-pubsub:v1-rev20220904-$google_clients_version", // Keep version consistent with the version in google_cloud_nio, managed by google_cloud_platform_libraries_bom google_api_services_storage : "com.google.apis:google-api-services-storage:v1-rev20230617-$google_clients_version", diff --git a/it/cassandra/src/main/java/org/apache/beam/it/cassandra/matchers/CassandraAsserts.java b/it/cassandra/src/main/java/org/apache/beam/it/cassandra/matchers/CassandraAsserts.java index 61f730bf3579..6aecc6609cfb 100644 --- a/it/cassandra/src/main/java/org/apache/beam/it/cassandra/matchers/CassandraAsserts.java +++ b/it/cassandra/src/main/java/org/apache/beam/it/cassandra/matchers/CassandraAsserts.java @@ -31,7 +31,7 @@ public class CassandraAsserts { /** - * Convert Cassandra {@link Row} list to a list of maps. + * Convert Cassandra {@link com.datastax.oss.driver.api.core.cql.Row} list to a list of maps. * * @param rows Rows to parse. * @return List of maps to use in {@link RecordsSubject}. diff --git a/it/common/src/main/java/org/apache/beam/it/common/utils/PipelineUtils.java b/it/common/src/main/java/org/apache/beam/it/common/utils/PipelineUtils.java index c696457bbdd9..d249d43d3789 100644 --- a/it/common/src/main/java/org/apache/beam/it/common/utils/PipelineUtils.java +++ b/it/common/src/main/java/org/apache/beam/it/common/utils/PipelineUtils.java @@ -27,6 +27,7 @@ import org.apache.beam.sdk.options.PipelineOptions; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.CaseFormat; +import org.apache.commons.lang3.RandomStringUtils; /** Utilities to make working with Dataflow easier. */ public class PipelineUtils { @@ -73,6 +74,15 @@ public static boolean waitUntil( } } + /** + * Creates a job name. Method uses {@link #createJobName(String, int)}} without a random suffix. + * + * @see #createJobName(String, int) + */ + public static String createJobName(String prefix) { + return createJobName(prefix, 0); + } + /** * Creates a job name. * @@ -83,17 +93,24 @@ public static boolean waitUntil( * same prefix are requested in a short period of time. * * @param prefix a prefix for the job + * @param randomChars if the string should contain random chars at the end, to increase the + * likelihood of being unique. * @return the prefix plus some way of identifying it separate from other jobs with the same * prefix */ - public static String createJobName(String prefix) { + public static String createJobName(String prefix, int randomChars) { String convertedPrefix = CaseFormat.UPPER_CAMEL.converterTo(CaseFormat.LOWER_HYPHEN).convert(prefix); String formattedTimestamp = DateTimeFormatter.ofPattern("yyyyMMddHHmmssSSS") .withZone(ZoneId.of("UTC")) .format(Instant.now()); - return String.format("%s-%s", convertedPrefix, formattedTimestamp); + + String suffix = ""; + if (randomChars > 0) { + suffix = "-" + RandomStringUtils.randomAlphanumeric(randomChars).toLowerCase(); + } + return String.format("%s-%s%s", convertedPrefix, formattedTimestamp, suffix); } /** Get raw job name (without prefix) from a jobName generated by createJobName. */ diff --git a/it/common/src/test/java/org/apache/beam/it/common/utils/PipelineUtilsTest.java b/it/common/src/test/java/org/apache/beam/it/common/utils/PipelineUtilsTest.java index acf203b06e6e..316283cdf7d2 100644 --- a/it/common/src/test/java/org/apache/beam/it/common/utils/PipelineUtilsTest.java +++ b/it/common/src/test/java/org/apache/beam/it/common/utils/PipelineUtilsTest.java @@ -37,7 +37,13 @@ public void testCreateJobName() { @Test public void testCreateJobNameWithUppercase() { - assertThat(createJobName("testWithUpperCase")).matches("test-with-upper-case" + "-\\d{17}"); + assertThat(createJobName("testWithUpperCase")).matches("test-with-upper-case-\\d{17}"); + } + + @Test + public void testCreateJobNameWithUppercaseSuffix() { + assertThat(createJobName("testWithUpperCase", 8)) + .matches("test-with-upper-case-\\d{17}-[a-z0-9]{8}"); } @Test diff --git a/it/elasticsearch/src/test/java/org/apache/beam/it/elasticsearch/ElasticsearchUtilsTest.java b/it/elasticsearch/src/test/java/org/apache/beam/it/elasticsearch/ElasticsearchUtilsTest.java index eb250a1c5f82..61d6b5d57c2c 100644 --- a/it/elasticsearch/src/test/java/org/apache/beam/it/elasticsearch/ElasticsearchUtilsTest.java +++ b/it/elasticsearch/src/test/java/org/apache/beam/it/elasticsearch/ElasticsearchUtilsTest.java @@ -34,7 +34,7 @@ public class ElasticsearchUtilsTest { @Test public void testGenerateIndexNameShouldReplaceForwardSlash() { String testBaseString = "Test/DB/Name"; - String actual = generateIndexName(testBaseString); + String actual = ElasticsearchUtils.generateIndexName(testBaseString); assertThat(actual).matches("test-db-name-\\d{8}-\\d{6}-\\d{6}"); } diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/IOLoadTestBase.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/IOLoadTestBase.java index 32f262f2eac1..6b728a6a60db 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/IOLoadTestBase.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/IOLoadTestBase.java @@ -62,7 +62,7 @@ public void tearDownBase() throws IOException { } @Override - PipelineLauncher launcher() { + public PipelineLauncher launcher() { return DefaultPipelineLauncher.builder(CREDENTIALS).build(); } diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/LoadTestBase.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/LoadTestBase.java index f6e359fed963..14bb05394de2 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/LoadTestBase.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/LoadTestBase.java @@ -18,6 +18,7 @@ package org.apache.beam.it.gcp; import static org.apache.beam.it.common.logging.LogStrings.formatForLogging; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.RUNNER_V2; import com.google.api.gax.core.CredentialsProvider; import com.google.api.gax.core.FixedCredentialsProvider; @@ -49,6 +50,7 @@ import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.MoreObjects; import org.junit.After; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Rule; import org.junit.rules.TestRule; import org.junit.rules.TestWatcher; @@ -107,11 +109,14 @@ protected void starting(Description description) { } }; - @Before - @SuppressFBWarnings("ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD") - public void setUp() throws IOException { + @BeforeClass + public static void setUpClass() { project = TestProperties.project(); region = TestProperties.region(); + } + + @Before + public void setUp() throws IOException { monitoringClient = MonitoringClient.builder(CREDENTIALS_PROVIDER).build(); pipelineLauncher = launcher(); pipelineOperator = new PipelineOperator(pipelineLauncher); @@ -123,7 +128,7 @@ public void tearDownLoadTestBase() throws IOException { monitoringClient.cleanupAll(); } - abstract PipelineLauncher launcher(); + public abstract PipelineLauncher launcher(); /** * Exports the metrics of given dataflow job to BigQuery. @@ -239,7 +244,7 @@ private void computeDataflowMetrics( metrics.put("EstimatedDataProcessedGB", dataProcessed / 1e9d); } metrics.putAll(getCpuUtilizationMetrics(launchInfo.jobId(), workerTimeInterval)); - metrics.putAll(getThroughputMetrics(launchInfo.jobId(), config, workerTimeInterval)); + metrics.putAll(getThroughputMetrics(launchInfo, config, workerTimeInterval)); } /** @@ -349,25 +354,30 @@ protected Map getCpuUtilizationMetrics(String jobId, TimeInterva /** * Computes throughput metrics of the given pcollection in dataflow job. * - * @param jobId dataflow job id + * @param jobInfo dataflow job LaunchInfo * @param config the {@class MetricsConfiguration} * @param timeInterval interval for the monitoring query * @return throughput metrics of the pcollection */ protected Map getThroughputMetrics( - String jobId, MetricsConfiguration config, TimeInterval timeInterval) { + LaunchInfo jobInfo, MetricsConfiguration config, TimeInterval timeInterval) { + String jobId = jobInfo.jobId(); + String iColl = + RUNNER_V2.equals(jobInfo.runner()) + ? config.inputPCollectionV2() + : config.inputPCollection(); + String oColl = + RUNNER_V2.equals(jobInfo.runner()) + ? config.outputPCollectionV2() + : config.outputPCollection(); List inputThroughputBytesPerSec = - monitoringClient.getThroughputBytesPerSecond( - project, jobId, config.inputPCollection(), timeInterval); + monitoringClient.getThroughputBytesPerSecond(project, jobId, iColl, timeInterval); List inputThroughputElementsPerSec = - monitoringClient.getThroughputElementsPerSecond( - project, jobId, config.inputPCollection(), timeInterval); + monitoringClient.getThroughputElementsPerSecond(project, jobId, iColl, timeInterval); List outputThroughputBytesPerSec = - monitoringClient.getThroughputBytesPerSecond( - project, jobId, config.outputPCollection(), timeInterval); + monitoringClient.getThroughputBytesPerSecond(project, jobId, oColl, timeInterval); List outputThroughputElementsPerSec = - monitoringClient.getThroughputElementsPerSecond( - project, jobId, config.outputPCollection(), timeInterval); + monitoringClient.getThroughputElementsPerSecond(project, jobId, oColl, timeInterval); return getThroughputMetrics( inputThroughputBytesPerSec, inputThroughputElementsPerSec, @@ -495,22 +505,31 @@ public abstract static class MetricsConfiguration { */ public abstract @Nullable String inputPCollection(); + /** Input PCollection name under Dataflow runner v2. */ + public abstract @Nullable String inputPCollectionV2(); + /** * Input PCollection of the Dataflow job to query additional metrics. If not provided, the * metrics for inputPCollection will not be calculated. */ public abstract @Nullable String outputPCollection(); - public static Builder builder() { + public abstract @Nullable String outputPCollectionV2(); + + public static MetricsConfiguration.Builder builder() { return new AutoValue_LoadTestBase_MetricsConfiguration.Builder(); } @AutoValue.Builder public abstract static class Builder { - public abstract Builder setInputPCollection(@Nullable String value); + public abstract MetricsConfiguration.Builder setInputPCollection(@Nullable String value); + + public abstract MetricsConfiguration.Builder setInputPCollectionV2(@Nullable String value); + + public abstract MetricsConfiguration.Builder setOutputPCollection(@Nullable String value); - public abstract Builder setOutputPCollection(@Nullable String value); + public abstract MetricsConfiguration.Builder setOutputPCollectionV2(@Nullable String value); public abstract MetricsConfiguration build(); } diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManager.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManager.java index 80bf5cfd9382..d6d348f524b2 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManager.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigquery/BigQueryResourceManager.java @@ -461,6 +461,7 @@ public synchronized void cleanupAll() throws BigQueryResourceManagerException { projectId, dataset.getDatasetId().getDataset(), table.getTableId().getTable())); } bigQuery.delete(dataset.getDatasetId()); + dataset = null; } } catch (Exception e) { throw new BigQueryResourceManagerException("Failed to delete resources.", e); diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManager.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManager.java index 1e6750cc81e4..713880229281 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManager.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManager.java @@ -33,6 +33,7 @@ import com.google.cloud.bigtable.admin.v2.models.AppProfile.MultiClusterRoutingPolicy; import com.google.cloud.bigtable.admin.v2.models.AppProfile.RoutingPolicy; import com.google.cloud.bigtable.admin.v2.models.AppProfile.SingleClusterRoutingPolicy; +import com.google.cloud.bigtable.admin.v2.models.Cluster; import com.google.cloud.bigtable.admin.v2.models.CreateAppProfileRequest; import com.google.cloud.bigtable.admin.v2.models.CreateInstanceRequest; import com.google.cloud.bigtable.admin.v2.models.CreateTableRequest; @@ -54,6 +55,8 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; import javax.annotation.Nullable; import org.apache.beam.it.common.ResourceManager; import org.apache.commons.lang3.StringUtils; @@ -93,6 +96,8 @@ public class BigtableResourceManager implements ResourceManager { private final Set cdcEnabledTables; private boolean hasInstance; + private Iterable clusters; + private final boolean usingStaticInstance; private BigtableResourceManager(Builder builder) throws IOException { @@ -111,6 +116,7 @@ private BigtableResourceManager(Builder builder) throws IOException { this.createdTables = new ArrayList<>(); this.createdAppProfiles = new ArrayList<>(); this.cdcEnabledTables = new HashSet<>(); + this.clusters = new ArrayList<>(); // Check if RM was configured to use static Bigtable instance. if (builder.useStaticInstance) { @@ -223,6 +229,7 @@ public synchronized void createInstance(Iterable "Failed to create instance " + instanceId + ".", e); } hasInstance = true; + this.clusters = clusters; LOG.info("Successfully created instance {}.", instanceId); } @@ -544,6 +551,32 @@ public synchronized ImmutableList readTable(String tableId, @Nullable Long return tableRows; } + /** Get all the cluster names of the current instance. */ + public List getClusterNames() { + return StreamSupport.stream(getClusters().spliterator(), false) + .map(BigtableResourceManagerCluster::clusterId) + .collect(Collectors.toList()); + } + + private Iterable getClusters() { + if (usingStaticInstance && this.clusters == null) { + try (BigtableInstanceAdminClient instanceAdminClient = + bigtableResourceManagerClientFactory.bigtableInstanceAdminClient()) { + List managedClusters = new ArrayList<>(); + for (Cluster cluster : instanceAdminClient.listClusters(instanceId)) { + managedClusters.add( + BigtableResourceManagerCluster.create( + cluster.getId(), + cluster.getZone(), + cluster.getServeNodes(), + cluster.getStorageType())); + } + this.clusters = managedClusters; + } + } + return this.clusters; + } + /** * Deletes all created resources (instance and tables) and cleans up all Bigtable clients, making * the manager object unusable. diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java index eb2323e52974..a893493d766e 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerUtils.java @@ -34,7 +34,7 @@ public final class BigtableResourceManagerUtils { private static final Pattern ILLEGAL_INSTANCE_ID_CHARS = Pattern.compile("[^a-z0-9-]"); private static final String REPLACE_INSTANCE_ID_CHAR = "-"; private static final int MIN_TABLE_ID_LENGTH = 1; - private static final int MAX_TABLE_ID_LENGTH = 30; + private static final int MAX_TABLE_ID_LENGTH = 40; private static final Pattern ILLEGAL_TABLE_CHARS = Pattern.compile("[^a-zA-Z0-9-_.]"); private static final String REPLACE_TABLE_ID_CHAR = "-"; diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/AbstractPipelineLauncher.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/AbstractPipelineLauncher.java index 08688d88b104..b5c9535953b7 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/AbstractPipelineLauncher.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/AbstractPipelineLauncher.java @@ -58,6 +58,11 @@ public abstract class AbstractPipelineLauncher implements PipelineLauncher { private static final Logger LOG = LoggerFactory.getLogger(AbstractPipelineLauncher.class); private static final Pattern CURRENT_METRICS = Pattern.compile(".*Current.*"); + public static final String LEGACY_RUNNER = "Dataflow Legacy Runner"; + public static final String RUNNER_V2 = "Dataflow Runner V2"; + public static final String PARAM_RUNNER = "runner"; + public static final String PARAM_JOB_TYPE = "jobType"; + public static final String PARAM_JOB_ID = "jobId"; protected final List launchedJobs = new ArrayList<>(); @@ -244,12 +249,12 @@ protected JobState handleJobState(Job job) { */ protected LaunchInfo.Builder getJobInfoBuilder(LaunchConfig options, JobState state, Job job) { Map labels = job.getLabels(); - String runner = "Dataflow Legacy Runner"; + String runner = LEGACY_RUNNER; Environment environment = job.getEnvironment(); if (environment != null && environment.getExperiments() != null && environment.getExperiments().contains("use_runner_v2")) { - runner = "Dataflow Runner V2"; + runner = RUNNER_V2; } LaunchInfo.Builder builder = LaunchInfo.builder() @@ -266,6 +271,10 @@ protected LaunchInfo.Builder getJobInfoBuilder(LaunchConfig options, JobState st // tests Map parameters = new HashMap<>(options.parameters()); options.environment().forEach((key, val) -> parameters.put(key, val.toString())); + // attach basic job info to parameters so that these are exported for load tests + parameters.put(PARAM_RUNNER, runner); + parameters.put(PARAM_JOB_TYPE, job.getType()); + parameters.put(PARAM_JOB_ID, job.getId()); builder.setParameters(ImmutableMap.copyOf(parameters)); if (labels != null && !labels.isEmpty()) { // template job diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DefaultPipelineLauncher.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DefaultPipelineLauncher.java index 7918dd6227d9..ad2dcafc007b 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DefaultPipelineLauncher.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DefaultPipelineLauncher.java @@ -99,7 +99,7 @@ public class DefaultPipelineLauncher extends AbstractPipelineLauncher { .put(PipelineResult.State.UNRECOGNIZED, JobState.UNKNOWN) .build(); - private DefaultPipelineLauncher(Builder builder) { + private DefaultPipelineLauncher(DefaultPipelineLauncher.Builder builder) { super( new Dataflow( Utils.getDefaultTransport(), @@ -109,8 +109,8 @@ private DefaultPipelineLauncher(Builder builder) { : new HttpCredentialsAdapter(builder.getCredentials()))); } - public static Builder builder(Credentials credentials) { - return new Builder(credentials); + public static DefaultPipelineLauncher.Builder builder(Credentials credentials) { + return new DefaultPipelineLauncher.Builder(credentials); } @Override diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java index 8017009ff378..57f8ad40c1b6 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dataflow/DirectRunnerClient.java @@ -53,8 +53,8 @@ public class DirectRunnerClient implements PipelineLauncher { this.mainClass = builder.getMainClass(); } - public static Builder builder(Class mainClass) { - return new Builder(mainClass); + public static DirectRunnerClient.Builder builder(Class mainClass) { + return new DirectRunnerClient.Builder(mainClass); } @Override @@ -172,7 +172,7 @@ public Class getMainClass() { return mainClass; } - public Builder setCredentials(Credentials value) { + public DirectRunnerClient.Builder setCredentials(Credentials value) { credentials = value; return this; } diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datagenerator/DataGenerator.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datagenerator/DataGenerator.java index 99016b5dd3a4..832a75defd95 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datagenerator/DataGenerator.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datagenerator/DataGenerator.java @@ -61,14 +61,16 @@ private DataGenerator(Builder builder) { .build(); } - public static Builder builderWithSchemaLocation(String testName, String schemaLocation) { - return new Builder(testName + "-data-generator") + public static DataGenerator.Builder builderWithSchemaLocation( + String testName, String schemaLocation) { + return new DataGenerator.Builder(testName + "-data-generator") .setSchemaLocation(schemaLocation) .setAutoscalingAlgorithm(AutoscalingAlgorithmType.THROUGHPUT_BASED); } - public static Builder builderWithSchemaTemplate(String testName, String schemaTemplate) { - return new Builder(testName + "-data-generator") + public static DataGenerator.Builder builderWithSchemaTemplate( + String testName, String schemaTemplate) { + return new DataGenerator.Builder(testName + "-data-generator") .setSchemaTemplate(schemaTemplate) .setAutoscalingAlgorithm(AutoscalingAlgorithmType.THROUGHPUT_BASED); } @@ -129,27 +131,27 @@ public Map getParameters() { return parameters; } - public Builder setSchemaTemplate(String value) { + public DataGenerator.Builder setSchemaTemplate(String value) { parameters.put("schemaTemplate", value); return this; } - public Builder setSchemaLocation(String value) { + public DataGenerator.Builder setSchemaLocation(String value) { parameters.put("schemaLocation", value); return this; } - public Builder setMessagesLimit(String value) { + public DataGenerator.Builder setMessagesLimit(String value) { parameters.put(MESSAGES_LIMIT, value); return this; } - public Builder setQPS(String value) { + public DataGenerator.Builder setQPS(String value) { parameters.put("qps", value); return this; } - public Builder setSinkType(String value) { + public DataGenerator.Builder setSinkType(String value) { parameters.put("sinkType", value); return this; } @@ -164,87 +166,87 @@ public Builder setNumWorkers(String value) { return this; } - public Builder setMaxNumWorkers(String value) { + public DataGenerator.Builder setMaxNumWorkers(String value) { parameters.put("maxNumWorkers", value); return this; } - public Builder setAutoscalingAlgorithm(AutoscalingAlgorithmType value) { + public DataGenerator.Builder setAutoscalingAlgorithm(AutoscalingAlgorithmType value) { parameters.put("autoscalingAlgorithm", value.toString()); return this; } - public Builder setOutputDirectory(String value) { + public DataGenerator.Builder setOutputDirectory(String value) { parameters.put("outputDirectory", value); return this; } - public Builder setOutputType(String value) { + public DataGenerator.Builder setOutputType(String value) { parameters.put("outputType", value); return this; } - public Builder setNumShards(String value) { + public DataGenerator.Builder setNumShards(String value) { parameters.put("numShards", value); return this; } - public Builder setAvroSchemaLocation(String value) { + public DataGenerator.Builder setAvroSchemaLocation(String value) { parameters.put("avroSchemaLocation", value); return this; } - public Builder setTopic(String value) { + public DataGenerator.Builder setTopic(String value) { parameters.put("topic", value); return this; } - public Builder setProjectId(String value) { + public DataGenerator.Builder setProjectId(String value) { parameters.put("projectId", value); return this; } - public Builder setSpannerInstanceName(String value) { + public DataGenerator.Builder setSpannerInstanceName(String value) { parameters.put("spannerInstanceName", value); return this; } - public Builder setSpannerDatabaseName(String value) { + public DataGenerator.Builder setSpannerDatabaseName(String value) { parameters.put("spannerDatabaseName", value); return this; } - public Builder setSpannerTableName(String value) { + public DataGenerator.Builder setSpannerTableName(String value) { parameters.put("spannerTableName", value); return this; } - public Builder setDriverClassName(String value) { + public DataGenerator.Builder setDriverClassName(String value) { parameters.put("driverClassName", value); return this; } - public Builder setConnectionUrl(String value) { + public DataGenerator.Builder setConnectionUrl(String value) { parameters.put("connectionUrl", value); return this; } - public Builder setUsername(String value) { + public DataGenerator.Builder setUsername(String value) { parameters.put("username", value); return this; } - public Builder setPassword(String value) { + public DataGenerator.Builder setPassword(String value) { parameters.put("password", value); return this; } - public Builder setConnectionProperties(String value) { + public DataGenerator.Builder setConnectionProperties(String value) { parameters.put("connectionProperties", value); return this; } - public Builder setStatement(String value) { + public DataGenerator.Builder setStatement(String value) { parameters.put("statement", value); return this; } diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastore/matchers/DatastoreAsserts.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastore/matchers/DatastoreAsserts.java index ef67a5a5c4fb..78fa7543150f 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastore/matchers/DatastoreAsserts.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/datastore/matchers/DatastoreAsserts.java @@ -61,7 +61,8 @@ public static List> datastoreResultsToRecords(Collection results) { diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dlp/DlpResourceManager.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dlp/DlpResourceManager.java index f59794af3e1f..de818a1bbff1 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dlp/DlpResourceManager.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/dlp/DlpResourceManager.java @@ -113,8 +113,9 @@ public void cleanupAll() { * @param project the GCP project ID * @return a new instance of Builder */ - public static Builder builder(String project, CredentialsProvider credentialsProvider) { - return new Builder(project, credentialsProvider); + public static DlpResourceManager.Builder builder( + String project, CredentialsProvider credentialsProvider) { + return new DlpResourceManager.Builder(project, credentialsProvider); } /** A builder class for creating instances of {@link DlpResourceManager}. */ diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/kms/KMSResourceManager.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/kms/KMSResourceManager.java index 7e1a403c7352..2cad6d0b9fab 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/kms/KMSResourceManager.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/kms/KMSResourceManager.java @@ -72,8 +72,9 @@ private KMSResourceManager(Builder builder) { this.keyRing = null; } - public static Builder builder(String projectId, CredentialsProvider credentialsProvider) { - return new Builder(projectId, credentialsProvider); + public static KMSResourceManager.Builder builder( + String projectId, CredentialsProvider credentialsProvider) { + return new KMSResourceManager.Builder(projectId, credentialsProvider); } /** diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/monitoring/MonitoringClient.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/monitoring/MonitoringClient.java index 0fc5614a3630..06591ea4fe0a 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/monitoring/MonitoringClient.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/monitoring/MonitoringClient.java @@ -150,8 +150,8 @@ public List listTimeSeriesAsLong(ListTimeSeriesRequest request) { Aggregation aggregation = Aggregation.newBuilder() .setAlignmentPeriod(Duration.newBuilder().setSeconds(60).build()) - .setPerSeriesAligner(Aligner.ALIGN_MEAN) - .setCrossSeriesReducer(Reducer.REDUCE_MEAN) + .setPerSeriesAligner(Aggregation.Aligner.ALIGN_MEAN) + .setCrossSeriesReducer(Aggregation.Reducer.REDUCE_MEAN) .addGroupByFields("resource.instance_id") .build(); ListTimeSeriesRequest request = @@ -188,7 +188,7 @@ public List listTimeSeriesAsLong(ListTimeSeriesRequest request) { Aggregation aggregation = Aggregation.newBuilder() .setAlignmentPeriod(Duration.newBuilder().setSeconds(60).build()) - .setPerSeriesAligner(Aligner.ALIGN_MEAN) + .setPerSeriesAligner(Aggregation.Aligner.ALIGN_MEAN) .setCrossSeriesReducer(Reducer.REDUCE_MAX) .build(); ListTimeSeriesRequest request = @@ -225,7 +225,7 @@ public List listTimeSeriesAsLong(ListTimeSeriesRequest request) { Aggregation aggregation = Aggregation.newBuilder() .setAlignmentPeriod(Duration.newBuilder().setSeconds(60).build()) - .setPerSeriesAligner(Aligner.ALIGN_MEAN) + .setPerSeriesAligner(Aggregation.Aligner.ALIGN_MEAN) .setCrossSeriesReducer(Reducer.REDUCE_MAX) .build(); ListTimeSeriesRequest request = @@ -269,7 +269,7 @@ public List listTimeSeriesAsLong(ListTimeSeriesRequest request) { Aggregation aggregation = Aggregation.newBuilder() .setAlignmentPeriod(Duration.newBuilder().setSeconds(60).build()) - .setPerSeriesAligner(Aligner.ALIGN_RATE) + .setPerSeriesAligner(Aggregation.Aligner.ALIGN_RATE) .build(); ListTimeSeriesRequest request = ListTimeSeriesRequest.newBuilder() @@ -312,7 +312,7 @@ public List listTimeSeriesAsLong(ListTimeSeriesRequest request) { Aggregation aggregation = Aggregation.newBuilder() .setAlignmentPeriod(Duration.newBuilder().setSeconds(60).build()) - .setPerSeriesAligner(Aligner.ALIGN_RATE) + .setPerSeriesAligner(Aggregation.Aligner.ALIGN_RATE) .build(); ListTimeSeriesRequest request = ListTimeSeriesRequest.newBuilder() diff --git a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManager.java b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManager.java index 3a684d34c045..738620c15b7e 100644 --- a/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManager.java +++ b/it/google-cloud-platform/src/main/java/org/apache/beam/it/gcp/pubsub/PubsubResourceManager.java @@ -20,6 +20,7 @@ import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; import com.google.api.gax.core.CredentialsProvider; +import com.google.api.gax.rpc.DeadlineExceededException; import com.google.cloud.pubsub.v1.Publisher; import com.google.cloud.pubsub.v1.SchemaServiceClient; import com.google.cloud.pubsub.v1.SchemaServiceSettings; @@ -42,12 +43,16 @@ import com.google.pubsub.v1.Topic; import com.google.pubsub.v1.TopicName; import com.google.pubsub.v1.UpdateTopicRequest; +import dev.failsafe.Failsafe; +import dev.failsafe.RetryPolicy; import java.io.IOException; +import java.time.Duration; import java.util.Collections; import java.util.HashSet; import java.util.Map; import java.util.Set; import org.apache.beam.it.common.ResourceManager; +import org.apache.beam.it.common.utils.ExceptionUtils; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; import org.slf4j.Logger; @@ -66,6 +71,12 @@ public final class PubsubResourceManager implements ResourceManager { private static final int DEFAULT_ACK_DEADLINE_SECONDS = 600; private static final String RESOURCE_NAME_SEPARATOR = "-"; + // Retry settings for client operations + private static final int FAILSAFE_MAX_RETRIES = 5; + private static final Duration FAILSAFE_RETRY_DELAY = Duration.ofSeconds(10); + private static final Duration FAILSAFE_RETRY_MAX_DELAY = Duration.ofSeconds(60); + private static final double FAILSAFE_RETRY_JITTER = 0.1; + private final String testId; private final String projectId; private final PubsubPublisherFactory publisherFactory; @@ -184,11 +195,14 @@ public SubscriptionName createSubscription(TopicName topicName, String subscript LOG.info("Creating subscription '{}' for topic '{}'", subscriptionName, topicName); Subscription subscription = - subscriptionAdminClient.createSubscription( - getSubscriptionName(subscriptionName), - topicName, - PushConfig.getDefaultInstance(), - DEFAULT_ACK_DEADLINE_SECONDS); + Failsafe.with(retryOnDeadlineExceeded()) + .get( + () -> + subscriptionAdminClient.createSubscription( + getSubscriptionName(subscriptionName), + topicName, + PushConfig.getDefaultInstance(), + DEFAULT_ACK_DEADLINE_SECONDS)); SubscriptionName reference = PubsubUtils.toSubscriptionName(subscription); createdSubscriptions.add(getSubscriptionName(subscriptionName)); @@ -299,17 +313,19 @@ public synchronized void cleanupAll() { try { for (SubscriptionName subscription : createdSubscriptions) { LOG.info("Deleting subscription '{}'", subscription); - subscriptionAdminClient.deleteSubscription(subscription); + Failsafe.with(retryOnDeadlineExceeded()) + .run(() -> subscriptionAdminClient.deleteSubscription(subscription)); } for (TopicName topic : createdTopics) { LOG.info("Deleting topic '{}'", topic); - topicAdminClient.deleteTopic(topic); + Failsafe.with(retryOnDeadlineExceeded()).run(() -> topicAdminClient.deleteTopic(topic)); } for (SchemaName schemaName : createdSchemas) { LOG.info("Deleting schema '{}'", schemaName); - schemaServiceClient.deleteSchema(schemaName); + Failsafe.with(retryOnDeadlineExceeded()) + .run(() -> schemaServiceClient.deleteSchema(schemaName)); } } finally { subscriptionAdminClient.close(); @@ -342,7 +358,8 @@ private void checkIsUsable() throws IllegalStateException { private TopicName createTopicInternal(TopicName topicName) { LOG.info("Creating topic '{}'...", topicName.toString()); - Topic topic = topicAdminClient.createTopic(topicName); + Topic topic = + Failsafe.with(retryOnDeadlineExceeded()).get(() -> topicAdminClient.createTopic(topicName)); TopicName reference = PubsubUtils.toTopicName(topic); createdTopics.add(reference); @@ -355,6 +372,16 @@ private boolean isNotUsable() { return topicAdminClient.isShutdown() || subscriptionAdminClient.isShutdown(); } + private static RetryPolicy retryOnDeadlineExceeded() { + return RetryPolicy.builder() + .handleIf( + exception -> ExceptionUtils.containsType(exception, DeadlineExceededException.class)) + .withMaxRetries(FAILSAFE_MAX_RETRIES) + .withBackoff(FAILSAFE_RETRY_DELAY, FAILSAFE_RETRY_MAX_DELAY) + .withJitter(FAILSAFE_RETRY_JITTER) + .build(); + } + /** Builder for {@link PubsubResourceManager}. */ public static final class Builder { diff --git a/it/google-cloud-platform/src/main/resources/test-artifact.json b/it/google-cloud-platform/src/main/resources/test-artifact.json new file mode 100644 index 000000000000..551c80d14a66 --- /dev/null +++ b/it/google-cloud-platform/src/main/resources/test-artifact.json @@ -0,0 +1 @@ +["This is a test artifact."] \ No newline at end of file diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryIOLT.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryIOLT.java index 03f6e8abfd41..a9ae68142778 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryIOLT.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigquery/BigQueryIOLT.java @@ -99,12 +99,8 @@ public final class BigQueryIOLT extends IOLoadTestBase { private static final String READ_ELEMENT_METRIC_NAME = "read_count"; private Configuration configuration; private String tempLocation; - private TableSchema schema; - private static final String READ_PCOLLECTION = "Counting element.out0"; - private static final String WRITE_PCOLLECTION = "Map records.out0"; - @Rule public TestPipeline writePipeline = TestPipeline.create(); @Rule public TestPipeline readPipeline = TestPipeline.create(); @@ -268,7 +264,7 @@ private void testWrite(BigQueryIO.Write writeIO) throws IOException { .withCustomGcsTempLocation(ValueProvider.StaticValueProvider.of(tempLocation))); PipelineLauncher.LaunchConfig options = - PipelineLauncher.LaunchConfig.builder("test-bigquery-write") + PipelineLauncher.LaunchConfig.builder("write-bigquery") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(writePipeline) .addParameter("runner", configuration.runner) @@ -284,7 +280,10 @@ private void testWrite(BigQueryIO.Write writeIO) throws IOException { // export metrics MetricsConfiguration metricsConfig = - MetricsConfiguration.builder().setInputPCollection(WRITE_PCOLLECTION).build(); + MetricsConfiguration.builder() + .setInputPCollection("Map records.out0") + .setInputPCollectionV2("Map records/ParMultiDo(MapKVToV).out0") + .build(); try { exportMetricsToBigQuery(launchInfo, getMetrics(launchInfo, metricsConfig)); } catch (ParseException | InterruptedException e) { @@ -301,7 +300,7 @@ private void testRead() throws IOException { .apply("Counting element", ParDo.of(new CountingFn<>(READ_ELEMENT_METRIC_NAME))); PipelineLauncher.LaunchConfig options = - PipelineLauncher.LaunchConfig.builder("test-bigquery-read") + PipelineLauncher.LaunchConfig.builder("read-bigquery") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(readPipeline) .addParameter("runner", configuration.runner) @@ -326,7 +325,10 @@ private void testRead() throws IOException { // export metrics MetricsConfiguration metricsConfig = - MetricsConfiguration.builder().setOutputPCollection(READ_PCOLLECTION).build(); + MetricsConfiguration.builder() + .setOutputPCollection("Counting element.out0") + .setOutputPCollectionV2("Counting element/ParMultiDo(Counting).out0") + .build(); try { exportMetricsToBigQuery(launchInfo, getMetrics(launchInfo, metricsConfig)); } catch (ParseException | InterruptedException e) { diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigTableIOLT.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigTableIOLT.java index fc7bd87707fc..e232ed31cb5a 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigTableIOLT.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigTableIOLT.java @@ -115,8 +115,6 @@ public void teardown() { /** Run integration test with configurations specified by TestProperties. */ @Test public void testWriteAndRead() throws IOException { - final String readPCollection = "Counting element.out0"; - final String writePCollection = "Map records.out0"; tableId = generateTableId(testName); resourceManager.createTable( @@ -149,8 +147,10 @@ public void testWriteAndRead() throws IOException { // export metrics MetricsConfiguration metricsConfig = MetricsConfiguration.builder() - .setInputPCollection(writePCollection) - .setOutputPCollection(readPCollection) + .setInputPCollection("Map records.out0") + .setInputPCollectionV2("Map records/ParMultiDo(MapToBigTableFormat).out0") + .setOutputPCollection("Counting element.out0") + .setOutputPCollectionV2("Counting element/ParMultiDo(Counting).out0") .build(); try { exportMetricsToBigQuery(writeInfo, getMetrics(writeInfo, metricsConfig)); @@ -174,7 +174,7 @@ private PipelineLauncher.LaunchInfo testWrite() throws IOException { .apply("Write to BigTable", writeIO); PipelineLauncher.LaunchConfig options = - PipelineLauncher.LaunchConfig.builder("test-bigtable-write") + PipelineLauncher.LaunchConfig.builder("write-bigtable") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(writePipeline) .addParameter("runner", configuration.getRunner()) @@ -196,7 +196,7 @@ private PipelineLauncher.LaunchInfo testRead() throws IOException { .apply("Counting element", ParDo.of(new CountingFn<>(READ_ELEMENT_METRIC_NAME))); PipelineLauncher.LaunchConfig options = - PipelineLauncher.LaunchConfig.builder("test-bigtable-read") + PipelineLauncher.LaunchConfig.builder("read-bigtable") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(readPipeline) .addParameter("runner", configuration.getRunner()) @@ -227,18 +227,18 @@ static Configuration of(long numRows, int pipelineTimeout, String runner, int va @AutoValue.Builder abstract static class Builder { - abstract Builder setNumRows(long numRows); + abstract Configuration.Builder setNumRows(long numRows); - abstract Builder setPipelineTimeout(int timeOutMinutes); + abstract Configuration.Builder setPipelineTimeout(int timeOutMinutes); - abstract Builder setRunner(String runner); + abstract Configuration.Builder setRunner(String runner); - abstract Builder setValueSizeBytes(int valueSizeBytes); + abstract Configuration.Builder setValueSizeBytes(int valueSizeBytes); abstract Configuration build(); } - abstract Builder toBuilder(); + abstract Configuration.Builder toBuilder(); } /** Maps long number to the BigTable format record. */ diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java index 65745aea49be..f8673ed696cc 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/bigtable/BigtableResourceManagerTest.java @@ -442,6 +442,7 @@ public void testCleanupAllShouldWorkWhenBigtableDoesNotThrowAnyError() { setupReadyTable(); testManager.createTable(TABLE_ID, ImmutableList.of("cf1")); + when(bigtableResourceManagerClientFactory.bigtableTableAdminClient().exists(anyString())) .thenReturn(true); testManager.readTable(TABLE_ID); diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/ClassicTemplateClientTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/ClassicTemplateClientTest.java index cfd56e596e52..88c35589f2be 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/ClassicTemplateClientTest.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/ClassicTemplateClientTest.java @@ -18,6 +18,10 @@ package org.apache.beam.it.gcp.dataflow; import static com.google.common.truth.Truth.assertThat; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.LEGACY_RUNNER; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.PARAM_JOB_ID; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.PARAM_JOB_TYPE; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.PARAM_RUNNER; import static org.junit.Assert.assertThrows; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; @@ -153,8 +157,14 @@ public void testLaunchNewJob() throws IOException { .setSdk("Apache Beam Java") .setVersion("2.42.0") .setJobType("JOB_TYPE_BATCH") - .setRunner("Dataflow Legacy Runner") - .setParameters(ImmutableMap.of(PARAM_KEY, PARAM_VALUE)) + .setRunner(AbstractPipelineLauncher.LEGACY_RUNNER) + .setParameters( + ImmutableMap.builder() + .put(PARAM_KEY, PARAM_VALUE) + .put(PARAM_JOB_ID, JOB_ID) + .put(PARAM_RUNNER, LEGACY_RUNNER) + .put(PARAM_JOB_TYPE, "JOB_TYPE_BATCH") + .build()) .build(); assertThat(actual).isEqualTo(expected); } diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/FlexTemplateClientTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/FlexTemplateClientTest.java index 4088efe67514..06f44437414a 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/FlexTemplateClientTest.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/dataflow/FlexTemplateClientTest.java @@ -18,6 +18,10 @@ package org.apache.beam.it.gcp.dataflow; import static com.google.common.truth.Truth.assertThat; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.LEGACY_RUNNER; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.PARAM_JOB_ID; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.PARAM_JOB_TYPE; +import static org.apache.beam.it.gcp.dataflow.AbstractPipelineLauncher.PARAM_RUNNER; import static org.junit.Assert.assertThrows; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; @@ -158,8 +162,14 @@ public void testLaunchNewJob() throws IOException { .setSdk("Apache Beam Java") .setVersion("2.42.0") .setJobType("JOB_TYPE_BATCH") - .setRunner("Dataflow Legacy Runner") - .setParameters(ImmutableMap.of(PARAM_KEY, PARAM_VALUE)) + .setRunner(LEGACY_RUNNER) + .setParameters( + ImmutableMap.builder() + .put(PARAM_KEY, PARAM_VALUE) + .put(PARAM_JOB_ID, JOB_ID) + .put(PARAM_RUNNER, LEGACY_RUNNER) + .put(PARAM_JOB_TYPE, "JOB_TYPE_BATCH") + .build()) .build(); assertThat(actual).isEqualTo(expected); } diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/FileBasedIOLT.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/FileBasedIOLT.java index fd1bc1772f2d..704f8337c66f 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/FileBasedIOLT.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/FileBasedIOLT.java @@ -90,7 +90,7 @@ public class FileBasedIOLT extends IOLoadTestBase { @Rule public TestPipeline readPipeline = TestPipeline.create(); - private static final Map TEST_CONFIGS_PRESET; + private static final Map TEST_CONFIGS_PRESET; static { try { @@ -160,8 +160,6 @@ public void setup() { @Test public void testTextIOWriteThenRead() throws IOException { - final String readPCollection = "Counting element.out0"; - final String writePCollection = "Map records.out0"; TextIO.TypedWrite write = TextIO.write() @@ -182,7 +180,7 @@ public void testTextIOWriteThenRead() throws IOException { .apply("Counting element", ParDo.of(new CountingFn<>(READ_ELEMENT_METRIC_NAME))); PipelineLauncher.LaunchConfig writeOptions = - PipelineLauncher.LaunchConfig.builder("test-textio-write") + PipelineLauncher.LaunchConfig.builder("write-textio") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(writePipeline) .addParameter("runner", configuration.runner) @@ -196,7 +194,7 @@ public void testTextIOWriteThenRead() throws IOException { assertThatResult(writeResult).isLaunchFinished(); PipelineLauncher.LaunchConfig readOptions = - PipelineLauncher.LaunchConfig.builder("test-textio-read") + PipelineLauncher.LaunchConfig.builder("read-textio") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(readPipeline) .addParameter("runner", configuration.runner) @@ -222,8 +220,10 @@ public void testTextIOWriteThenRead() throws IOException { // export metrics MetricsConfiguration metricsConfig = MetricsConfiguration.builder() - .setInputPCollection(writePCollection) - .setOutputPCollection(readPCollection) + .setInputPCollection("Map records.out0") + .setInputPCollectionV2("Map records/ParMultiDo(MapKVToString).out0") + .setOutputPCollection("Counting element.out0") + .setOutputPCollectionV2("Counting element/ParMultiDo(Counting).out0") .build(); try { exportMetricsToBigQuery(writeInfo, getMetrics(writeInfo, metricsConfig)); diff --git a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/GcsResourceManagerTest.java b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/GcsResourceManagerTest.java index 3ec96da81007..0153573feaed 100644 --- a/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/GcsResourceManagerTest.java +++ b/it/google-cloud-platform/src/test/java/org/apache/beam/it/gcp/storage/GcsResourceManagerTest.java @@ -71,7 +71,7 @@ public final class GcsResourceManagerTest { @Mock private Blob blob; private GcsResourceManager gcsClient; - private static final String ARTIFACT_NAME = "test-artifact.txt"; + private static final String ARTIFACT_NAME = "test-artifact.json"; private static final Path LOCAL_PATH; private static final byte[] TEST_ARTIFACT_CONTENTS; diff --git a/it/google-cloud-platform/src/test/resources/test-artifact.txt b/it/google-cloud-platform/src/test/resources/test-artifact.txt deleted file mode 100644 index 22c4e1d122a7..000000000000 --- a/it/google-cloud-platform/src/test/resources/test-artifact.txt +++ /dev/null @@ -1 +0,0 @@ -This is a test artifact. \ No newline at end of file diff --git a/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MSSQLResourceManager.java b/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MSSQLResourceManager.java index 0bcb16c61095..c515b2c4844f 100644 --- a/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MSSQLResourceManager.java +++ b/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MSSQLResourceManager.java @@ -61,13 +61,14 @@ private MSSQLResourceManager(Builder builder) { } @VisibleForTesting - > MSSQLResourceManager(T container, Builder builder) { + > MSSQLResourceManager( + T container, Builder builder) { super(container, builder); initialized = true; } - public static Builder builder(String testId) { - return new Builder(testId); + public static MSSQLResourceManager.Builder builder(String testId) { + return new MSSQLResourceManager.Builder(testId); } private synchronized void createDatabase(String databaseName) { diff --git a/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MySQLResourceManager.java b/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MySQLResourceManager.java index e1bf3640b53d..688c26dfb56d 100644 --- a/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MySQLResourceManager.java +++ b/it/jdbc/src/main/java/org/apache/beam/it/jdbc/MySQLResourceManager.java @@ -49,8 +49,8 @@ private MySQLResourceManager(Builder builder) { super(container, builder); } - public static Builder builder(String testId) { - return new Builder(testId); + public static MySQLResourceManager.Builder builder(String testId) { + return new MySQLResourceManager.Builder(testId); } @Override diff --git a/it/jdbc/src/main/java/org/apache/beam/it/jdbc/OracleResourceManager.java b/it/jdbc/src/main/java/org/apache/beam/it/jdbc/OracleResourceManager.java index f44e939936d2..8054d26c33f7 100644 --- a/it/jdbc/src/main/java/org/apache/beam/it/jdbc/OracleResourceManager.java +++ b/it/jdbc/src/main/java/org/apache/beam/it/jdbc/OracleResourceManager.java @@ -45,7 +45,7 @@ public class OracleResourceManager extends AbstractJDBCResourceManager( DockerImageName.parse(builder.containerImageName).withTag(builder.containerImageTag)), @@ -46,12 +46,13 @@ private PostgresResourceManager(Builder builder) { } @VisibleForTesting - PostgresResourceManager(PostgreSQLContainer container, Builder builder) { + PostgresResourceManager( + PostgreSQLContainer container, PostgresResourceManager.Builder builder) { super(container, builder); } - public static Builder builder(String testId) { - return new Builder(testId); + public static PostgresResourceManager.Builder builder(String testId) { + return new PostgresResourceManager.Builder(testId); } @Override diff --git a/it/kafka/src/main/java/org/apache/beam/it/kafka/KafkaResourceManager.java b/it/kafka/src/main/java/org/apache/beam/it/kafka/KafkaResourceManager.java index d9a647dbeebd..7f7fb5b69569 100644 --- a/it/kafka/src/main/java/org/apache/beam/it/kafka/KafkaResourceManager.java +++ b/it/kafka/src/main/java/org/apache/beam/it/kafka/KafkaResourceManager.java @@ -71,13 +71,16 @@ public class KafkaResourceManager extends TestContainerResourceManager 0; @@ -102,8 +105,8 @@ private KafkaResourceManager(Builder builder) { : AdminClient.create(ImmutableMap.of("bootstrap.servers", this.connectionString)); } - public static Builder builder(String testId) { - return new Builder(testId); + public static KafkaResourceManager.Builder builder(String testId) { + return new KafkaResourceManager.Builder(testId); } /** Returns the kafka bootstrap server connection string. */ diff --git a/it/kafka/src/test/java/org/apache/beam/it/kafka/KafkaIOLT.java b/it/kafka/src/test/java/org/apache/beam/it/kafka/KafkaIOLT.java index a03030664de4..ce6ad877c375 100644 --- a/it/kafka/src/test/java/org/apache/beam/it/kafka/KafkaIOLT.java +++ b/it/kafka/src/test/java/org/apache/beam/it/kafka/KafkaIOLT.java @@ -175,7 +175,7 @@ private PipelineLauncher.LaunchInfo testWrite() throws IOException { .apply("Write to Kafka", writeIO.withTopic(kafkaTopic)); PipelineLauncher.LaunchConfig options = - PipelineLauncher.LaunchConfig.builder("test-kafka-write") + PipelineLauncher.LaunchConfig.builder("write-kafka") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(writePipeline) .addParameter("runner", configuration.getRunner()) @@ -195,7 +195,7 @@ private PipelineLauncher.LaunchInfo testRead() throws IOException { .apply("Counting element", ParDo.of(new CountingFn<>(READ_ELEMENT_METRIC_NAME))); PipelineLauncher.LaunchConfig options = - PipelineLauncher.LaunchConfig.builder("test-kafka-read") + PipelineLauncher.LaunchConfig.builder("read-kafka") .setSdk(PipelineLauncher.Sdk.JAVA) .setPipeline(readPipeline) .addParameter("runner", configuration.getRunner()) diff --git a/it/mongodb/src/main/java/org/apache/beam/it/mongodb/MongoDBResourceManager.java b/it/mongodb/src/main/java/org/apache/beam/it/mongodb/MongoDBResourceManager.java index ed0e556bf0df..80216b14ac0e 100644 --- a/it/mongodb/src/main/java/org/apache/beam/it/mongodb/MongoDBResourceManager.java +++ b/it/mongodb/src/main/java/org/apache/beam/it/mongodb/MongoDBResourceManager.java @@ -69,7 +69,7 @@ public class MongoDBResourceManager extends TestContainerResourceManager( @@ -79,7 +79,10 @@ private Neo4jResourceManager(Builder builder) { @VisibleForTesting @SuppressWarnings("nullness") - Neo4jResourceManager(@Nullable Driver neo4jDriver, Neo4jContainer container, Builder builder) { + Neo4jResourceManager( + @Nullable Driver neo4jDriver, + Neo4jContainer container, + Neo4jResourceManager.Builder builder) { super(container, builder); this.adminPassword = builder.adminPassword; @@ -98,8 +101,8 @@ private Neo4jResourceManager(Builder builder) { } } - public static Builder builder(String testId) { - return new Builder(testId); + public static Neo4jResourceManager.Builder builder(String testId) { + return new Neo4jResourceManager.Builder(testId); } /** Returns the URI connection string to the Neo4j Database. */ diff --git a/it/splunk/src/main/java/org/apache/beam/it/splunk/SplunkResourceManager.java b/it/splunk/src/main/java/org/apache/beam/it/splunk/SplunkResourceManager.java index 0115a791eefe..1ef4726df43a 100644 --- a/it/splunk/src/main/java/org/apache/beam/it/splunk/SplunkResourceManager.java +++ b/it/splunk/src/main/java/org/apache/beam/it/splunk/SplunkResourceManager.java @@ -85,7 +85,7 @@ public class SplunkResourceManager extends TestContainerResourceManagerOptionally, a static resource can be specified by calling the useStaticContainer() method in - * the {@link Builder} class. A static resource is a pre-configured database or other resource that - * is ready to be connected to by the resource manager. This could be a pre-existing TestContainer - * that has not been closed, a local database instance, a remote VM, or any other source that can be - * connected to. If a static container is used, the host and port must also be configured using the - * Builder's setHost() and setPort() methods, respectively. + * the {@link TestContainerResourceManager.Builder} class. A static resource is a pre-configured + * database or other resource that is ready to be connected to by the resource manager. This could + * be a pre-existing TestContainer that has not been closed, a local database instance, a remote VM, + * or any other source that can be connected to. If a static container is used, the host and port + * must also be configured using the Builder's setHost() and setPort() methods, respectively. */ public abstract class TestContainerResourceManager> implements ResourceManager { @@ -48,11 +48,12 @@ public abstract class TestContainerResourceManager private final String host; protected int port; - protected > TestContainerResourceManager(T container, B builder) { + protected > TestContainerResourceManager( + T container, B builder) { this(container, builder, null); } - protected > TestContainerResourceManager( + protected > TestContainerResourceManager( T container, B builder, @Nullable Callable setup) { this.container = container; this.usingStaticContainer = builder.useStaticContainer; diff --git a/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/LaunchInfoSubject.java b/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/LaunchInfoSubject.java index a496ecce9448..30a27c9ad259 100644 --- a/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/LaunchInfoSubject.java +++ b/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/LaunchInfoSubject.java @@ -43,7 +43,7 @@ public static Factory launchInfo() { } /** - * Check if the subject reflects succeeded states. A successfully {@link LaunchInfo} does not mean + * Check if the subject reflects succeeded states. A successful {@link LaunchInfo} does not mean * that the pipeline finished and no errors happened, it just means that the job was able to get * itself into an active state (RUNNING, UPDATED). */ diff --git a/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/RecordsSubject.java b/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/RecordsSubject.java index 75d5ce3a67cd..39a0c0cebedc 100644 --- a/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/RecordsSubject.java +++ b/it/truthmatchers/src/main/java/org/apache/beam/it/truthmatchers/RecordsSubject.java @@ -81,7 +81,7 @@ public void hasRecordSubset(Map subset) { Map expected = convertMapToTreeMap(subset); for (Map candidate : actual) { boolean match = true; - for (Entry entry : subset.entrySet()) { + for (Map.Entry entry : subset.entrySet()) { if (!candidate.containsKey(entry.getKey()) || !candidate.get(entry.getKey()).equals(entry.getValue())) { match = false; diff --git a/learning/tour-of-beam/frontend/pubspec.lock b/learning/tour-of-beam/frontend/pubspec.lock index 5cde8a54211f..49bdc9ef95a8 100644 --- a/learning/tour-of-beam/frontend/pubspec.lock +++ b/learning/tour-of-beam/frontend/pubspec.lock @@ -45,10 +45,10 @@ packages: dependency: transitive description: name: archive - sha256: "0c8368c9b3f0abbc193b9d6133649a614204b528982bebc7026372d61677ce3a" + sha256: "49b1fad315e57ab0bbc15bcbb874e83116a1d78f77ebd500a4af6c9407d6b28e" url: "https://pub.dev" source: hosted - version: "3.3.7" + version: "3.3.8" args: dependency: transitive description: @@ -189,10 +189,10 @@ packages: dependency: "direct main" description: name: collection - sha256: "4a07be6cb69c84d677a6c3096fcf960cc3285a8330b4603e0d463d15d9bd934c" + sha256: f092b211a4319e98e5ff58223576de6c2803db36221657b46c82574721240687 url: "https://pub.dev" source: hosted - version: "1.17.1" + version: "1.17.2" color: dependency: transitive description: @@ -656,10 +656,10 @@ packages: dependency: transitive description: name: intl - sha256: a3715e3bc90294e971cb7dc063fbf3cd9ee0ebf8604ffeafabd9e6f16abbdbe6 + sha256: "3bc132a9dbce73a7e4a21a17d06e1878839ffbf975568bc875c60537824b0c4d" url: "https://pub.dev" source: hosted - version: "0.18.0" + version: "0.18.1" io: dependency: transitive description: @@ -728,18 +728,18 @@ packages: dependency: transitive description: name: matcher - sha256: "6501fbd55da300384b768785b83e5ce66991266cec21af89ab9ae7f5ce1c4cbb" + sha256: "1803e76e6653768d64ed8ff2e1e67bea3ad4b923eb5c56a295c3e634bad5960e" url: "https://pub.dev" source: hosted - version: "0.12.15" + version: "0.12.16" material_color_utilities: dependency: transitive description: name: material_color_utilities - sha256: d92141dc6fe1dad30722f9aa826c7fbc896d021d792f80678280601aff8cf724 + sha256: "9528f2f296073ff54cb9fee677df673ace1218163c3bc7628093e7eed5203d41" url: "https://pub.dev" source: hosted - version: "0.2.0" + version: "0.5.0" meta: dependency: transitive description: @@ -1115,10 +1115,10 @@ packages: dependency: transitive description: name: source_span - sha256: dd904f795d4b4f3b870833847c461801f6750a9fa8e61ea5ac53f9422b31f250 + sha256: "53e943d4206a5e30df338fd4c6e7a077e02254531b138a15aec3bd143c1a8b3c" url: "https://pub.dev" source: hosted - version: "1.9.1" + version: "1.10.0" stack_trace: dependency: transitive description: @@ -1171,26 +1171,26 @@ packages: dependency: transitive description: name: test - sha256: "3dac9aecf2c3991d09b9cdde4f98ded7b30804a88a0d7e4e7e1678e78d6b97f4" + sha256: "13b41f318e2a5751c3169137103b60c584297353d4b1761b66029bae6411fe46" url: "https://pub.dev" source: hosted - version: "1.24.1" + version: "1.24.3" test_api: dependency: transitive description: name: test_api - sha256: eb6ac1540b26de412b3403a163d919ba86f6a973fe6cc50ae3541b80092fdcfb + sha256: "75760ffd7786fffdfb9597c35c5b27eaeec82be8edfb6d71d32651128ed7aab8" url: "https://pub.dev" source: hosted - version: "0.5.1" + version: "0.6.0" test_core: dependency: transitive description: name: test_core - sha256: "5138dbffb77b2289ecb12b81c11ba46036590b72a64a7a90d6ffb880f1a29e93" + sha256: "99806e9e6d95c7b059b7a0fc08f07fc53fabe54a829497f0d9676299f1e8637e" url: "https://pub.dev" source: hosted - version: "0.5.1" + version: "0.5.3" time: dependency: transitive description: @@ -1347,10 +1347,10 @@ packages: dependency: transitive description: name: vm_service - sha256: f6deed8ed625c52864792459709183da231ebf66ff0cf09e69b573227c377efe + sha256: c620a6f783fa22436da68e42db7ebbf18b8c44b9a46ab911f666ff09ffd9153f url: "https://pub.dev" source: hosted - version: "11.3.0" + version: "11.7.1" watcher: dependency: transitive description: @@ -1359,6 +1359,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.1.0" + web: + dependency: transitive + description: + name: web + sha256: dc8ccd225a2005c1be616fe02951e2e342092edf968cf0844220383757ef8f10 + url: "https://pub.dev" + source: hosted + version: "0.1.4-beta" web_socket_channel: dependency: transitive description: diff --git a/playground/frontend/pubspec.lock b/playground/frontend/pubspec.lock index 425e33e4de7d..e49850e7a820 100644 --- a/playground/frontend/pubspec.lock +++ b/playground/frontend/pubspec.lock @@ -45,10 +45,10 @@ packages: dependency: transitive description: name: archive - sha256: "0c8368c9b3f0abbc193b9d6133649a614204b528982bebc7026372d61677ce3a" + sha256: "49b1fad315e57ab0bbc15bcbb874e83116a1d78f77ebd500a4af6c9407d6b28e" url: "https://pub.dev" source: hosted - version: "3.3.7" + version: "3.3.8" args: dependency: transitive description: @@ -189,10 +189,10 @@ packages: dependency: "direct main" description: name: collection - sha256: "4a07be6cb69c84d677a6c3096fcf960cc3285a8330b4603e0d463d15d9bd934c" + sha256: f092b211a4319e98e5ff58223576de6c2803db36221657b46c82574721240687 url: "https://pub.dev" source: hosted - version: "1.17.1" + version: "1.17.2" connectivity_plus: dependency: transitive description: @@ -600,10 +600,10 @@ packages: dependency: "direct main" description: name: intl - sha256: a3715e3bc90294e971cb7dc063fbf3cd9ee0ebf8604ffeafabd9e6f16abbdbe6 + sha256: "3bc132a9dbce73a7e4a21a17d06e1878839ffbf975568bc875c60537824b0c4d" url: "https://pub.dev" source: hosted - version: "0.18.0" + version: "0.18.1" io: dependency: transitive description: @@ -672,18 +672,18 @@ packages: dependency: transitive description: name: matcher - sha256: "6501fbd55da300384b768785b83e5ce66991266cec21af89ab9ae7f5ce1c4cbb" + sha256: "1803e76e6653768d64ed8ff2e1e67bea3ad4b923eb5c56a295c3e634bad5960e" url: "https://pub.dev" source: hosted - version: "0.12.15" + version: "0.12.16" material_color_utilities: dependency: transitive description: name: material_color_utilities - sha256: d92141dc6fe1dad30722f9aa826c7fbc896d021d792f80678280601aff8cf724 + sha256: "9528f2f296073ff54cb9fee677df673ace1218163c3bc7628093e7eed5203d41" url: "https://pub.dev" source: hosted - version: "0.2.0" + version: "0.5.0" meta: dependency: transitive description: @@ -1059,10 +1059,10 @@ packages: dependency: transitive description: name: source_span - sha256: dd904f795d4b4f3b870833847c461801f6750a9fa8e61ea5ac53f9422b31f250 + sha256: "53e943d4206a5e30df338fd4c6e7a077e02254531b138a15aec3bd143c1a8b3c" url: "https://pub.dev" source: hosted - version: "1.9.1" + version: "1.10.0" stack_trace: dependency: transitive description: @@ -1115,26 +1115,26 @@ packages: dependency: transitive description: name: test - sha256: "3dac9aecf2c3991d09b9cdde4f98ded7b30804a88a0d7e4e7e1678e78d6b97f4" + sha256: "13b41f318e2a5751c3169137103b60c584297353d4b1761b66029bae6411fe46" url: "https://pub.dev" source: hosted - version: "1.24.1" + version: "1.24.3" test_api: dependency: transitive description: name: test_api - sha256: eb6ac1540b26de412b3403a163d919ba86f6a973fe6cc50ae3541b80092fdcfb + sha256: "75760ffd7786fffdfb9597c35c5b27eaeec82be8edfb6d71d32651128ed7aab8" url: "https://pub.dev" source: hosted - version: "0.5.1" + version: "0.6.0" test_core: dependency: transitive description: name: test_core - sha256: "5138dbffb77b2289ecb12b81c11ba46036590b72a64a7a90d6ffb880f1a29e93" + sha256: "99806e9e6d95c7b059b7a0fc08f07fc53fabe54a829497f0d9676299f1e8637e" url: "https://pub.dev" source: hosted - version: "0.5.1" + version: "0.5.3" timing: dependency: transitive description: @@ -1299,10 +1299,10 @@ packages: dependency: transitive description: name: vm_service - sha256: f6deed8ed625c52864792459709183da231ebf66ff0cf09e69b573227c377efe + sha256: c620a6f783fa22436da68e42db7ebbf18b8c44b9a46ab911f666ff09ffd9153f url: "https://pub.dev" source: hosted - version: "11.3.0" + version: "11.7.1" watcher: dependency: transitive description: @@ -1311,6 +1311,14 @@ packages: url: "https://pub.dev" source: hosted version: "1.1.0" + web: + dependency: transitive + description: + name: web + sha256: dc8ccd225a2005c1be616fe02951e2e342092edf968cf0844220383757ef8f10 + url: "https://pub.dev" + source: hosted + version: "0.1.4-beta" web_browser_detect: dependency: transitive description: diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java index 02f6f9acd7a6..17aea34045ff 100644 --- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java +++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java @@ -1755,7 +1755,7 @@ void maybeRecordPCollectionWithAutoSharding(PCollection pcol) { options.isEnableStreamingEngine(), "Runner determined sharding not available in Dataflow for GroupIntoBatches for" + " non-Streaming-Engine jobs. In order to use runner determined sharding, please use" - + " --streaming --enable_streaming_engine"); + + " --streaming --experiments=enable_streaming_engine"); pCollectionsPreservedKeys.add(pcol); pcollectionsRequiringAutoSharding.add(pcol); } diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java index af6d890cf51b..a34303d92552 100644 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java +++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/SamzaPipelineOptions.java @@ -173,4 +173,10 @@ public ExecutorService create(PipelineOptions options) { new ThreadFactoryBuilder().setNameFormat("Process Element Thread-%d").build()); } } + + @Description("Enable/disable late data dropping in GroupByKey/Combine transforms") + @Default.Boolean(false) + boolean getDropLateData(); + + void setDropLateData(boolean dropLateData); } diff --git a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java index 3ecd406da615..1b19275dd967 100644 --- a/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java +++ b/runners/samza/src/main/java/org/apache/beam/runners/samza/runtime/GroupByKeyOp.java @@ -180,11 +180,19 @@ public TimerInternals timerInternals() { DoFnSchemaInformation.create(), Collections.emptyMap()); + final DoFnRunner, KV> dropLateDataRunner = + pipelineOptions.getDropLateData() + ? DoFnRunners.lateDataDroppingRunner( + doFnRunner, keyedInternals.timerInternals(), windowingStrategy) + : doFnRunner; + final SamzaExecutionContext executionContext = (SamzaExecutionContext) context.getApplicationContainerContext(); - this.fnRunner = + final DoFnRunner, KV> doFnRunnerWithMetrics = DoFnRunnerWithMetrics.wrap( - doFnRunner, executionContext.getMetricsContainer(), transformFullName); + dropLateDataRunner, executionContext.getMetricsContainer(), transformFullName); + + this.fnRunner = new DoFnRunnerWithKeyedInternals<>(doFnRunnerWithMetrics, keyedInternals); } @Override diff --git a/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/GroupByKeyOpTest.java b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/GroupByKeyOpTest.java new file mode 100644 index 000000000000..8670d9a46eac --- /dev/null +++ b/runners/samza/src/test/java/org/apache/beam/runners/samza/runtime/GroupByKeyOpTest.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.runners.samza.runtime; + +import java.io.Serializable; +import java.util.Arrays; +import org.apache.beam.sdk.coders.KvCoder; +import org.apache.beam.sdk.coders.StringUtf8Coder; +import org.apache.beam.sdk.coders.VarIntCoder; +import org.apache.beam.sdk.options.PipelineOptionsFactory; +import org.apache.beam.sdk.testing.PAssert; +import org.apache.beam.sdk.testing.TestPipeline; +import org.apache.beam.sdk.testing.TestStream; +import org.apache.beam.sdk.transforms.Combine; +import org.apache.beam.sdk.transforms.Sum; +import org.apache.beam.sdk.transforms.windowing.FixedWindows; +import org.apache.beam.sdk.transforms.windowing.Window; +import org.apache.beam.sdk.values.KV; +import org.apache.beam.sdk.values.PCollection; +import org.apache.beam.sdk.values.TimestampedValue; +import org.joda.time.Duration; +import org.joda.time.Instant; +import org.junit.Rule; +import org.junit.Test; + +/** Tests for GroupByKeyOp. */ +public class GroupByKeyOpTest implements Serializable { + @Rule + public final transient TestPipeline pipeline = + TestPipeline.fromOptions( + PipelineOptionsFactory.fromArgs("--runner=TestSamzaRunner").create()); + + @Rule + public final transient TestPipeline dropLateDataPipeline = + TestPipeline.fromOptions( + PipelineOptionsFactory.fromArgs("--runner=TestSamzaRunner", "--dropLateData=true") + .create()); + + @Test + public void testDefaultGbk() { + TestStream.Builder testStream = + TestStream.create(VarIntCoder.of()) + .addElements(TimestampedValue.of(1, new Instant(1000))) + .addElements(TimestampedValue.of(2, new Instant(2000))) + .advanceWatermarkTo(new Instant(3000)) + .addElements(TimestampedValue.of(10, new Instant(1000))) + .advanceWatermarkTo(new Instant(10000)); + + PCollection aggregated = + pipeline + .apply(testStream.advanceWatermarkToInfinity()) + .apply( + Window.into(FixedWindows.of(Duration.standardSeconds(3))) + .accumulatingFiredPanes()) + .apply(Combine.globally(Sum.ofIntegers()).withoutDefaults()); + + PAssert.that(aggregated).containsInAnyOrder(Arrays.asList(3, 10)); + + pipeline.run().waitUntilFinish(); + } + + @Test + public void testDropLateDataNonKeyed() { + TestStream.Builder testStream = + TestStream.create(VarIntCoder.of()) + .addElements(TimestampedValue.of(1, new Instant(1000))) + .addElements(TimestampedValue.of(2, new Instant(2000))) + .advanceWatermarkTo(new Instant(3000)) + .addElements(TimestampedValue.of(10, new Instant(1000))) + .advanceWatermarkTo(new Instant(10000)); + + PCollection aggregated = + dropLateDataPipeline + .apply(testStream.advanceWatermarkToInfinity()) + .apply( + Window.into(FixedWindows.of(Duration.standardSeconds(3))) + .accumulatingFiredPanes()) + .apply(Combine.globally(Sum.ofIntegers()).withoutDefaults()); + + PAssert.that(aggregated).containsInAnyOrder(3); + + dropLateDataPipeline.run().waitUntilFinish(); + } + + @Test + public void testDropLateDataKeyed() { + TestStream.Builder> testStream = + TestStream.create(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())) + .addElements(TimestampedValue.of(KV.of("a", 1), new Instant(1000))) + .addElements(TimestampedValue.of(KV.of("b", 2), new Instant(2000))) + .addElements(TimestampedValue.of(KV.of("a", 3), new Instant(2500))) + .advanceWatermarkTo(new Instant(3000)) + .addElements(TimestampedValue.of(KV.of("a", 10), new Instant(1000))) + .advanceWatermarkTo(new Instant(10000)); + + PCollection> aggregated = + dropLateDataPipeline + .apply(testStream.advanceWatermarkToInfinity()) + .apply( + Window.>into(FixedWindows.of(Duration.standardSeconds(3))) + .accumulatingFiredPanes()) + .apply(Sum.integersPerKey()); + + PAssert.that(aggregated).containsInAnyOrder(Arrays.asList(KV.of("a", 4), KV.of("b", 2))); + + dropLateDataPipeline.run().waitUntilFinish(); + } +} diff --git a/sdks/go.mod b/sdks/go.mod index 5e91aea021f8..53596c5d207d 100644 --- a/sdks/go.mod +++ b/sdks/go.mod @@ -31,9 +31,9 @@ require ( cloud.google.com/go/spanner v1.49.0 cloud.google.com/go/storage v1.33.0 github.com/aws/aws-sdk-go-v2 v1.21.0 - github.com/aws/aws-sdk-go-v2/config v1.18.39 - github.com/aws/aws-sdk-go-v2/credentials v1.13.37 - github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.83 + github.com/aws/aws-sdk-go-v2/config v1.18.40 + github.com/aws/aws-sdk-go-v2/credentials v1.13.38 + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.84 github.com/aws/aws-sdk-go-v2/service/s3 v1.38.5 github.com/aws/smithy-go v1.14.2 github.com/docker/go-connections v0.4.0 @@ -47,7 +47,7 @@ require ( github.com/linkedin/goavro/v2 v2.12.0 github.com/proullon/ramsql v0.1.2 github.com/spf13/cobra v1.7.0 - github.com/testcontainers/testcontainers-go v0.23.0 + github.com/testcontainers/testcontainers-go v0.24.0 github.com/tetratelabs/wazero v1.5.0 github.com/xitongsys/parquet-go v1.6.2 github.com/xitongsys/parquet-go-source v0.0.0-20220315005136-aec0fe3e777c @@ -57,9 +57,9 @@ require ( golang.org/x/sync v0.3.0 golang.org/x/sys v0.12.0 golang.org/x/text v0.13.0 - google.golang.org/api v0.140.0 + google.golang.org/api v0.142.0 google.golang.org/genproto v0.0.0-20230821184602-ccc8af3d0e93 - google.golang.org/grpc v1.58.0 + google.golang.org/grpc v1.58.1 google.golang.org/protobuf v1.31.0 gopkg.in/retry.v1 v1.0.3 gopkg.in/yaml.v2 v2.4.0 @@ -71,7 +71,18 @@ require ( golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 ) -require dario.cat/mergo v1.0.0 // indirect +require ( + dario.cat/mergo v1.0.0 // indirect + github.com/Microsoft/hcsshim v0.11.0 // indirect + github.com/go-ole/go-ole v1.2.6 // indirect + github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect + github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect + github.com/shirou/gopsutil/v3 v3.23.7 // indirect + github.com/shoenig/go-m1cpu v0.1.6 // indirect + github.com/tklauser/go-sysconf v0.3.11 // indirect + github.com/tklauser/numcpus v0.6.0 // indirect + github.com/yusufpapurcu/wmi v1.2.3 // indirect +) require ( cloud.google.com/go v0.110.7 // indirect @@ -96,18 +107,18 @@ require ( github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.36 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.35 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.15.4 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.13.6 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.15.6 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.21.5 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.14.0 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.16.0 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.22.0 // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect github.com/census-instrumentation/opencensus-proto v0.4.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe // indirect github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 // indirect - github.com/containerd/containerd v1.7.3 // indirect + github.com/containerd/containerd v1.7.6 // indirect github.com/cpuguy83/dockercfg v0.3.1 // indirect github.com/docker/distribution v2.8.2+incompatible // indirect - github.com/docker/docker v24.0.5+incompatible // indirect; but required to resolve issue docker has with go1.20 + github.com/docker/docker v24.0.6+incompatible // but required to resolve issue docker has with go1.20 github.com/docker/go-units v0.5.0 // indirect github.com/envoyproxy/go-control-plane v0.11.1 // indirect github.com/envoyproxy/protoc-gen-validate v1.0.2 // indirect @@ -160,5 +171,5 @@ require ( golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect google.golang.org/appengine v1.6.7 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20230803162519-f966b187b2e5 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20230911183012-2d3300fd4832 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20230913181813-007df8e322eb // indirect ) diff --git a/sdks/go.sum b/sdks/go.sum index c30891294dbd..502fdf1e8892 100644 --- a/sdks/go.sum +++ b/sdks/go.sum @@ -48,7 +48,7 @@ cloud.google.com/go/storage v1.33.0/go.mod h1:Hhh/dogNRGca7IWv1RC2YqEn0c0G77ctA/ dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk= dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= -github.com/AdaLogics/go-fuzz-headers v0.0.0-20230106234847-43070de90fa1 h1:EKPd1INOIyr5hWOWhvpmQpY6tKjeG0hT1s3AMC/9fic= +github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 h1:bvDV9vkmnHYOMsOr4WLk+Vo07yKIzd94sVoIqshQ4bU= github.com/Azure/azure-pipeline-go v0.2.3/go.mod h1:x841ezTBIMG6O3lAcl8ATHnsOPVl2bqk7S3ta6S6u4k= github.com/Azure/azure-storage-blob-go v0.14.0/go.mod h1:SMqIBi+SuiQH32bvyjngEewEeXoPfKMgWlBDaYf6fck= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= @@ -64,7 +64,8 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migciow= github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= -github.com/Microsoft/hcsshim v0.10.0-rc.8 h1:YSZVvlIIDD1UxQpJp0h+dnpLUw+TrY0cx8obKsp3bek= +github.com/Microsoft/hcsshim v0.11.0 h1:7EFNIY4igHEXUdj1zXgAyU3fLc7QfOKHbkldRVTBdiM= +github.com/Microsoft/hcsshim v0.11.0/go.mod h1:OEthFdQv/AD2RAdzR6Mm1N1KPCztGKDurW1Z8b8VGMM= github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/apache/arrow/go/arrow v0.0.0-20200730104253-651201b0f516 h1:byKBBF2CKWBjjA4J1ZL2JXttJULvWSl50LegTyRZ728= @@ -85,17 +86,17 @@ github.com/aws/aws-sdk-go-v2 v1.21.0/go.mod h1:/RfNgGmRxI+iFOB1OeJUyxiU+9s88k3pf github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.13 h1:OPLEkmhXf6xFPiz0bLeDArZIDx1NNS4oJyG4nv3Gct0= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.13/go.mod h1:gpAbvyDGQFozTEmlTFO8XcQKHzubdq0LzRyJpG6MiXM= github.com/aws/aws-sdk-go-v2/config v1.5.0/go.mod h1:RWlPOAW3E3tbtNAqTwvSW54Of/yP3oiZXMI0xfUdjyA= -github.com/aws/aws-sdk-go-v2/config v1.18.39 h1:oPVyh6fuu/u4OiW4qcuQyEtk7U7uuNBmHmJSLg1AJsQ= -github.com/aws/aws-sdk-go-v2/config v1.18.39/go.mod h1:+NH/ZigdPckFpgB1TRcRuWCB/Kbbvkxc/iNAKTq5RhE= +github.com/aws/aws-sdk-go-v2/config v1.18.40 h1:dbu1llI/nTIL+r6sYHMeVLl99DM8J8/o1I4EPurnhLg= +github.com/aws/aws-sdk-go-v2/config v1.18.40/go.mod h1:JjrCZQwSPGCoZRQzKHyZNNueaKO+kFaEy2sR6mCzd90= github.com/aws/aws-sdk-go-v2/credentials v1.3.1/go.mod h1:r0n73xwsIVagq8RsxmZbGSRQFj9As3je72C2WzUIToc= -github.com/aws/aws-sdk-go-v2/credentials v1.13.37 h1:BvEdm09+ZEh2XtN+PVHPcYwKY3wIeB6pw7vPRM4M9/U= -github.com/aws/aws-sdk-go-v2/credentials v1.13.37/go.mod h1:ACLrdkd4CLZyXOghZ8IYumQbcooAcp2jo/s2xsFH8IM= +github.com/aws/aws-sdk-go-v2/credentials v1.13.38 h1:gDAuCdVlA4lmmgQhvpZlscwicloCqH44vkxLklGkQLA= +github.com/aws/aws-sdk-go-v2/credentials v1.13.38/go.mod h1:sD4G/Ybgp6s89mWIES3Xn97CsRLpxvz9uVSdv0UxY8I= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.3.0/go.mod h1:2LAuqPx1I6jNfaGDucWfA2zqQCYCOMCDHiCOciALyNw= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.11 h1:uDZJF1hu0EVT/4bogChk8DyjSF6fof6uL/0Y26Ma7Fg= github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.11/go.mod h1:TEPP4tENqBGO99KwVpV9MlOX4NSrSLP8u3KRy2CDwA8= github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.3.2/go.mod h1:qaqQiHSrOUVOfKe6fhgQ6UzhxjwqVW8aHNegd6Ws4w4= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.83 h1:wcluDLIQ0uYaxv0fCWQRimbXkPdTgWHUD21j1CzXEwc= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.83/go.mod h1:nGCBuon134gW67yAtxHKV73x+tAcY/xG4ZPNPDB1h/I= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.84 h1:LENrVcqnWTyI8fbIUCvxAMe+fXbREIaXzcR8WPwco1U= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.84/go.mod h1:LHxCiYAStsgps4srke7HujyADd504MSkNXjLpOtICTc= github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41 h1:22dGT7PneFMx4+b3pz7lMTRyN8ZKH7M2cW4GP9yUS2g= github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.41/go.mod h1:CrObHAuPneJBlfEJ5T3szXOUkLEThaGfvnhTf33buas= github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.35 h1:SijA0mgjV8E+8G45ltVHs0fvKpTj8xmZJ3VwhGKtUSI= @@ -120,13 +121,13 @@ github.com/aws/aws-sdk-go-v2/service/s3 v1.11.1/go.mod h1:XLAGFrEjbvMCLvAtWLLP32 github.com/aws/aws-sdk-go-v2/service/s3 v1.38.5 h1:A42xdtStObqy7NGvzZKpnyNXvoOmm+FENobZ0/ssHWk= github.com/aws/aws-sdk-go-v2/service/s3 v1.38.5/go.mod h1:rDGMZA7f4pbmTtPOk5v5UM2lmX6UAbRnMDJeDvnH7AM= github.com/aws/aws-sdk-go-v2/service/sso v1.3.1/go.mod h1:J3A3RGUvuCZjvSuZEcOpHDnzZP/sKbhDWV2T1EOzFIM= -github.com/aws/aws-sdk-go-v2/service/sso v1.13.6 h1:2PylFCfKCEDv6PeSN09pC/VUiRd10wi1VfHG5FrW0/g= -github.com/aws/aws-sdk-go-v2/service/sso v1.13.6/go.mod h1:fIAwKQKBFu90pBxx07BFOMJLpRUGu8VOzLJakeY+0K4= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.15.6 h1:pSB560BbVj9ZlJZF4WYj5zsytWHWKxg+NgyGV4B2L58= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.15.6/go.mod h1:yygr8ACQRY2PrEcy3xsUI357stq2AxnFM6DIsR9lij4= +github.com/aws/aws-sdk-go-v2/service/sso v1.14.0 h1:AR/hlTsCyk1CwlyKnPFvIMvnONydRjDDRT9OGb0i+/g= +github.com/aws/aws-sdk-go-v2/service/sso v1.14.0/go.mod h1:fIAwKQKBFu90pBxx07BFOMJLpRUGu8VOzLJakeY+0K4= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.16.0 h1:vbgiXuhtn49+erlPrgIvQ+J32rg1HseaPf8lEpKbkxQ= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.16.0/go.mod h1:yygr8ACQRY2PrEcy3xsUI357stq2AxnFM6DIsR9lij4= github.com/aws/aws-sdk-go-v2/service/sts v1.6.0/go.mod h1:q7o0j7d7HrJk/vr9uUt3BVRASvcU7gYZB9PUgPiByXg= -github.com/aws/aws-sdk-go-v2/service/sts v1.21.5 h1:CQBFElb0LS8RojMJlxRSo/HXipvTZW2S44Lt9Mk2aYQ= -github.com/aws/aws-sdk-go-v2/service/sts v1.21.5/go.mod h1:VC7JDqsqiwXukYEDjoHh9U0fOJtNWh04FPQz4ct4GGU= +github.com/aws/aws-sdk-go-v2/service/sts v1.22.0 h1:s4bioTgjSFRwOoyEFzAVCmFmoowBgjTR8gkrF/sQ4wk= +github.com/aws/aws-sdk-go-v2/service/sts v1.22.0/go.mod h1:VC7JDqsqiwXukYEDjoHh9U0fOJtNWh04FPQz4ct4GGU= github.com/aws/smithy-go v1.6.0/go.mod h1:SObp3lf9smib00L/v3U2eAKG8FyQ7iLrJnQiAmR5n+E= github.com/aws/smithy-go v1.14.2 h1:MJU9hqBGbvWZdApzpvoF2WAIJDbtjK2NDJSiJP7HblQ= github.com/aws/smithy-go v1.14.2/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA= @@ -151,8 +152,8 @@ github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+g github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/colinmarc/hdfs/v2 v2.1.1/go.mod h1:M3x+k8UKKmxtFu++uAZ0OtDU8jR3jnaZIAc6yK4Ue0c= github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= -github.com/containerd/containerd v1.7.3 h1:cKwYKkP1eTj54bP3wCdXXBymmKRQMrWjkLSWZZJDa8o= -github.com/containerd/containerd v1.7.3/go.mod h1:32FOM4/O0RkNg7AjQj3hDzN9cUGtu+HMvaKUNiqCZB8= +github.com/containerd/containerd v1.7.6 h1:oNAVsnhPoy4BTPQivLgTzI9Oleml9l/+eYIDYXRCYo8= +github.com/containerd/containerd v1.7.6/go.mod h1:SY6lrkkuJT40BVNO37tlYTSnKJnP5AXBc0fhx0q+TJ4= github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/cpuguy83/dockercfg v0.3.1 h1:/FpZ+JaygUR/lZP2NlFI2DVfrOEMAIKP5wWEJdoYe9E= github.com/cpuguy83/dockercfg v0.3.1/go.mod h1:sugsbF4//dDlL/i+S+rtpIWp+5h0BHJHfjj5/jFyUJc= @@ -160,15 +161,14 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:ma github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= -github.com/cyphar/filepath-securejoin v0.2.3 h1:YX6ebbZCZP7VkM3scTTokDgBL2TY741X51MTk3ycuNI= github.com/cyphar/filepath-securejoin v0.2.3/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/docker/distribution v2.8.2+incompatible h1:T3de5rq0dB1j30rp0sA2rER+m322EBzniBPB6ZIzuh8= github.com/docker/distribution v2.8.2+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= -github.com/docker/docker v24.0.5+incompatible h1:WmgcE4fxyI6EEXxBRxsHnZXrO1pQ3smi0k/jho4HLeY= -github.com/docker/docker v24.0.5+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= +github.com/docker/docker v24.0.6+incompatible h1:hceabKCtUgDqPu+qm0NgsaXf28Ljf4/pWFL7xjWWDgE= +github.com/docker/docker v24.0.6+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ= github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= @@ -197,6 +197,8 @@ github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gorp/gorp v2.2.0+incompatible h1:xAUh4QgEeqPPhK3vxZN+bzrim1z5Av6q837gtjUlshc= +github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= +github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= @@ -332,6 +334,8 @@ github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/linkedin/goavro/v2 v2.12.0 h1:rIQQSj8jdAUlKQh6DttK8wCRv4t4QO09g1C4aBWXslg= github.com/linkedin/goavro/v2 v2.12.0/go.mod h1:KXx+erlq+RPlGSPmLF7xGo6SAbh8sCQ53x064+ioxhk= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 h1:6E+4a0GO5zZEnZ81pIr0yLvtUWk2if982qA3F3QD6H4= +github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0/go.mod h1:zJYVVT2jmtg6P3p1VtQj7WsuWi/y4VnjVBn7F8KPB3I= github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= github.com/mattn/go-ieproxy v0.0.1/go.mod h1:pYabZ6IHcRpFh7vIaLfK7rdcWgFEb3SFJ6/gNWuh88E= @@ -375,6 +379,8 @@ github.com/pkg/xattr v0.4.9 h1:5883YPCtkSd8LFbs13nXplj9g9tlrwoJRjgpgMu1/fE= github.com/pkg/xattr v0.4.9/go.mod h1:di8WF84zAKk8jzR1UBTEWh9AUlIZZ7M/JNt8e9B6ktU= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c h1:ncq/mPwQF4JjgDlrVEn3C11VoGHZN7m8qihwgMEtzYw= +github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c/go.mod h1:OmDBASR4679mdNQnz2pUhc2G8CO2JrUAVFDRBDP/hJE= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/proullon/ramsql v0.1.2 h1:PTtsy2iml/CW3Lsopyr86dlIs7JyYEmfLrfYvQVXD2U= github.com/proullon/ramsql v0.1.2/go.mod h1:CFGqeQHQpdRfWqYmWD3yXqPTEaHkF4zgXy1C6qDWc9E= @@ -390,6 +396,12 @@ github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46/go.mod h1:uAQ5P github.com/seccomp/libseccomp-golang v0.9.2-0.20220502022130-f33da4d89646/go.mod h1:JA8cRccbGaA1s33RQf7Y1+q9gHmZX1yB/z9WDN1C6fg= github.com/shabbyrobe/gocovmerge v0.0.0-20180507124511-f6ea450bfb63 h1:J6qvD6rbmOil46orKqJaRPG+zTpoGlBTUdyv8ki63L0= github.com/shabbyrobe/gocovmerge v0.0.0-20180507124511-f6ea450bfb63/go.mod h1:n+VKSARF5y/tS9XFSP7vWDfS+GUC5vs/YT7M5XDTUEM= +github.com/shirou/gopsutil/v3 v3.23.7 h1:C+fHO8hfIppoJ1WdsVm1RoI0RwXoNdfTK7yWXV0wVj4= +github.com/shirou/gopsutil/v3 v3.23.7/go.mod h1:c4gnmoRC0hQuaLqvxnx1//VXQ0Ms/X9UnJF8pddY5z4= +github.com/shoenig/go-m1cpu v0.1.6 h1:nxdKQNcEB6vzgA2E2bvzKIYRuNj7XNJ4S/aRSwKzFtM= +github.com/shoenig/go-m1cpu v0.1.6/go.mod h1:1JJMcUBvfNwpq05QDQVAnx3gUHr9IYF7GNg9SUEw2VQ= +github.com/shoenig/test v0.6.4 h1:kVTaSd7WLz5WZ2IaoM0RSzRsUD+m8wRR+5qvntpn4LU= +github.com/shoenig/test v0.6.4/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= @@ -414,11 +426,16 @@ github.com/stretchr/testify v1.7.5/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= -github.com/testcontainers/testcontainers-go v0.23.0 h1:ERYTSikX01QczBLPZpqsETTBO7lInqEP349phDOVJVs= -github.com/testcontainers/testcontainers-go v0.23.0/go.mod h1:3gzuZfb7T9qfcH2pHpV4RLlWrPjeWNQah6XlYQ32c4I= +github.com/testcontainers/testcontainers-go v0.24.0 h1:eqkq6nNIPVrqpXNyn/s5jDBqPGuWtND2hOMEBrUULIw= +github.com/testcontainers/testcontainers-go v0.24.0/go.mod h1:MGBiAkCm86yXQoCiipmQCqZLVdk1uFqtMqaU1Or0MRk= github.com/tetratelabs/wazero v1.5.0 h1:Yz3fZHivfDiZFUXnWMPUoiW7s8tC1sjdBtlJn08qYa0= github.com/tetratelabs/wazero v1.5.0/go.mod h1:0U0G41+ochRKoPKCJlh0jMg1CHkyfK8kDqiirMmKY8A= +github.com/tklauser/go-sysconf v0.3.11 h1:89WgdJhk5SNwJfu+GKyYveZ4IaJ7xAkecBo+KdJV0CM= +github.com/tklauser/go-sysconf v0.3.11/go.mod h1:GqXfhXY3kiPa0nAXPDIQIWzJbMCB7AmcWpGR8lSZfqI= +github.com/tklauser/numcpus v0.6.0 h1:kebhY2Qt+3U6RNK7UqpYNA+tJ23IBEGKkB7JQBfDYms= +github.com/tklauser/numcpus v0.6.0/go.mod h1:FEZLMke0lhOUG6w2JadTzp0a+Nl8PF/GFkQ5UVIcaL4= github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= @@ -440,6 +457,8 @@ github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7Jul github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/yusufpapurcu/wmi v1.2.3 h1:E1ctvB7uKFMOJw3fdOW32DwGE9I7t++CRUEMKvFoFiw= +github.com/yusufpapurcu/wmi v1.2.3/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= @@ -551,6 +570,7 @@ golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191112214154-59a1497f0cea/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -565,6 +585,7 @@ golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200828194041-157a740278f4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -576,7 +597,9 @@ golang.org/x/sys v0.0.0-20220408201424-a24fb2fb8a0f/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -647,8 +670,8 @@ google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsb google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= -google.golang.org/api v0.140.0 h1:CaXNdYOH5oQQI7l6iKTHHiMTdxZca4/02hRg2U8c2hM= -google.golang.org/api v0.140.0/go.mod h1:aGbCiFgtwb2P6badchFbSBUurV6oR5d50Af4iNJtDdI= +google.golang.org/api v0.142.0 h1:mf+7EJ94fi5ZcnpPy+m0Yv2dkz8bKm+UL0snTCuwXlY= +google.golang.org/api v0.142.0/go.mod h1:zJAN5o6HRqR7O+9qJUFOWrZkYE66RH+efPBdTLA4xBA= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -678,8 +701,8 @@ google.golang.org/genproto v0.0.0-20230821184602-ccc8af3d0e93 h1:zv6ieVm8jNcN33A google.golang.org/genproto v0.0.0-20230821184602-ccc8af3d0e93/go.mod h1:yZTlhN0tQnXo3h00fuXNCxJdLdIdnVFVBaRJ5LWBbw4= google.golang.org/genproto/googleapis/api v0.0.0-20230803162519-f966b187b2e5 h1:nIgk/EEq3/YlnmVVXVnm14rC2oxgs1o0ong4sD/rd44= google.golang.org/genproto/googleapis/api v0.0.0-20230803162519-f966b187b2e5/go.mod h1:5DZzOUPCLYL3mNkQ0ms0F3EuUNZ7py1Bqeq6sxzI7/Q= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230911183012-2d3300fd4832 h1:o4LtQxebKIJ4vkzyhtD2rfUNZ20Zf0ik5YVP5E7G7VE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230911183012-2d3300fd4832/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230913181813-007df8e322eb h1:Isk1sSH7bovx8Rti2wZK0UZF6oraBDK74uoyLEEVFN0= +google.golang.org/genproto/googleapis/rpc v0.0.0-20230913181813-007df8e322eb/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= @@ -689,8 +712,8 @@ google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8 google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.58.0 h1:32JY8YpPMSR45K+c3o6b8VL73V+rR8k+DeMIr4vRH8o= -google.golang.org/grpc v1.58.0/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= +google.golang.org/grpc v1.58.1 h1:OL+Vz23DTtrrldqHK49FUOPHyY75rvFqJfXC84NYW58= +google.golang.org/grpc v1.58.1/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= diff --git a/sdks/go/README.md b/sdks/go/README.md index a3b03c2e6184..7734d58d9eb9 100644 --- a/sdks/go/README.md +++ b/sdks/go/README.md @@ -131,6 +131,7 @@ Executing all unit tests for the SDK is possible from the `\sdks\go` To test your change as Jenkins would execute it from a PR, from the beam root directory, run: * `./gradlew :sdks:go:goTest` executes the unit tests. + * `./gradlew :sdks:go:test:prismValidatesRunner` validates the SDK against the Go Prism runner as a stand alone binary, with containers. * `./gradlew :sdks:go:test:ulrValidatesRunner` validates the SDK against the Portable Python runner. * `./gradlew :sdks:go:test:flinkValidatesRunner` validates the SDK against the Flink runner. diff --git a/sdks/go/cmd/prism/prism.go b/sdks/go/cmd/prism/prism.go index f00a16c9b2d0..804ae0c2ab2d 100644 --- a/sdks/go/cmd/prism/prism.go +++ b/sdks/go/cmd/prism/prism.go @@ -30,6 +30,8 @@ import ( ) var ( + jobPort = flag.Int("job_port", 8073, "specify the job management service port") + webPort = flag.Int("web_port", 8074, "specify the web ui port") jobManagerEndpoint = flag.String("jm_override", "", "set to only stand up a web ui that refers to a seperate JobManagement endpoint") serveHTTP = flag.Bool("serve_http", true, "enable or disable the web ui") ) @@ -37,12 +39,12 @@ var ( func main() { flag.Parse() ctx := context.Background() - cli, err := makeJobClient(ctx, *jobManagerEndpoint) + cli, err := makeJobClient(ctx, prism.Options{Port: *jobPort}, *jobManagerEndpoint) if err != nil { log.Fatalf("error creating job server: %v", err) } if *serveHTTP { - if err := prism.CreateWebServer(ctx, cli, prism.Options{Port: 8074}); err != nil { + if err := prism.CreateWebServer(ctx, cli, prism.Options{Port: *webPort}); err != nil { log.Fatalf("error creating web server: %v", err) } } else { @@ -51,7 +53,7 @@ func main() { } } -func makeJobClient(ctx context.Context, endpoint string) (jobpb.JobServiceClient, error) { +func makeJobClient(ctx context.Context, opts prism.Options, endpoint string) (jobpb.JobServiceClient, error) { if endpoint != "" { clientConn, err := grpc.DialContext(ctx, endpoint, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock()) if err != nil { @@ -59,7 +61,7 @@ func makeJobClient(ctx context.Context, endpoint string) (jobpb.JobServiceClient } return jobpb.NewJobServiceClient(clientConn), nil } - cli, err := prism.CreateJobServer(ctx, prism.Options{Port: 8073}) + cli, err := prism.CreateJobServer(ctx, opts) if err != nil { return nil, fmt.Errorf("error creating local job server: %v", err) } diff --git a/sdks/go/pkg/beam/beam.shims.go b/sdks/go/pkg/beam/beam.shims.go index 6653fb0129f7..29ebaf2ca681 100644 --- a/sdks/go/pkg/beam/beam.shims.go +++ b/sdks/go/pkg/beam/beam.shims.go @@ -25,7 +25,6 @@ import ( // Library imports "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec" - "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/graphx/schema" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/sdf" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/typex" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/util/reflectx" @@ -44,13 +43,6 @@ func init() { runtime.RegisterFunction(schemaDec) runtime.RegisterFunction(schemaEnc) runtime.RegisterFunction(swapKVFn) - runtime.RegisterType(reflect.TypeOf((*createFn)(nil)).Elem()) - schema.RegisterType(reflect.TypeOf((*createFn)(nil)).Elem()) - runtime.RegisterType(reflect.TypeOf((*reflect.Type)(nil)).Elem()) - schema.RegisterType(reflect.TypeOf((*reflect.Type)(nil)).Elem()) - runtime.RegisterType(reflect.TypeOf((*reflectx.Func)(nil)).Elem()) - schema.RegisterType(reflect.TypeOf((*reflectx.Func)(nil)).Elem()) - reflectx.RegisterStructWrapper(reflect.TypeOf((*createFn)(nil)).Elem(), wrapMakerCreateFn) reflectx.RegisterFunc(reflect.TypeOf((*func(reflect.Type, []byte) (typex.T, error))(nil)).Elem(), funcMakerReflect۰TypeSliceOfByteГTypex۰TError) reflectx.RegisterFunc(reflect.TypeOf((*func(reflect.Type, typex.T) ([]byte, error))(nil)).Elem(), funcMakerReflect۰TypeTypex۰TГSliceOfByteError) reflectx.RegisterFunc(reflect.TypeOf((*func([]byte, func(typex.T)) error)(nil)).Elem(), funcMakerSliceOfByteEmitTypex۰TГError) @@ -64,13 +56,6 @@ func init() { exec.RegisterEmitter(reflect.TypeOf((*func(typex.T))(nil)).Elem(), emitMakerTypex۰T) } -func wrapMakerCreateFn(fn any) map[string]reflectx.Func { - dfn := fn.(*createFn) - return map[string]reflectx.Func{ - "ProcessElement": reflectx.MakeFunc(func(a0 []byte, a1 func(typex.T)) error { return dfn.ProcessElement(a0, a1) }), - } -} - type callerReflect۰TypeSliceOfByteГTypex۰TError struct { fn func(reflect.Type, []byte) (typex.T, error) } diff --git a/sdks/go/pkg/beam/core/runtime/exec/fullvalue.go b/sdks/go/pkg/beam/core/runtime/exec/fullvalue.go index aaa049510f52..0a9343199a1c 100644 --- a/sdks/go/pkg/beam/core/runtime/exec/fullvalue.go +++ b/sdks/go/pkg/beam/core/runtime/exec/fullvalue.go @@ -251,6 +251,9 @@ func (s *decodeStream) Read() (*FullValue, error) { } err := s.d.DecodeTo(s.r, &s.ret) if err != nil { + if err == io.EOF { + return nil, io.EOF + } return nil, errors.Wrap(err, "decodeStream value decode failed") } s.next++ @@ -342,6 +345,9 @@ func (s *decodeMultiChunkStream) Read() (*FullValue, error) { if s.chunk == 0 && s.next == 0 { chunk, err := coder.DecodeVarInt(s.r.reader) if err != nil { + if err == io.EOF { + return nil, io.EOF + } return nil, errors.Wrap(err, "decodeMultiChunkStream chunk size decoding failed") } s.chunk = chunk diff --git a/sdks/go/pkg/beam/core/runtime/metricsx/metricsx.go b/sdks/go/pkg/beam/core/runtime/metricsx/metricsx.go index 4a872e291c6a..c71ead208364 100644 --- a/sdks/go/pkg/beam/core/runtime/metricsx/metricsx.go +++ b/sdks/go/pkg/beam/core/runtime/metricsx/metricsx.go @@ -24,6 +24,7 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/coder" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/metrics" pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "golang.org/x/exp/slog" ) // FromMonitoringInfos extracts metrics from monitored states and @@ -139,7 +140,7 @@ func groupByType(p *pipepb.Pipeline, minfos []*pipepb.MonitoringInfo) ( } } if len(errs) > 0 { - log.Printf("Warning: %v errors during metrics processing: %v\n", len(errs), errs) + slog.Debug("errors during metrics processing", "count", len(errs), "errors", errs) } return counters, distributions, gauges, msecs, pcols } diff --git a/sdks/go/pkg/beam/create.go b/sdks/go/pkg/beam/create.go index 91e9f335ef87..d2bd554963ee 100644 --- a/sdks/go/pkg/beam/create.go +++ b/sdks/go/pkg/beam/create.go @@ -112,11 +112,6 @@ func createList(s Scope, values []any, t reflect.Type) (PCollection, error) { // TODO(herohde) 6/26/2017: make 'create' a SDF once supported. See BEAM-2421. -func init() { - register.DoFn2x1[[]byte, func(T), error]((*createFn)(nil)) - register.Emitter1[T]() -} - type createFn struct { Values [][]byte `json:"values"` Type EncodedType `json:"type"` diff --git a/sdks/go/pkg/beam/io/databaseio/writer.go b/sdks/go/pkg/beam/io/databaseio/writer.go index 4719831581ff..ebd805b080d7 100644 --- a/sdks/go/pkg/beam/io/databaseio/writer.go +++ b/sdks/go/pkg/beam/io/databaseio/writer.go @@ -108,7 +108,7 @@ type valueTemplateGenerator struct { func (v *valueTemplateGenerator) generate(rowCount int, columnColunt int) string { switch v.driver { - case "postgres": + case "postgres", "pgx": // the point is to generate ($1,$2),($3,$4) valueTemplates := make([]string, rowCount) for i := 0; i < rowCount; i++ { diff --git a/sdks/go/pkg/beam/io/databaseio/writer_test.go b/sdks/go/pkg/beam/io/databaseio/writer_test.go index 24f617a9b857..91d7fb9246dd 100644 --- a/sdks/go/pkg/beam/io/databaseio/writer_test.go +++ b/sdks/go/pkg/beam/io/databaseio/writer_test.go @@ -39,6 +39,12 @@ func TestValueTemplateGenerator_generate(t *testing.T) { columnCount: 10, expected: "", }, + { + generator: &valueTemplateGenerator{"pgx"}, + rowCount: 4, + columnCount: 3, + expected: "($1,$2,$3),($4,$5,$6),($7,$8,$9),($10,$11,$12)", + }, { generator: &valueTemplateGenerator{"mysql"}, rowCount: 4, diff --git a/sdks/go/pkg/beam/io/parquetio/parquetio.go b/sdks/go/pkg/beam/io/parquetio/parquetio.go index 9c48d134014b..eb2a611f6836 100644 --- a/sdks/go/pkg/beam/io/parquetio/parquetio.go +++ b/sdks/go/pkg/beam/io/parquetio/parquetio.go @@ -96,7 +96,7 @@ func (a *parquetReadFn) ProcessElement(ctx context.Context, file fileio.Readable } // Write writes a PCollection to .parquet file. -// Write expects a type t of struct with parquet tags +// Write expects elements of a struct type with parquet tags // For example: // // type Student struct { @@ -108,7 +108,8 @@ func (a *parquetReadFn) ProcessElement(ctx context.Context, file fileio.Readable // Day int32 `parquet:"name=day, type=INT32, convertedtype=DATE"` // Ignored int32 //without parquet tag and won't write // } -func Write(s beam.Scope, filename string, t reflect.Type, col beam.PCollection) { +func Write(s beam.Scope, filename string, col beam.PCollection) { + t := col.Type().Type() s = s.Scope("parquetio.Write") filesystem.ValidateScheme(filename) pre := beam.AddFixedKey(s, col) diff --git a/sdks/go/pkg/beam/io/parquetio/parquetio_test.go b/sdks/go/pkg/beam/io/parquetio/parquetio_test.go index 1cceefcef46b..f3c901395609 100644 --- a/sdks/go/pkg/beam/io/parquetio/parquetio_test.go +++ b/sdks/go/pkg/beam/io/parquetio/parquetio_test.go @@ -95,7 +95,7 @@ func TestWrite(t *testing.T) { } p, s, sequence := ptest.CreateList(studentList) parquetFile := "./write_student.parquet" - Write(s, parquetFile, reflect.TypeOf(Student{}), sequence) + Write(s, parquetFile, sequence) t.Cleanup(func() { os.Remove(parquetFile) }) diff --git a/sdks/go/pkg/beam/runners/prism/internal/environments.go b/sdks/go/pkg/beam/runners/prism/internal/environments.go new file mode 100644 index 000000000000..3a429920fb28 --- /dev/null +++ b/sdks/go/pkg/beam/runners/prism/internal/environments.go @@ -0,0 +1,204 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package internal + +import ( + "bytes" + "context" + "fmt" + "io" + "os" + + fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" + pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/jobservices" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/urns" + "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/worker" + "golang.org/x/exp/slog" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + "google.golang.org/protobuf/proto" + + dtyp "github.com/docker/docker/api/types" + "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/mount" + dcli "github.com/docker/docker/client" + "github.com/docker/docker/pkg/stdcopy" +) + +// TODO move environment handling to the worker package. + +func runEnvironment(ctx context.Context, j *jobservices.Job, env string, wk *worker.W) error { + logger := slog.With(slog.String("envID", wk.Env)) + // TODO fix broken abstraction. + // We're starting a worker pool here, because that's the loopback environment. + // It's sort of a mess, largely because of loopback, which has + // a different flow from a provisioned docker container. + e := j.Pipeline.GetComponents().GetEnvironments()[env] + switch e.GetUrn() { + case urns.EnvExternal: + ep := &pipepb.ExternalPayload{} + if err := (proto.UnmarshalOptions{}).Unmarshal(e.GetPayload(), ep); err != nil { + logger.Error("unmarshing external environment payload", "error", err) + } + go func() { + externalEnvironment(ctx, ep, wk) + slog.Debug("environment stopped", slog.String("job", j.String())) + }() + return nil + case urns.EnvDocker: + dp := &pipepb.DockerPayload{} + if err := (proto.UnmarshalOptions{}).Unmarshal(e.GetPayload(), dp); err != nil { + logger.Error("unmarshing docker environment payload", "error", err) + } + return dockerEnvironment(ctx, logger, dp, wk, j.ArtifactEndpoint()) + default: + return fmt.Errorf("environment %v with urn %v unimplemented", env, e.GetUrn()) + } +} + +func externalEnvironment(ctx context.Context, ep *pipepb.ExternalPayload, wk *worker.W) { + conn, err := grpc.Dial(ep.GetEndpoint().GetUrl(), grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + panic(fmt.Sprintf("unable to dial sdk worker %v: %v", ep.GetEndpoint().GetUrl(), err)) + } + defer conn.Close() + pool := fnpb.NewBeamFnExternalWorkerPoolClient(conn) + + endpoint := &pipepb.ApiServiceDescriptor{ + Url: wk.Endpoint(), + } + pool.StartWorker(ctx, &fnpb.StartWorkerRequest{ + WorkerId: wk.ID, + ControlEndpoint: endpoint, + LoggingEndpoint: endpoint, + ArtifactEndpoint: endpoint, + ProvisionEndpoint: endpoint, + Params: ep.GetParams(), + }) + // Job processing happens here, but orchestrated by other goroutines + // This goroutine blocks until the context is cancelled, signalling + // that the pool runner should stop the worker. + <-ctx.Done() + + // Previous context cancelled so we need a new one + // for this request. + pool.StopWorker(context.Background(), &fnpb.StopWorkerRequest{ + WorkerId: wk.ID, + }) + wk.Stop() +} + +func dockerEnvironment(ctx context.Context, logger *slog.Logger, dp *pipepb.DockerPayload, wk *worker.W, artifactEndpoint string) error { + logger = logger.With("worker_id", wk.ID, "image", dp.GetContainerImage()) + + // TODO consider preserving client? + cli, err := dcli.NewClientWithOpts(dcli.FromEnv, dcli.WithAPIVersionNegotiation()) + if err != nil { + return fmt.Errorf("couldn't connect to docker:%w", err) + } + + // TODO abstract mounting cloud specific auths better. + const gcloudCredsEnv = "GOOGLE_APPLICATION_CREDENTIALS" + gcloudCredsFile, ok := os.LookupEnv(gcloudCredsEnv) + var mounts []mount.Mount + var envs []string + if ok { + _, err := os.Stat(gcloudCredsFile) + // File exists + if err == nil { + dockerGcloudCredsFile := "/docker_cred_file.json" + mounts = append(mounts, mount.Mount{ + Type: "bind", + Source: gcloudCredsFile, + Target: dockerGcloudCredsFile, + }) + credEnv := fmt.Sprintf("%v=%v", gcloudCredsEnv, dockerGcloudCredsFile) + envs = append(envs, credEnv) + } + } + if _, _, err := cli.ImageInspectWithRaw(ctx, dp.GetContainerImage()); err != nil { + // We don't have a local image, so we should pull it. + if rc, err := cli.ImagePull(ctx, dp.GetContainerImage(), dtyp.ImagePullOptions{}); err == nil { + // Copy the output, but discard it so we can wait until the image pull is finished. + io.Copy(io.Discard, rc) + rc.Close() + } else { + logger.Warn("unable to pull image and it's not local", "error", err) + } + } + + ccr, err := cli.ContainerCreate(ctx, &container.Config{ + Image: dp.GetContainerImage(), + Cmd: []string{ + fmt.Sprintf("--id=%v-%v", wk.JobKey, wk.Env), + fmt.Sprintf("--control_endpoint=%v", wk.Endpoint()), + fmt.Sprintf("--artifact_endpoint=%v", artifactEndpoint), + fmt.Sprintf("--provision_endpoint=%v", wk.Endpoint()), + fmt.Sprintf("--logging_endpoint=%v", wk.Endpoint()), + }, + Env: envs, + Tty: false, + }, &container.HostConfig{ + NetworkMode: "host", + Mounts: mounts, + AutoRemove: true, + }, nil, nil, "") + if err != nil { + cli.Close() + return fmt.Errorf("unable to create container image %v with docker for env %v, err: %w", dp.GetContainerImage(), wk.Env, err) + } + containerID := ccr.ID + logger = logger.With("container", containerID) + + if err := cli.ContainerStart(ctx, containerID, dtyp.ContainerStartOptions{}); err != nil { + cli.Close() + return fmt.Errorf("unable to start container image %v with docker for env %v, err: %w", dp.GetContainerImage(), wk.Env, err) + } + + // Start goroutine to wait on container state. + go func() { + defer cli.Close() + defer wk.Stop() + + statusCh, errCh := cli.ContainerWait(ctx, containerID, container.WaitConditionNotRunning) + select { + case <-ctx.Done(): + // Can't use command context, since it's already canceled here. + err := cli.ContainerKill(context.Background(), containerID, "") + if err != nil { + logger.Error("docker container kill error", "error", err) + } + case err := <-errCh: + if err != nil { + logger.Error("docker container wait error", "error", err) + } + case resp := <-statusCh: + logger.Info("docker container has self terminated", "status_code", resp.StatusCode) + + rc, err := cli.ContainerLogs(ctx, containerID, dtyp.ContainerLogsOptions{Details: true, ShowStdout: true, ShowStderr: true}) + if err != nil { + logger.Error("docker container logs error", "error", err) + } + defer rc.Close() + var buf bytes.Buffer + stdcopy.StdCopy(&buf, &buf, rc) + logger.Error("container self terminated", "log", buf.String()) + } + }() + + return nil +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/execute.go b/sdks/go/pkg/beam/runners/prism/internal/execute.go index b2f9d866603a..c1ac6ea4488c 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/execute.go +++ b/sdks/go/pkg/beam/runners/prism/internal/execute.go @@ -20,11 +20,11 @@ import ( "fmt" "io" "sort" + "sync/atomic" "time" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/graph/mtime" "github.com/apache/beam/sdks/v2/go/pkg/beam/core/runtime/exec" - fnpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/fnexecution_v1" pipepb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/pipeline_v1" "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/engine" "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/jobservices" @@ -32,8 +32,6 @@ import ( "github.com/apache/beam/sdks/v2/go/pkg/beam/runners/prism/internal/worker" "golang.org/x/exp/maps" "golang.org/x/exp/slog" - "google.golang.org/grpc" - "google.golang.org/grpc/credentials/insecure" "google.golang.org/protobuf/proto" ) @@ -49,98 +47,59 @@ func RunPipeline(j *jobservices.Job) { // here, we only want and need the go one, operating // in loopback mode. envs := j.Pipeline.GetComponents().GetEnvironments() - if len(envs) != 1 { - j.Failed(fmt.Errorf("unable to execute multi-environment pipelines;\npipeline has environments: %+v", envs)) - return - } - env, _ := getOnlyPair(envs) - wk := worker.New(j.String()+"_"+env, env) // Cheating by having the worker id match the environment id. - go wk.Serve() - timeout := time.Minute - time.AfterFunc(timeout, func() { - if wk.Connected() { + wks := map[string]*worker.W{} + for envID := range envs { + wk, err := makeWorker(envID, j) + if err != nil { + j.Failed(err) return } - err := fmt.Errorf("prism %v didn't get control connection after %v", wk, timeout) - j.Failed(err) - j.CancelFn(err) - }) - + wks[envID] = wk + } // When this function exits, we cancel the context to clear // any related job resources. defer func() { j.CancelFn(fmt.Errorf("runPipeline returned, cleaning up")) }() - go runEnvironment(j.RootCtx, j, env, wk) j.SendMsg("running " + j.String()) j.Running() - err := executePipeline(j.RootCtx, wk, j) - if err != nil { + if err := executePipeline(j.RootCtx, wks, j); err != nil { j.Failed(err) return } j.SendMsg("pipeline completed " + j.String()) - // Stop the worker. - wk.Stop() - j.SendMsg("terminating " + j.String()) j.Done() } -// TODO move environment handling to the worker package. - -func runEnvironment(ctx context.Context, j *jobservices.Job, env string, wk *worker.W) { - // TODO fix broken abstraction. - // We're starting a worker pool here, because that's the loopback environment. - // It's sort of a mess, largely because of loopback, which has - // a different flow from a provisioned docker container. - e := j.Pipeline.GetComponents().GetEnvironments()[env] - switch e.GetUrn() { - case urns.EnvExternal: - ep := &pipepb.ExternalPayload{} - if err := (proto.UnmarshalOptions{}).Unmarshal(e.GetPayload(), ep); err != nil { - slog.Error("unmarshing environment payload", err, slog.String("envID", wk.Env)) - } - externalEnvironment(ctx, ep, wk) - slog.Debug("environment stopped", slog.String("envID", wk.String()), slog.String("job", j.String())) - default: - panic(fmt.Sprintf("environment %v with urn %v unimplemented", env, e.GetUrn())) - } -} - -func externalEnvironment(ctx context.Context, ep *pipepb.ExternalPayload, wk *worker.W) { - conn, err := grpc.Dial(ep.GetEndpoint().GetUrl(), grpc.WithTransportCredentials(insecure.NewCredentials())) - if err != nil { - panic(fmt.Sprintf("unable to dial sdk worker %v: %v", ep.GetEndpoint().GetUrl(), err)) - } - defer conn.Close() - pool := fnpb.NewBeamFnExternalWorkerPoolClient(conn) +// makeWorker creates a worker for that environment. +func makeWorker(env string, j *jobservices.Job) (*worker.W, error) { + wk := worker.New(j.String()+"_"+env, env) - endpoint := &pipepb.ApiServiceDescriptor{ - Url: wk.Endpoint(), - } - pool.StartWorker(ctx, &fnpb.StartWorkerRequest{ - WorkerId: wk.ID, - ControlEndpoint: endpoint, - LoggingEndpoint: endpoint, - ArtifactEndpoint: endpoint, - ProvisionEndpoint: endpoint, - Params: nil, - }) + wk.EnvPb = j.Pipeline.GetComponents().GetEnvironments()[env] + wk.PipelineOptions = j.PipelineOptions() + wk.JobKey = j.JobKey() + wk.ArtifactEndpoint = j.ArtifactEndpoint() - // Job processing happens here, but orchestrated by other goroutines - // This goroutine blocks until the context is cancelled, signalling - // that the pool runner should stop the worker. - <-ctx.Done() + go wk.Serve() - // Previous context cancelled so we need a new one - // for this request. - pool.StopWorker(context.Background(), &fnpb.StopWorkerRequest{ - WorkerId: wk.ID, + if err := runEnvironment(j.RootCtx, j, env, wk); err != nil { + return nil, fmt.Errorf("failed to start environment %v for job %v: %w", env, j, err) + } + // Check for connection succeeding after we've created the environment successfully. + timeout := 1 * time.Minute + time.AfterFunc(timeout, func() { + if wk.Connected() || wk.Stopped() { + return + } + err := fmt.Errorf("prism %v didn't get control connection to %v after %v", wk, wk.Endpoint(), timeout) + j.Failed(err) + j.CancelFn(err) }) + return wk, nil } type transformExecuter interface { @@ -153,7 +112,7 @@ type processor struct { transformExecuters map[string]transformExecuter } -func executePipeline(ctx context.Context, wk *worker.W, j *jobservices.Job) error { +func executePipeline(ctx context.Context, wks map[string]*worker.W, j *jobservices.Job) error { pipeline := j.Pipeline comps := proto.Clone(pipeline.GetComponents()).(*pipepb.Components) @@ -196,7 +155,12 @@ func executePipeline(ctx context.Context, wk *worker.W, j *jobservices.Job) erro // TODO move this loop and code into the preprocessor instead. stages := map[string]*stage{} var impulses []string - for _, stage := range topo { + + // Inialize the "dataservice cache" to support side inputs. + // TODO(https://github.com/apache/beam/issues/28543), remove this concept. + ds := &worker.DataService{} + + for i, stage := range topo { tid := stage.transforms[0] t := ts[tid] urn := t.GetSpec().GetUrn() @@ -207,11 +171,11 @@ func executePipeline(ctx context.Context, wk *worker.W, j *jobservices.Job) erro if stage.exe != nil { stage.envID = stage.exe.ExecuteWith(t) } - stage.ID = wk.NextStage() + stage.ID = fmt.Sprintf("stage-%03d", i) + wk := wks[stage.envID] switch stage.envID { case "": // Runner Transforms - var onlyOut string for _, out := range t.GetOutputs() { onlyOut = out @@ -270,10 +234,8 @@ func executePipeline(ctx context.Context, wk *worker.W, j *jobservices.Job) erro em.AddStage(stage.ID, inputs, nil, []string{getOnlyValue(t.GetOutputs())}) } stages[stage.ID] = stage - wk.Descriptors[stage.ID] = stage.desc case wk.Env: - // Great! this is for this environment. // Broken abstraction. - if err := buildDescriptor(stage, comps, wk); err != nil { + if err := buildDescriptor(stage, comps, wk, ds); err != nil { return fmt.Errorf("prism error building stage %v: \n%w", stage.ID, err) } stages[stage.ID] = stage @@ -297,7 +259,12 @@ func executePipeline(ctx context.Context, wk *worker.W, j *jobservices.Job) erro maxParallelism := make(chan struct{}, 8) // Execute stages here bundleFailed := make(chan error) - bundles := em.Bundles(ctx, wk.NextInst) + + var instID uint64 + bundles := em.Bundles(ctx, func() string { + return fmt.Sprintf("inst%03d", atomic.AddUint64(&instID, 1)) + }) + for { select { case <-ctx.Done(): @@ -311,7 +278,8 @@ func executePipeline(ctx context.Context, wk *worker.W, j *jobservices.Job) erro go func(rb engine.RunBundle) { defer func() { <-maxParallelism }() s := stages[rb.StageID] - if err := s.Execute(ctx, j, wk, comps, em, rb); err != nil { + wk := wks[s.envID] + if err := s.Execute(ctx, j, wk, ds, comps, em, rb); err != nil { // Ensure we clean up on bundle failure em.FailBundle(rb) bundleFailed <- err diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go index e66def5b0fe8..99b786d45980 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/artifact.go @@ -16,11 +16,14 @@ package jobservices import ( + "bytes" + "context" "fmt" "io" jobpb "github.com/apache/beam/sdks/v2/go/pkg/beam/model/jobmanagement_v1" "golang.org/x/exp/slog" + "google.golang.org/protobuf/encoding/prototext" ) func (s *Server) ReverseArtifactRetrievalService(stream jobpb.ArtifactStagingService_ReverseArtifactRetrievalServiceServer) error { @@ -47,7 +50,7 @@ func (s *Server) ReverseArtifactRetrievalService(stream jobpb.ArtifactStagingSer }, }, }) - var count int + var buf bytes.Buffer for { in, err := stream.Recv() if err == io.EOF { @@ -56,26 +59,61 @@ func (s *Server) ReverseArtifactRetrievalService(stream jobpb.ArtifactStagingSer if err != nil { return err } - if in.IsLast { - slog.Debug("GetArtifact finish", + if in.GetIsLast() { + slog.Debug("GetArtifact finished", slog.Group("dep", slog.String("urn", dep.GetTypeUrn()), slog.String("payload", string(dep.GetTypePayload()))), - slog.Int("bytesReceived", count)) + slog.Int("bytesReceived", buf.Len()), + slog.String("rtype", fmt.Sprintf("%T", in.GetResponse())), + ) break } // Here's where we go through each environment's artifacts. // We do nothing with them. switch req := in.GetResponse().(type) { case *jobpb.ArtifactResponseWrapper_GetArtifactResponse: - count += len(req.GetArtifactResponse.GetData()) + buf.Write(req.GetArtifactResponse.GetData()) + case *jobpb.ArtifactResponseWrapper_ResolveArtifactResponse: err := fmt.Errorf("unexpected ResolveArtifactResponse to GetArtifact: %v", in.GetResponse()) slog.Error("GetArtifact failure", err) return err } } + if len(s.artifacts) == 0 { + s.artifacts = map[string][]byte{} + } + s.artifacts[string(dep.GetTypePayload())] = buf.Bytes() } } return nil } + +func (s *Server) ResolveArtifacts(_ context.Context, req *jobpb.ResolveArtifactsRequest) (*jobpb.ResolveArtifactsResponse, error) { + return &jobpb.ResolveArtifactsResponse{ + Replacements: req.GetArtifacts(), + }, nil +} + +func (s *Server) GetArtifact(req *jobpb.GetArtifactRequest, stream jobpb.ArtifactRetrievalService_GetArtifactServer) error { + info := req.GetArtifact() + buf, ok := s.artifacts[string(info.GetTypePayload())] + if !ok { + pt := prototext.Format(info) + slog.Warn("unable to provide artifact to worker", "artifact_info", pt) + return fmt.Errorf("unable to provide %v to worker", pt) + } + chunk := 128 * 1024 * 1024 // 128 MB + var i int + for i+chunk < len(buf) { + stream.Send(&jobpb.GetArtifactResponse{ + Data: buf[i : i+chunk], + }) + i += chunk + } + stream.Send(&jobpb.GetArtifactResponse{ + Data: buf[i:], + }) + return nil +} diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go index 10d36066391f..cd302a70fcc0 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/job.go @@ -68,6 +68,8 @@ type Job struct { key string jobName string + artifactEndpoint string + Pipeline *pipepb.Pipeline options *structpb.Struct @@ -88,6 +90,14 @@ type Job struct { metrics metricsStore } +func (j *Job) ArtifactEndpoint() string { + return j.artifactEndpoint +} + +func (j *Job) PipelineOptions() *structpb.Struct { + return j.options +} + // ContributeTentativeMetrics returns the datachannel read index, and any unknown monitoring short ids. func (j *Job) ContributeTentativeMetrics(payloads *fnpb.ProcessBundleProgressResponse) (int64, []string) { return j.metrics.ContributeTentativeMetrics(payloads) @@ -113,6 +123,10 @@ func (j *Job) LogValue() slog.Value { slog.String("name", j.jobName)) } +func (j *Job) JobKey() string { + return j.key +} + func (j *Job) SendMsg(msg string) { j.streamCond.L.Lock() defer j.streamCond.L.Unlock() diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go index e626a05b51e1..0fd7381e17f4 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/management.go @@ -79,6 +79,8 @@ func (s *Server) Prepare(ctx context.Context, req *jobpb.PrepareJobRequest) (*jo streamCond: sync.NewCond(&sync.Mutex{}), RootCtx: rootCtx, CancelFn: cancelFn, + + artifactEndpoint: s.Endpoint(), } // Queue initial state of the job. @@ -91,14 +93,18 @@ func (s *Server) Prepare(ctx context.Context, req *jobpb.PrepareJobRequest) (*jo return nil, err } var errs []error - check := func(feature string, got, want any) { - if got != want { - err := unimplementedError{ - feature: feature, - value: got, + check := func(feature string, got any, wants ...any) { + for _, want := range wants { + if got == want { + return } - errs = append(errs, err) } + + err := unimplementedError{ + feature: feature, + value: got, + } + errs = append(errs, err) } // Inspect Transforms for unsupported features. @@ -112,6 +118,8 @@ func (s *Server) Prepare(ctx context.Context, req *jobpb.PrepareJobRequest) (*jo urns.TransformGBK, urns.TransformFlatten, urns.TransformCombinePerKey, + urns.TransformCombineGlobally, // Used by Java SDK + urns.TransformCombineGroupedValues, // Used by Java SDK urns.TransformAssignWindows: // Very few expected transforms types for submitted pipelines. // Most URNs are for the runner to communicate back to the SDK for execution. @@ -152,7 +160,7 @@ func (s *Server) Prepare(ctx context.Context, req *jobpb.PrepareJobRequest) (*jo check("WindowingStrategy.MergeStatus", ws.GetMergeStatus(), pipepb.MergeStatus_NON_MERGING) } if !bypassedWindowingStrategies[wsID] { - check("WindowingStrategy.OnTimeBehavior", ws.GetOnTimeBehavior(), pipepb.OnTimeBehavior_FIRE_IF_NONEMPTY) + check("WindowingStrategy.OnTimeBehavior", ws.GetOnTimeBehavior(), pipepb.OnTimeBehavior_FIRE_IF_NONEMPTY, pipepb.OnTimeBehavior_FIRE_ALWAYS) check("WindowingStrategy.OutputTime", ws.GetOutputTime(), pipepb.OutputTime_END_OF_WINDOW) // Non nil triggers should fail. if ws.GetTrigger().GetDefault() == nil { diff --git a/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go b/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go index e3fb7766b519..bf2db814813c 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go +++ b/sdks/go/pkg/beam/runners/prism/internal/jobservices/server.go @@ -29,6 +29,7 @@ import ( type Server struct { jobpb.UnimplementedJobServiceServer jobpb.UnimplementedArtifactStagingServiceServer + jobpb.UnimplementedArtifactRetrievalServiceServer fnpb.UnimplementedProvisionServiceServer // Server management @@ -42,6 +43,9 @@ type Server struct { // execute defines how a job is executed. execute func(*Job) + + // Artifact hack + artifacts map[string][]byte } // NewServer acquires the indicated port. @@ -60,6 +64,7 @@ func NewServer(port int, execute func(*Job)) *Server { s.server = grpc.NewServer(opts...) jobpb.RegisterJobServiceServer(s.server, s) jobpb.RegisterArtifactStagingServiceServer(s.server, s) + jobpb.RegisterArtifactRetrievalServiceServer(s.server, s) return s } diff --git a/sdks/go/pkg/beam/runners/prism/internal/stage.go b/sdks/go/pkg/beam/runners/prism/internal/stage.go index 4d8d4621168d..4ce3ce7ffeb6 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/stage.go +++ b/sdks/go/pkg/beam/runners/prism/internal/stage.go @@ -75,7 +75,7 @@ type stage struct { OutputsToCoders map[string]engine.PColInfo } -func (s *stage) Execute(ctx context.Context, j *jobservices.Job, wk *worker.W, comps *pipepb.Components, em *engine.ElementManager, rb engine.RunBundle) error { +func (s *stage) Execute(ctx context.Context, j *jobservices.Job, wk *worker.W, ds *worker.DataService, comps *pipepb.Components, em *engine.ElementManager, rb engine.RunBundle) error { slog.Debug("Execute: starting bundle", "bundle", rb) var b *worker.B @@ -204,8 +204,8 @@ progress: md := wk.MonitoringMetadata(ctx, unknownIDs) j.AddMetricShortIDs(md) } - // TODO handle side input data properly. - wk.D.Commit(b.OutputData) + // TODO(https://github.com/apache/beam/issues/28543) handle side input data properly. + ds.Commit(b.OutputData) var residualData [][]byte var minOutputWatermark map[string]mtime.Time for _, rr := range resp.GetResidualRoots() { @@ -270,7 +270,7 @@ func portFor(wInCid string, wk *worker.W) []byte { // It assumes that the side inputs are not sourced from PCollections generated by any transform in this stage. // // Because we need the local ids for routing the sources/sinks information. -func buildDescriptor(stg *stage, comps *pipepb.Components, wk *worker.W) error { +func buildDescriptor(stg *stage, comps *pipepb.Components, wk *worker.W, ds *worker.DataService) error { // Assume stage has an indicated primary input coders := map[string]*pipepb.Coder{} @@ -327,7 +327,7 @@ func buildDescriptor(stg *stage, comps *pipepb.Components, wk *worker.W) error { // Update side inputs to point to new PCollection with any replaced coders. transforms[si.transform].GetInputs()[si.local] = newGlobal } - prepSide, err := handleSideInput(si.transform, si.local, si.global, comps, coders, wk) + prepSide, err := handleSideInput(si.transform, si.local, si.global, comps, coders, ds) if err != nil { slog.Error("buildDescriptor: handleSideInputs", err, slog.String("transformID", si.transform)) return err @@ -392,7 +392,7 @@ func buildDescriptor(stg *stage, comps *pipepb.Components, wk *worker.W) error { } // handleSideInput returns a closure that will look up the data for a side input appropriate for the given watermark. -func handleSideInput(tid, local, global string, comps *pipepb.Components, coders map[string]*pipepb.Coder, wk *worker.W) (func(b *worker.B, watermark mtime.Time), error) { +func handleSideInput(tid, local, global string, comps *pipepb.Components, coders map[string]*pipepb.Coder, ds *worker.DataService) (func(b *worker.B, watermark mtime.Time), error) { t := comps.GetTransforms()[tid] sis, err := getSideInputs(t) if err != nil { @@ -412,7 +412,7 @@ func handleSideInput(tid, local, global string, comps *pipepb.Components, coders global, local := global, local return func(b *worker.B, watermark mtime.Time) { - data := wk.D.GetAllData(global) + data := ds.GetAllData(global) if b.IterableSideInputData == nil { b.IterableSideInputData = map[string]map[string]map[typex.Window][][]byte{} @@ -447,7 +447,7 @@ func handleSideInput(tid, local, global string, comps *pipepb.Components, coders global, local := global, local return func(b *worker.B, watermark mtime.Time) { // May be of zero length, but that's OK. Side inputs can be empty. - data := wk.D.GetAllData(global) + data := ds.GetAllData(global) if b.MultiMapSideInputData == nil { b.MultiMapSideInputData = map[string]map[string]map[typex.Window]map[string][][]byte{} } diff --git a/sdks/go/pkg/beam/runners/prism/internal/urns/urns.go b/sdks/go/pkg/beam/runners/prism/internal/urns/urns.go index 9fc2c1a923c5..bf1e36656661 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/urns/urns.go +++ b/sdks/go/pkg/beam/runners/prism/internal/urns/urns.go @@ -57,7 +57,9 @@ var ( // SDK transforms. TransformParDo = ptUrn(pipepb.StandardPTransforms_PAR_DO) TransformCombinePerKey = ctUrn(pipepb.StandardPTransforms_COMBINE_PER_KEY) + TransformCombineGlobally = ctUrn(pipepb.StandardPTransforms_COMBINE_GLOBALLY) TransformReshuffle = ctUrn(pipepb.StandardPTransforms_RESHUFFLE) + TransformCombineGroupedValues = cmbtUrn(pipepb.StandardPTransforms_COMBINE_GROUPED_VALUES) TransformPreCombine = cmbtUrn(pipepb.StandardPTransforms_COMBINE_PER_KEY_PRECOMBINE) TransformMerge = cmbtUrn(pipepb.StandardPTransforms_COMBINE_PER_KEY_MERGE_ACCUMULATORS) TransformExtract = cmbtUrn(pipepb.StandardPTransforms_COMBINE_PER_KEY_EXTRACT_OUTPUTS) diff --git a/sdks/go/pkg/beam/runners/prism/internal/web/assets/style.css b/sdks/go/pkg/beam/runners/prism/internal/web/assets/style.css index 74f4a6958d29..d252dc020e63 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/web/assets/style.css +++ b/sdks/go/pkg/beam/runners/prism/internal/web/assets/style.css @@ -101,10 +101,19 @@ footer { color: var(--beam-white); } +#page-container { + position: relative; + min-height: 100vh; +} + +#content-wrap { + padding-bottom: 2.5rem; /* Footer height */ +} + .container { width: 100%; margin: 0 auto; - padding: 80px 20px 40px; + padding: 40px 20px 0px; } .child { @@ -132,6 +141,53 @@ footer { padding: 12px 15px; } +/* Tooltip container */ +.tooltip { + display: inline-block; + border-bottom: 1px dotted var(--beam-black); +} + +/* Tooltip text */ +.tooltip .tooltiptext { + visibility: hidden; + width: max-content; + max-width: 400px; + background-color: var(--dark-grey); + color: var(--beam-white); + text-align: left; + padding: 5px 10px; + border-radius: 6px; + + /* Position the tooltip text */ + position: absolute; + z-index: 1; + bottom: 125%; + left: 50%; + margin-left: -60px; + + /* Fade in tooltip */ + opacity: 0; + transition: opacity 0.3s; +} + +/* Tooltip arrow */ +.tooltip .tooltiptext::after { + content: ""; + position: absolute; + top: 100%; + left: 18%; + margin-left: -5px; + border-width: 5px; + border-style: solid; + border-color: var(--dark-grey) transparent transparent transparent; +} + +/* Show the tooltip text when you mouse over the tooltip container */ +.tooltip:hover .tooltiptext { + visibility: visible; + opacity: 1; +} + @media screen and (max-width: 550px) { header { flex-direction: column; diff --git a/sdks/go/pkg/beam/runners/prism/internal/web/debugz.go b/sdks/go/pkg/beam/runners/prism/internal/web/debugz.go index b34547e92752..015a9103134a 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/web/debugz.go +++ b/sdks/go/pkg/beam/runners/prism/internal/web/debugz.go @@ -21,6 +21,9 @@ import ( "runtime/metrics" "runtime/pprof" "strings" + "time" + + "github.com/dustin/go-humanize" ) type debugzData struct { @@ -54,16 +57,24 @@ func dumpMetrics() debugzData { name, value := sample.Name, sample.Value m := goRuntimeMetric{ - Name: name, + Name: strings.TrimSpace(name), Description: descs[i].Description, } // Handle each sample. switch value.Kind() { case metrics.KindUint64: - m.Value = fmt.Sprintf("%d", value.Uint64()) + if strings.HasSuffix(name, "bytes") { + m.Value = humanize.Bytes(value.Uint64()) + } else { + m.Value = humanize.FormatInteger("", int(value.Uint64())) + } case metrics.KindFloat64: - m.Value = fmt.Sprintf("%f", value.Float64()) + if strings.HasSuffix(name, "seconds") { + m.Value = time.Duration(float64(time.Second) * value.Float64()).String() + } else { + m.Value = humanize.FormatFloat("", value.Float64()) + } case metrics.KindFloat64Histogram: m.Value = fmt.Sprintf("%f", medianBucket(value.Float64Histogram())) // The histogram may be quite large, so let's just pull out @@ -88,16 +99,16 @@ func dumpMetrics() debugzData { data.Metrics = append(data.Metrics, goRuntimeMetric{ Name: "BUILD INFO", - Value: "n/a", - Description: b.String(), + Value: b.String(), + Description: "result from runtime/debug.ReadBuildInfo()", }) b.Reset() goroutineDump(&b) data.Metrics = append(data.Metrics, goRuntimeMetric{ Name: "GOROUTINES", - Value: "n/a", - Description: b.String(), + Value: b.String(), + Description: "consolidated active goroutines", }) b.Reset() diff --git a/sdks/go/pkg/beam/runners/prism/internal/web/debugz.html b/sdks/go/pkg/beam/runners/prism/internal/web/debugz.html index ebf37f129ae3..175f44da7447 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/web/debugz.html +++ b/sdks/go/pkg/beam/runners/prism/internal/web/debugz.html @@ -30,14 +30,16 @@ - - + {{ range .Metrics }} - + - {{ else }} diff --git a/sdks/go/pkg/beam/runners/prism/internal/web/index.html b/sdks/go/pkg/beam/runners/prism/internal/web/index.html index fe9bb056e51c..1aa0ed719d87 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/web/index.html +++ b/sdks/go/pkg/beam/runners/prism/internal/web/index.html @@ -22,31 +22,33 @@ -
-
- -
-
- {{ if .Error}}{{.Error}}{{end}} -
IDNameStateValue
{{ .Name }} +
{{ .Name }} + {{ .Description }} +
+
{{ .Value }}{{ .Description }}
- - - - - - {{ range .Jobs }} - - - - - - {{ else }} - - - - {{ end }} -
IDNameState
{{ .JobId }}{{ .JobName }}{{ .State }}
No jobs have been run.
- +
+
+
+ +
+
+ {{ if .Error}}{{.Error}}{{end}} + + + + + + + {{ range .Jobs }} + + + + + + {{ else }} + + + + {{ end }} +
IDNameState
{{ .JobId }}{{ .JobName }}{{ .State }}
No jobs have been run.
+
+
diff --git a/sdks/go/pkg/beam/runners/prism/internal/worker/bundle.go b/sdks/go/pkg/beam/runners/prism/internal/worker/bundle.go index 98479e3db071..573bdf4aeb9d 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/worker/bundle.go +++ b/sdks/go/pkg/beam/runners/prism/internal/worker/bundle.go @@ -93,7 +93,7 @@ func (b *B) Respond(resp *fnpb.InstructionResponse) { } b.responded = true if resp.GetError() != "" { - b.BundleErr = fmt.Errorf("bundle %v failed:%v", resp.GetInstructionId(), resp.GetError()) + b.BundleErr = fmt.Errorf("bundle %v %v failed:%v", resp.GetInstructionId(), b.PBDID, resp.GetError()) close(b.Resp) return } diff --git a/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go b/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go index 0ad7ccb37032..4968c9eb433e 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go +++ b/sdks/go/pkg/beam/runners/prism/internal/worker/worker.go @@ -43,6 +43,7 @@ import ( "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/encoding/prototext" + "google.golang.org/protobuf/types/known/structpb" ) // A W manages worker environments, sending them work @@ -57,12 +58,16 @@ type W struct { ID, Env string + JobKey, ArtifactEndpoint string + EnvPb *pipepb.Environment + PipelineOptions *structpb.Struct + // Server management lis net.Listener server *grpc.Server // These are the ID sources - inst, bund uint64 + inst uint64 connected, stopped atomic.Bool InstReqs chan *fnpb.InstructionRequest @@ -71,8 +76,6 @@ type W struct { mu sync.Mutex activeInstructions map[string]controlResponder // Active instructions keyed by InstructionID Descriptors map[string]*fnpb.ProcessBundleDescriptor // Stages keyed by PBDID - - D *DataService } type controlResponder interface { @@ -99,14 +102,13 @@ func New(id, env string) *W { activeInstructions: make(map[string]controlResponder), Descriptors: make(map[string]*fnpb.ProcessBundleDescriptor), - - D: &DataService{}, } slog.Debug("Serving Worker components", slog.String("endpoint", wk.Endpoint())) fnpb.RegisterBeamFnControlServer(wk.server, wk) fnpb.RegisterBeamFnDataServer(wk.server, wk) fnpb.RegisterBeamFnLoggingServer(wk.server, wk) fnpb.RegisterBeamFnStateServer(wk.server, wk) + fnpb.RegisterProvisionServiceServer(wk.server, wk) return wk } @@ -143,11 +145,7 @@ func (wk *W) Stop() { } func (wk *W) NextInst() string { - return fmt.Sprintf("inst%03d", atomic.AddUint64(&wk.inst, 1)) -} - -func (wk *W) NextStage() string { - return fmt.Sprintf("stage%03d", atomic.AddUint64(&wk.bund, 1)) + return fmt.Sprintf("inst-%v-%03d", wk.Env, atomic.AddUint64(&wk.inst, 1)) } // TODO set logging level. @@ -159,20 +157,24 @@ func (wk *W) GetProvisionInfo(_ context.Context, _ *fnpb.GetProvisionInfoRequest } resp := &fnpb.GetProvisionInfoResponse{ Info: &fnpb.ProvisionInfo{ - // TODO: Add the job's Pipeline options // TODO: Include runner capabilities with the per job configuration. RunnerCapabilities: []string{ urns.CapabilityMonitoringInfoShortIDs, }, - LoggingEndpoint: endpoint, - ControlEndpoint: endpoint, - ArtifactEndpoint: endpoint, - // TODO add this job's RetrievalToken - // TODO add this job's artifact Dependencies + LoggingEndpoint: endpoint, + ControlEndpoint: endpoint, + ArtifactEndpoint: &pipepb.ApiServiceDescriptor{ + Url: wk.ArtifactEndpoint, + }, + + RetrievalToken: wk.JobKey, + Dependencies: wk.EnvPb.GetDependencies(), + PipelineOptions: wk.PipelineOptions, Metadata: map[string]string{ "runner": "prism", "runner_version": core.SdkVersion, + "variant": "test", }, }, } @@ -253,6 +255,11 @@ func (wk *W) Connected() bool { return wk.connected.Load() } +// Stopped indicates that the worker has stopped. +func (wk *W) Stopped() bool { + return wk.stopped.Load() +} + // Control relays instructions to SDKs and back again, coordinated via unique instructionIDs. // // Requests come from the runner, and are sent to the client in the SDK. @@ -302,10 +309,12 @@ func (wk *W) Control(ctrl fnpb.BeamFnControl_ControlServer) error { wk.mu.Lock() // Fail extant instructions slog.Debug("SDK Disconnected", "worker", wk, "ctx_error", ctrl.Context().Err(), "outstanding_instructions", len(wk.activeInstructions)) + + msg := fmt.Sprintf("SDK worker disconnected: %v, %v active instructions", wk.String(), len(wk.activeInstructions)) for instID, b := range wk.activeInstructions { b.Respond(&fnpb.InstructionResponse{ InstructionId: instID, - Error: "SDK Disconnected", + Error: msg, }) } wk.mu.Unlock() @@ -526,7 +535,7 @@ func (wk *W) sendInstruction(ctx context.Context, req *fnpb.InstructionRequest) req.InstructionId = progInst - if wk.stopped.Load() { + if wk.Stopped() { return nil } wk.InstReqs <- req @@ -556,6 +565,7 @@ func (wk *W) MonitoringMetadata(ctx context.Context, unknownIDs []string) *fnpb. // DataService is slated to be deleted in favour of stage based state // management for side inputs. +// TODO(https://github.com/apache/beam/issues/28543), remove this concept. type DataService struct { mu sync.Mutex // TODO actually quick process the data to windows here as well. diff --git a/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go b/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go index ed61f484481c..6a90b463c45d 100644 --- a/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go +++ b/sdks/go/pkg/beam/runners/prism/internal/worker/worker_test.go @@ -50,18 +50,6 @@ func TestWorker_NextInst(t *testing.T) { } } -func TestWorker_NextStage(t *testing.T) { - w := New("test", "testEnv") - - stageIDs := map[string]struct{}{} - for i := 0; i < 100; i++ { - stageIDs[w.NextStage()] = struct{}{} - } - if got, want := len(stageIDs), 100; got != want { - t.Errorf("calling w.NextStage() got %v unique ids, want %v", got, want) - } -} - func TestWorker_GetProcessBundleDescriptor(t *testing.T) { w := New("test", "testEnv") @@ -189,7 +177,7 @@ func TestWorker_Data_HappyPath(t *testing.T) { b := &B{ InstID: instID, - PBDID: wk.NextStage(), + PBDID: "teststageID", InputData: [][]byte{ {1, 1, 1, 1, 1, 1}, }, diff --git a/sdks/go/pkg/beam/runners/prism/prism.go b/sdks/go/pkg/beam/runners/prism/prism.go index 0be35ad5cc33..bcb7a3fb689f 100644 --- a/sdks/go/pkg/beam/runners/prism/prism.go +++ b/sdks/go/pkg/beam/runners/prism/prism.go @@ -49,9 +49,9 @@ func Execute(ctx context.Context, p *beam.Pipeline) (beam.PipelineResult, error) s := jobservices.NewServer(0, internal.RunPipeline) *jobopts.Endpoint = s.Endpoint() go s.Serve() - } - if !jobopts.IsLoopback() { - *jobopts.EnvironmentType = "loopback" + if !jobopts.IsLoopback() { + *jobopts.EnvironmentType = "loopback" + } } return universal.Execute(ctx, p) } diff --git a/sdks/go/pkg/beam/runners/universal/runnerlib/stage.go b/sdks/go/pkg/beam/runners/universal/runnerlib/stage.go index 732f4382ab5d..d5cc6aa7327a 100644 --- a/sdks/go/pkg/beam/runners/universal/runnerlib/stage.go +++ b/sdks/go/pkg/beam/runners/universal/runnerlib/stage.go @@ -44,7 +44,7 @@ func Stage(ctx context.Context, id, endpoint, binary, st string) (retrievalToken defer cc.Close() if err := StageViaPortableAPI(ctx, cc, binary, st); err == nil { - return "", nil + return st, nil } log.Warnf(ctx, "unable to stage with PortableAPI: %v; falling back to legacy", err) diff --git a/sdks/go/pkg/beam/util.go b/sdks/go/pkg/beam/util.go index d591dedd7624..4b24af831134 100644 --- a/sdks/go/pkg/beam/util.go +++ b/sdks/go/pkg/beam/util.go @@ -16,7 +16,7 @@ package beam //go:generate go install github.com/apache/beam/sdks/v2/go/cmd/starcgen -//go:generate starcgen --package=beam --identifiers=addFixedKeyFn,dropKeyFn,dropValueFn,swapKVFn,explodeFn,jsonDec,jsonEnc,protoEnc,protoDec,schemaEnc,schemaDec,makePartitionFn,createFn +//go:generate starcgen --package=beam --identifiers=addFixedKeyFn,dropKeyFn,dropValueFn,swapKVFn,explodeFn,jsonDec,jsonEnc,protoEnc,protoDec,schemaEnc,schemaDec,makePartitionFn //go:generate go fmt // We have some freedom to create various utilities, users can use depending on diff --git a/sdks/go/test/build.gradle b/sdks/go/test/build.gradle index d53491194753..5b39cf81400f 100644 --- a/sdks/go/test/build.gradle +++ b/sdks/go/test/build.gradle @@ -173,6 +173,30 @@ tasks.register("ulrValidatesRunner") { } } +// ValidatesRunner tests for Prism. Runs tests in the integration directory +// with prism in docker mod to validate that the runner behaves as expected. +task prismValidatesRunner { + group = "Verification" + + dependsOn ":sdks:go:test:goBuild" + dependsOn ":sdks:go:container:docker" + dependsOn ":sdks:java:container:java8:docker" + dependsOn ":sdks:java:testing:expansion-service:buildTestExpansionServiceJar" + doLast { + def pipelineOptions = [ // Pipeline options piped directly to Go SDK flags. + "--expansion_jar=test:${project(":sdks:java:testing:expansion-service").buildTestExpansionServiceJar.archivePath}", + ] + def options = [ + "--runner prism", + "--pipeline_opts \"${pipelineOptions.join(' ')}\"", + ] + exec { + executable "sh" + args "-c", "./run_validatesrunner_tests.sh ${options.join(' ')}" + } + } +} + // A method for configuring a cross-language validates runner test task, // intended to be used in calls to createCrossLanguageValidatesRunnerTask. ext.goIoValidatesRunnerTask = { proj, name, scriptOpts, pipelineOpts -> diff --git a/sdks/go/test/integration/integration.go b/sdks/go/test/integration/integration.go index dee161dcb2af..f3cffd176110 100644 --- a/sdks/go/test/integration/integration.go +++ b/sdks/go/test/integration/integration.go @@ -38,11 +38,11 @@ package integration import ( "fmt" "math/rand" + "os" "regexp" "strings" "testing" "time" - "os" // common runner flag. "github.com/apache/beam/sdks/v2/go/pkg/beam/options/jobopts" @@ -140,6 +140,8 @@ var portableFilters = []string{ } var prismFilters = []string{ + // The prism runner does not yet support Java's CoGBK. + "TestXLang_CoGroupBy", // The prism runner does not support the TestStream primitive "TestTestStream.*", // The trigger and pane tests uses TestStream diff --git a/sdks/go/test/run_validatesrunner_tests.sh b/sdks/go/test/run_validatesrunner_tests.sh index c25d59bd57b0..60dd0cd97f11 100755 --- a/sdks/go/test/run_validatesrunner_tests.sh +++ b/sdks/go/test/run_validatesrunner_tests.sh @@ -257,8 +257,10 @@ print(s.getsockname()[1]) s.close() " +TMPDIR=$(mktemp -d) + # Set up environment based on runner. -if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || "$RUNNER" == "portable" ]]; then +if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || "$RUNNER" == "portable" || "$RUNNER" == "prism" ]]; then if [[ -z "$ENDPOINT" ]]; then JOB_PORT=$(python3 -c "$SOCKET_SCRIPT") ENDPOINT="localhost:$JOB_PORT" @@ -288,6 +290,14 @@ if [[ "$RUNNER" == "flink" || "$RUNNER" == "spark" || "$RUNNER" == "samza" || "$ python3 \ -m apache_beam.runners.portability.local_job_service_main \ --port $JOB_PORT & + elif [[ "$RUNNER" == "prism" ]]; then + PRISMBIN=$TMPDIR/prismbin + cd sdks + ./go/run_with_go_version.sh build -o $PRISMBIN go/cmd/prism/*.go + $PRISMBIN \ + --serve_http=false \ + --job_port $JOB_PORT & + cd .. else echo "Unknown runner: $RUNNER" exit 1; @@ -340,7 +350,6 @@ if [[ "$RUNNER" == "dataflow" ]]; then gcloud --version # ensure gcloud is version 186 or above - TMPDIR=$(mktemp -d) gcloud_ver=$(gcloud -v | head -1 | awk '{print $4}') if [[ "$gcloud_ver" < "186" ]] then @@ -402,6 +411,7 @@ fi ARGS="$ARGS -p $SIMULTANEOUS" # Assemble test arguments and pipeline options. +ARGS="$ARGS -v" ARGS="$ARGS -timeout $TIMEOUT" ARGS="$ARGS --runner=$RUNNER" ARGS="$ARGS --project=$DATAFLOW_PROJECT" @@ -449,9 +459,9 @@ if [[ "$RUNNER" == "dataflow" ]]; then docker rmi $JAVA_CONTAINER:$JAVA_TAG || echo "Failed to remove container" gcloud --quiet container images delete $JAVA_CONTAINER:$JAVA_TAG || echo "Failed to delete container" fi - - # Clean up tempdir - rm -rf $TMPDIR fi +# Clean up tempdir +rm -rf $TMPDIR + exit $TEST_EXIT_CODE diff --git a/sdks/java/container/boot.go b/sdks/java/container/boot.go index 0e39d907f075..f7fd7437c88a 100644 --- a/sdks/java/container/boot.go +++ b/sdks/java/container/boot.go @@ -159,8 +159,9 @@ func main() { cp = append(cp, filepath.Join(dir, filepath.FromSlash(name))) } + var setRecommendedMaxXmx = strings.Contains(options, "set_recommended_max_xmx") args := []string{ - "-Xmx" + strconv.FormatUint(heapSizeLimit(info), 10), + "-Xmx" + strconv.FormatUint(heapSizeLimit(info, setRecommendedMaxXmx), 10), // ParallelGC the most adequate for high throughput and lower CPU utilization // It is the default GC in Java 8, but not on newer versions "-XX:+UseParallelGC", @@ -266,9 +267,14 @@ func makePipelineOptionsFile(options string) error { // it returns 70% of the physical memory on the machine. If it cannot determine // that value, it returns 1GB. This is an imperfect heuristic. It aims to // ensure there is memory for non-heap use and other overhead, while also not -// underutilizing the machine. -func heapSizeLimit(info *fnpb.ProvisionInfo) uint64 { - if size, err := syscallx.PhysicalMemorySize(); err == nil { +// underutilizing the machine. if set_recommended_max_xmx experiment is enabled, +// sets xmx to 32G. Under 32G JVM enables CompressedOops. CompressedOops +// utilizes memory more efficiently, and has positive impact on GC performance +// and cache hit rate. +func heapSizeLimit(info *fnpb.ProvisionInfo, setRecommendedMaxXmx bool) uint64 { + if setRecommendedMaxXmx { + return 32 << 30 + } else if size, err := syscallx.PhysicalMemorySize(); err == nil { return (size * 70) / 100 } return 1 << 30 diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java index fb6746b9cdd1..c0683ef44616 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/schemas/SchemaTranslation.java @@ -56,6 +56,7 @@ import org.apache.beam.vendor.grpc.v1p54p0.com.google.protobuf.ByteString; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps; @@ -74,7 +75,23 @@ public class SchemaTranslation { private static final Logger LOG = LoggerFactory.getLogger(SchemaTranslation.class); private static final String URN_BEAM_LOGICAL_DECIMAL = FixedPrecisionNumeric.BASE_IDENTIFIER; - private static final String URN_BEAM_LOGICAL_JAVASDK = "beam:logical_type:javasdk:v1"; + + private static String getLogicalTypeUrn(String identifier) { + if (identifier.startsWith("beam:logical_type:")) { + return identifier; + } else { + String filtered = identifier.replaceAll("[^0-9A-Za-z_]", "").toLowerCase(); + if (!Strings.isNullOrEmpty(filtered)) { + // urn for non-standard Java SDK logical types are assigned with javasdk_ + return String.format("beam:logical_type:javasdk_%s:v1", filtered); + } else { + // raw "javasdk" name should only be a last resort. Types defined in Beam should have their + // own URN. + return "beam:logical_type:javasdk:v1"; + } + } + } + private static final String URN_BEAM_LOGICAL_MILLIS_INSTANT = SchemaApi.LogicalTypes.Enum.MILLIS_INSTANT .getValueDescriptor() @@ -84,18 +101,18 @@ public class SchemaTranslation { // TODO(https://github.com/apache/beam/issues/19715): Populate this with a LogicalTypeRegistrar, // which includes a way to construct // the LogicalType given an argument. - private static final ImmutableMap>> - STANDARD_LOGICAL_TYPES = - ImmutableMap.>>builder() - .put(FixedPrecisionNumeric.IDENTIFIER, FixedPrecisionNumeric.class) - .put(MicrosInstant.IDENTIFIER, MicrosInstant.class) - .put(SchemaLogicalType.IDENTIFIER, SchemaLogicalType.class) - .put(PythonCallable.IDENTIFIER, PythonCallable.class) - .put(FixedBytes.IDENTIFIER, FixedBytes.class) - .put(VariableBytes.IDENTIFIER, VariableBytes.class) - .put(FixedString.IDENTIFIER, FixedString.class) - .put(VariableString.IDENTIFIER, VariableString.class) - .build(); + @VisibleForTesting + static final ImmutableMap>> STANDARD_LOGICAL_TYPES = + ImmutableMap.>>builder() + .put(FixedPrecisionNumeric.IDENTIFIER, FixedPrecisionNumeric.class) + .put(MicrosInstant.IDENTIFIER, MicrosInstant.class) + .put(SchemaLogicalType.IDENTIFIER, SchemaLogicalType.class) + .put(PythonCallable.IDENTIFIER, PythonCallable.class) + .put(FixedBytes.IDENTIFIER, FixedBytes.class) + .put(VariableBytes.IDENTIFIER, VariableBytes.class) + .put(FixedString.IDENTIFIER, FixedString.class) + .put(VariableString.IDENTIFIER, VariableString.class) + .build(); public static SchemaApi.Schema schemaToProto(Schema schema, boolean serializeLogicalType) { String uuid = schema.getUUID() != null ? schema.getUUID().toString() : ""; @@ -179,11 +196,7 @@ static SchemaApi.FieldType fieldTypeToProto(FieldType fieldType, boolean seriali fieldValueToProto(logicalType.getArgumentType(), logicalType.getArgument())); } } else { - // TODO(https://github.com/apache/beam/issues/19715): "javasdk" types should only - // be a last resort. Types defined in Beam should have their own URN, and there - // should be a mechanism for users to register their own types by URN. - String urn = - identifier.startsWith("beam:logical_type:") ? identifier : URN_BEAM_LOGICAL_JAVASDK; + String urn = getLogicalTypeUrn(identifier); logicalTypeBuilder = SchemaApi.LogicalType.newBuilder() .setRepresentation( @@ -429,15 +442,22 @@ private static FieldType fieldTypeFromProtoWithoutNullable(SchemaApi.FieldType p } else if (urn.equals(URN_BEAM_LOGICAL_DECIMAL)) { return FieldType.DECIMAL; } else if (urn.startsWith("beam:logical_type:")) { - try { - return FieldType.logicalType( - (LogicalType) - SerializableUtils.deserializeFromByteArray( - logicalType.getPayload().toByteArray(), "logicalType")); - } catch (IllegalArgumentException e) { - LOG.warn( - "Unable to deserialize the logical type {} from proto. Mark as UnknownLogicalType.", - urn); + if (!logicalType.getPayload().isEmpty()) { + // logical type has a payload, try to recover the instance by deserialization + try { + return FieldType.logicalType( + (LogicalType) + SerializableUtils.deserializeFromByteArray( + logicalType.getPayload().toByteArray(), "logicalType")); + } catch (IllegalArgumentException e) { + LOG.warn( + "Unable to deserialize the logical type {} from proto. Mark as UnknownLogicalType.", + urn); + } + } else { + // logical type does not have a payload. This happens when it is passed xlang. + // TODO(yathu) it appears this path is called heavily, consider cache the instance + LOG.debug("Constructing non-standard logical type {} as UnknownLogicalType", urn); } } // assemble an UnknownLogicalType diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicImpulse.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicImpulse.java index 3679c3eb10f5..db4f141ee624 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicImpulse.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicImpulse.java @@ -17,11 +17,15 @@ */ package org.apache.beam.sdk.transforms; +import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; + +import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.transforms.windowing.BoundedWindow; import org.apache.beam.sdk.transforms.windowing.FixedWindows; import org.apache.beam.sdk.transforms.windowing.Window; import org.apache.beam.sdk.values.PBegin; import org.apache.beam.sdk.values.PCollection; +import org.checkerframework.checker.nullness.qual.Nullable; import org.joda.time.Duration; import org.joda.time.Instant; @@ -34,28 +38,58 @@ */ public class PeriodicImpulse extends PTransform> { - Instant startTimestamp = Instant.now(); - Instant stopTimestamp = BoundedWindow.TIMESTAMP_MAX_VALUE; - Duration fireInterval = Duration.standardMinutes(1); + Instant startTimestamp; + Instant stopTimestamp; + @Nullable Duration stopDuration; + Duration fireInterval; boolean applyWindowing = false; boolean catchUpToNow = true; - private PeriodicImpulse() {} + private PeriodicImpulse() { + this.startTimestamp = Instant.now(); + this.stopTimestamp = BoundedWindow.TIMESTAMP_MAX_VALUE; + this.fireInterval = Duration.standardMinutes(1); + } public static PeriodicImpulse create() { return new PeriodicImpulse(); } + /** + * Assign a timestamp when the pipeliene starts to produce data. + * + *

Cannot be used along with {@link #stopAfter}. + */ public PeriodicImpulse startAt(Instant startTime) { + checkArgument(stopDuration == null, "startAt and stopAfter cannot be set at the same time"); this.startTimestamp = startTime; return this; } + /** + * Assign a timestamp when the pipeliene stops producing data. + * + *

Cannot be used along with {@link #stopAfter}. + */ public PeriodicImpulse stopAt(Instant stopTime) { + checkArgument(stopDuration == null, "stopAt and stopAfter cannot be set at the same time"); this.stopTimestamp = stopTime; return this; } + /** + * For internal use only; no backwards-compatibility guarantees. + * + *

Assign a time interval at which the pipeliene produces data. This is different from setting + * {@link #startAt} and {@link #stopAt}, as the first timestamp is determined at run time + * (pipeline starts processing). + */ + @Internal + public PeriodicImpulse stopAfter(Duration duration) { + this.stopDuration = duration; + return this; + } + public PeriodicImpulse withInterval(Duration interval) { this.fireInterval = interval; return this; @@ -67,10 +101,13 @@ public PeriodicImpulse applyWindowing() { } /** - * The default behavior is that PeriodicImpulse emits all instants until Instant.now(), then + * For internal use only; no backwards-compatibility guarantees. + * + *

The default behavior is that PeriodicImpulse emits all instants until Instant.now(), then * starts firing at the specified interval. If this is set to false, the PeriodicImpulse will * perform the interval wait before firing each instant. */ + @Internal public PeriodicImpulse catchUpToNow(boolean catchUpToNow) { this.catchUpToNow = catchUpToNow; return this; @@ -78,20 +115,51 @@ public PeriodicImpulse catchUpToNow(boolean catchUpToNow) { @Override public PCollection expand(PBegin input) { - PCollection result = - input - .apply( - Create.of( - new PeriodicSequence.SequenceDefinition( - startTimestamp, stopTimestamp, fireInterval, catchUpToNow))) - .apply(PeriodicSequence.create()); + PCollection seqDef; + if (stopDuration != null) { + // nonnull guaranteed + Duration d = stopDuration; + seqDef = + input + .apply(Impulse.create()) + .apply(ParDo.of(new RuntimeSequenceFn(d, fireInterval, catchUpToNow))); + } else { + seqDef = + input.apply( + Create.of( + new PeriodicSequence.SequenceDefinition( + startTimestamp, stopTimestamp, fireInterval, catchUpToNow))); + } + PCollection result = seqDef.apply(PeriodicSequence.create()); if (this.applyWindowing) { result = - result.apply( - Window.into(FixedWindows.of(Duration.millis(fireInterval.getMillis())))); + result.apply(Window.into(FixedWindows.of(Duration.millis(fireInterval.getMillis())))); } - return result; } + + /** + * A DoFn generated a SequenceDefinition at run time. This enables set first element timestamp at + * pipeline start processing data. + */ + private static class RuntimeSequenceFn extends DoFn { + Duration stopDuration; + Duration fireInterval; + boolean catchUpToNow; + + RuntimeSequenceFn(Duration stopDuration, Duration fireInterval, boolean catchUpToNow) { + this.stopDuration = stopDuration; + this.fireInterval = fireInterval; + this.catchUpToNow = catchUpToNow; + } + + @ProcessElement + public void process(ProcessContext c) { + Instant now = Instant.now(); + c.output( + new PeriodicSequence.SequenceDefinition( + now, now.plus(stopDuration), fireInterval, catchUpToNow)); + } + } } diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicSequence.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicSequence.java index b3cd2afde697..12cbecd04b02 100644 --- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicSequence.java +++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicSequence.java @@ -22,6 +22,7 @@ import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; import java.util.Objects; +import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.io.range.OffsetRange; import org.apache.beam.sdk.schemas.JavaFieldSchema; import org.apache.beam.sdk.schemas.annotations.DefaultSchema; @@ -67,6 +68,8 @@ public SequenceDefinition(Instant first, Instant last, Duration duration) { this.catchUpToNow = true; } + /** catchUpToNow is experimental; no backwards-compatibility guarantees. */ + @Internal public SequenceDefinition( Instant first, Instant last, Duration duration, boolean catchUpToNow) { this.first = first; diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTranslationTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTranslationTest.java index bdce452192a4..3020d7e42d05 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTranslationTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/schemas/SchemaTranslationTest.java @@ -17,6 +17,7 @@ */ package org.apache.beam.sdk.schemas; +import static org.apache.beam.sdk.schemas.SchemaTranslation.STANDARD_LOGICAL_TYPES; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -48,6 +49,7 @@ import org.apache.beam.sdk.schemas.logicaltypes.PythonCallable; import org.apache.beam.sdk.schemas.logicaltypes.SchemaLogicalType; import org.apache.beam.sdk.schemas.logicaltypes.SqlTypes; +import org.apache.beam.sdk.schemas.logicaltypes.UnknownLogicalType; import org.apache.beam.sdk.schemas.logicaltypes.VariableBytes; import org.apache.beam.sdk.schemas.logicaltypes.VariableString; import org.apache.beam.sdk.values.Row; @@ -186,7 +188,8 @@ public static Iterable data() { .withOptions(optionsBuilder)) .add( Schema.of( - Field.of("null_argument", FieldType.logicalType(new NullArgumentLogicalType())))) + Field.of( + "null_argument", FieldType.logicalType(new PortableNullArgLogicalType())))) .add(Schema.of(Field.of("logical_argument", FieldType.logicalType(new DateTime())))) .add( Schema.of(Field.of("single_arg_argument", FieldType.logicalType(FixedBytes.of(100))))) @@ -348,14 +351,14 @@ public static Iterable data() { .add(simpleRow(FieldType.row(row.getSchema()), row)) .add(simpleRow(FieldType.DATETIME, new Instant(23L))) .add(simpleRow(FieldType.DECIMAL, BigDecimal.valueOf(100000))) - .add(simpleRow(FieldType.logicalType(new NullArgumentLogicalType()), "str")) + .add(simpleRow(FieldType.logicalType(new PortableNullArgLogicalType()), "str")) .add(simpleRow(FieldType.logicalType(new DateTime()), LocalDateTime.of(2000, 1, 3, 3, 1))) .add(simpleNullRow(FieldType.STRING)) .add(simpleNullRow(FieldType.INT32)) .add(simpleNullRow(FieldType.map(FieldType.STRING, FieldType.INT32))) .add(simpleNullRow(FieldType.array(FieldType.STRING))) .add(simpleNullRow(FieldType.row(row.getSchema()))) - .add(simpleNullRow(FieldType.logicalType(new NullArgumentLogicalType()))) + .add(simpleNullRow(FieldType.logicalType(new PortableNullArgLogicalType()))) .add(simpleNullRow(FieldType.logicalType(new DateTime()))) .add(simpleNullRow(FieldType.DECIMAL)) .add(simpleNullRow(FieldType.DATETIME)) @@ -419,6 +422,8 @@ public static Iterable data() { .add(FieldType.logicalType(FixedString.of(10))) .add(FieldType.logicalType(VariableString.of(10))) .add(FieldType.logicalType(FixedPrecisionNumeric.of(10))) + .add(FieldType.logicalType(new PortableNullArgLogicalType())) + .add(FieldType.logicalType(new NullArgumentLogicalType())) .build(); } @@ -426,7 +431,7 @@ public static Iterable data() { public Schema.FieldType fieldType; @Test - public void testPortableLogicalTypeSerializeDeserilizeCorrectly() { + public void testLogicalTypeSerializeDeserilizeCorrectly() { SchemaApi.FieldType proto = SchemaTranslation.fieldTypeToProto(fieldType, true); Schema.FieldType translated = SchemaTranslation.fieldTypeFromProto(proto); @@ -438,14 +443,64 @@ public void testPortableLogicalTypeSerializeDeserilizeCorrectly() { assertThat( translated.getLogicalType().getArgument(), equalTo(fieldType.getLogicalType().getArgument())); + assertThat( + translated.getLogicalType().getIdentifier(), + equalTo(fieldType.getLogicalType().getIdentifier())); + } + + @Test + public void testLogicalTypeFromToProtoCorrectly() { + SchemaApi.FieldType proto = SchemaTranslation.fieldTypeToProto(fieldType, false); + Schema.FieldType translated = SchemaTranslation.fieldTypeFromProto(proto); + + if (STANDARD_LOGICAL_TYPES.containsKey(translated.getLogicalType().getIdentifier())) { + // standard logical type should be able to fully recover the original type + assertThat( + translated.getLogicalType().getClass(), equalTo(fieldType.getLogicalType().getClass())); + } else { + // non-standard type will get assembled to UnknownLogicalType + assertThat(translated.getLogicalType().getClass(), equalTo(UnknownLogicalType.class)); + } + assertThat( + translated.getLogicalType().getArgumentType(), + equalTo(fieldType.getLogicalType().getArgumentType())); + assertThat( + translated.getLogicalType().getArgument(), + equalTo(fieldType.getLogicalType().getArgument())); + if (fieldType.getLogicalType().getIdentifier().startsWith("beam:logical_type:")) { + // portable logical type should fully recover the urn + assertThat( + translated.getLogicalType().getIdentifier(), + equalTo(fieldType.getLogicalType().getIdentifier())); + } else { + // non-portable logical type would have "javasdk_" urn + assertThat( + translated.getLogicalType().getIdentifier(), + equalTo( + String.format( + "beam:logical_type:javasdk_%s:v1", + fieldType + .getLogicalType() + .getIdentifier() + .toLowerCase() + .replaceAll("[^0-9A-Za-z_]", "")))); + } } } - /** A simple logical type that has no argument. */ - private static class NullArgumentLogicalType implements Schema.LogicalType { + /** A portable logical type that has no argument. */ + private static class PortableNullArgLogicalType extends NullArgumentLogicalType { public static final String IDENTIFIER = "beam:logical_type:null_argument:v1"; - public NullArgumentLogicalType() {} + @Override + public String getIdentifier() { + return IDENTIFIER; + } + } + + /** A non-portable (Java SDK) logical type that has no argument. */ + private static class NullArgumentLogicalType implements Schema.LogicalType { + public static final String IDENTIFIER = "NULL_ARGUMENT"; @Override public String toBaseType(String input) { diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/BeamSqlSeekableTable.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/BeamSqlSeekableTable.java index 7b924cf6b6da..4dc9bd5777ff 100644 --- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/BeamSqlSeekableTable.java +++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/BeamSqlSeekableTable.java @@ -20,6 +20,7 @@ import java.io.Serializable; import java.util.List; import org.apache.beam.sdk.options.PipelineOptions; +import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.transforms.DoFn; import org.apache.beam.sdk.values.Row; @@ -28,8 +29,12 @@ * FROM FACT_TABLE JOIN LOOKUP_TABLE ON ...}. */ public interface BeamSqlSeekableTable extends Serializable { - /** prepare the instance. */ - default void setUp() {} + /** + * prepare the instance. + * + * @param joinSubsetType joining subset schema + */ + default void setUp(Schema joinSubsetType) {} default void startBundle( DoFn.StartBundleContext context, PipelineOptions pipelineOptions) {} diff --git a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamJoinTransforms.java b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamJoinTransforms.java index e4d62c2b5de7..d25f98729bd4 100644 --- a/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamJoinTransforms.java +++ b/sdks/java/extensions/sql/src/main/java/org/apache/beam/sdk/extensions/sql/impl/transform/BeamJoinTransforms.java @@ -153,7 +153,7 @@ public PCollection expand(PCollection input) { new DoFn() { @Setup public void setup() { - seekableTable.setUp(); + seekableTable.setUp(joinSubsetType); } @StartBundle diff --git a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamSideInputLookupJoinRelTest.java b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamSideInputLookupJoinRelTest.java index 2e2971ebd6e9..b5fd03045cbc 100644 --- a/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamSideInputLookupJoinRelTest.java +++ b/sdks/java/extensions/sql/src/test/java/org/apache/beam/sdk/extensions/sql/impl/rel/BeamSideInputLookupJoinRelTest.java @@ -34,6 +34,7 @@ import org.apache.beam.sdk.values.POutput; import org.apache.beam.sdk.values.Row; import org.hamcrest.core.StringContains; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; @@ -47,11 +48,18 @@ public class BeamSideInputLookupJoinRelTest extends BaseRelTest { /** Test table for JOIN-AS-LOOKUP. */ public static class SiteLookupTable extends SchemaBaseBeamTable implements BeamSqlSeekableTable { + private Schema joinSubsetType; public SiteLookupTable(Schema schema) { super(schema); } + @Override + public void setUp(Schema joinSubsetType) { + this.joinSubsetType = joinSubsetType; + Assert.assertNotNull(joinSubsetType); + } + @Override public PCollection.IsBounded isBounded() { return PCollection.IsBounded.BOUNDED; @@ -69,6 +77,7 @@ public POutput buildIOWriter(PCollection input) { @Override public List seekRow(Row lookupSubRow) { + Assert.assertEquals(joinSubsetType, lookupSubRow.getSchema()); if (lookupSubRow.getInt32("site_id") == 2) { return Arrays.asList(Row.withSchema(getSchema()).addValues(2, "SITE1").build()); } diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java index 3f018c376f08..e103da4d6007 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnHarness.java @@ -97,7 +97,6 @@ public class FnHarness { private static final String PIPELINE_OPTIONS_FILE = "PIPELINE_OPTIONS_FILE"; private static final String PIPELINE_OPTIONS = "PIPELINE_OPTIONS"; private static final String RUNNER_CAPABILITIES = "RUNNER_CAPABILITIES"; - private static final String ENABLE_DATA_SAMPLING_EXPERIMENT = "enable_data_sampling"; private static final Logger LOG = LoggerFactory.getLogger(FnHarness.class); private static Endpoints.ApiServiceDescriptor getApiServiceDescriptor(String descriptor) @@ -248,7 +247,8 @@ public static void main( options.as(ExecutorOptions.class).getScheduledExecutorService(); ExecutionStateSampler executionStateSampler = new ExecutionStateSampler(options, System::currentTimeMillis); - final DataSampler dataSampler = new DataSampler(); + + final @Nullable DataSampler dataSampler = DataSampler.create(options); // The logging client variable is not used per se, but during its lifetime (until close()) it // intercepts logging and sends it to the logging service. @@ -276,10 +276,6 @@ public static void main( FinalizeBundleHandler finalizeBundleHandler = new FinalizeBundleHandler(executorService); - // Create the sampler, if the experiment is enabled. - boolean shouldSample = - ExperimentalOptions.hasExperiment(options, ENABLE_DATA_SAMPLING_EXPERIMENT); - // Retrieves the ProcessBundleDescriptor from cache. Requests the PBD from the Runner if it // doesn't exist. Additionally, runs any graph modifications. Function getProcessBundleDescriptor = @@ -314,8 +310,7 @@ private BeamFnApi.ProcessBundleDescriptor loadDescriptor(String id) { metricsShortIds, executionStateSampler, processWideCache, - shouldSample ? dataSampler : null); - logging.setProcessBundleHandler(processBundleHandler); + dataSampler); BeamFnStatusClient beamFnStatusClient = null; if (statusApiServiceDescriptor != null) { @@ -363,7 +358,12 @@ private BeamFnApi.ProcessBundleDescriptor loadDescriptor(String id) { InstructionRequest.RequestCase.HARNESS_MONITORING_INFOS, processWideHandler::harnessMonitoringInfos); handlers.put( - InstructionRequest.RequestCase.SAMPLE_DATA, dataSampler::handleDataSampleRequest); + InstructionRequest.RequestCase.SAMPLE_DATA, + request -> + dataSampler == null + ? BeamFnApi.InstructionResponse.newBuilder() + .setSampleData(BeamFnApi.SampleDataResponse.newBuilder()) + : dataSampler.handleDataSampleRequest(request)); JvmInitializers.runBeforeProcessing(options); diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/BeamFnControlClient.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/BeamFnControlClient.java index 0509a26c76bb..876a838f1662 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/BeamFnControlClient.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/BeamFnControlClient.java @@ -118,11 +118,11 @@ public void onNext(BeamFnApi.InstructionRequest request) { sendErrorResponse(e); throw e; } finally { - BeamFnLoggingMDC.setInstructionId(null); + BeamFnLoggingMDC.reset(); } }); } finally { - BeamFnLoggingMDC.setInstructionId(null); + BeamFnLoggingMDC.reset(); } } diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java index 313bbf5b4fa2..a82ce9276820 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ExecutionStateSampler.java @@ -32,6 +32,7 @@ import java.util.concurrent.atomic.AtomicReference; import javax.annotation.concurrent.GuardedBy; import org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessor; +import org.apache.beam.fn.harness.logging.BeamFnLoggingMDC; import org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfo; import org.apache.beam.runners.core.metrics.MetricsContainerStepMap; import org.apache.beam.runners.core.metrics.MonitoringInfoEncodings; @@ -120,6 +121,14 @@ public interface ExecutionState { *

Must only be invoked by the bundle processing thread. */ void deactivate(); + + /** + * Sets the error state to the currently executing state. Returns true if this was the first + * time the error was set. Returns false otherwise. + * + *

This can only be set once. + */ + boolean error(); } /** Stops the execution of the state sampler. */ @@ -250,6 +259,8 @@ public class ExecutionStateTracker implements BundleProgressReporter { private @Nullable ExecutionStateImpl currentState; // Read by multiple threads, written by the bundle processing thread lazily. private final AtomicReference<@Nullable ExecutionStateImpl> currentStateLazy; + // If an exception occurs, this will be to state at the time of exception. + private boolean inErrorState = false; // Read and written by the ExecutionStateSampler thread private long transitionsAtLastSample; @@ -465,6 +476,15 @@ public void deactivate() { numTransitions += 1; numTransitionsLazy.lazySet(numTransitions); } + + @Override + public boolean error() { + if (!inErrorState) { + inErrorState = true; + return true; + } + return false; + } } /** @@ -473,6 +493,7 @@ public void deactivate() { *

Only invoked by the bundle processing thread. */ public void start(String processBundleId) { + BeamFnLoggingMDC.setStateTracker(this); this.processBundleId.lazySet(processBundleId); this.lastTransitionTime.lazySet(clock.getMillis()); this.trackedThread.lazySet(Thread.currentThread()); @@ -514,6 +535,8 @@ public void reset() { this.numTransitionsLazy.lazySet(0); this.lastTransitionTime.lazySet(0); this.metricsContainerRegistry.reset(); + this.inErrorState = false; + BeamFnLoggingMDC.setStateTracker(null); } } diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java index a7a8766ffc7b..e27df577779c 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistry.java @@ -53,6 +53,8 @@ import org.apache.beam.sdk.util.WindowedValue; import org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder; import org.apache.beam.sdk.util.common.ElementByteSizeObserver; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * The {@code PCollectionConsumerRegistry} is used to maintain a collection of consuming @@ -97,6 +99,7 @@ public static ConsumerAndMetadata forConsumer( private final ProcessBundleDescriptor processBundleDescriptor; private final RehydratedComponents rehydratedComponents; private final @Nullable DataSampler dataSampler; + private static final Logger LOG = LoggerFactory.getLogger(PCollectionConsumerRegistry.class); public PCollectionConsumerRegistry( ExecutionStateTracker stateTracker, @@ -242,6 +245,26 @@ public FnDataReceiver> getMultiplexingConsumer(String pCollecti }); } + private static void logAndRethrow( + Exception e, + ExecutionState executionState, + ExecutionStateTracker executionStateTracker, + String ptransformId, + @Nullable OutputSampler outputSampler, + @Nullable ElementSample elementSample) + throws Exception { + ExecutionStateSampler.ExecutionStateTrackerStatus status = executionStateTracker.getStatus(); + String processBundleId = status == null ? null : status.getProcessBundleId(); + if (outputSampler != null) { + outputSampler.exception(elementSample, e, ptransformId, processBundleId); + } + + if (executionState.error()) { + LOG.error("Failed to process element for bundle \"{}\"", processBundleId, e); + } + throw e; + } + /** * A wrapping {@code FnDataReceiver>} which counts the number of elements * consumed by the original {@code FnDataReceiver> consumer} and sets up metrics @@ -324,13 +347,8 @@ public void accept(WindowedValue input) throws Exception { try { this.delegate.accept(input); } catch (Exception e) { - if (outputSampler != null) { - ExecutionStateSampler.ExecutionStateTrackerStatus status = - executionStateTracker.getStatus(); - String processBundleId = status == null ? null : status.getProcessBundleId(); - outputSampler.exception(elementSample, e, ptransformId, processBundleId); - } - throw e; + logAndRethrow( + e, executionState, executionStateTracker, ptransformId, outputSampler, elementSample); } finally { executionState.deactivate(); } @@ -419,14 +437,13 @@ public void accept(WindowedValue input) throws Exception { try { consumerAndMetadata.getConsumer().accept(input); } catch (Exception e) { - if (outputSampler != null) { - ExecutionStateSampler.ExecutionStateTrackerStatus status = - consumerAndMetadata.getExecutionStateTracker().getStatus(); - String processBundleId = status == null ? null : status.getProcessBundleId(); - outputSampler.exception( - elementSample, e, consumerAndMetadata.getPTransformId(), processBundleId); - } - throw e; + logAndRethrow( + e, + state, + consumerAndMetadata.getExecutionStateTracker(), + consumerAndMetadata.getPTransformId(), + outputSampler, + elementSample); } finally { state.deactivate(); } diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/DataSampler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/DataSampler.java index b03c453475bd..29011b82a4dc 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/DataSampler.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/DataSampler.java @@ -23,9 +23,12 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import javax.annotation.Nullable; import org.apache.beam.model.fnexecution.v1.BeamFnApi; import org.apache.beam.model.fnexecution.v1.BeamFnApi.SampleDataResponse.ElementList; import org.apache.beam.sdk.coders.Coder; +import org.apache.beam.sdk.options.ExperimentalOptions; +import org.apache.beam.sdk.options.PipelineOptions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,19 +40,66 @@ */ public class DataSampler { private static final Logger LOG = LoggerFactory.getLogger(DataSampler.class); + private static final String ENABLE_DATA_SAMPLING_EXPERIMENT = "enable_data_sampling"; + private static final String ENABLE_ALWAYS_ON_EXCEPTION_SAMPLING_EXPERIMENT = + "enable_always_on_exception_sampling"; + private static final String DISABLE_ALWAYS_ON_EXCEPTION_SAMPLING_EXPERIMENT = + "disable_always_on_exception_sampling"; + + /** + * Optionally returns a DataSampler if the experiment "enable_data_sampling" is present or + * "enable_always_on_exception_sampling" is present. Returns null is data sampling is not enabled + * or "disable_always_on_exception_sampling" experiment is given. + * + * @param options the pipeline options given to this SDK Harness. + * @return the DataSampler if enabled or null, otherwise. + */ + public static @Nullable DataSampler create(PipelineOptions options) { + boolean disableAlwaysOnExceptionSampling = + ExperimentalOptions.hasExperiment(options, DISABLE_ALWAYS_ON_EXCEPTION_SAMPLING_EXPERIMENT); + boolean enableAlwaysOnExceptionSampling = + ExperimentalOptions.hasExperiment(options, ENABLE_ALWAYS_ON_EXCEPTION_SAMPLING_EXPERIMENT); + boolean enableDataSampling = + ExperimentalOptions.hasExperiment(options, ENABLE_DATA_SAMPLING_EXPERIMENT); + // Enable exception sampling, unless the user specifies for it to be disabled. + enableAlwaysOnExceptionSampling = + enableAlwaysOnExceptionSampling && !disableAlwaysOnExceptionSampling; + + // If no sampling is enabled, don't create the DataSampler. + if (enableDataSampling || enableAlwaysOnExceptionSampling) { + // For performance reasons, sampling all elements should only be done when the user requests + // it. + // But, exception sampling doesn't need to worry about performance implications, since the SDK + // is already in a bad state. Thus, enable only exception sampling when the user does not + // request for the sampling of all elements. + boolean onlySampleExceptions = enableAlwaysOnExceptionSampling && !enableDataSampling; + return new DataSampler(onlySampleExceptions); + } else { + return null; + } + } /** * Creates a DataSampler to sample every 1000 elements while keeping a maximum of 10 in memory. */ public DataSampler() { - this(10, 1000); + this(10, 1000, false); + } + + /** + * Creates a DataSampler to sample every 1000 elements while keeping a maximum of 10 in memory. + * + * @param onlySampleExceptions If true, only samples elements from exceptions. + */ + public DataSampler(Boolean onlySampleExceptions) { + this(10, 1000, onlySampleExceptions); } /** * @param maxSamples Sets the maximum number of samples held in memory at once. * @param sampleEveryN Sets how often to sample. */ - public DataSampler(int maxSamples, int sampleEveryN) { + public DataSampler(int maxSamples, int sampleEveryN, Boolean onlySampleExceptions) { checkArgument( maxSamples > 0, "Expected positive number of samples, did you mean to disable data sampling?"); @@ -58,6 +108,7 @@ public DataSampler(int maxSamples, int sampleEveryN) { "Expected positive number for sampling period, did you mean to disable data sampling?"); this.maxSamples = maxSamples; this.sampleEveryN = sampleEveryN; + this.onlySampleExceptions = onlySampleExceptions; } // Maximum number of elements in buffer. @@ -66,6 +117,9 @@ public DataSampler(int maxSamples, int sampleEveryN) { // Sampling rate. private final int sampleEveryN; + // If true, only takes samples when exceptions in UDFs occur. + private final Boolean onlySampleExceptions; + // The fully-qualified type is: Map[PCollectionId, OutputSampler]. In order to sample // on a PCollection-basis and not per-bundle, this keeps track of shared samples between states. private final Map> outputSamplers = new ConcurrentHashMap<>(); @@ -86,7 +140,10 @@ public DataSampler(int maxSamples, int sampleEveryN) { public OutputSampler sampleOutput(String pcollectionId, Coder coder) { return (OutputSampler) outputSamplers.computeIfAbsent( - pcollectionId, k -> new OutputSampler<>(coder, this.maxSamples, this.sampleEveryN)); + pcollectionId, + k -> + new OutputSampler<>( + coder, this.maxSamples, this.sampleEveryN, this.onlySampleExceptions)); } /** diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/OutputSampler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/OutputSampler.java index f1e710d3ec7e..f7fabae0cc21 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/OutputSampler.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/debug/OutputSampler.java @@ -60,14 +60,19 @@ public class OutputSampler { // Index into the buffer of where to overwrite samples. private int resampleIndex = 0; + // If true, only takes samples when exceptions in UDFs occur. + private final Boolean onlySampleExceptions; + @Nullable private final Coder valueCoder; @Nullable private final Coder> windowedValueCoder; - public OutputSampler(Coder coder, int maxElements, int sampleEveryN) { + public OutputSampler( + Coder coder, int maxElements, int sampleEveryN, boolean onlySampleExceptions) { this.maxElements = maxElements; this.sampleEveryN = sampleEveryN; this.buffer = new ArrayList<>(this.maxElements); + this.onlySampleExceptions = onlySampleExceptions; // The samples taken and encoded should match exactly to the specification from the // ProcessBundleDescriptor. The coder given can either be a WindowedValueCoder, in which the @@ -103,7 +108,7 @@ public ElementSample sample(WindowedValue element) { ElementSample elementSample = new ElementSample<>(ThreadLocalRandom.current().nextInt(), element); - if (samples > 10 && samples % sampleEveryN != 0) { + if (onlySampleExceptions || (samples > 10 && samples % sampleEveryN != 0)) { return elementSample; } diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClient.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClient.java index 2d3e168eab50..8fa074b04768 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClient.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClient.java @@ -40,8 +40,7 @@ import java.util.logging.LogRecord; import java.util.logging.Logger; import java.util.logging.SimpleFormatter; -import org.apache.beam.fn.harness.control.ProcessBundleHandler; -import org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessor; +import org.apache.beam.fn.harness.control.ExecutionStateSampler; import org.apache.beam.model.fnexecution.v1.BeamFnApi; import org.apache.beam.model.fnexecution.v1.BeamFnApi.LogEntry; import org.apache.beam.model.fnexecution.v1.BeamFnLoggingGrpc; @@ -105,8 +104,6 @@ public class BeamFnLoggingClient implements AutoCloseable { * so if they are garbage collected, our hierarchical configuration will be lost. */ private final Collection configuredLoggers = new ArrayList<>(); - private @Nullable ProcessBundleHandler processBundleHandler; - private final BlockingQueue bufferedLogEntries = new ArrayBlockingQueue<>(MAX_BUFFERED_LOG_ENTRY_COUNT); @@ -347,10 +344,6 @@ public void close() throws Exception { } } - public void setProcessBundleHandler(ProcessBundleHandler processBundleHandler) { - this.processBundleHandler = processBundleHandler; - } - // Reset the logging configuration to what it is at startup. @RequiresNonNull("configuredLoggers") @RequiresNonNull("logRecordHandler") @@ -440,14 +433,12 @@ public void publish(LogRecord record) { if (loggerName != null) { builder.setLogLocation(loggerName); } - if (instructionId != null && processBundleHandler != null) { - BundleProcessor bundleProcessor = - processBundleHandler.getBundleProcessorCache().find(instructionId); - if (bundleProcessor != null) { - String transformId = bundleProcessor.getStateTracker().getCurrentThreadsPTransformId(); - if (transformId != null) { - builder.setTransformId(transformId); - } + + ExecutionStateSampler.ExecutionStateTracker stateTracker = BeamFnLoggingMDC.getStateTracker(); + if (stateTracker != null) { + String transformId = stateTracker.getCurrentThreadsPTransformId(); + if (transformId != null) { + builder.setTransformId(transformId); } } diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingMDC.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingMDC.java index bcfcd4b34ea5..68b03a484904 100644 --- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingMDC.java +++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/logging/BeamFnLoggingMDC.java @@ -17,12 +17,16 @@ */ package org.apache.beam.fn.harness.logging; +import org.apache.beam.fn.harness.control.ExecutionStateSampler.ExecutionStateTracker; import org.checkerframework.checker.nullness.qual.Nullable; /** Mapped diagnostic context to be consumed and set on LogEntry protos in BeamFnLoggingClient. */ public class BeamFnLoggingMDC { private static final ThreadLocal<@Nullable String> instructionId = new ThreadLocal<>(); + private static final ThreadLocal<@Nullable ExecutionStateTracker> stateTracker = + new ThreadLocal<>(); + /** Sets the Instruction ID of the current thread, which will be inherited by child threads. */ public static void setInstructionId(@Nullable String newInstructionId) { instructionId.set(newInstructionId); @@ -32,4 +36,20 @@ public static void setInstructionId(@Nullable String newInstructionId) { public static @Nullable String getInstructionId() { return instructionId.get(); } + + /** Sets the State Tracker of the current thread, which will be inherited by child threads. */ + public static void setStateTracker(@Nullable ExecutionStateTracker newStateTracker) { + stateTracker.set(newStateTracker); + } + + /** Gets the State Tracker of the current thread. */ + public static @Nullable ExecutionStateTracker getStateTracker() { + return stateTracker.get(); + } + + /** Resets to a default state. */ + public static void reset() { + instructionId.set(null); + stateTracker.set(null); + } } diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ExecutionStateSamplerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ExecutionStateSamplerTest.java index 9d79de0fa153..47866adc892b 100644 --- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ExecutionStateSamplerTest.java +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ExecutionStateSamplerTest.java @@ -22,6 +22,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; @@ -648,4 +649,28 @@ public Long answer(InvocationOnMock invocation) throws Throwable { sampler.stop(); expectedLogs.verifyWarn("Operation ongoing in bundle bundleId for PTransform"); } + + @Test + public void testErrorState() throws Exception { + MillisProvider clock = mock(MillisProvider.class); + ExecutionStateSampler sampler = + new ExecutionStateSampler( + PipelineOptionsFactory.fromArgs("--experiments=state_sampling_period_millis=10") + .create(), + clock); + ExecutionStateTracker tracker = sampler.create(); + ExecutionState state1 = + tracker.create("shortId1", "ptransformId1", "ptransformIdName1", "process"); + ExecutionState state2 = + tracker.create("shortId2", "ptransformId2", "ptransformIdName2", "process"); + + state1.activate(); + state2.activate(); + assertTrue(state2.error()); + assertFalse(state2.error()); + state2.deactivate(); + assertFalse(state2.error()); + tracker.reset(); + assertTrue(state1.error()); + } } diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistryTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistryTest.java index b3e2788e378d..f75b84e76ad5 100644 --- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistryTest.java +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/data/PCollectionConsumerRegistryTest.java @@ -33,17 +33,26 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import org.apache.beam.fn.harness.HandlesSplits; import org.apache.beam.fn.harness.control.BundleProgressReporter; import org.apache.beam.fn.harness.control.ExecutionStateSampler; import org.apache.beam.fn.harness.control.ExecutionStateSampler.ExecutionStateTracker; import org.apache.beam.fn.harness.debug.DataSampler; +import org.apache.beam.fn.harness.logging.BeamFnLoggingClient; +import org.apache.beam.fn.harness.logging.BeamFnLoggingMDC; import org.apache.beam.model.fnexecution.v1.BeamFnApi; import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor; +import org.apache.beam.model.fnexecution.v1.BeamFnLoggingGrpc; +import org.apache.beam.model.pipeline.v1.Endpoints; import org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfo; import org.apache.beam.model.pipeline.v1.RunnerApi.PCollection; import org.apache.beam.runners.core.construction.SdkComponents; @@ -56,6 +65,7 @@ import org.apache.beam.sdk.coders.IterableCoder; import org.apache.beam.sdk.coders.StringUtf8Coder; import org.apache.beam.sdk.fn.data.FnDataReceiver; +import org.apache.beam.sdk.fn.test.TestStreams; import org.apache.beam.sdk.metrics.Counter; import org.apache.beam.sdk.metrics.Metrics; import org.apache.beam.sdk.metrics.MetricsEnvironment; @@ -65,6 +75,12 @@ import org.apache.beam.sdk.util.common.ElementByteSizeObservableIterable; import org.apache.beam.sdk.util.common.ElementByteSizeObservableIterator; import org.apache.beam.vendor.grpc.v1p54p0.com.google.protobuf.ByteString; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.ManagedChannel; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.Server; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.inprocess.InProcessChannelBuilder; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.inprocess.InProcessServerBuilder; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.stub.CallStreamObserver; +import org.apache.beam.vendor.grpc.v1p54p0.io.grpc.stub.StreamObserver; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; import org.junit.After; import org.junit.Before; @@ -567,6 +583,124 @@ public void dataSampling() throws Exception { assertTrue(elementList.getElementsList().containsAll(expectedSamples)); } + @Test + public void logsExceptionWithTransformId() throws Exception { + final String pTransformId = "pTransformId"; + final String message = "testException"; + final String instructionId = "instruction"; + final Exception thrownException = new Exception(message); + + // The following is a bunch of boiler-plate to set up a local FnApiLoggingService to catch any + // logs for later test + // expectations. + AtomicBoolean clientClosedStream = new AtomicBoolean(); + Collection values = new ConcurrentLinkedQueue<>(); + AtomicReference> outboundServerObserver = + new AtomicReference<>(); + CallStreamObserver inboundServerObserver = + TestStreams.withOnNext( + (BeamFnApi.LogEntry.List logEntries) -> + values.addAll(logEntries.getLogEntriesList())) + .withOnCompleted( + () -> { + // Remember that the client told us that this stream completed + clientClosedStream.set(true); + outboundServerObserver.get().onCompleted(); + }) + .build(); + + Endpoints.ApiServiceDescriptor apiServiceDescriptor = + Endpoints.ApiServiceDescriptor.newBuilder() + .setUrl(this.getClass().getName() + "-" + UUID.randomUUID().toString()) + .build(); + Server server = + InProcessServerBuilder.forName(apiServiceDescriptor.getUrl()) + .addService( + new BeamFnLoggingGrpc.BeamFnLoggingImplBase() { + @Override + public StreamObserver logging( + StreamObserver outboundObserver) { + outboundServerObserver.set(outboundObserver); + return inboundServerObserver; + } + }) + .build(); + server.start(); + ManagedChannel channel = InProcessChannelBuilder.forName(apiServiceDescriptor.getUrl()).build(); + // End logging boiler-plate... + + // This section is to set up the StateSampler with the expected metadata. + ExecutionStateSampler sampler = + new ExecutionStateSampler(PipelineOptionsFactory.create(), System::currentTimeMillis); + ExecutionStateSampler.ExecutionStateTracker stateTracker = sampler.create(); + stateTracker.start("process-bundle"); + ExecutionStateSampler.ExecutionState state = + stateTracker.create("shortId", pTransformId, pTransformId, "process"); + state.activate(); + + // Track the instruction and state in the logging system. In a real run, this is set when a + // ProcessBundlehandler + // starts processing. + BeamFnLoggingMDC.setInstructionId(instructionId); + BeamFnLoggingMDC.setStateTracker(stateTracker); + + // Start the test within the logging context. This reroutes logging through to the boiler-plate + // that was set up + // earlier. + try (BeamFnLoggingClient ignored = + BeamFnLoggingClient.createAndStart( + PipelineOptionsFactory.create(), + apiServiceDescriptor, + (Endpoints.ApiServiceDescriptor descriptor) -> channel)) { + + // Set up the component under test, the FnDataReceiver, to emit an exception when it starts. + ShortIdMap shortIds = new ShortIdMap(); + BundleProgressReporter.InMemory reporterAndRegistrar = new BundleProgressReporter.InMemory(); + PCollectionConsumerRegistry consumers = + new PCollectionConsumerRegistry( + stateTracker, shortIds, reporterAndRegistrar, TEST_DESCRIPTOR); + FnDataReceiver> consumer = mock(FnDataReceiver.class); + + consumers.register(P_COLLECTION_A, pTransformId, pTransformId + "Name", consumer); + + FnDataReceiver> wrapperConsumer = + (FnDataReceiver>) + (FnDataReceiver) consumers.getMultiplexingConsumer(P_COLLECTION_A); + + doThrow(thrownException).when(consumer).accept(any()); + expectedException.expectMessage(message); + expectedException.expect(Exception.class); + + // Run the test. + wrapperConsumer.accept(valueInGlobalWindow("elem")); + + } finally { + // The actual log entry has a lot of metadata that can't easily be controlled. So set the + // entries that are needed + // for this test and cull everything else. + final BeamFnApi.LogEntry expectedEntry = + BeamFnApi.LogEntry.newBuilder() + .setInstructionId(instructionId) + .setTransformId(pTransformId) + .setMessage("Failed to process element for bundle \"process-bundle\"") + .build(); + + List entries = new ArrayList<>(values); + assertEquals(1, entries.size()); + BeamFnApi.LogEntry actualEntry = entries.get(0); + BeamFnApi.LogEntry actualEntryCulled = + BeamFnApi.LogEntry.newBuilder() + .setInstructionId(actualEntry.getInstructionId()) + .setTransformId(actualEntry.getTransformId()) + .setMessage(actualEntry.getMessage()) + .build(); + + assertEquals(expectedEntry, actualEntryCulled); + + server.shutdownNow(); + } + } + private static class TestElementByteSizeObservableIterable extends ElementByteSizeObservableIterable> { private List elements; diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/DataSamplerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/DataSamplerTest.java index a05efca120ab..1cad5210380b 100644 --- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/DataSamplerTest.java +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/DataSamplerTest.java @@ -21,6 +21,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; @@ -32,10 +33,13 @@ import java.util.Map; import java.util.concurrent.CountDownLatch; import org.apache.beam.model.fnexecution.v1.BeamFnApi; +import org.apache.beam.model.fnexecution.v1.BeamFnApi.SampledElement; import org.apache.beam.sdk.coders.ByteArrayCoder; import org.apache.beam.sdk.coders.Coder; import org.apache.beam.sdk.coders.StringUtf8Coder; import org.apache.beam.sdk.coders.VarIntCoder; +import org.apache.beam.sdk.options.ExperimentalOptions; +import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.util.WindowedValue; import org.apache.beam.vendor.grpc.v1p54p0.com.google.protobuf.ByteString; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; @@ -117,6 +121,21 @@ void assertHasSamples( assertTrue(elementList.getElementsList().containsAll(expectedSamples)); } + void assertHasSamples( + BeamFnApi.InstructionResponse response, + String pcollection, + List elements) { + Map elementSamplesMap = + response.getSampleData().getElementSamplesMap(); + + assertFalse(elementSamplesMap.isEmpty()); + + BeamFnApi.SampleDataResponse.ElementList elementList = elementSamplesMap.get(pcollection); + assertNotNull(elementList); + + assertTrue(elementList.getElementsList().containsAll(elements)); + } + /** * Smoke test that a samples show in the output map. * @@ -203,7 +222,7 @@ void generateStringSamples(DataSampler sampler) { */ @Test public void testFiltersSinglePCollectionId() throws Exception { - DataSampler sampler = new DataSampler(10, 10); + DataSampler sampler = new DataSampler(10, 10, false); generateStringSamples(sampler); BeamFnApi.InstructionResponse samples = getSamplesForPCollection(sampler, "a"); @@ -219,7 +238,7 @@ public void testFiltersSinglePCollectionId() throws Exception { public void testFiltersMultiplePCollectionIds() throws Exception { List pcollectionIds = ImmutableList.of("a", "c"); - DataSampler sampler = new DataSampler(10, 10); + DataSampler sampler = new DataSampler(10, 10, false); generateStringSamples(sampler); BeamFnApi.InstructionResponse samples = getSamplesForPCollections(sampler, pcollectionIds); @@ -275,4 +294,87 @@ public void testConcurrentNewSampler() throws Exception { sampleThread.join(); } } + + /** + * Tests that including the "enable_always_on_exception_sampling" can sample. + * + * @throws Exception + */ + @Test + public void testEnableAlwaysOnExceptionSampling() throws Exception { + ExperimentalOptions experimentalOptions = PipelineOptionsFactory.as(ExperimentalOptions.class); + experimentalOptions.setExperiments( + Collections.singletonList("enable_always_on_exception_sampling")); + DataSampler sampler = DataSampler.create(experimentalOptions); + assertNotNull(sampler); + + VarIntCoder coder = VarIntCoder.of(); + OutputSampler outputSampler = sampler.sampleOutput("pcollection-id", coder); + ElementSample elementSample = outputSampler.sample(globalWindowedValue(1)); + outputSampler.exception(elementSample, new RuntimeException(), "", ""); + + outputSampler.sample(globalWindowedValue(2)); + + BeamFnApi.InstructionResponse samples = getAllSamples(sampler); + List expectedSamples = + ImmutableList.of( + SampledElement.newBuilder() + .setElement(ByteString.copyFrom(encodeInt(1))) + .setException( + SampledElement.Exception.newBuilder() + .setError(new RuntimeException().toString())) + .build()); + assertHasSamples(samples, "pcollection-id", expectedSamples); + } + + /** + * Tests that "disable_always_on_exception_sampling" overrides the always on experiment. + * + * @throws Exception + */ + @Test + public void testDisableAlwaysOnExceptionSampling() throws Exception { + ExperimentalOptions experimentalOptions = PipelineOptionsFactory.as(ExperimentalOptions.class); + experimentalOptions.setExperiments( + ImmutableList.of( + "enable_always_on_exception_sampling", "disable_always_on_exception_sampling")); + DataSampler sampler = DataSampler.create(experimentalOptions); + assertNull(sampler); + } + + /** + * Tests that the "enable_data_sampling" experiment overrides + * "disable_always_on_exception_sampling". + * + * @throws Exception + */ + @Test + public void testDisableAlwaysOnExceptionSamplingWithEnableDataSampling() throws Exception { + ExperimentalOptions experimentalOptions = PipelineOptionsFactory.as(ExperimentalOptions.class); + experimentalOptions.setExperiments( + ImmutableList.of( + "enable_data_sampling", + "enable_always_on_exception_sampling", + "disable_always_on_exception_sampling")); + DataSampler sampler = DataSampler.create(experimentalOptions); + assertNotNull(sampler); + + VarIntCoder coder = VarIntCoder.of(); + OutputSampler outputSampler = sampler.sampleOutput("pcollection-id", coder); + ElementSample elementSample = outputSampler.sample(globalWindowedValue(1)); + outputSampler.exception(elementSample, new RuntimeException(), "", ""); + + outputSampler.sample(globalWindowedValue(2)); + + BeamFnApi.InstructionResponse samples = getAllSamples(sampler); + List expectedSamples = + ImmutableList.of( + SampledElement.newBuilder() + .setElement(ByteString.copyFrom(encodeInt(1))) + .setException( + SampledElement.Exception.newBuilder() + .setError(new RuntimeException().toString())) + .build()); + assertHasSamples(samples, "pcollection-id", expectedSamples); + } } diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/OutputSamplerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/OutputSamplerTest.java index 5ca562e1c241..26285205bd34 100644 --- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/OutputSamplerTest.java +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/debug/OutputSamplerTest.java @@ -90,7 +90,7 @@ public BeamFnApi.SampledElement encodeException( @Test public void testSamplesFirstN() throws IOException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 10, 10); + OutputSampler outputSampler = new OutputSampler<>(coder, 10, 10, false); // Purposely go over maxSamples and sampleEveryN. This helps to increase confidence. for (int i = 0; i < 15; ++i) { @@ -112,7 +112,7 @@ public void testWindowedValueSample() throws IOException { WindowedValue.WindowedValueCoder coder = WindowedValue.FullWindowedValueCoder.of(VarIntCoder.of(), GlobalWindow.Coder.INSTANCE); - OutputSampler outputSampler = new OutputSampler<>(coder, 10, 10); + OutputSampler outputSampler = new OutputSampler<>(coder, 10, 10, false); outputSampler.sample(WindowedValue.valueInGlobalWindow(0)); // The expected list is only 0..9 inclusive. @@ -125,7 +125,7 @@ public void testWindowedValueSample() throws IOException { public void testNonWindowedValueSample() throws IOException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 10, 10); + OutputSampler outputSampler = new OutputSampler<>(coder, 10, 10, false); outputSampler.sample(WindowedValue.valueInGlobalWindow(0)); // The expected list is only 0..9 inclusive. @@ -142,7 +142,7 @@ public void testNonWindowedValueSample() throws IOException { @Test public void testActsLikeCircularBuffer() throws IOException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20); + OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20, false); for (int i = 0; i < 100; ++i) { outputSampler.sample(WindowedValue.valueInGlobalWindow(i)); @@ -171,7 +171,7 @@ public void testActsLikeCircularBuffer() throws IOException { @Test public void testCanSampleExceptions() throws IOException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20); + OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20, false); WindowedValue windowedValue = WindowedValue.valueInGlobalWindow(1); ElementSample elementSample = outputSampler.sample(windowedValue); @@ -197,7 +197,7 @@ public void testCanSampleExceptions() throws IOException { @Test public void testNoDuplicateExceptions() throws IOException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20); + OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20, false); ElementSample elementSampleA = outputSampler.sample(WindowedValue.valueInGlobalWindow(1)); @@ -227,7 +227,7 @@ public void testNoDuplicateExceptions() throws IOException { @Test public void testExceptionOnlySampledIfNonNullProcessBundle() throws IOException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20); + OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20, false); WindowedValue windowedValue = WindowedValue.valueInGlobalWindow(1); ElementSample elementSample = outputSampler.sample(windowedValue); @@ -244,15 +244,14 @@ public void testExceptionOnlySampledIfNonNullProcessBundle() throws IOException } /** - * Tests that multiple samples don't push out exception samples. TODO: test that the exception - * metadata is set. + * Tests that multiple samples don't push out exception samples. * * @throws IOException when encoding fails (shouldn't happen). */ @Test public void testExceptionSamplesAreNotRemoved() throws IOException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20); + OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20, false); WindowedValue windowedValue = WindowedValue.valueInGlobalWindow(0); ElementSample elementSample = outputSampler.sample(windowedValue); @@ -281,6 +280,32 @@ public void testExceptionSamplesAreNotRemoved() throws IOException { assertThat(samples, containsInAnyOrder(expected.toArray())); } + /** + * Test that elements the onlySampleExceptions flag works. + * + * @throws IOException when encoding fails (shouldn't happen). + */ + @Test + public void testOnlySampleExceptions() throws IOException { + VarIntCoder coder = VarIntCoder.of(); + OutputSampler outputSampler = new OutputSampler<>(coder, 5, 20, true); + + WindowedValue windowedValue = WindowedValue.valueInGlobalWindow(1); + outputSampler.sample(WindowedValue.valueInGlobalWindow(2)); + ElementSample elementSample = outputSampler.sample(windowedValue); + + Exception exception = new RuntimeException("Test exception"); + String ptransformId = "ptransform"; + String processBundleId = "processBundle"; + outputSampler.exception(elementSample, exception, ptransformId, processBundleId); + + List expected = new ArrayList<>(); + expected.add(encodeException(1, exception.toString(), ptransformId, processBundleId)); + + List samples = outputSampler.samples(); + assertThat(samples, containsInAnyOrder(expected.toArray())); + } + /** * Test that sampling a PCollection while retrieving samples from multiple threads is ok. * @@ -289,7 +314,7 @@ public void testExceptionSamplesAreNotRemoved() throws IOException { @Test public void testConcurrentSamples() throws IOException, InterruptedException { VarIntCoder coder = VarIntCoder.of(); - OutputSampler outputSampler = new OutputSampler<>(coder, 10, 2); + OutputSampler outputSampler = new OutputSampler<>(coder, 10, 2, false); CountDownLatch startSignal = new CountDownLatch(1); CountDownLatch doneSignal = new CountDownLatch(2); diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClientTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClientTest.java index 47b59bc68b07..8c7a40f8db90 100644 --- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClientTest.java +++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/logging/BeamFnLoggingClientTest.java @@ -38,6 +38,7 @@ import java.util.logging.LogRecord; import java.util.logging.Logger; import java.util.logging.SimpleFormatter; +import org.apache.beam.fn.harness.control.ExecutionStateSampler; import org.apache.beam.model.fnexecution.v1.BeamFnApi; import org.apache.beam.model.fnexecution.v1.BeamFnLoggingGrpc; import org.apache.beam.model.pipeline.v1.Endpoints; @@ -95,6 +96,7 @@ public class BeamFnLoggingClientTest { .setInstructionId("instruction-1") .setSeverity(BeamFnApi.LogEntry.Severity.Enum.DEBUG) .setMessage("Message") + .setTransformId("ptransformId") .setThread("12345") .setTimestamp(Timestamp.newBuilder().setSeconds(1234567).setNanos(890000000).build()) .setLogLocation("LoggerName") @@ -104,6 +106,7 @@ public class BeamFnLoggingClientTest { .setInstructionId("instruction-1") .setSeverity(BeamFnApi.LogEntry.Severity.Enum.DEBUG) .setMessage("testMdcValue:Message") + .setTransformId("ptransformId") .setCustomData( Struct.newBuilder() .putFields( @@ -117,6 +120,7 @@ public class BeamFnLoggingClientTest { .setInstructionId("instruction-1") .setSeverity(BeamFnApi.LogEntry.Severity.Enum.WARN) .setMessage("MessageWithException") + .setTransformId("errorPtransformId") .setTrace(getStackTraceAsString(TEST_RECORD_WITH_EXCEPTION.getThrown())) .setThread("12345") .setTimestamp(Timestamp.newBuilder().setSeconds(1234567).setNanos(890000000).build()) @@ -126,7 +130,16 @@ public class BeamFnLoggingClientTest { @Test public void testLogging() throws Exception { + ExecutionStateSampler sampler = + new ExecutionStateSampler(PipelineOptionsFactory.create(), null); + ExecutionStateSampler.ExecutionStateTracker stateTracker = sampler.create(); + ExecutionStateSampler.ExecutionState state = + stateTracker.create("shortId", "ptransformId", "ptransformIdName", "process"); + state.activate(); + BeamFnLoggingMDC.setInstructionId("instruction-1"); + BeamFnLoggingMDC.setStateTracker(stateTracker); + AtomicBoolean clientClosedStream = new AtomicBoolean(); Collection values = new ConcurrentLinkedQueue<>(); AtomicReference> outboundServerObserver = @@ -188,7 +201,14 @@ public StreamObserver logging( rootLogger.log(FILTERED_RECORD); // Should not be filtered because the default log level override for ConfiguredLogger is DEBUG configuredLogger.log(TEST_RECORD); + + // Simulate an exception. This sets an internal error state where the PTransform should come + // from. + ExecutionStateSampler.ExecutionState errorState = + stateTracker.create("shortId", "errorPtransformId", "errorPtransformIdName", "process"); + errorState.activate(); configuredLogger.log(TEST_RECORD_WITH_EXCEPTION); + errorState.deactivate(); // Ensure that configuring a custom formatter on the logging handler will be honored. for (Handler handler : rootLogger.getHandlers()) { diff --git a/sdks/java/io/google-ads/build.gradle b/sdks/java/io/google-ads/build.gradle index e874c0b3cb1b..f3dbb19e04f4 100644 --- a/sdks/java/io/google-ads/build.gradle +++ b/sdks/java/io/google-ads/build.gradle @@ -27,7 +27,6 @@ dependencies { implementation project(path: ":sdks:java:extensions:google-cloud-platform-core") implementation library.java.jackson_annotations implementation library.java.gax - implementation library.java.google_ads implementation library.java.google_auth_library_credentials implementation library.java.google_auth_library_oauth2_http implementation library.java.protobuf_java diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableConfig.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableConfig.java index 018e538f552c..15230c8adef9 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableConfig.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableConfig.java @@ -25,6 +25,7 @@ import org.apache.beam.sdk.annotations.Internal; import org.apache.beam.sdk.extensions.gcp.auth.CredentialFactory; import org.apache.beam.sdk.extensions.gcp.options.GcpOptions; +import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.BigtableClientOverride; import org.apache.beam.sdk.options.ValueProvider; import org.apache.beam.sdk.transforms.SerializableFunction; import org.apache.beam.sdk.transforms.display.DisplayData; @@ -49,6 +50,9 @@ public abstract class BigtableConfig implements Serializable { /** Returns the app profile being read from. */ public abstract @Nullable ValueProvider getAppProfileId(); + /** Returns the Bigtable client override. */ + public abstract @Nullable BigtableClientOverride getBigtableClientOverride(); + /** * Returns the Google Cloud Bigtable instance being written to, and other parameters. * @@ -113,6 +117,8 @@ abstract Builder setBigtableOptionsConfigurator( abstract Builder setChannelCount(int count); + abstract Builder setBigtableClientOverride(BigtableClientOverride clientOverride); + abstract BigtableConfig build(); } @@ -156,6 +162,12 @@ public BigtableConfig withEmulator(String emulatorHost) { return toBuilder().setEmulatorHost(emulatorHost).build(); } + @VisibleForTesting + BigtableConfig withBigtableClientOverride(BigtableClientOverride clientOverride) { + checkArgument(clientOverride != null, "clientOverride can not be null"); + return toBuilder().setBigtableClientOverride(clientOverride).build(); + } + void validate() { checkArgument( (getProjectId() != null diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java index a655a29e92b2..9f3c627a89ef 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/BigtableIO.java @@ -49,6 +49,7 @@ import org.apache.beam.sdk.io.gcp.bigtable.changestreams.UniqueIdGenerator; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.action.ActionFactory; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.BigtableChangeStreamAccessor; +import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.BigtableClientOverride; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.DaoFactory; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao.MetadataTableAdminDao; import org.apache.beam.sdk.io.gcp.bigtable.changestreams.dofn.DetectNewPartitionsDoFn; @@ -1988,6 +1989,24 @@ public ReadChangeStream withMetadataTableAppProfileId(String appProfileId) { .build(); } + /** + * Returns a new {@link BigtableIO.ReadChangeStream} that overrides the config of data and/or + * admin client for streaming changes and for managing the metadata. For testing purposes only. + * Not intended for use. + * + *

Does not modify this object. + */ + @VisibleForTesting + ReadChangeStream withBigtableClientOverride(BigtableClientOverride clientOverride) { + BigtableConfig config = getBigtableConfig(); + BigtableConfig metadataTableConfig = getMetadataTableBigtableConfig(); + return toBuilder() + .setBigtableConfig(config.withBigtableClientOverride(clientOverride)) + .setMetadataTableBigtableConfig( + metadataTableConfig.withBigtableClientOverride(clientOverride)) + .build(); + } + /** * Returns a new {@link BigtableIO.ReadChangeStream} that, if set to true, will create or update * metadata table before launching pipeline. Otherwise, it is expected that a metadata table diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableChangeStreamAccessor.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableChangeStreamAccessor.java index ecf9a7039598..cb296aef6c28 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableChangeStreamAccessor.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableChangeStreamAccessor.java @@ -210,6 +210,13 @@ private static BigtableChangeStreamAccessor createAccessor(@NonNull BigtableConf .setMaxAttempts(10) .build()); + final BigtableClientOverride clientOverride = bigtableConfig.getBigtableClientOverride(); + if (clientOverride != null) { + clientOverride.updateTableAdminClientSettings(tableAdminSettingsBuilder); + clientOverride.updateInstanceAdminClientSettings(instanceAdminSettingsBuilder); + clientOverride.updateDataClientSettings(dataSettingsBuilder); + } + BigtableDataClient dataClient = BigtableDataClient.create(dataSettingsBuilder.build()); BigtableTableAdminClient tableAdminClient = BigtableTableAdminClient.create(tableAdminSettingsBuilder.build()); diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableClientOverride.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableClientOverride.java new file mode 100644 index 000000000000..72b3e39871ef --- /dev/null +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigtable/changestreams/dao/BigtableClientOverride.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.beam.sdk.io.gcp.bigtable.changestreams.dao; + +import com.google.cloud.bigtable.admin.v2.BigtableInstanceAdminSettings; +import com.google.cloud.bigtable.admin.v2.BigtableTableAdminSettings; +import com.google.cloud.bigtable.data.v2.BigtableDataSettings; +import java.io.IOException; +import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; + +/** Override the configuration of Cloud Bigtable data and admin client. */ +@VisibleForTesting +public interface BigtableClientOverride { + /** + * Update {@link BigtableInstanceAdminSettings.Builder} with custom configurations. + * + *

For example, to update the admin api endpoint. + * + * @param builder builds the instance admin client + * @throws IOException when dependency initialization fails + */ + void updateInstanceAdminClientSettings(BigtableInstanceAdminSettings.Builder builder) + throws IOException; + + /** + * Update {@link BigtableTableAdminSettings.Builder} with custom configurations. + * + *

For example, to update the admin api endpoint. + * + * @param builder builds the table admin client + * @throws IOException when dependency initialization fails + */ + void updateTableAdminClientSettings(BigtableTableAdminSettings.Builder builder) + throws IOException; + + /** + * Update {@link BigtableDataSettings.Builder} with custom configurations. + * + * @param builder builds the data client + * @throws IOException when dependency initialization fails + */ + void updateDataClientSettings(BigtableDataSettings.Builder builder) throws IOException; +} diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/healthcare/HealthcareApiClient.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/healthcare/HealthcareApiClient.java index a72b55bd7209..39c30b949425 100644 --- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/healthcare/HealthcareApiClient.java +++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/healthcare/HealthcareApiClient.java @@ -36,60 +36,68 @@ import org.checkerframework.checker.nullness.qual.Nullable; import org.joda.time.Instant; -/** Defines a client that talks to the Cloud Healthcare API (version v1). */ +/** Defines a client to communicate with the GCP HCLS API (version v1). */ public interface HealthcareApiClient { /** - * Fetches an Hl7v2 message by its name from a Hl7v2 store. + * Gets a Hl7v2 message by its name from a Hl7v2 store. * - * @param msgName the msg name - * @return HL7v2 message - * @throws IOException the io exception - * @throws ParseException the parse exception + * @param msgName The message name to be retrieved. + * @return The HL7v2 message. + * @throws IOException The IO Exception. + * @throws ParseException The Parse Exception. */ Message getHL7v2Message(String msgName) throws IOException, ParseException; /** - * Delete hl 7 v 2 message empty. + * Deletes an HL7v2 message. * - * @param msgName the msg name - * @return the empty - * @throws IOException the io exception + * @param msgName The message name to be deleted. + * @return Empty. + * @throws IOException The IO Exception. */ Empty deleteHL7v2Message(String msgName) throws IOException; /** - * Gets HL7v2 store. + * Gets an HL7v2 store. * - * @param storeName the store name - * @return the HL7v2 store - * @throws IOException the io exception + * @param storeName The store name to be retrieved. + * @return The HL7v2 store. + * @throws IOException The IO Exception. */ Hl7V2Store getHL7v2Store(String storeName) throws IOException; /** - * Gets earliest hl 7 v 2 send time. + * Gets the earliest HL7v2 send time. * - * @param hl7v2Store the hl 7 v 2 store - * @param filter the filter - * @return the earliest hl 7 v 2 send time - * @throws IOException the io exception + * @param hl7v2Store The HL7v2 store. + * @param filter the filter to be matched on. + * @return The earliest HL7v2 send time. + * @throws IOException The IO Exception. */ Instant getEarliestHL7v2SendTime(String hl7v2Store, @Nullable String filter) throws IOException; + /** + * Gets the latest HL7v2 send time. + * + * @param hl7v2Store The HL7v2 store. + * @param filter The filter to be matched on. + * @return The latest HL7v2 send time. + * @throws IOException The IO Exception. + */ Instant getLatestHL7v2SendTime(String hl7v2Store, @Nullable String filter) throws IOException; /** - * Make send time bound hl 7 v 2 list request. + * Time Bound HL7v2 list request. * - * @param hl7v2Store the hl 7 v 2 store - * @param start the start - * @param end the end - * @param otherFilter the other filter - * @param orderBy the order by - * @param pageToken the page token - * @return the list messages response - * @throws IOException the io exception + * @param hl7v2Store The HL7v2 store. + * @param start Start time. + * @param end End time. + * @param otherFilter The filter outside of the sendTime. + * @param orderBy Order by. + * @param pageToken The page token. + * @return HTTP List response. + * @throws IOException The IO Exception. */ ListMessagesResponse makeSendTimeBoundHL7v2ListRequest( String hl7v2Store, @@ -103,12 +111,12 @@ ListMessagesResponse makeSendTimeBoundHL7v2ListRequest( /** * Make hl 7 v 2 list request list messages response. * - * @param hl7v2Store the hl 7 v 2 store - * @param filter the filter - * @param orderBy the order by - * @param pageToken the page token - * @return the list messages response - * @throws IOException the io exception + * @param hl7v2Store The HL7v2 Store. + * @param filter The Filter. + * @param orderBy Order by. + * @param pageToken The Page Token. + * @return HTTP List response. + * @throws IOException The IO Exception. */ ListMessagesResponse makeHL7v2ListRequest( String hl7v2Store, @@ -118,38 +126,89 @@ ListMessagesResponse makeHL7v2ListRequest( throws IOException; /** - * Ingest hl 7 v 2 message ingest message response. + * Ingest an HL7v2 message. * - * @param hl7v2Store the hl 7 v 2 store - * @param msg the msg - * @return the ingest message response - * @throws IOException the io exception + * @param hl7v2Store The HL7v2 store of the message. + * @param msg The message. + * @return Empty. + * @throws IOException The IO Exception. */ IngestMessageResponse ingestHL7v2Message(String hl7v2Store, Message msg) throws IOException; /** - * Create hl 7 v 2 message message. + * Creates an HL7v2 message. * - * @param hl7v2Store the hl 7 v 2 store - * @param msg the msg - * @return the message - * @throws IOException the io exception + * @param hl7v2Store The HL7v2 store to create a message in. + * @param msg The message to create. + * @return Empty. + * @throws IOException The IO Exception. */ Message createHL7v2Message(String hl7v2Store, Message msg) throws IOException; + /** + * Deletes an HL7v2 store. + * + * @param store The HL7v2 store to be deleted. + * @return Empty. + * @throws IOException The IO Exception. + */ + Empty deleteHL7v2Store(String store) throws IOException; + + /** + * Importing a FHIR resource from GCS. + * + * @param fhirStore the FhirStore to import into. + * @param gcsSourcePath the GCS Path of resource. + * @param contentStructure The content structure. + * @return Empty. + * @throws IOException the io exception + */ Operation importFhirResource( String fhirStore, String gcsSourcePath, @Nullable String contentStructure) throws IOException; + /** + * Export a FHIR Resource to GCS. + * + * @param fhirStore the FhirStore of the resource. + * @param gcsDestinationPrefix GCS Destination Prefix to export to. + * @return Empty. + * @throws IOException The IO Exception. + */ Operation exportFhirResourceToGcs(String fhirStore, String gcsDestinationPrefix) throws IOException; + /** + * Export a FHIR Resource to BigQuery. + * + * @param fhirStore the FhirStore of the resource. + * @param bigQueryDatasetUri The BQ Dataset URI to export to. + * @return Empty. + * @throws IOException The IO Exception. + */ Operation exportFhirResourceToBigQuery(String fhirStore, String bigQueryDatasetUri) throws IOException; + /** + * Deidentify a GCP FHIR Store and write the result into a new FHIR Store. + * + * @param sourceFhirStore the FhirStore to be deidentified. + * @param destinationFhirStore the FhirStore that the deidentified data will be written to. + * @param deidConfig the deidCongig specifying form of deidentification. + * @return Empty. + * @throws IOException The IO Exception. + */ Operation deidentifyFhirStore( String sourceFhirStore, String destinationFhirStore, DeidentifyConfig deidConfig) throws IOException; + /** + * Poll operation. + * + * @param operation to be polled. + * @param sleepMs length of time to wait between requests. + * @return HTTP Request (that returns status of operation). + * @throws IOException The IO Exception. + */ Operation pollOperation(Operation operation, Long sleepMs) throws InterruptedException, IOException; @@ -159,7 +218,7 @@ Operation pollOperation(Operation operation, Long sleepMs) * @param fhirStore the fhir store * @param bundle the bundle * @return the http body - * @throws IOException the io exception + * @throws IOException The IO Exception. */ HttpBody executeFhirBundle(String fhirStore, String bundle) throws IOException, HealthcareHttpException; @@ -170,7 +229,7 @@ HttpBody executeFhirBundle(String fhirStore, String bundle) * @param resourceName the resource name, in format * projects/{p}/locations/{l}/datasets/{d}/fhirStores/{f}/fhir/{resourceType}/{id} * @return the http body - * @throws IOException the io exception + * @throws IOException The IO Exception. */ HttpBody readFhirResource(String resourceName) throws IOException; @@ -179,9 +238,9 @@ HttpBody executeFhirBundle(String fhirStore, String bundle) * * @param fhirStore the fhir store * @param resourceType the resource type - * @param parameters the parameters + * @param parameters The parameters (in the form of key-value pairs). * @return the http body - * @throws IOException + * @throws IOException The IO Exception. */ HttpBody searchFhirResource( String fhirStore, @@ -195,9 +254,9 @@ HttpBody searchFhirResource( * * @param resourceName the resource name, in format * projects/{p}/locations/{l}/datasets/{d}/fhirStores/{f}/fhir/{resourceType}/{id} - * @param filters optional request filters + * @param filters optional request filters (in key value pairs). * @return the http body - * @throws IOException + * @throws IOException The IO Exception. */ HttpBody getPatientEverything( String resourceName, @Nullable Map filters, String pageToken) @@ -206,46 +265,101 @@ HttpBody getPatientEverything( /** * Create hl 7 v 2 store hl 7 v 2 store. * - * @param dataset the dataset - * @param name the name - * @return the hl 7 v 2 store - * @throws IOException the io exception + * @param dataset The dataset to create the HL7v2 store in. + * @param name The name of the store to be created. + * @return Empty. + * @throws IOException The IO Exception. */ Hl7V2Store createHL7v2Store(String dataset, String name) throws IOException; + /** + * Create FHIR Store with a PubSub topic listener. + * + * @param dataset The name of Dataset for the FHIR store to be created in. + * @param name The name of the FHIR store. + * @param version The version of the FHIR store (DSTU2, STU3, R4). + * @param pubsubTopic The pubsub topic listening to the FHIR store. + * @throws IOException The IO Exception. + */ FhirStore createFhirStore(String dataset, String name, String version, String pubsubTopic) throws IOException; - + /** + * Create FHIR Store. + * + * @param dataset The name of the Dataset for the FHIR store to be created in. + * @param name The name of the FHIR store. + * @param version The version of the FHIR store (DSTU2, STU3, R4). + * @throws IOException The IO Exception. + */ FhirStore createFhirStore(String dataset, String name, String version) throws IOException; /** * List all FHIR stores in a dataset. * - * @param dataset the dataset, in the format: + * @param dataset The dataset, in the format: * projects/project_id/locations/location_id/datasets/dataset_id - * @return a list of FhirStore - * @throws IOException + * @return A list of all FHIR stores in the dataset. + * @throws IOException The IO Exception. */ List listAllFhirStores(String dataset) throws IOException; /** - * Delete hl 7 v 2 store empty. + * Delete Fhir store. * - * @param store the store - * @return the empty - * @throws IOException the io exception + * @param store The FHIR store to be deleted. + * @return Empty. + * @throws IOException The IO Exception. */ - Empty deleteHL7v2Store(String store) throws IOException; - Empty deleteFhirStore(String store) throws IOException; + /** + * Retrieve DicomStudyMetadata. + * + * @param dicomWebPath The Dicom Web Path to retrieve the metadata from. + * @return The study metadata. + * @throws IOException The IO Exception. + */ String retrieveDicomStudyMetadata(String dicomWebPath) throws IOException; + /** + * Create a DicomStore. + * + * @param dataset The dataset that the Dicom Store should be in, in the format: + * projects/project_id/locations/location_id/datasets/dataset_id. + * @param name The name of the Dicom Store to be created. + * @return Empty. + * @throws IOException The IO Exception. + */ DicomStore createDicomStore(String dataset, String name) throws IOException; + /** + * Create a DicomStore with a PubSub listener. + * + * @param dataset The dataset that the Dicom Store should be in, in the format: + * projects/project_id/locations/location_id/datasets/dataset_id + * @param name The name of the Dicom Store to be created. + * @param pubsubTopic Name of PubSub topic connected with the Dicom store. + * @return Empty. + * @throws IOException The IO Exception. + */ DicomStore createDicomStore(String dataset, String name, String pubsubTopic) throws IOException; + /** + * Delete a Dicom Store. + * + * @param name The name of the Dicom Store to be deleted. + * @return Empty. + * @throws IOException The IO Exception. + */ Empty deleteDicomStore(String name) throws IOException; + /** + * Upload to a Dicom Store. + * + * @param webPath String format of webPath to upload into. + * @param filePath filePath of file to upload. + * @return Empty. + * @throws IOException The IO Exception. + */ Empty uploadToDicomStore(String webPath, String filePath) throws IOException, URISyntaxException; } diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageWriteIT.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageWriteIT.java index 81de67f38502..fc3ce0be4b69 100644 --- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageWriteIT.java +++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOStorageWriteIT.java @@ -33,9 +33,16 @@ import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.testing.TestPipeline; import org.apache.beam.sdk.transforms.DoFn; +import org.apache.beam.sdk.transforms.MapElements; +import org.apache.beam.sdk.transforms.PTransform; import org.apache.beam.sdk.transforms.ParDo; +import org.apache.beam.sdk.transforms.PeriodicImpulse; +import org.apache.beam.sdk.transforms.SimpleFunction; +import org.apache.beam.sdk.values.PBegin; +import org.apache.beam.sdk.values.PCollection; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; import org.joda.time.Duration; +import org.joda.time.Instant; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; @@ -81,11 +88,32 @@ public void processElement(ProcessContext c) { } } - private GenerateSequence stream(int rowCount) { - int timestampIntervalInMilliseconds = 10; - return GenerateSequence.from(0) - .to(rowCount) - .withRate(1, Duration.millis(timestampIntervalInMilliseconds)); + static class UnboundedStream extends PTransform> { + + private final int rowCount; + + public UnboundedStream(int rowCount) { + this.rowCount = rowCount; + } + + @Override + public PCollection expand(PBegin input) { + int timestampIntervalInMillis = 10; + PeriodicImpulse impulse = + PeriodicImpulse.create() + .stopAfter(Duration.millis((long) timestampIntervalInMillis * rowCount - 1)) + .withInterval(Duration.millis(timestampIntervalInMillis)); + return input + .apply(impulse) + .apply( + MapElements.via( + new SimpleFunction() { + @Override + public Long apply(Instant input) { + return input.getMillis(); + } + })); + } } private void runBigQueryIOStorageWritePipeline( @@ -102,7 +130,9 @@ private void runBigQueryIOStorageWritePipeline( new TableFieldSchema().setName("str").setType("STRING"))); Pipeline p = Pipeline.create(bqOptions); - p.apply("Input", isStreaming ? stream(rowCount) : GenerateSequence.from(0).to(rowCount)) + p.apply( + "Input", + isStreaming ? new UnboundedStream(rowCount) : GenerateSequence.from(0).to(rowCount)) .apply("GenerateMessage", ParDo.of(new FillRowFn())) .apply( "WriteToBQ", diff --git a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java index e49ef05974fe..6e7ad865cc35 100644 --- a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java +++ b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/JdbcIO.java @@ -786,7 +786,7 @@ public PCollection expand(PBegin input) { // Spotbugs seems to not understand the multi-statement try-with-resources @SuppressFBWarnings("OBL_UNSATISFIED_OBLIGATION") - private static Schema inferBeamSchema(DataSource ds, String query) { + public static Schema inferBeamSchema(DataSource ds, String query) { try (Connection conn = ds.getConnection(); PreparedStatement statement = conn.prepareStatement( diff --git a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/LogicalTypes.java b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/LogicalTypes.java index 674b274f907b..6e8e46b7afa2 100644 --- a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/LogicalTypes.java +++ b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/LogicalTypes.java @@ -19,7 +19,6 @@ import java.sql.JDBCType; import java.time.Instant; -import java.util.Objects; import org.apache.beam.sdk.schemas.Schema; import org.apache.beam.sdk.schemas.Schema.FieldType; import org.apache.beam.sdk.schemas.logicaltypes.FixedBytes; @@ -30,11 +29,11 @@ import org.apache.beam.sdk.schemas.logicaltypes.VariableBytes; import org.apache.beam.sdk.schemas.logicaltypes.VariableString; import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; -import org.checkerframework.checker.nullness.qual.NonNull; -import org.checkerframework.checker.nullness.qual.Nullable; /** Beam {@link org.apache.beam.sdk.schemas.Schema.LogicalType} implementations of JDBC types. */ class LogicalTypes { + // Logical types of the following static members are not portable and are preserved for + // compatibility reason. Consider using portable logical types when adding new ones. static final Schema.FieldType JDBC_BIT_TYPE = Schema.FieldType.logicalType( new PassThroughLogicalType( @@ -110,69 +109,4 @@ static Schema.LogicalType fixedOrVariableBytes(String name, int return FixedBytes.of(name, length); } } - - /** Base class for JDBC logical types. */ - abstract static class JdbcLogicalType - implements Schema.LogicalType { - protected final String identifier; - protected final Schema.FieldType argumentType; - protected final Schema.FieldType baseType; - protected final Object argument; - - protected JdbcLogicalType( - String identifier, - Schema.FieldType argumentType, - Schema.FieldType baseType, - Object argument) { - this.identifier = identifier; - this.argumentType = argumentType; - this.baseType = baseType; - this.argument = argument; - } - - @Override - public String getIdentifier() { - return identifier; - } - - @Override - public FieldType getArgumentType() { - return argumentType; - } - - @Override - @SuppressWarnings("TypeParameterUnusedInFormals") - public ArgumentT getArgument() { - return (ArgumentT) argument; - } - - @Override - public Schema.FieldType getBaseType() { - return baseType; - } - - @Override - public T toBaseType(T input) { - return input; - } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) { - return true; - } - if (!(o instanceof JdbcLogicalType)) { - return false; - } - JdbcLogicalType that = (JdbcLogicalType) o; - return Objects.equals(identifier, that.identifier) - && Objects.equals(baseType, that.baseType) - && Objects.equals(argument, that.argument); - } - - @Override - public int hashCode() { - return Objects.hash(identifier, baseType, argument); - } - } } diff --git a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/SchemaUtil.java b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/SchemaUtil.java index 234b60cd3879..65f21308ea32 100644 --- a/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/SchemaUtil.java +++ b/sdks/java/io/jdbc/src/main/java/org/apache/beam/sdk/io/jdbc/SchemaUtil.java @@ -354,7 +354,7 @@ private static ResultSetFieldExtractor createObjectExtractor() { * A {@link org.apache.beam.sdk.io.jdbc.JdbcIO.RowMapper} implementation that converts JDBC * results into Beam {@link Row} objects. */ - static final class BeamRowMapper implements JdbcIO.RowMapper { + public static final class BeamRowMapper implements JdbcIO.RowMapper { private final Schema schema; private final List fieldExtractors; diff --git a/sdks/python/apache_beam/dataframe/frame_base.py b/sdks/python/apache_beam/dataframe/frame_base.py index 1da12ececff6..4e89e473b730 100644 --- a/sdks/python/apache_beam/dataframe/frame_base.py +++ b/sdks/python/apache_beam/dataframe/frame_base.py @@ -500,6 +500,8 @@ def wrap(func): removed_arg_names = removed_args if removed_args is not None else [] + # We would need to add position only arguments if they ever become a thing + # in Pandas (as of 2.1 currently they aren't). base_arg_spec = getfullargspec(unwrap(getattr(base_type, func.__name__))) base_arg_names = base_arg_spec.args # Some arguments are keyword only and we still want to check against those. @@ -514,6 +516,9 @@ def wrap(func): @functools.wraps(func) def wrapper(*args, **kwargs): + if len(args) > len(base_arg_names): + raise TypeError(f"{func.__name__} got too many positioned arguments.") + for name, value in zip(base_arg_names, args): if name in kwargs: raise TypeError( @@ -523,7 +528,7 @@ def wrapper(*args, **kwargs): # Still have to populate these for the Beam function signature. if removed_args: for name in removed_args: - if not name in kwargs: + if name not in kwargs: kwargs[name] = None return func(**kwargs) @@ -646,13 +651,18 @@ def wrap(func): return func base_argspec = getfullargspec(unwrap(getattr(base_type, func.__name__))) - if not base_argspec.defaults: + if not base_argspec.defaults and not base_argspec.kwonlydefaults: return func - arg_to_default = dict( - zip( - base_argspec.args[-len(base_argspec.defaults):], - base_argspec.defaults)) + arg_to_default = {} + if base_argspec.defaults: + arg_to_default.update( + zip( + base_argspec.args[-len(base_argspec.defaults):], + base_argspec.defaults)) + + if base_argspec.kwonlydefaults: + arg_to_default.update(base_argspec.kwonlydefaults) unwrapped_func = unwrap(func) # args that do not have defaults in func, but do have defaults in base @@ -664,11 +674,19 @@ def wrap(func): if removed_args: defaults_to_populate -= set(removed_args) + # In pandas 2, many methods rely on the default copy=None + # to mean that copy is the value of copy_on_write. Since + # copy_on_write will always be true for Beam, just fill it + # in here. In pandas 1, the default was True anyway. + if 'copy' in arg_to_default and arg_to_default['copy'] is None: + arg_to_default['copy'] = True + @functools.wraps(func) def wrapper(**kwargs): for name in defaults_to_populate: if name not in kwargs: kwargs[name] = arg_to_default[name] + return func(**kwargs) return wrapper diff --git a/sdks/python/apache_beam/dataframe/frame_base_test.py b/sdks/python/apache_beam/dataframe/frame_base_test.py index 2d16d02ba1ea..0a73905339fd 100644 --- a/sdks/python/apache_beam/dataframe/frame_base_test.py +++ b/sdks/python/apache_beam/dataframe/frame_base_test.py @@ -72,7 +72,7 @@ def add_one(frame): def test_args_to_kwargs(self): class Base(object): - def func(self, a=1, b=2, c=3): + def func(self, a=1, b=2, c=3, *, kw_only=4): pass class Proxy(object): @@ -87,6 +87,9 @@ def func(self, **kwargs): self.assertEqual(proxy.func(2, 4, 6), {'a': 2, 'b': 4, 'c': 6}) self.assertEqual(proxy.func(2, c=6), {'a': 2, 'c': 6}) self.assertEqual(proxy.func(c=6, a=2), {'a': 2, 'c': 6}) + self.assertEqual(proxy.func(2, kw_only=20), {'a': 2, 'kw_only': 20}) + with self.assertRaises(TypeError): # got too many positioned arguments + proxy.func(2, 4, 6, 8) def test_args_to_kwargs_populates_defaults(self): class Base(object): @@ -129,6 +132,63 @@ def func_removed_args(self, a, c, **kwargs): proxy.func_removed_args() self.assertEqual(proxy.func_removed_args(12, d=100), {'a': 12, 'd': 100}) + def test_args_to_kwargs_populates_default_handles_kw_only(self): + class Base(object): + def func(self, a, b=2, c=3, *, kw_only=4): + pass + + class ProxyUsesKwOnly(object): + @frame_base.args_to_kwargs(Base) + @frame_base.populate_defaults(Base) + def func(self, a, kw_only, **kwargs): + return dict(kwargs, a=a, kw_only=kw_only) + + proxy = ProxyUsesKwOnly() + + # pylint: disable=too-many-function-args,no-value-for-parameter + with self.assertRaises(TypeError): # missing 1 required positional argument + proxy.func() + + self.assertEqual(proxy.func(100), {'a': 100, 'kw_only': 4}) + self.assertEqual( + proxy.func(2, 4, 6, kw_only=8), { + 'a': 2, 'b': 4, 'c': 6, 'kw_only': 8 + }) + with self.assertRaises(TypeError): + proxy.func(2, 4, 6, 8) # got too many positioned arguments + + class ProxyDoesntUseKwOnly(object): + @frame_base.args_to_kwargs(Base) + @frame_base.populate_defaults(Base) + def func(self, a, **kwargs): + return dict(kwargs, a=a) + + proxy = ProxyDoesntUseKwOnly() + + # pylint: disable=too-many-function-args,no-value-for-parameter + with self.assertRaises(TypeError): # missing 1 required positional argument + proxy.func() + self.assertEqual(proxy.func(100), {'a': 100}) + self.assertEqual( + proxy.func(2, 4, 6, kw_only=8), { + 'a': 2, 'b': 4, 'c': 6, 'kw_only': 8 + }) + + def test_populate_defaults_overwrites_copy(self): + class Base(object): + def func(self, a=1, b=2, c=3, *, copy=None): + pass + + class Proxy(object): + @frame_base.args_to_kwargs(Base) + @frame_base.populate_defaults(Base) + def func(self, a, copy, **kwargs): + return dict(kwargs, a=a, copy=copy) + + proxy = Proxy() + self.assertEqual(proxy.func(), {'a': 1, 'copy': True}) + self.assertEqual(proxy.func(copy=False), {'a': 1, 'copy': False}) + if __name__ == '__main__': unittest.main() diff --git a/sdks/python/apache_beam/dataframe/frames.py b/sdks/python/apache_beam/dataframe/frames.py index e2390bda28be..a74ccbba041a 100644 --- a/sdks/python/apache_beam/dataframe/frames.py +++ b/sdks/python/apache_beam/dataframe/frames.py @@ -907,7 +907,7 @@ def sort_index(self, axis, **kwargs): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( 'sort_index', - lambda df: df.sort_index(axis, **kwargs), + lambda df: df.sort_index(axis=axis, **kwargs), [self._expr], requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Arbitrary(), @@ -1471,8 +1471,10 @@ def compute_idx(s): index = pd.Index([], dtype=index_dtype) proxy = self._expr.proxy().copy() proxy.index = index - proxy = proxy.append( - pd.Series([1], index=np.asarray(['0']).astype(proxy.index.dtype))) + proxy = pd.concat([ + proxy, + pd.Series([1], index=np.asarray(['0']).astype(proxy.index.dtype)) + ]) idx_func = expressions.ComputedExpression( 'idx_func', @@ -1899,7 +1901,8 @@ def dropna(self, **kwargs): @frame_base.with_docs_from(pd.Series) @frame_base.args_to_kwargs(pd.Series) - @frame_base.populate_defaults(pd.Series) + @frame_base.populate_defaults( + pd.Series, removed_args=['inplace'] if PD_VERSION >= (2, 0) else None) @frame_base.maybe_inplace def set_axis(self, labels, **kwargs): # TODO: assigning the index is generally order-sensitive, but we could @@ -2345,8 +2348,13 @@ def value_counts( result = column.groupby(column, dropna=dropna).size() - # groupby.size() names the index, which we don't need - result.index.name = None + # Pandas 2 introduces new naming for the results. + if PD_VERSION >= (2, 0): + result.index.name = getattr(self, "name", None) + result.name = "proportion" if normalize else "count" + else: + # groupby.size() names the index, which we don't need + result.index.name = None if normalize: return result / column.length() @@ -2673,7 +2681,9 @@ def set_index(self, keys, **kwargs): @frame_base.with_docs_from(pd.DataFrame) @frame_base.args_to_kwargs(pd.DataFrame) - @frame_base.populate_defaults(pd.DataFrame) + @frame_base.populate_defaults( + pd.DataFrame, + removed_args=['inplace'] if PD_VERSION >= (2, 0) else None) @frame_base.maybe_inplace def set_axis(self, labels, axis, **kwargs): if axis in ('index', 0): @@ -2687,7 +2697,7 @@ def set_axis(self, labels, axis, **kwargs): return frame_base.DeferredFrame.wrap( expressions.ComputedExpression( 'set_axis', - lambda df: df.set_axis(labels, axis, **kwargs), + lambda df: df.set_axis(labels, axis=axis, **kwargs), [self._expr], requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Arbitrary())) @@ -4002,12 +4012,18 @@ def value_counts(self, subset=None, sort=False, normalize=False, columns = subset or list(self.columns) if dropna: - dropped = self.dropna() + # Must include subset here because otherwise we spuriously drop NAs due + # to columns outside our subset. + dropped = self.dropna(subset=subset) else: dropped = self result = dropped.groupby(columns, dropna=dropna).size() + # Pandas 2 introduces new naming for the results. + if PD_VERSION >= (2,0): + result.name = "proportion" if normalize else "count" + if normalize: return result/dropped.length() else: @@ -4915,9 +4931,9 @@ def __setitem__(self, index, value): class _DeferredStringMethods(frame_base.DeferredBase): - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) - @frame_base.populate_defaults(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) + @frame_base.populate_defaults(pd.Series.str) def cat(self, others, join, **kwargs): """If defined, ``others`` must be a :class:`DeferredSeries` or a ``list`` of ``DeferredSeries``.""" @@ -4957,8 +4973,8 @@ def func(*args): requires_partition_by=requires, preserves_partition_by=partitionings.Arbitrary())) - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) def repeat(self, repeats): """``repeats`` must be an ``int`` or a :class:`DeferredSeries`. Lists are not supported because they make this operation order-sensitive.""" @@ -4995,8 +5011,8 @@ def repeat(self, repeats): raise TypeError("str.repeat(repeats=) value must be an int or a " f"DeferredSeries (encountered {type(repeats)}).") - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) def get_dummies(self, **kwargs): """ Series must be categorical dtype. Please cast to ``CategoricalDtype`` @@ -5078,9 +5094,9 @@ def func(s): requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Arbitrary())) - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) - @frame_base.populate_defaults(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) + @frame_base.populate_defaults(pd.Series.str) def split(self, **kwargs): """ Like other non-deferred methods, dtype must be CategoricalDtype. @@ -5089,9 +5105,9 @@ def split(self, **kwargs): """ return self._split_helper(rsplit=False, **kwargs) - @frame_base.with_docs_from(pd.core.strings.StringMethods) - @frame_base.args_to_kwargs(pd.core.strings.StringMethods) - @frame_base.populate_defaults(pd.core.strings.StringMethods) + @frame_base.with_docs_from(pd.Series.str) + @frame_base.args_to_kwargs(pd.Series.str) + @frame_base.populate_defaults(pd.Series.str) def rsplit(self, **kwargs): """ Like other non-deferred methods, dtype must be CategoricalDtype. @@ -5169,17 +5185,17 @@ def func(df, *args, **kwargs): return func for method in ELEMENTWISE_STRING_METHODS: - if not hasattr(pd.core.strings.StringMethods, method): + if not hasattr(pd.Series.str, method): # older versions (1.0.x) don't support some of these methods continue setattr(_DeferredStringMethods, method, frame_base._elementwise_method(make_str_func(method), name=method, - base=pd.core.strings.StringMethods)) + base=pd.Series.str)) for method in NON_ELEMENTWISE_STRING_METHODS: - if not hasattr(pd.core.strings.StringMethods, method): + if not hasattr(pd.Series.str, method): # older versions (1.0.x) don't support some of these methods continue setattr(_DeferredStringMethods, @@ -5187,7 +5203,7 @@ def func(df, *args, **kwargs): frame_base._proxy_method( make_str_func(method), name=method, - base=pd.core.strings.StringMethods, + base=pd.Series.str, requires_partition_by=partitionings.Arbitrary(), preserves_partition_by=partitionings.Singleton())) @@ -5361,11 +5377,12 @@ def func(df, *args, **kwargs): 'second', 'time', 'timetz', - 'week', 'weekday', - 'weekofyear', 'year', ] +# Pandas 2 removed these. +if PD_VERSION < (2, 0): + ELEMENTWISE_DATETIME_PROPERTIES += ['week', 'weekofyear'] for method in ELEMENTWISE_DATETIME_PROPERTIES: setattr(_DeferredDatetimeMethods, diff --git a/sdks/python/apache_beam/dataframe/frames_test.py b/sdks/python/apache_beam/dataframe/frames_test.py index 4e59d1da5de4..e3555b50187b 100644 --- a/sdks/python/apache_beam/dataframe/frames_test.py +++ b/sdks/python/apache_beam/dataframe/frames_test.py @@ -17,9 +17,11 @@ import re import unittest import warnings +from typing import Dict import numpy as np import pandas as pd +import pytest from parameterized import parameterized import apache_beam as beam @@ -44,6 +46,10 @@ 'str': [str(i) for i in range(100)], }) +if PD_VERSION < (2, 0): + # All these are things that are fixed in the Pandas 2 transition. + pytestmark = pytest.mark.filterwarnings("ignore::FutureWarning") + def _get_deferred_args(*args): return [ @@ -187,6 +193,9 @@ def _run_test( if expected.index.is_unique: expected = expected.sort_index() actual = actual.sort_index() + elif isinstance(expected, pd.Series): + expected = expected.sort_values() + actual = actual.sort_values() else: expected = expected.sort_values(list(expected.columns)) actual = actual.sort_values(list(actual.columns)) @@ -688,6 +697,8 @@ def test_value_counts_with_nans(self): self._run_test(lambda df: df.value_counts(), df) self._run_test(lambda df: df.value_counts(normalize=True), df) + # Ensure we don't drop rows due to nan values in unused columns. + self._run_test(lambda df: df.value_counts('num_wings'), df) if PD_VERSION >= (1, 3): # dropna=False is new in pandas 1.3 @@ -1634,6 +1645,30 @@ def test_pivot_no_index_provided_on_multiindex(self): # https://github.com/pandas-dev/pandas/issues/40139 ALL_GROUPING_AGGREGATIONS = sorted( set(frames.ALL_AGGREGATIONS) - set(('kurt', 'kurtosis'))) +AGGREGATIONS_WHERE_NUMERIC_ONLY_DEFAULTS_TO_TRUE_IN_PANDAS_1 = set( + frames.ALL_AGGREGATIONS) - set(( + 'nunique', + 'size', + 'count', + 'idxmin', + 'idxmax', + 'mode', + 'rank', + 'all', + 'any', + 'describe')) + + +def numeric_only_kwargs_for_pandas_2(agg_type: str) -> Dict[str, bool]: + """Get proper arguments for numeric_only. + + Behavior for numeric_only in these methods changed in Pandas 2 to default + to False instead of True, so explicitly make it True in Pandas 2.""" + if PD_VERSION >= (2, 0) and ( + agg_type in AGGREGATIONS_WHERE_NUMERIC_ONLY_DEFAULTS_TO_TRUE_IN_PANDAS_1): + return {'numeric_only': True} + else: + return {} class GroupByTest(_AbstractFrameTest): @@ -1650,8 +1685,9 @@ def test_groupby_agg(self, agg_type): self.skipTest( "https://github.com/apache/beam/issues/20967: proxy generation of " "DataFrameGroupBy.describe fails in pandas < 1.2") + kwargs = numeric_only_kwargs_for_pandas_2(agg_type) self._run_test( - lambda df: df.groupby('group').agg(agg_type), + lambda df: df.groupby('group').agg(agg_type, **kwargs), GROUPBY_DF, check_proxy=False) @@ -1661,8 +1697,10 @@ def test_groupby_with_filter(self, agg_type): self.skipTest( "https://github.com/apache/beam/issues/20967: proxy generation of " "DataFrameGroupBy.describe fails in pandas < 1.2") + kwargs = numeric_only_kwargs_for_pandas_2(agg_type) self._run_test( - lambda df: getattr(df[df.foo > 30].groupby('group'), agg_type)(), + lambda df: getattr(df[df.foo > 30].groupby('group'), agg_type) + (**kwargs), GROUPBY_DF, check_proxy=False) @@ -1673,8 +1711,9 @@ def test_groupby(self, agg_type): "https://github.com/apache/beam/issues/20967: proxy generation of " "DataFrameGroupBy.describe fails in pandas < 1.2") + kwargs = numeric_only_kwargs_for_pandas_2(agg_type) self._run_test( - lambda df: getattr(df.groupby('group'), agg_type)(), + lambda df: getattr(df.groupby('group'), agg_type)(**kwargs), GROUPBY_DF, check_proxy=False) @@ -1685,8 +1724,10 @@ def test_groupby_series(self, agg_type): "https://github.com/apache/beam/issues/20967: proxy generation of " "DataFrameGroupBy.describe fails in pandas < 1.2") + kwargs = numeric_only_kwargs_for_pandas_2(agg_type) self._run_test( - lambda df: getattr(df[df.foo > 40].groupby(df.group), agg_type)(), + lambda df: getattr(df[df.foo > 40].groupby(df.group), agg_type) + (**kwargs), GROUPBY_DF, check_proxy=False) @@ -1717,12 +1758,15 @@ def test_groupby_project_series(self, agg_type): "https://github.com/apache/beam/issues/20895: " "SeriesGroupBy.{corr, cov} do not raise the expected error.") - self._run_test(lambda df: getattr(df.groupby('group').foo, agg_type)(), df) - self._run_test(lambda df: getattr(df.groupby('group').bar, agg_type)(), df) + kwargs = numeric_only_kwargs_for_pandas_2(agg_type) + self._run_test( + lambda df: getattr(df.groupby('group').foo, agg_type)(**kwargs), df) + self._run_test( + lambda df: getattr(df.groupby('group').bar, agg_type)(**kwargs), df) self._run_test( - lambda df: getattr(df.groupby('group')['foo'], agg_type)(), df) + lambda df: getattr(df.groupby('group')['foo'], agg_type)(**kwargs), df) self._run_test( - lambda df: getattr(df.groupby('group')['bar'], agg_type)(), df) + lambda df: getattr(df.groupby('group')['bar'], agg_type)(**kwargs), df) @parameterized.expand(ALL_GROUPING_AGGREGATIONS) def test_groupby_project_dataframe(self, agg_type): @@ -1730,8 +1774,10 @@ def test_groupby_project_dataframe(self, agg_type): self.skipTest( "https://github.com/apache/beam/issues/20967: proxy generation of " "DataFrameGroupBy.describe fails in pandas < 1.2") + kwargs = numeric_only_kwargs_for_pandas_2(agg_type) self._run_test( - lambda df: getattr(df.groupby('group')[['bar', 'baz']], agg_type)(), + lambda df: getattr(df.groupby('group')[['bar', 'baz']], agg_type) + (**kwargs), GROUPBY_DF, check_proxy=False) @@ -1760,9 +1806,10 @@ def test_groupby_errors_non_existent_label(self): def test_groupby_callable(self): df = GROUPBY_DF - - self._run_test(lambda df: df.groupby(lambda x: x % 2).foo.sum(), df) - self._run_test(lambda df: df.groupby(lambda x: x % 5).median(), df) + kwargs = numeric_only_kwargs_for_pandas_2('sum') + self._run_test(lambda df: df.groupby(lambda x: x % 2).foo.sum(**kwargs), df) + kwargs = numeric_only_kwargs_for_pandas_2('median') + self._run_test(lambda df: df.groupby(lambda x: x % 5).median(**kwargs), df) def test_groupby_apply(self): df = GROUPBY_DF @@ -1790,8 +1837,8 @@ def test_groupby_apply_preserves_column_order(self): df = GROUPBY_DF self._run_test( - lambda df: df[['foo', 'group', 'bar']].groupby('group').apply( - lambda x: x), + lambda df: df[['foo', 'group', 'bar']].groupby( + 'group', group_keys=False).apply(lambda x: x), df) def test_groupby_transform(self): @@ -1817,8 +1864,9 @@ def test_groupby_transform(self): def test_groupby_pipe(self): df = GROUPBY_DF - - self._run_test(lambda df: df.groupby('group').pipe(lambda x: x.sum()), df) + kwargs = numeric_only_kwargs_for_pandas_2('sum') + self._run_test( + lambda df: df.groupby('group').pipe(lambda x: x.sum(**kwargs)), df) self._run_test( lambda df: df.groupby('group')['bool'].pipe(lambda x: x.any()), df) self._run_test( @@ -1900,14 +1948,14 @@ def test_dataframe_groupby_series(self, agg_type): self.skipTest( "https://github.com/apache/beam/issues/20967: proxy generation of " "DataFrameGroupBy.describe fails in pandas < 1.2") + + def agg(df, group_by): + kwargs = numeric_only_kwargs_for_pandas_2(agg_type) + return df[df.foo > 40].groupby(group_by).agg(agg_type, **kwargs) + + self._run_test(lambda df: agg(df, df.group), GROUPBY_DF, check_proxy=False) self._run_test( - lambda df: df[df.foo > 40].groupby(df.group).agg(agg_type), - GROUPBY_DF, - check_proxy=False) - self._run_test( - lambda df: df[df.foo > 40].groupby(df.foo % 3).agg(agg_type), - GROUPBY_DF, - check_proxy=False) + lambda df: agg(df, df.foo % 3), GROUPBY_DF, check_proxy=False) @parameterized.expand(ALL_GROUPING_AGGREGATIONS) def test_series_groupby_series(self, agg_type): @@ -2941,7 +2989,7 @@ class DocstringTest(unittest.TestCase): (frames.DeferredDataFrame, pd.DataFrame), (frames.DeferredSeries, pd.Series), #(frames._DeferredIndex, pd.Index), - (frames._DeferredStringMethods, pd.core.strings.StringMethods), + (frames._DeferredStringMethods, pd.Series.str), ( frames._DeferredCategoricalMethods, pd.core.arrays.categorical.CategoricalAccessor), diff --git a/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py b/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py index ed8745ec2ac1..54a473d1b52b 100644 --- a/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py +++ b/sdks/python/apache_beam/io/external/xlang_jdbcio_it_test.py @@ -17,6 +17,7 @@ # pytype: skip-file +import datetime import logging import time import typing @@ -60,7 +61,8 @@ "JdbcTestRow", [("f_id", int), ("f_float", float), ("f_char", str), ("f_varchar", str), ("f_bytes", bytes), ("f_varbytes", bytes), ("f_timestamp", Timestamp), - ("f_decimal", Decimal)], + ("f_decimal", Decimal), ("f_date", datetime.date), + ("f_time", datetime.time)], ) coders.registry.register_coder(JdbcTestRow, coders.RowCoder) @@ -132,7 +134,7 @@ def test_xlang_jdbc_write_read(self, database): "f_float DOUBLE PRECISION, " + "f_char CHAR(10), " + "f_varchar VARCHAR(10), " + f"f_bytes {binary_type[0]}, " + f"f_varbytes {binary_type[1]}, " + "f_timestamp TIMESTAMP(3), " + - "f_decimal DECIMAL(10, 2))") + "f_decimal DECIMAL(10, 2), " + "f_date DATE, " + "f_time TIME(3))") inserted_rows = [ JdbcTestRow( i, @@ -144,7 +146,11 @@ def test_xlang_jdbc_write_read(self, database): # In alignment with Java Instant which supports milli precision. Timestamp.of(seconds=round(time.time(), 3)), # Test both positive and negative numbers. - Decimal(f'{i-1}.23')) for i in range(ROW_COUNT) + Decimal(f'{i-1}.23'), + # Test both date before or after EPOCH + datetime.date(1969 + i, i % 12 + 1, i % 31 + 1), + datetime.time(i % 24, i % 60, i % 60, (i * 1000) % 1_000_000)) + for i in range(ROW_COUNT) ] expected_row = [] for row in inserted_rows: @@ -163,7 +169,9 @@ def test_xlang_jdbc_write_read(self, database): f_bytes, row.f_bytes, row.f_timestamp, - row.f_decimal)) + row.f_decimal, + row.f_date, + row.f_time)) with TestPipeline() as p: p.not_use_test_runner_api = True diff --git a/sdks/python/apache_beam/io/gcp/bigquery_test.py b/sdks/python/apache_beam/io/gcp/bigquery_test.py index 3a3033dfcaf4..7e9c1e634748 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_test.py @@ -77,7 +77,6 @@ from apache_beam.testing.util import equal_to from apache_beam.transforms.display import DisplayData from apache_beam.transforms.display_test import DisplayDataItemMatcher -from apache_beam.utils import retry # Protect against environments where bigquery library is not available. # pylint: disable=wrong-import-order, wrong-import-position @@ -931,77 +930,269 @@ def test_copy_load_job_exception(self, exception_type, error_message): 'GCP dependencies are not installed') class BigQueryStreamingInsertsErrorHandling(unittest.TestCase): - # Using https://cloud.google.com/bigquery/docs/error-messages and - # https://googleapis.dev/python/google-api-core/latest/_modules/google - # /api_core/exceptions.html - # to determine error types and messages to try for retriables. + # Running tests with a variety of exceptions from https://googleapis.dev + # /python/google-api-core/latest/_modules/google/api_core/exceptions.html. + # Choosing some exceptions that produce reasons included in + # bigquery_tools._NON_TRANSIENT_ERRORS and some that are not @parameterized.expand([ + # reason not in _NON_TRANSIENT_ERRORS for row 1 on first attempt + # transient error retried and succeeds on second attempt, 0 rows sent to + # failed rows param( - exception_type=exceptions.Forbidden if exceptions else None, - error_reason='rateLimitExceeded'), + insert_response=[ + exceptions.TooManyRequests if exceptions else None, + None], + error_reason='Too Many Requests', # not in _NON_TRANSIENT_ERRORS + failed_rows=[]), + # reason not in _NON_TRANSIENT_ERRORS for row 1 on both attempts, sent to + # failed rows after hitting max_retries + param( + insert_response=[ + exceptions.InternalServerError if exceptions else None, + exceptions.InternalServerError if exceptions else None], + error_reason='Internal Server Error', # not in _NON_TRANSIENT_ERRORS + failed_rows=['value1', 'value3', 'value5']), + # reason in _NON_TRANSIENT_ERRORS for row 1 on both attempts, sent to + # failed_rows after hitting max_retries + param( + insert_response=[ + exceptions.Forbidden if exceptions else None, + exceptions.Forbidden if exceptions else None], + error_reason='Forbidden', # in _NON_TRANSIENT_ERRORS + failed_rows=['value1', 'value3', 'value5']), + ]) + def test_insert_rows_json_exception_retry_always( + self, insert_response, error_reason, failed_rows): + # In this test, a pipeline will always retry all caught exception types + # since RetryStrategy is not set and defaults to RETRY_ALWAYS + with mock.patch('time.sleep'): + call_counter = 0 + mock_response = mock.Mock() + mock_response.reason = error_reason + + def store_callback(table, **kwargs): + nonlocal call_counter + # raise exception if insert_response element is an exception + if insert_response[call_counter]: + exception_type = insert_response[call_counter] + call_counter += 1 + raise exception_type('some exception', response=mock_response) + # return empty list if not insert_response element, indicating + # successful call to insert_rows_json + else: + call_counter += 1 + return [] + + client = mock.Mock() + client.insert_rows_json.side_effect = store_callback + + # Using the bundle based direct runner to avoid pickling problems + # with mocks. + with beam.Pipeline(runner='BundleBasedDirectRunner') as p: + bq_write_out = ( + p + | beam.Create([{ + 'columnA': 'value1', 'columnB': 'value2' + }, { + 'columnA': 'value3', 'columnB': 'value4' + }, { + 'columnA': 'value5', 'columnB': 'value6' + }]) + # Using _StreamToBigQuery in order to be able to pass max_retries + # in order to limit run time of test with RETRY_ALWAYS + | _StreamToBigQuery( + table_reference='project:dataset.table', + table_side_inputs=[], + schema_side_inputs=[], + schema='anyschema', + batch_size=None, + triggering_frequency=None, + create_disposition='CREATE_NEVER', + write_disposition=None, + kms_key=None, + retry_strategy=RetryStrategy.RETRY_ALWAYS, + additional_bq_parameters=[], + ignore_insert_ids=False, + ignore_unknown_columns=False, + with_auto_sharding=False, + test_client=client, + max_retries=len(insert_response) - 1, + num_streaming_keys=500)) + + failed_values = ( + bq_write_out[beam_bq.BigQueryWriteFn.FAILED_ROWS] + | beam.Map(lambda x: x[1]['columnA'])) + + assert_that(failed_values, equal_to(failed_rows)) + + # Running tests with a variety of exceptions from https://googleapis.dev + # /python/google-api-core/latest/_modules/google/api_core/exceptions.html. + # Choosing some exceptions that produce reasons that are included in + # bigquery_tools._NON_TRANSIENT_ERRORS and some that are not + @parameterized.expand([ + param( + # not in _NON_TRANSIENT_ERRORS + exception_type=exceptions.BadGateway if exceptions else None, + error_reason='Bad Gateway'), + param( + # in _NON_TRANSIENT_ERRORS + exception_type=exceptions.Unauthorized if exceptions else None, + error_reason='Unauthorized'), + ]) + @mock.patch('time.sleep') + @mock.patch('google.cloud.bigquery.Client.insert_rows_json') + def test_insert_rows_json_exception_retry_never( + self, mock_send, unused_mock_sleep, exception_type, error_reason): + # In this test, a pipeline will never retry caught exception types + # since RetryStrategy is set to RETRY_NEVER + mock_response = mock.Mock() + mock_response.reason = error_reason + mock_send.side_effect = [ + exception_type('some exception', response=mock_response) + ] + + with beam.Pipeline(runner='BundleBasedDirectRunner') as p: + bq_write_out = ( + p + | beam.Create([{ + 'columnA': 'value1' + }, { + 'columnA': 'value2' + }]) + | WriteToBigQuery( + table='project:dataset.table', + schema={ + 'fields': [{ + 'name': 'columnA', 'type': 'STRING', 'mode': 'NULLABLE' + }] + }, + create_disposition='CREATE_NEVER', + method='STREAMING_INSERTS', + insert_retry_strategy=RetryStrategy.RETRY_NEVER)) + failed_values = ( + bq_write_out[beam_bq.BigQueryWriteFn.FAILED_ROWS_WITH_ERRORS] + | beam.Map(lambda x: x[1]['columnA'])) + + assert_that(failed_values, equal_to(['value1', 'value2'])) + + self.assertEqual(1, mock_send.call_count) + + # Running tests with a variety of exceptions from https://googleapis.dev + # /python/google-api-core/latest/_modules/google/api_core/exceptions.html. + # Choosing some exceptions that produce reasons that are included in + # bigquery_tools._NON_TRANSIENT_ERRORS and some that are not + @parameterized.expand([ param( exception_type=exceptions.DeadlineExceeded if exceptions else None, - error_reason='somereason'), + error_reason='Deadline Exceeded', # not in _NON_TRANSIENT_ERRORS + failed_values=[], + expected_call_count=2), param( - exception_type=exceptions.ServiceUnavailable if exceptions else None, - error_reason='backendError'), + exception_type=exceptions.Conflict if exceptions else None, + error_reason='Conflict', # not in _NON_TRANSIENT_ERRORS + failed_values=[], + expected_call_count=2), param( - exception_type=exceptions.InternalServerError if exceptions else None, - error_reason='internalError'), + exception_type=exceptions.TooManyRequests if exceptions else None, + error_reason='Too Many Requests', # not in _NON_TRANSIENT_ERRORS + failed_values=[], + expected_call_count=2), param( exception_type=exceptions.InternalServerError if exceptions else None, - error_reason='backendError'), + error_reason='Internal Server Error', # not in _NON_TRANSIENT_ERRORS + failed_values=[], + expected_call_count=2), + param( + exception_type=exceptions.BadGateway if exceptions else None, + error_reason='Bad Gateway', # not in _NON_TRANSIENT_ERRORS + failed_values=[], + expected_call_count=2), + param( + exception_type=exceptions.ServiceUnavailable if exceptions else None, + error_reason='Service Unavailable', # not in _NON_TRANSIENT_ERRORS + failed_values=[], + expected_call_count=2), + param( + exception_type=exceptions.GatewayTimeout if exceptions else None, + error_reason='Gateway Timeout', # not in _NON_TRANSIENT_ERRORS + failed_values=[], + expected_call_count=2), + param( + exception_type=exceptions.BadRequest if exceptions else None, + error_reason='Bad Request', # in _NON_TRANSIENT_ERRORS + failed_values=['value1', 'value2'], + expected_call_count=1), + param( + exception_type=exceptions.Unauthorized if exceptions else None, + error_reason='Unauthorized', # in _NON_TRANSIENT_ERRORS + failed_values=['value1', 'value2'], + expected_call_count=1), + param( + exception_type=exceptions.Forbidden if exceptions else None, + error_reason='Forbidden', # in _NON_TRANSIENT_ERRORS + failed_values=['value1', 'value2'], + expected_call_count=1), + param( + exception_type=exceptions.NotFound if exceptions else None, + error_reason='Not Found', # in _NON_TRANSIENT_ERRORS + failed_values=['value1', 'value2'], + expected_call_count=1), + param( + exception_type=exceptions.MethodNotImplemented + if exceptions else None, + error_reason='Not Implemented', # in _NON_TRANSIENT_ERRORS + failed_values=['value1', 'value2'], + expected_call_count=1), ]) @mock.patch('time.sleep') @mock.patch('google.cloud.bigquery.Client.insert_rows_json') - def test_insert_all_retries_if_structured_retriable( + def test_insert_rows_json_exception_retry_on_transient_error( self, mock_send, unused_mock_sleep, - exception_type=None, - error_reason=None): - # In this test, a BATCH pipeline will retry the known RETRIABLE errors. + exception_type, + error_reason, + failed_values, + expected_call_count): + # In this test, a pipeline will only retry caught exception types + # with reasons that are not in _NON_TRANSIENT_ERRORS since RetryStrategy is + # set to RETRY_ON_TRANSIENT_ERROR + mock_response = mock.Mock() + mock_response.reason = error_reason mock_send.side_effect = [ - exception_type( - 'some retriable exception', errors=[{ - 'reason': error_reason - }]), - exception_type( - 'some retriable exception', errors=[{ - 'reason': error_reason - }]), - exception_type( - 'some retriable exception', errors=[{ - 'reason': error_reason - }]), - exception_type( - 'some retriable exception', errors=[{ - 'reason': error_reason - }]), + exception_type('some exception', response=mock_response), + # Return no exception and no errors on 2nd call, if there is a 2nd call + [] ] - with self.assertRaises(Exception) as exc: - with beam.Pipeline() as p: - _ = ( - p - | beam.Create([{ - 'columnA': 'value1' - }]) - | WriteToBigQuery( - table='project:dataset.table', - schema={ - 'fields': [{ - 'name': 'columnA', 'type': 'STRING', 'mode': 'NULLABLE' - }] - }, - create_disposition='CREATE_NEVER', - method='STREAMING_INSERTS')) - self.assertEqual(4, mock_send.call_count) - self.assertIn('some retriable exception', exc.exception.args[0]) + with beam.Pipeline(runner='BundleBasedDirectRunner') as p: + bq_write_out = ( + p + | beam.Create([{ + 'columnA': 'value1' + }, { + 'columnA': 'value2' + }]) + | WriteToBigQuery( + table='project:dataset.table', + schema={ + 'fields': [{ + 'name': 'columnA', 'type': 'STRING', 'mode': 'NULLABLE' + }] + }, + create_disposition='CREATE_NEVER', + method='STREAMING_INSERTS', + insert_retry_strategy=RetryStrategy.RETRY_ON_TRANSIENT_ERROR)) + failed_values_out = ( + bq_write_out[beam_bq.BigQueryWriteFn.FAILED_ROWS] + | beam.Map(lambda x: x[1]['columnA'])) + + assert_that(failed_values_out, equal_to(failed_values)) + self.assertEqual(expected_call_count, mock_send.call_count) - # Using https://googleapis.dev/python/google-api-core/latest/_modules/google - # /api_core/exceptions.html - # to determine error types and messages to try for retriables. + # Running tests with persistent exceptions with exception types not + # caught in BigQueryWrapper._insert_all_rows but retriable by + # retry.with_exponential_backoff @parameterized.expand([ param( exception_type=requests.exceptions.ConnectionError, @@ -1009,28 +1200,18 @@ def test_insert_all_retries_if_structured_retriable( param( exception_type=requests.exceptions.Timeout, error_message='some timeout error'), - param( - exception_type=ConnectionError, - error_message='some py connection error'), - param( - exception_type=exceptions.BadGateway if exceptions else None, - error_message='some badgateway error'), ]) @mock.patch('time.sleep') @mock.patch('google.cloud.bigquery.Client.insert_rows_json') - def test_insert_all_retries_if_unstructured_retriable( - self, - mock_send, - unused_mock_sleep, - exception_type=None, - error_message=None): - # In this test, a BATCH pipeline will retry the unknown RETRIABLE errors. - mock_send.side_effect = [ - exception_type(error_message), - exception_type(error_message), - exception_type(error_message), - exception_type(error_message), - ] + def test_insert_rows_json_persistent_retriable_exception( + self, mock_send, unused_mock_sleep, exception_type, error_message): + # In this test, each insert_rows_json call will result in an exception + # and be retried with retry.with_exponential_backoff until MAX_RETRIES is + # reached + mock_send.side_effect = exception_type(error_message) + + # Expecting 1 initial call plus maximum number of retries + expected_call_count = 1 + bigquery_tools.MAX_RETRIES with self.assertRaises(Exception) as exc: with beam.Pipeline() as p: @@ -1038,6 +1219,8 @@ def test_insert_all_retries_if_unstructured_retriable( p | beam.Create([{ 'columnA': 'value1' + }, { + 'columnA': 'value2' }]) | WriteToBigQuery( table='project:dataset.table', @@ -1048,138 +1231,390 @@ def test_insert_all_retries_if_unstructured_retriable( }, create_disposition='CREATE_NEVER', method='STREAMING_INSERTS')) - self.assertEqual(4, mock_send.call_count) + + self.assertEqual(expected_call_count, mock_send.call_count) self.assertIn(error_message, exc.exception.args[0]) - # Using https://googleapis.dev/python/google-api-core/latest/_modules/google - # /api_core/exceptions.html - # to determine error types and messages to try for retriables. + # Running tests with intermittent exceptions with exception types not + # caught in BigQueryWrapper._insert_all_rows but retriable by + # retry.with_exponential_backoff @parameterized.expand([ param( - exception_type=retry.PermanentException, - error_args=('nonretriable', )), - param( - exception_type=exceptions.BadRequest if exceptions else None, - error_args=( - 'forbidden morbidden', [{ - 'reason': 'nonretriablereason' - }])), - param( - exception_type=exceptions.BadRequest if exceptions else None, - error_args=('BAD REQUEST!', [{ - 'reason': 'nonretriablereason' - }])), - param( - exception_type=exceptions.MethodNotAllowed if exceptions else None, - error_args=( - 'method not allowed!', [{ - 'reason': 'nonretriablereason' - }])), - param( - exception_type=exceptions.MethodNotAllowed if exceptions else None, - error_args=('method not allowed!', 'args')), - param( - exception_type=exceptions.Unknown if exceptions else None, - error_args=('unknown!', 'args')), + exception_type=requests.exceptions.ConnectionError, + error_message='some connection error'), param( - exception_type=exceptions.Aborted if exceptions else None, - error_args=('abortet!', 'abort')), + exception_type=requests.exceptions.Timeout, + error_message='some timeout error'), ]) @mock.patch('time.sleep') @mock.patch('google.cloud.bigquery.Client.insert_rows_json') - def test_insert_all_unretriable_errors( - self, mock_send, unused_mock_sleep, exception_type=None, error_args=None): - # In this test, a BATCH pipeline will retry the unknown RETRIABLE errors. + def test_insert_rows_json_intermittent_retriable_exception( + self, mock_send, unused_mock_sleep, exception_type, error_message): + # In this test, the first 2 insert_rows_json calls will result in an + # exception and be retried with retry.with_exponential_backoff. The last + # call will not raise an exception and will succeed. mock_send.side_effect = [ - exception_type(*error_args), - exception_type(*error_args), - exception_type(*error_args), - exception_type(*error_args), + exception_type(error_message), exception_type(error_message), [] ] - with self.assertRaises(Exception): - with beam.Pipeline() as p: - _ = ( + with beam.Pipeline() as p: + _ = ( + p + | beam.Create([{ + 'columnA': 'value1' + }, { + 'columnA': 'value2' + }]) + | WriteToBigQuery( + table='project:dataset.table', + schema={ + 'fields': [{ + 'name': 'columnA', 'type': 'STRING', 'mode': 'NULLABLE' + }] + }, + create_disposition='CREATE_NEVER', + method='STREAMING_INSERTS')) + + self.assertEqual(3, mock_send.call_count) + + # Running tests with a variety of error reasons from + # https://cloud.google.com/bigquery/docs/error-messages + # This covers the scenario when + # the google.cloud.bigquery.Client.insert_rows_json call returns an error list + # rather than raising an exception. + # Choosing some error reasons that are included in + # bigquery_tools._NON_TRANSIENT_ERRORS and some that are not + @parameterized.expand([ + # reason in _NON_TRANSIENT_ERRORS for row 1, sent to failed_rows + param( + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }], + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }], + ], + failed_rows=['value1']), + # reason in _NON_TRANSIENT_ERRORS for row 1 + # reason not in _NON_TRANSIENT_ERRORS for row 2 on 1st run + # row 1 sent to failed_rows + param( + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }, { + 'index': 1, 'errors': [{ + 'reason': 'internalError' + }] + }], + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }], + ], + failed_rows=['value1']), + # reason not in _NON_TRANSIENT_ERRORS for row 1 on first attempt + # transient error succeeds on second attempt, 0 rows sent to failed rows + param( + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'internalError' + }] + }], + [], + ], + failed_rows=[]), + ]) + def test_insert_rows_json_errors_retry_always( + self, insert_response, failed_rows, unused_sleep_mock=None): + # In this test, a pipeline will always retry all errors + # since RetryStrategy is not set and defaults to RETRY_ALWAYS + with mock.patch('time.sleep'): + call_counter = 0 + + def store_callback(table, **kwargs): + nonlocal call_counter + response = insert_response[call_counter] + call_counter += 1 + return response + + client = mock.Mock() + client.insert_rows_json = mock.Mock(side_effect=store_callback) + + # Using the bundle based direct runner to avoid pickling problems + # with mocks. + with beam.Pipeline(runner='BundleBasedDirectRunner') as p: + bq_write_out = ( p | beam.Create([{ - 'columnA': 'value1' + 'columnA': 'value1', 'columnB': 'value2' + }, { + 'columnA': 'value3', 'columnB': 'value4' + }, { + 'columnA': 'value5', 'columnB': 'value6' }]) - | WriteToBigQuery( - table='project:dataset.table', - schema={ - 'fields': [{ - 'name': 'columnA', 'type': 'STRING', 'mode': 'NULLABLE' - }] - }, + # Using _StreamToBigQuery in order to be able to pass max_retries + # in order to limit run time of test with RETRY_ALWAYS + | _StreamToBigQuery( + table_reference='project:dataset.table', + table_side_inputs=[], + schema_side_inputs=[], + schema='anyschema', + batch_size=None, + triggering_frequency=None, create_disposition='CREATE_NEVER', - method='STREAMING_INSERTS')) - self.assertEqual(1, mock_send.call_count) + write_disposition=None, + kms_key=None, + retry_strategy=RetryStrategy.RETRY_ALWAYS, + additional_bq_parameters=[], + ignore_insert_ids=False, + ignore_unknown_columns=False, + with_auto_sharding=False, + test_client=client, + max_retries=len(insert_response) - 1, + num_streaming_keys=500)) - # Using https://googleapis.dev/python/google-api-core/latest/_modules/google - # /api_core/exceptions.html - # to determine error types and messages to try for retriables. + failed_values = ( + bq_write_out[beam_bq.BigQueryWriteFn.FAILED_ROWS] + | beam.Map(lambda x: x[1]['columnA'])) + + assert_that(failed_values, equal_to(failed_rows)) + + # Running tests with a variety of error reasons from + # https://cloud.google.com/bigquery/docs/error-messages + # This covers the scenario when + # the google.cloud.bigquery.Client.insert_rows_json call returns an error list + # rather than raising an exception. + # Choosing some error reasons that are included in + # bigquery_tools._NON_TRANSIENT_ERRORS and some that are not @parameterized.expand([ + # reason in _NON_TRANSIENT_ERRORS for row 1, sent to failed_rows param( - exception_type=retry.PermanentException, - error_args=('nonretriable', )), - param( - exception_type=exceptions.BadRequest if exceptions else None, - error_args=( - 'forbidden morbidden', [{ - 'reason': 'nonretriablereason' - }])), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalidQuery' + }] + }], + ], + streaming=False), + # reason not in _NON_TRANSIENT_ERRORS for row 1, sent to failed_rows param( - exception_type=exceptions.BadRequest if exceptions else None, - error_args=('BAD REQUEST!', [{ - 'reason': 'nonretriablereason' - }])), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'internalError' + }] + }], + ], + streaming=False), param( - exception_type=exceptions.MethodNotAllowed if exceptions else None, - error_args=( - 'method not allowed!', [{ - 'reason': 'nonretriablereason' - }])), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }], + ], + streaming=True), + # reason not in _NON_TRANSIENT_ERRORS for row 1, sent to failed_rows param( - exception_type=exceptions.MethodNotAllowed if exceptions else None, - error_args=('method not allowed!', 'args')), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'internalError' + }] + }], + ], + streaming=True), + ]) + @mock.patch('time.sleep') + @mock.patch('google.cloud.bigquery.Client.insert_rows_json') + def test_insert_rows_json_errors_retry_never( + self, mock_send, unused_mock_sleep, insert_response, streaming): + # In this test, a pipeline will never retry errors since RetryStrategy is + # set to RETRY_NEVER + mock_send.side_effect = insert_response + opt = StandardOptions() + opt.streaming = streaming + with beam.Pipeline(runner='BundleBasedDirectRunner', options=opt) as p: + bq_write_out = ( + p + | beam.Create([{ + 'columnA': 'value1' + }, { + 'columnA': 'value2' + }]) + | WriteToBigQuery( + table='project:dataset.table', + schema={ + 'fields': [{ + 'name': 'columnA', 'type': 'STRING', 'mode': 'NULLABLE' + }] + }, + create_disposition='CREATE_NEVER', + method='STREAMING_INSERTS', + insert_retry_strategy=RetryStrategy.RETRY_NEVER)) + failed_values = ( + bq_write_out[beam_bq.BigQueryWriteFn.FAILED_ROWS_WITH_ERRORS] + | beam.Map(lambda x: x[1]['columnA'])) + + assert_that(failed_values, equal_to(['value1'])) + + self.assertEqual(1, mock_send.call_count) + + # Running tests with a variety of error reasons from + # https://cloud.google.com/bigquery/docs/error-messages + # This covers the scenario when + # the google.cloud.bigquery.Client.insert_rows_json call returns an error list + # rather than raising an exception. + # Choosing some error reasons that are included in + # bigquery_tools._NON_TRANSIENT_ERRORS and some that are not + @parameterized.expand([ + # reason in _NON_TRANSIENT_ERRORS for row 1, sent to failed_rows param( - exception_type=exceptions.Unknown if exceptions else None, - error_args=('unknown!', 'args')), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }], + ], + failed_rows=['value1'], + streaming=False), + # reason not in _NON_TRANSIENT_ERRORS for row 1 on 1st attempt + # transient error succeeds on 2nd attempt, 0 rows sent to failed rows param( - exception_type=exceptions.Aborted if exceptions else None, - error_args=('abortet!', 'abort')), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'internalError' + }] + }], + [], + ], + failed_rows=[], + streaming=False), + # reason in _NON_TRANSIENT_ERRORS for row 1 + # reason not in _NON_TRANSIENT_ERRORS for row 2 on 1st and 2nd attempt + # all rows with errors are retried when any row has a retriable error + # row 1 sent to failed_rows after final attempt param( - exception_type=requests.exceptions.ConnectionError, - error_args=('some connection error', )), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }, { + 'index': 1, 'errors': [{ + 'reason': 'internalError' + }] + }], + [ + { + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }, + ], + ], + failed_rows=['value1'], + streaming=False), + # reason in _NON_TRANSIENT_ERRORS for row 1, sent to failed_rows param( - exception_type=requests.exceptions.Timeout, - error_args=('some timeout error', )), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }], + ], + failed_rows=['value1'], + streaming=True), + # reason not in _NON_TRANSIENT_ERRORS for row 1 on 1st attempt + # transient error succeeds on 2nd attempt, 0 rows sent to failed rows param( - exception_type=ConnectionError, - error_args=('some py connection error', )), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'internalError' + }] + }], + [], + ], + failed_rows=[], + streaming=True), + # reason in _NON_TRANSIENT_ERRORS for row 1 + # reason not in _NON_TRANSIENT_ERRORS for row 2 on 1st and 2nd attempt + # all rows with errors are retried when any row has a retriable error + # row 1 sent to failed_rows after final attempt param( - exception_type=exceptions.BadGateway if exceptions else None, - error_args=('some badgateway error', )), + insert_response=[ + [{ + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }, { + 'index': 1, 'errors': [{ + 'reason': 'internalError' + }] + }], + [ + { + 'index': 0, 'errors': [{ + 'reason': 'invalid' + }] + }, + ], + ], + failed_rows=['value1'], + streaming=True), ]) @mock.patch('time.sleep') @mock.patch('google.cloud.bigquery.Client.insert_rows_json') - def test_insert_all_unretriable_errors_streaming( - self, mock_send, unused_mock_sleep, exception_type=None, error_args=None): - # In this test, a STREAMING pipeline will retry ALL errors, and never throw - # an exception. - mock_send.side_effect = [ - exception_type(*error_args), - exception_type(*error_args), - [] # Errors thrown twice, and then succeeded - ] + def test_insert_rows_json_errors_retry_on_transient_error( + self, + mock_send, + unused_mock_sleep, + insert_response, + failed_rows, + streaming=False): + # In this test, a pipeline will only retry errors with reasons that are not + # in _NON_TRANSIENT_ERRORS since RetryStrategy is set to + # RETRY_ON_TRANSIENT_ERROR + call_counter = 0 + + def store_callback(table, **kwargs): + nonlocal call_counter + response = insert_response[call_counter] + call_counter += 1 + return response + + mock_send.side_effect = store_callback opt = StandardOptions() - opt.streaming = True + opt.streaming = streaming + + # Using the bundle based direct runner to avoid pickling problems + # with mocks. with beam.Pipeline(runner='BundleBasedDirectRunner', options=opt) as p: - _ = ( + bq_write_out = ( p | beam.Create([{ 'columnA': 'value1' + }, { + 'columnA': 'value2' + }, { + 'columnA': 'value3' }]) | WriteToBigQuery( table='project:dataset.table', @@ -1189,8 +1624,14 @@ def test_insert_all_unretriable_errors_streaming( }] }, create_disposition='CREATE_NEVER', - method='STREAMING_INSERTS')) - self.assertEqual(3, mock_send.call_count) + method='STREAMING_INSERTS', + insert_retry_strategy=RetryStrategy.RETRY_ON_TRANSIENT_ERROR)) + + failed_values = ( + bq_write_out[beam_bq.BigQueryWriteFn.FAILED_ROWS] + | beam.Map(lambda x: x[1]['columnA'])) + + assert_that(failed_values, equal_to(failed_rows)) @unittest.skipIf(HttpError is None, 'GCP dependencies are not installed') @@ -1499,76 +1940,6 @@ def store_callback(table, **kwargs): result) self.assertEqual(len(data1['colA_values']), 1) - @parameterized.expand([ - param(retry_strategy=RetryStrategy.RETRY_ALWAYS), - param(retry_strategy=RetryStrategy.RETRY_NEVER), - param(retry_strategy=RetryStrategy.RETRY_ON_TRANSIENT_ERROR), - ]) - def test_permanent_failure_in_some_rows_does_not_duplicate( - self, unused_sleep_mock=None, retry_strategy=None): - with mock.patch('time.sleep'): - - def store_callback(table, **kwargs): - return [ - { - 'index': 0, - 'errors': [{ - 'reason': 'invalid' - }, { - 'reason': 'its bad' - }] - }, - ] - - client = mock.Mock() - client.insert_rows_json = mock.Mock(side_effect=store_callback) - - # The expected rows to be inserted according to the insert strategy - if retry_strategy == RetryStrategy.RETRY_NEVER: - inserted_rows = ['value3', 'value5'] - else: # RETRY_ALWAYS and RETRY_ON_TRANSIENT_ERRORS should insert all rows - inserted_rows = ['value3', 'value5'] - - # Using the bundle based direct runner to avoid pickling problems - # with mocks. - with beam.Pipeline(runner='BundleBasedDirectRunner') as p: - bq_write_out = ( - p - | beam.Create([{ - 'columnA': 'value1', 'columnB': 'value2' - }, { - 'columnA': 'value3', 'columnB': 'value4' - }, { - 'columnA': 'value5', 'columnB': 'value6' - }]) - | _StreamToBigQuery( - table_reference='project:dataset.table', - table_side_inputs=[], - schema_side_inputs=[], - schema='anyschema', - batch_size=None, - triggering_frequency=None, - create_disposition='CREATE_NEVER', - write_disposition=None, - kms_key=None, - retry_strategy=retry_strategy, - additional_bq_parameters=[], - ignore_insert_ids=False, - ignore_unknown_columns=False, - with_auto_sharding=False, - test_client=client, - max_retries=10, - num_streaming_keys=500)) - - failed_values = ( - bq_write_out[beam_bq.BigQueryWriteFn.FAILED_ROWS] - | beam.Map(lambda x: x[1]['columnA'])) - - assert_that( - failed_values, - equal_to( - list({'value1', 'value3', 'value5'}.difference(inserted_rows)))) - @parameterized.expand([ param(with_auto_sharding=False), param(with_auto_sharding=True), diff --git a/sdks/python/apache_beam/io/gcp/bigquery_tools.py b/sdks/python/apache_beam/io/gcp/bigquery_tools.py index 07d711f8fc92..2f9420795288 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_tools.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_tools.py @@ -732,11 +732,13 @@ def _insert_all_rows( except (ClientError, GoogleAPICallError) as e: # e.code contains the numeric http status code. service_call_metric.call(e.code) - # Re-reise the exception so that we re-try appropriately. - raise + # Package exception with required fields + error = {'message': e.message, 'reason': e.response.reason} + # Add all rows to the errors list along with the error + errors = [{"index": i, "errors": [error]} for i, _ in enumerate(rows)] except HttpError as e: service_call_metric.call(e) - # Re-reise the exception so that we re-try appropriately. + # Re-raise the exception so that we re-try appropriately. raise finally: self._latency_histogram_metric.update( @@ -1491,7 +1493,19 @@ class RetryStrategy(object): RETRY_NEVER = 'RETRY_NEVER' RETRY_ON_TRANSIENT_ERROR = 'RETRY_ON_TRANSIENT_ERROR' - _NON_TRANSIENT_ERRORS = {'invalid', 'invalidQuery', 'notImplemented'} + # Values below may be found in reasons provided either in an + # error returned by a client method or by an http response as + # defined in google.api_core.exceptions + _NON_TRANSIENT_ERRORS = { + 'invalid', + 'invalidQuery', + 'notImplemented', + 'Bad Request', + 'Unauthorized', + 'Forbidden', + 'Not Found', + 'Not Implemented', + } @staticmethod def should_retry(strategy, error_message): diff --git a/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py b/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py index c73d3ff7e53e..4b728fe7ec1f 100644 --- a/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py +++ b/sdks/python/apache_beam/io/gcp/bigquery_write_it_test.py @@ -379,7 +379,7 @@ def test_big_query_write_without_schema(self): def test_big_query_write_insert_errors_reporting(self): """ Test that errors returned by beam.io.WriteToBigQuery - contain both the failed rows amd the reason for it failing. + contain both the failed rows and the reason for it failing. """ table_name = 'python_write_table' table_id = '{}.{}'.format(self.dataset_id, table_name) @@ -454,6 +454,55 @@ def test_big_query_write_insert_errors_reporting(self): | 'ParseErrors' >> beam.Map(lambda err: (err[1], err[2])), equal_to(bq_result_errors)) + @pytest.mark.it_postcommit + def test_big_query_write_insert_non_transient_api_call_error(self): + """ + Test that non-transient GoogleAPICallError errors returned + by beam.io.WriteToBigQuery are not retried and result in + FAILED_ROWS containing both the failed rows and the reason + for failure. + """ + table_name = 'this_table_does_not_exist' + table_id = '{}.{}'.format(self.dataset_id, table_name) + + input_data = [{ + 'number': 1, + 'str': 'some_string', + }] + + table_schema = { + "fields": [{ + "name": "number", "type": "INTEGER", 'mode': 'NULLABLE' + }, { + "name": "str", "type": "STRING", 'mode': 'NULLABLE' + }] + } + + bq_result_errors = [({ + 'number': 1, + 'str': 'some_string', + }, "Not Found")] + + args = self.test_pipeline.get_full_options_as_args() + + with beam.Pipeline(argv=args) as p: + # pylint: disable=expression-not-assigned + errors = ( + p | 'create' >> beam.Create(input_data) + | 'write' >> beam.io.WriteToBigQuery( + table_id, + schema=table_schema, + method='STREAMING_INSERTS', + insert_retry_strategy='RETRY_ON_TRANSIENT_ERROR', + create_disposition=beam.io.BigQueryDisposition.CREATE_NEVER, + write_disposition=beam.io.BigQueryDisposition.WRITE_APPEND)) + + assert_that( + errors[BigQueryWriteFn.FAILED_ROWS_WITH_ERRORS] + | + 'ParseErrors' >> beam.Map(lambda err: (err[1], err[2][0]["reason"])), + equal_to(bq_result_errors)) + @pytest.mark.it_postcommit @parameterized.expand([ param(file_format=FileFormat.AVRO), diff --git a/sdks/python/apache_beam/io/gcp/gcsio.py b/sdks/python/apache_beam/io/gcp/gcsio.py index 2fdbce73170a..d75af4fe6ac1 100644 --- a/sdks/python/apache_beam/io/gcp/gcsio.py +++ b/sdks/python/apache_beam/io/gcp/gcsio.py @@ -825,5 +825,4 @@ def finish(self): # Check for exception since the last put() call. if self._upload_thread.last_error is not None: e = self._upload_thread.last_error - raise type(self._upload_thread.last_error)( - "Error while uploading file %s" % self._path) from e # pylint: disable=raising-bad-type + raise RuntimeError("Error while uploading file %s" % self._path) from e diff --git a/sdks/python/apache_beam/io/jdbc.py b/sdks/python/apache_beam/io/jdbc.py index f8f24ddeb8d2..903b0d1b0fef 100644 --- a/sdks/python/apache_beam/io/jdbc.py +++ b/sdks/python/apache_beam/io/jdbc.py @@ -86,6 +86,7 @@ # pytype: skip-file +import datetime import typing import numpy as np @@ -94,7 +95,10 @@ from apache_beam.transforms.external import BeamJarExpansionService from apache_beam.transforms.external import ExternalTransform from apache_beam.transforms.external import NamedTupleBasedPayloadBuilder +from apache_beam.typehints.schemas import LogicalType +from apache_beam.typehints.schemas import MillisInstant from apache_beam.typehints.schemas import typing_to_runner_api +from apache_beam.utils.timestamp import Timestamp __all__ = [ 'WriteToJdbc', @@ -355,3 +359,99 @@ def __init__( ), expansion_service or default_io_expansion_service(classpath), ) + + +@LogicalType.register_logical_type +class JdbcDateType(LogicalType[datetime.date, MillisInstant, str]): + """ + For internal use only; no backwards-compatibility guarantees. + + Support of Legacy JdbcIO DATE logical type. Deemed to change when Java JDBCIO + has been migrated to Beam portable logical types. + """ + def __init__(self, argument=""): + pass + + @classmethod + def representation_type(cls): + # type: () -> type + return Timestamp + + @classmethod + def urn(cls): + return "beam:logical_type:javasdk_date:v1" + + @classmethod + def language_type(cls): + return datetime.date + + def to_representation_type(self, value): + # type: (datetime.date) -> Timestamp + return Timestamp.from_utc_datetime( + datetime.datetime.combine( + value, datetime.datetime.min.time(), tzinfo=datetime.timezone.utc)) + + def to_language_type(self, value): + # type: (Timestamp) -> datetime.date + + return value.to_utc_datetime().date() + + @classmethod + def argument_type(cls): + return str + + def argument(self): + return "" + + @classmethod + def _from_typing(cls, typ): + return cls() + + +@LogicalType.register_logical_type +class JdbcTimeType(LogicalType[datetime.time, MillisInstant, str]): + """ + For internal use only; no backwards-compatibility guarantees. + + Support of Legacy JdbcIO TIME logical type. . Deemed to change when Java + JDBCIO has been migrated to Beam portable logical types. + """ + def __init__(self, argument=""): + pass + + @classmethod + def representation_type(cls): + # type: () -> type + return Timestamp + + @classmethod + def urn(cls): + return "beam:logical_type:javasdk_time:v1" + + @classmethod + def language_type(cls): + return datetime.time + + def to_representation_type(self, value): + # type: (datetime.date) -> Timestamp + return Timestamp.from_utc_datetime( + datetime.datetime.combine( + datetime.datetime.utcfromtimestamp(0), + value, + tzinfo=datetime.timezone.utc)) + + def to_language_type(self, value): + # type: (Timestamp) -> datetime.date + + return value.to_utc_datetime().time() + + @classmethod + def argument_type(cls): + return str + + def argument(self): + return "" + + @classmethod + def _from_typing(cls, typ): + return cls() diff --git a/sdks/python/apache_beam/ml/inference/huggingface_inference.py b/sdks/python/apache_beam/ml/inference/huggingface_inference.py index aee613363781..3ec063808ae3 100644 --- a/sdks/python/apache_beam/ml/inference/huggingface_inference.py +++ b/sdks/python/apache_beam/ml/inference/huggingface_inference.py @@ -98,6 +98,7 @@ class PipelineTask(str, Enum): TextClassification = 'text-classification' TextGeneration = 'text-generation' Text2TextGeneration = 'text2text-generation' + TextToAudio = 'text-to-audio' TokenClassification = 'token-classification' Translation = 'translation' VideoClassification = 'video-classification' @@ -570,7 +571,7 @@ class HuggingFacePipelineModelHandler(ModelHandler[str, def __init__( self, task: Union[str, PipelineTask] = "", - model=None, + model: str = "", *, inference_fn: PipelineInferenceFn = _default_pipeline_inference_fn, load_pipeline_args: Optional[Dict[str, Any]] = None, @@ -593,9 +594,18 @@ def __init__( Args: task (str or enum.Enum): task supported by HuggingFace Pipelines. Accepts a string task or an enum.Enum from PipelineTask. - model : path to pretrained model on Hugging Face Models Hub to use custom - model for the chosen task. If the model already defines the task then - no need to specify the task parameter. + model (str): path to the pretrained *model-id* on Hugging Face Models Hub + to use custom model for the chosen task. If the `model` already defines + the task then no need to specify the `task` parameter. + Use the *model-id* string instead of an actual model here. + Model-specific kwargs for `from_pretrained(..., **model_kwargs)` can be + specified with `model_kwargs` using `load_pipeline_args`. + + Example Usage:: + model_handler = HuggingFacePipelineModelHandler( + task="text-generation", model="meta-llama/Llama-2-7b-hf", + load_pipeline_args={'model_kwargs':{'quantization_map':config}}) + inference_fn: the inference function to use during RunInference. Default is _default_pipeline_inference_fn. load_pipeline_args (Dict[str, Any]): keyword arguments to provide load diff --git a/sdks/python/apache_beam/ml/inference/vertex_ai_inference_it_test.py b/sdks/python/apache_beam/ml/inference/vertex_ai_inference_it_test.py index 4ef42fb10a70..8ba315ba00ca 100644 --- a/sdks/python/apache_beam/ml/inference/vertex_ai_inference_it_test.py +++ b/sdks/python/apache_beam/ml/inference/vertex_ai_inference_it_test.py @@ -26,6 +26,7 @@ from apache_beam.io.filesystems import FileSystems from apache_beam.testing.test_pipeline import TestPipeline +# pylint: disable=ungrouped-imports try: from apache_beam.examples.inference import vertex_ai_image_classification from apache_beam.examples.inference import vertex_ai_llm_text_classification diff --git a/sdks/python/apache_beam/py.typed b/sdks/python/apache_beam/py.typed new file mode 100644 index 000000000000..1aea47b9e7dd --- /dev/null +++ b/sdks/python/apache_beam/py.typed @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Marker file for PEP 561. +# The apache-beam package uses inline types. diff --git a/sdks/python/apache_beam/runners/common.py b/sdks/python/apache_beam/runners/common.py index c3de9c190434..99cd26cc4098 100644 --- a/sdks/python/apache_beam/runners/common.py +++ b/sdks/python/apache_beam/runners/common.py @@ -24,6 +24,7 @@ # pytype: skip-file +import logging import sys import threading import traceback @@ -81,6 +82,8 @@ ENCODED_IMPULSE_VALUE = IMPULSE_VALUE_CODER_IMPL.encode_nested( GlobalWindows.windowed_value(b'')) +_LOGGER = logging.getLogger(__name__) + class NameContext(object): """Holds the name information for a step.""" @@ -1538,6 +1541,7 @@ def _reraise_augmented(self, exn, windowed_value=None): new_exn = new_exn.with_traceback(tb) self._maybe_sample_exception(exc_info, windowed_value) + _LOGGER.exception(new_exn) raise new_exn diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py index 11e2e1218659..3d612bd6ec0f 100644 --- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py +++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py @@ -359,6 +359,8 @@ def run_pipeline(self, pipeline, options, pipeline_proto=None): 'Google Cloud Dataflow runner not available, ' 'please install apache_beam[gcp]') + _check_and_add_missing_options(options) + # Convert all side inputs into a form acceptable to Dataflow. if pipeline: pipeline.visit(self.combinefn_visitor()) diff --git a/sdks/python/apache_beam/runners/dataflow/internal/names.py b/sdks/python/apache_beam/runners/dataflow/internal/names.py index 8bb39940e484..57c0bcdff201 100644 --- a/sdks/python/apache_beam/runners/dataflow/internal/names.py +++ b/sdks/python/apache_beam/runners/dataflow/internal/names.py @@ -34,6 +34,6 @@ # Unreleased sdks use container image tag specified below. # Update this tag whenever there is a change that # requires changes to SDK harness container or SDK harness launcher. -BEAM_DEV_SDK_CONTAINER_TAG = 'beam-master-20230912' +BEAM_DEV_SDK_CONTAINER_TAG = 'beam-master-20230915' DATAFLOW_CONTAINER_IMAGE_REPOSITORY = 'gcr.io/cloud-dataflow/v1beta3' diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py index 13f1dc635965..cc1494fc7ae2 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations.py @@ -1346,6 +1346,7 @@ def lifted_stages(stage): payload=transform.spec.payload), inputs=transform.inputs, outputs={'out': precombined_pcoll_id}, + annotations=transform.annotations, environment_id=transform.environment_id)) yield make_stage( @@ -1355,6 +1356,7 @@ def lifted_stages(stage): spec=beam_runner_api_pb2.FunctionSpec( urn=common_urns.primitives.GROUP_BY_KEY.urn), inputs={'in': precombined_pcoll_id}, + annotations=transform.annotations, outputs={'out': grouped_pcoll_id})) yield make_stage( @@ -1367,6 +1369,7 @@ def lifted_stages(stage): payload=transform.spec.payload), inputs={'in': grouped_pcoll_id}, outputs={'out': merged_pcoll_id}, + annotations=transform.annotations, environment_id=transform.environment_id)) yield make_stage( @@ -1379,6 +1382,7 @@ def lifted_stages(stage): payload=transform.spec.payload), inputs={'in': merged_pcoll_id}, outputs=transform.outputs, + annotations=transform.annotations, environment_id=transform.environment_id)) def unlifted_stages(stage): diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py index 144f067900f3..3ff2421e6265 100644 --- a/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py +++ b/sdks/python/apache_beam/runners/portability/fn_api_runner/translations_test.py @@ -384,6 +384,36 @@ def expand(self, pcoll): 'multiple-small-combines/min-4-globally/CombinePerKey', optimized_stage_names) + def test_combineperkey_annotation_propagation(self): + """ + Test that the CPK component transforms inherit annotations from the + source CPK + """ + class MyCombinePerKey(beam.CombinePerKey): + def annotations(self): + return {"my_annotation": b""} + + with TestPipeline() as pipeline: + _ = pipeline | beam.Create([(1, 2)]) | MyCombinePerKey(min) + + # Verify the annotations are propagated to the split up + # CPK transforms + proto = pipeline.to_runner_api( + default_environment=environments.EmbeddedPythonEnvironment( + capabilities=environments.python_sdk_capabilities())) + optimized = translations.optimize_pipeline( + proto, + phases=[translations.lift_combiners], + known_runner_urns=frozenset(), + partial=True) + for transform_id in ['MyCombinePerKey(min)/Precombine', + 'MyCombinePerKey(min)/Group', + 'MyCombinePerKey(min)/Merge', + 'MyCombinePerKey(min)/ExtractOutputs']: + assert ( + "my_annotation" in + optimized.components.transforms[transform_id].annotations) + def test_conditionally_packed_combiners(self): class RecursiveCombine(beam.PTransform): def __init__(self, labels): diff --git a/sdks/python/apache_beam/runners/render.py b/sdks/python/apache_beam/runners/render.py index 306bf8c2090b..da153d25a4bd 100644 --- a/sdks/python/apache_beam/runners/render.py +++ b/sdks/python/apache_beam/runners/render.py @@ -78,6 +78,8 @@ except ImportError: gcsio = None # type: ignore +_LOGGER = logging.getLogger(__name__) + # From the Beam site, circa November 2022. DEFAULT_EDGE_STYLE = 'color="#ff570b"' DEFAULT_TRANSFORM_STYLE = ( @@ -129,12 +131,6 @@ def _add_argparse_args(cls, parser): help='Set to also log input pipeline proto to stdout.') return parser - def __init__(self, *args, render_testing=False, **kwargs): - super().__init__(*args, **kwargs) - if self.render_port < 0 and not self.render_output and not render_testing: - raise ValueError( - 'At least one of --render_port or --render_output must be provided.') - class PipelineRenderer: def __init__(self, pipeline, options): @@ -342,7 +338,7 @@ def page_callback_data(self, layout): } def render_data(self): - logging.info("Re-rendering pipeline...") + _LOGGER.info("Re-rendering pipeline...") layout = self.layout_dot() if self.options.render_output: for path in self.options.render_output: @@ -352,10 +348,10 @@ def render_data(self): input=layout, check=False) if result.returncode: - logging.error( + _LOGGER.error( "Failed render pipeline as %r: exit %s", path, result.returncode) else: - logging.info("Rendered pipeline as %r", path) + _LOGGER.info("Rendered pipeline as %r", path) return self.page_callback_data(layout) def render_json(self): @@ -404,15 +400,19 @@ class RenderRunner(runner.PipelineRunner): # (such as counters, stage completion status, or possibly even PCollection # samples) queryable and/or displayed. This could evolve into a full Beam # UI. - def run_pipeline(self, pipeline_object, options, pipeline_proto=None): - if not pipeline_proto: - pipeline_proto = pipeline_object.to_runner_api() + def run_pipeline(self, pipeline_object, options): + return self.run_portable_pipeline(pipeline_object.to_runner_api(), options) + + def run_portable_pipeline(self, pipeline_proto, options): render_options = options.view_as(RenderOptions) + if render_options.render_port < 0 and not render_options.render_output: + raise ValueError( + 'At least one of --render_port or --render_output must be provided.') if render_options.log_proto: - logging.info(pipeline_proto) + _LOGGER.info(pipeline_proto) renderer = PipelineRenderer(pipeline_proto, render_options) try: - subprocess.run(['dotX', '-V'], capture_output=True, check=True) + subprocess.run(['dot', '-V'], capture_output=True, check=True) except FileNotFoundError as exn: # If dot is not available, we can at least output the raw .dot files. dot_files = [ @@ -422,7 +422,7 @@ def run_pipeline(self, pipeline_object, options, pipeline_proto=None): for output in dot_files: with open(output, 'w') as fout: fout.write(renderer.to_dot()) - logging.info("Wrote pipeline as %s", output) + _LOGGER.info("Wrote pipeline as %s", output) non_dot_files = set(render_options.render_output) - set(dot_files) if non_dot_files: @@ -543,17 +543,16 @@ def render_one(options): pipeline_proto = beam_runner_api_pb2.Pipeline() pipeline_proto.ParseFromString(content) - RenderRunner().run_pipeline( - None, pipeline_options.PipelineOptions(**vars(options)), pipeline_proto) + RenderRunner().run_portable_pipeline( + pipeline_proto, pipeline_options.PipelineOptions(**vars(options))) def run_server(options): class RenderBeamJob(local_job_service.BeamJob): def _invoke_runner(self): - return RenderRunner().run_pipeline( - None, - pipeline_options.PipelineOptions(**vars(options)), - self._pipeline_proto) + return RenderRunner().run_portable_pipeline( + self._pipeline_proto, + pipeline_options.PipelineOptions(**vars(options))) with tempfile.TemporaryDirectory() as staging_dir: job_servicer = local_job_service.LocalJobServicer( diff --git a/sdks/python/apache_beam/runners/render_test.py b/sdks/python/apache_beam/runners/render_test.py index 4dca2b8b5221..67e7afc1c7b9 100644 --- a/sdks/python/apache_beam/runners/render_test.py +++ b/sdks/python/apache_beam/runners/render_test.py @@ -16,10 +16,13 @@ # # pytype: skip-file +import os import argparse import logging import subprocess import unittest +import tempfile +import pytest import apache_beam as beam from apache_beam.runners import render @@ -39,6 +42,16 @@ def test_basic_graph(self): self.assertIn('CustomName', dot) self.assertEqual(dot.count('->'), 2) + def test_render_config_validation(self): + p = beam.Pipeline() + _ = ( + p | beam.Impulse() | beam.Map(lambda _: 2) + | 'CustomName' >> beam.Map(lambda x: x * x)) + pipeline_proto = p.to_runner_api() + with pytest.raises(ValueError): + render.RenderRunner().run_portable_pipeline( + pipeline_proto, render.RenderOptions()) + def test_side_input(self): p = beam.Pipeline() pcoll = p | beam.Impulse() | beam.FlatMap(lambda x: [1, 2, 3]) @@ -65,11 +78,35 @@ def test_composite_collapse(self): renderer.update(toggle=[create_transform_id]) self.assertEqual(renderer.to_dot().count('->'), 1) - def test_dot_well_formed(self): + +class DotRequiringRenderingTest(unittest.TestCase): + @classmethod + def setUpClass(cls): try: subprocess.run(['dot', '-V'], capture_output=True, check=True) except FileNotFoundError: + cls._dot_installed = False + else: + cls._dot_installed = True + + def setUp(self) -> None: + if not self._dot_installed: # type: ignore[attr-defined] self.skipTest('dot executable not installed') + + def test_run_portable_pipeline(self): + p = beam.Pipeline() + _ = ( + p | beam.Impulse() | beam.Map(lambda _: 2) + | 'CustomName' >> beam.Map(lambda x: x * x)) + pipeline_proto = p.to_runner_api() + + with tempfile.TemporaryDirectory() as tmpdir: + svg_path = os.path.join(tmpdir, "my_output.svg") + render.RenderRunner().run_portable_pipeline( + pipeline_proto, render.RenderOptions(render_output=[svg_path])) + assert os.path.exists(svg_path) + + def test_dot_well_formed(self): p = beam.Pipeline() _ = p | beam.Create([1, 2, 3]) | beam.Map(lambda x: x * x) pipeline_proto = p.to_runner_api() @@ -84,16 +121,12 @@ def test_dot_well_formed(self): renderer.render_data() def test_leaf_composite_filter(self): - try: - subprocess.run(['dot', '-V'], capture_output=True, check=True) - except FileNotFoundError: - self.skipTest('dot executable not installed') p = beam.Pipeline() _ = p | beam.Create([1, 2, 3]) | beam.Map(lambda x: x * x) dot = render.PipelineRenderer( p.to_runner_api(), - render.RenderOptions(['--render_leaf_composite_nodes=Create'], - render_testing=True)).to_dot() + render.RenderOptions(['--render_leaf_composite_nodes=Create' + ])).to_dot() self.assertEqual(dot.count('->'), 1) diff --git a/sdks/python/apache_beam/runners/worker/log_handler_test.py b/sdks/python/apache_beam/runners/worker/log_handler_test.py index 4adae90edceb..9eb9299cac39 100644 --- a/sdks/python/apache_beam/runners/worker/log_handler_test.py +++ b/sdks/python/apache_beam/runners/worker/log_handler_test.py @@ -23,17 +23,97 @@ import grpc +import apache_beam as beam +from apache_beam.coders.coders import FastPrimitivesCoder +from apache_beam.portability import common_urns from apache_beam.portability.api import beam_fn_api_pb2 from apache_beam.portability.api import beam_fn_api_pb2_grpc from apache_beam.portability.api import endpoints_pb2 +from apache_beam.runners import common from apache_beam.runners.common import NameContext +from apache_beam.runners.worker import bundle_processor from apache_beam.runners.worker import log_handler +from apache_beam.runners.worker import operations from apache_beam.runners.worker import statesampler +from apache_beam.runners.worker.bundle_processor import BeamTransformFactory +from apache_beam.runners.worker.bundle_processor import BundleProcessor +from apache_beam.transforms.window import GlobalWindow from apache_beam.utils import thread_pool_executor +from apache_beam.utils.windowed_value import WindowedValue _LOGGER = logging.getLogger(__name__) +@BeamTransformFactory.register_urn('beam:internal:testexn:v1', bytes) +def create_exception_dofn( + factory, transform_id, transform_proto, payload, consumers): + """Returns a test DoFn that raises the given exception.""" + class RaiseException(beam.DoFn): + def __init__(self, msg): + self.msg = msg.decode() + + def process(self, _): + raise RuntimeError(self.msg) + + return bundle_processor._create_simple_pardo_operation( + factory, + transform_id, + transform_proto, + consumers, + RaiseException(payload)) + + +class TestOperation(operations.Operation): + """Test operation that forwards its payload to consumers.""" + class Spec: + def __init__(self, transform_proto): + self.output_coders = [ + FastPrimitivesCoder() for _ in transform_proto.outputs + ] + + def __init__( + self, + transform_proto, + name_context, + counter_factory, + state_sampler, + consumers, + payload, + ): + super().__init__( + name_context, + self.Spec(transform_proto), + counter_factory, + state_sampler) + self.payload = payload + + for _, consumer_ops in consumers.items(): + for consumer in consumer_ops: + self.add_receiver(consumer, 0) + + def start(self): + super().start() + + # Not using windowing logic, so just using simple defaults here. + if self.payload: + self.process( + WindowedValue(self.payload, timestamp=0, windows=[GlobalWindow()])) + + def process(self, windowed_value): + self.output(windowed_value) + + +@BeamTransformFactory.register_urn('beam:internal:testop:v1', bytes) +def create_test_op(factory, transform_id, transform_proto, payload, consumers): + return TestOperation( + transform_proto, + common.NameContext(transform_proto.unique_name, transform_id), + factory.counter_factory, + factory.state_sampler, + consumers, + payload) + + class BeamFnLoggingServicer(beam_fn_api_pb2_grpc.BeamFnLoggingServicer): def __init__(self): self.log_records_received = [] @@ -153,6 +233,77 @@ def test_context(self): finally: statesampler.set_current_tracker(None) + def test_extracts_transform_id_during_exceptions(self): + """Tests that transform ids are captured during user code exceptions.""" + descriptor = beam_fn_api_pb2.ProcessBundleDescriptor() + + # Boiler plate for the DoFn. + WINDOWING_ID = 'window' + WINDOW_CODER_ID = 'cw' + window = descriptor.windowing_strategies[WINDOWING_ID] + window.window_fn.urn = common_urns.global_windows.urn + window.window_coder_id = WINDOW_CODER_ID + window.trigger.default.SetInParent() + window_coder = descriptor.coders[WINDOW_CODER_ID] + window_coder.spec.urn = common_urns.StandardCoders.Enum.GLOBAL_WINDOW.urn + + # Input collection to the exception raising DoFn. + INPUT_PCOLLECTION_ID = 'pc-in' + INPUT_CODER_ID = 'c-in' + descriptor.pcollections[ + INPUT_PCOLLECTION_ID].unique_name = INPUT_PCOLLECTION_ID + descriptor.pcollections[INPUT_PCOLLECTION_ID].coder_id = INPUT_CODER_ID + descriptor.pcollections[ + INPUT_PCOLLECTION_ID].windowing_strategy_id = WINDOWING_ID + descriptor.coders[ + INPUT_CODER_ID].spec.urn = common_urns.StandardCoders.Enum.BYTES.urn + + # Output collection to the exception raising DoFn. + OUTPUT_PCOLLECTION_ID = 'pc-out' + OUTPUT_CODER_ID = 'c-out' + descriptor.pcollections[ + OUTPUT_PCOLLECTION_ID].unique_name = OUTPUT_PCOLLECTION_ID + descriptor.pcollections[OUTPUT_PCOLLECTION_ID].coder_id = OUTPUT_CODER_ID + descriptor.pcollections[ + OUTPUT_PCOLLECTION_ID].windowing_strategy_id = WINDOWING_ID + descriptor.coders[ + OUTPUT_CODER_ID].spec.urn = common_urns.StandardCoders.Enum.BYTES.urn + + # Add a simple transform to inject an element into the fake pipeline. + TEST_OP_TRANSFORM_ID = 'test_op' + test_transform = descriptor.transforms[TEST_OP_TRANSFORM_ID] + test_transform.outputs['None'] = INPUT_PCOLLECTION_ID + test_transform.spec.urn = 'beam:internal:testop:v1' + test_transform.spec.payload = b'hello, world!' + + # Add the DoFn to create an exception. + TEST_EXCEPTION_TRANSFORM_ID = 'test_transform' + test_transform = descriptor.transforms[TEST_EXCEPTION_TRANSFORM_ID] + test_transform.inputs['0'] = INPUT_PCOLLECTION_ID + test_transform.outputs['None'] = OUTPUT_PCOLLECTION_ID + test_transform.spec.urn = 'beam:internal:testexn:v1' + test_transform.spec.payload = b'expected exception' + + # Create and process a fake bundle. The instruction id doesn't matter + # here. + processor = BundleProcessor(descriptor, None, None) + + with self.assertRaisesRegex(RuntimeError, 'expected exception'): + processor.process_bundle('instruction_id') + + self.fn_log_handler.close() + logs = [ + log for logs in self.test_logging_service.log_records_received + for log in logs.log_entries + ] + + actual_log = logs[0] + + self.assertEqual( + actual_log.severity, beam_fn_api_pb2.LogEntry.Severity.ERROR) + self.assertTrue('expected exception' in actual_log.message) + self.assertEqual(actual_log.transform_id, 'test_transform') + # Test cases. data = { diff --git a/sdks/python/apache_beam/transforms/core.py b/sdks/python/apache_beam/transforms/core.py index 66ac8fbad967..671af54e47be 100644 --- a/sdks/python/apache_beam/transforms/core.py +++ b/sdks/python/apache_beam/transforms/core.py @@ -2258,6 +2258,10 @@ def __init__(self, pcoll, exception_handling_args, upstream_errors=()): self._exception_handling_args = exception_handling_args self._upstream_errors = upstream_errors + @property + def element_type(self): + return self._pcoll.element_type + def main_output_tag(self): return self._exception_handling_args.get('main_tag', 'good') @@ -2309,6 +2313,10 @@ def __init__(self, pvalue, exception_handling_args=None): else: self._pvalue = _PValueWithErrors(pvalue, exception_handling_args) + @property + def element_type(self): + return self._pvalue.element_type + def __or__(self, transform): return self.apply(transform) diff --git a/sdks/python/apache_beam/transforms/external.py b/sdks/python/apache_beam/transforms/external.py index 4b8e708bfc5c..44bf2398a6dd 100644 --- a/sdks/python/apache_beam/transforms/external.py +++ b/sdks/python/apache_beam/transforms/external.py @@ -185,6 +185,14 @@ def __init__(self, identifier, **kwargs): self._identifier = identifier self._kwargs = kwargs + def identifier(self): + """ + The URN referencing this SchemaTransform + + :return: str + """ + return self._identifier + def build(self): schema_proto, payload = self._get_schema_proto_and_payload(**self._kwargs) payload = external_transforms_pb2.SchemaTransformPayload( @@ -194,7 +202,7 @@ def build(self): return payload -class ExplicitSchemaTransformPayloadBuilder(PayloadBuilder): +class ExplicitSchemaTransformPayloadBuilder(SchemaTransformPayloadBuilder): def __init__(self, identifier, schema_proto, **kwargs): self._identifier = identifier self._schema_proto = schema_proto @@ -414,7 +422,7 @@ def __init__( def expand(self, pcolls): # Expand the transform using the expansion service. - return pcolls | ExternalTransform( + return pcolls | self._payload_builder.identifier() >> ExternalTransform( common_urns.schematransform_based_expand.urn, self._payload_builder, self._expansion_service) diff --git a/sdks/python/apache_beam/yaml/readme_test.py b/sdks/python/apache_beam/yaml/readme_test.py index 958d9cb5783a..d918d18e11dd 100644 --- a/sdks/python/apache_beam/yaml/readme_test.py +++ b/sdks/python/apache_beam/yaml/readme_test.py @@ -32,6 +32,7 @@ import apache_beam as beam from apache_beam.options.pipeline_options import PipelineOptions from apache_beam.typehints import trivial_inference +from apache_beam.yaml import yaml_mapping from apache_beam.yaml import yaml_provider from apache_beam.yaml import yaml_transform @@ -85,13 +86,16 @@ def guess_name_and_type(expr): typ, = [t for t in typ.__args__ if t is not type(None)] return name, typ - output_schema = [ - guess_name_and_type(expr) for expr in m.group(1).split(',') - ] - output_element = beam.Row(**{name: typ() for name, typ in output_schema}) - return next(iter(inputs.values())) | beam.Map( - lambda _: output_element).with_output_types( - trivial_inference.instance_to_type(output_element)) + if m.group(1) == '*': + return inputs['PCOLLECTION'] | beam.Filter(lambda _: True) + else: + output_schema = [ + guess_name_and_type(expr) for expr in m.group(1).split(',') + ] + output_element = beam.Row(**{name: typ() for name, typ in output_schema}) + return next(iter(inputs.values())) | beam.Map( + lambda _: output_element).with_output_types( + trivial_inference.instance_to_type(output_element)) class FakeReadFromPubSub(beam.PTransform): @@ -204,12 +208,13 @@ def test(self): ] options['render_leaf_composite_nodes'] = ['.*'] test_provider = TestProvider(TEST_TRANSFORMS) + test_sql_mapping_provider = yaml_mapping.SqlMappingProvider(test_provider) p = beam.Pipeline(options=PipelineOptions(**options)) yaml_transform.expand_pipeline( p, modified_yaml, - {t: test_provider - for t in test_provider.provided_transforms()}) + yaml_provider.merge_providers( + [test_provider, test_sql_mapping_provider])) if test_type == 'BUILD': return p.run().wait_until_finish() diff --git a/sdks/python/apache_beam/yaml/standard_io.yaml b/sdks/python/apache_beam/yaml/standard_io.yaml index e60f0026fd25..1738110539ce 100644 --- a/sdks/python/apache_beam/yaml/standard_io.yaml +++ b/sdks/python/apache_beam/yaml/standard_io.yaml @@ -51,6 +51,8 @@ 'ReadFromBigQuery': 'apache_beam.yaml.yaml_io.read_from_bigquery' # Disable until https://github.com/apache/beam/issues/28162 is resolved. # 'WriteToBigQuery': 'apache_beam.yaml.yaml_io.write_to_bigquery' + 'ReadFromText': 'apache_beam.yaml.yaml_io.read_from_text' + 'WriteToText': 'apache_beam.yaml.yaml_io.write_to_text' # Declared as a renaming transform to avoid exposing all # (implementation-specific) pandas arguments and aligning with possible Java diff --git a/sdks/python/apache_beam/yaml/yaml_io.py b/sdks/python/apache_beam/yaml/yaml_io.py index 646d5e1fbff1..297c07e9abb5 100644 --- a/sdks/python/apache_beam/yaml/yaml_io.py +++ b/sdks/python/apache_beam/yaml/yaml_io.py @@ -28,12 +28,38 @@ import yaml import apache_beam as beam +import apache_beam.io as beam_io from apache_beam.io import ReadFromBigQuery from apache_beam.io import WriteToBigQuery from apache_beam.io.gcp.bigquery import BigQueryDisposition +from apache_beam.typehints.schemas import named_fields_from_element_type from apache_beam.yaml import yaml_provider +def read_from_text(path: str): + # TODO(yaml): Consider passing the filename and offset, possibly even + # by default. + return beam_io.ReadFromText(path) | beam.Map(lambda s: beam.Row(line=s)) + + +@beam.ptransform_fn +def write_to_text(pcoll, path: str): + try: + field_names = [ + name for name, _ in named_fields_from_element_type(pcoll.element_type) + ] + except Exception as exn: + raise ValueError( + "WriteToText requires an input schema with exactly one field.") from exn + if len(field_names) != 1: + raise ValueError( + "WriteToText requires an input schema with exactly one field, got %s" % + field_names) + sole_field_name, = field_names + return pcoll | beam.Map( + lambda x: str(getattr(x, sole_field_name))) | beam.io.WriteToText(path) + + def read_from_bigquery( query=None, table=None, row_restriction=None, fields=None): if query is None: @@ -99,18 +125,4 @@ def raise_exception(failed_row_with_error): def io_providers(): with open(os.path.join(os.path.dirname(__file__), 'standard_io.yaml')) as fin: - explicit_ios = yaml_provider.parse_providers( - yaml.load(fin, Loader=yaml.SafeLoader)) - - # TOOD(yaml): We should make all top-level IOs explicit. - # This will be a chance to clean up the APIs and align them with their - # Java implementations. - # PythonTransform can be used to get the "raw" transforms for any others. - implicit_ios = yaml_provider.InlineProvider({ - key: getattr(beam.io, key) - for key in dir(beam.io) - if (key.startswith('ReadFrom') or key.startswith('WriteTo')) and - key not in explicit_ios - }) - - return yaml_provider.merge_providers(explicit_ios, implicit_ios) + return yaml_provider.parse_providers(yaml.load(fin, Loader=yaml.SafeLoader)) diff --git a/sdks/python/apache_beam/yaml/yaml_mapping.md b/sdks/python/apache_beam/yaml/yaml_mapping.md index b5e84e1a8054..653b4abe8b89 100644 --- a/sdks/python/apache_beam/yaml/yaml_mapping.md +++ b/sdks/python/apache_beam/yaml/yaml_mapping.md @@ -131,7 +131,7 @@ Currently, in addition to Python, SQL expressions are supported as well Sometimes it may be desirable to emit more (or less) than one record for each input record. This can be accomplished by mapping to an iterable type and -noting that the specific field should be exploded, e.g. +following the mapping with an Explode operation, e.g. ``` - type: MapToFields @@ -140,7 +140,9 @@ noting that the specific field should be exploded, e.g. fields: new_col: "[col1.upper(), col1.lower(), col1.title()]" another_col: "col2 + col3" - explode: new_col +- type: Explode + config: + fields: new_col ``` will result in three output records for every input record. @@ -155,7 +157,9 @@ product over all fields should be taken. For example fields: new_col: "[col1.upper(), col1.lower(), col1.title()]" another_col: "[col2 - 1, col2, col2 + 1]" - explode: [new_col, another_col] +- type: Explode + config: + fields: [new_col, another_col] cross_product: true ``` @@ -168,38 +172,27 @@ will emit nine records whereas fields: new_col: "[col1.upper(), col1.lower(), col1.title()]" another_col: "[col2 - 1, col2, col2 + 1]" - explode: [new_col, another_col] +- type: Explode + config: + fields: [new_col, another_col] cross_product: false ``` will only emit three. -If one is only exploding existing fields, a simpler `Explode` transform may be -used instead +The `Explode` operation can be used on its own if the field in question is +already an iterable type. ``` - type: Explode config: - explode: [col1] + fields: [col1] ``` ## Filtering Sometimes it can be desirable to only keep records that satisfy a certain -criteria. This can be accomplished by specifying a keep parameter, e.g. - -``` -- type: MapToFields - config: - language: python - fields: - new_col: "col1.upper()" - another_col: "col2 + col3" - keep: "col2 > 0" -``` - -Like explode, there is a simpler `Filter` transform useful when no mapping is -being done +criteria. This can be accomplished with a `Filter` transform, e.g. ``` - type: Filter diff --git a/sdks/python/apache_beam/yaml/yaml_mapping.py b/sdks/python/apache_beam/yaml/yaml_mapping.py index b6dea894b3e9..221c6f018d67 100644 --- a/sdks/python/apache_beam/yaml/yaml_mapping.py +++ b/sdks/python/apache_beam/yaml/yaml_mapping.py @@ -17,6 +17,14 @@ """This module defines the basic MapToFields operation.""" import itertools +from typing import Any +from typing import Callable +from typing import Collection +from typing import Dict +from typing import Iterable +from typing import Mapping +from typing import Optional +from typing import Union import js2py @@ -139,18 +147,73 @@ def _as_callable(original_fields, expr, transform_name, language): 'Supported languages are "javascript" and "python."') +def exception_handling_args(error_handling_spec): + if error_handling_spec: + return { + 'dead_letter_tag' if k == 'output' else k: v + for (k, v) in error_handling_spec.items() + } + else: + return None + + +def _map_errors_to_standard_format(): + # TODO(https://github.com/apache/beam/issues/24755): Switch to MapTuple. + return beam.Map( + lambda x: beam.Row(element=x[0], msg=str(x[1][1]), stack=str(x[1][2]))) + + +def maybe_with_exception_handling(inner_expand): + def expand(self, pcoll): + wrapped_pcoll = beam.core._MaybePValueWithErrors( + pcoll, self._exception_handling_args) + return inner_expand(self, wrapped_pcoll).as_result( + _map_errors_to_standard_format()) + + return expand + + +def maybe_with_exception_handling_transform_fn(transform_fn): + def expand(pcoll, error_handling=None, **kwargs): + wrapped_pcoll = beam.core._MaybePValueWithErrors( + pcoll, exception_handling_args(error_handling)) + return transform_fn(wrapped_pcoll, + **kwargs).as_result(_map_errors_to_standard_format()) + + return expand + + # TODO(yaml): This should be available in all environments, in which case # we choose the one that matches best. class _Explode(beam.PTransform): - def __init__(self, fields, cross_product): + def __init__( + self, + fields: Union[str, Collection[str]], + cross_product: Optional[bool] = None, + error_handling: Optional[Mapping[str, Any]] = None): + if isinstance(fields, str): + fields = [fields] + if cross_product is None: + if len(fields) > 1: + raise ValueError( + 'cross_product must be specified true or false ' + 'when exploding multiple fields') + else: + # Doesn't matter. + cross_product = True self._fields = fields self._cross_product = cross_product - self._exception_handling_args = None + # TODO(yaml): Support standard error handling argument. + self._exception_handling_args = exception_handling_args(error_handling) + @maybe_with_exception_handling def expand(self, pcoll): all_fields = [ x for x, _ in named_fields_from_element_type(pcoll.element_type) ] + for field in self._fields: + if field not in all_fields: + raise ValueError(f'Exploding unknown field "{field}"') to_explode = self._fields def explode_cross_product(base, fields): @@ -171,12 +234,12 @@ def explode_zip(base, fields): yield beam.Row(**copy) return ( - beam.core._MaybePValueWithErrors(pcoll, self._exception_handling_args) + pcoll | beam.FlatMap( lambda row: (explode_cross_product if self._cross_product else explode_zip) ({name: getattr(row, name) - for name in all_fields}, to_explode))).as_result() + for name in all_fields}, to_explode))) def infer_output_type(self, input_type): return row_type.RowTypeConstraint.from_fields([( @@ -190,189 +253,171 @@ def with_exception_handling(self, **kwargs): return self -# TODO(yaml): Should Filter and Explode be distinct operations from Project? -# We'll want these per-language. @beam.ptransform.ptransform_fn -def _PythonProjectionTransform( - pcoll, - *, - fields, - transform_name, - language, - keep=None, - explode=(), - cross_product=True, - error_handling=None): - original_fields = [ - name for (name, _) in named_fields_from_element_type(pcoll.element_type) - ] +@maybe_with_exception_handling_transform_fn +def _PyJsFilter( + pcoll, keep: Union[str, Dict[str, str]], language: Optional[str] = None): - if error_handling is None: - error_handling_args = None + input_schema = dict(named_fields_from_element_type(pcoll.element_type)) + if isinstance(keep, str) and keep in input_schema: + keep_fn = lambda row: getattr(row, keep) else: - error_handling_args = { - 'dead_letter_tag' if k == 'output' else k: v - for (k, v) in error_handling.items() - } + keep_fn = _as_callable(list(input_schema.keys()), keep, "keep", language) + return pcoll | beam.Filter(keep_fn) - pcoll = beam.core._MaybePValueWithErrors(pcoll, error_handling_args) - if keep: - if isinstance(keep, str) and keep in original_fields: - keep_fn = lambda row: getattr(row, keep) - else: - keep_fn = _as_callable(original_fields, keep, transform_name, language) - filtered = pcoll | beam.Filter(keep_fn) - else: - filtered = pcoll +def is_expr(v): + return isinstance(v, str) or (isinstance(v, dict) and 'expression' in v) - projected = filtered | beam.Select( - **{ - name: _as_callable(original_fields, expr, transform_name, language) - for (name, expr) in fields.items() - }) - if explode: - result = projected | _Explode(explode, cross_product=cross_product) - else: - result = projected - - return result.as_result( - # TODO(https://github.com/apache/beam/issues/24755): Switch to MapTuple. - beam.Map( - lambda x: beam.Row( - element=x[0], msg=str(x[1][1]), stack=str(x[1][2])))) - - -@beam.ptransform.ptransform_fn -def MapToFields( - pcoll, - yaml_create_transform, - *, - fields, - keep=None, - explode=(), - cross_product=None, - append=False, - drop=(), - language=None, - error_handling=None, - transform_name="MapToFields", - **language_keywords): - if isinstance(explode, str): - explode = [explode] - if cross_product is None: - if len(explode) > 1: - # TODO(robertwb): Consider if true is an OK default. - raise ValueError( - 'cross_product must be specified true or false ' - 'when exploding multiple fields') - else: - # Doesn't matter. - cross_product = True +def normalize_fields(pcoll, fields, drop=(), append=False, language='generic'): + try: + input_schema = dict(named_fields_from_element_type(pcoll.element_type)) + except ValueError as exn: + if drop: + raise ValueError("Can only drop fields on a schema'd input.") from exn + if append: + raise ValueError("Can only append fields on a schema'd input.") from exn + elif any(is_expr(x) for x in fields.values()): + raise ValueError("Can only use expressions on a schema'd input.") from exn + input_schema = {} - input_schema = dict(named_fields_from_element_type(pcoll.element_type)) + if isinstance(drop, str): + drop = [drop] if drop and not append: raise ValueError("Can only drop fields if append is true.") for name in drop: if name not in input_schema: raise ValueError(f'Dropping unknown field "{name}"') - for name in explode: - if not (name in fields or (append and name in input_schema)): - raise ValueError(f'Exploding unknown field "{name}"') if append: for name in fields: if name in input_schema and name not in drop: - raise ValueError(f'Redefinition of field "{name}"') + raise ValueError( + f'Redefinition of field "{name}". ' + 'Cannot append a field that already exists in original input.') + + if language == 'generic': + for expr in fields.values(): + if not isinstance(expr, str): + raise ValueError( + "Missing language specification. " + "Must specify a language when using a map with custom logic.") + missing = set(fields.values()) - set(input_schema.keys()) + if missing: + raise ValueError( + f"Missing language specification or unknown input fields: {missing}") if append: - fields = { + return input_schema, { **{name: name for name in input_schema.keys() if name not in drop}, **fields } + else: + return input_schema, fields - if language is None: - for name, expr in fields.items(): - if not isinstance(expr, str) or expr not in input_schema: - # TODO(robertw): Could consider defaulting to SQL, or another - # lowest-common-denominator expression language. - raise ValueError("Missing language specification.") - - # We should support this for all languages. - language = "python" - - if language in ("sql", "calcite"): - if error_handling: - raise ValueError('Error handling unsupported for sql.') - selects = [f'{expr} AS {name}' for (name, expr) in fields.items()] - query = "SELECT " + ", ".join(selects) + " FROM PCOLLECTION" - if keep: - query += " WHERE " + keep - - result = pcoll | yaml_create_transform({ - 'type': 'Sql', - 'config': { - 'query': query, **language_keywords - }, - }, [pcoll]) - if explode: - # TODO(yaml): Implement via unnest. - result = result | _Explode(explode, cross_product) - - return result - - elif language == 'python' or language == 'javascript': - return pcoll | yaml_create_transform({ - 'type': 'PyTransform', - 'config': { - 'constructor': __name__ + '._PythonProjectionTransform', - 'kwargs': { - 'fields': fields, - 'transform_name': transform_name, - 'language': language, - 'keep': keep, - 'explode': explode, - 'cross_product': cross_product, - 'error_handling': error_handling, - }, - **language_keywords - }, - }, [pcoll]) - else: - # TODO(yaml): Support javascript expressions and UDFs. - # TODO(yaml): Support java by fully qualified name. - # TODO(yaml): Maybe support java lambdas? - raise ValueError( - f'Unknown language: {language}. ' - 'Supported languages are "sql" (alias calcite) and "python."') +@beam.ptransform.ptransform_fn +@maybe_with_exception_handling_transform_fn +def _PyJsMapToFields(pcoll, language='generic', **mapping_args): + input_schema, fields = normalize_fields( + pcoll, language=language, **mapping_args) + original_fields = list(input_schema.keys()) + + return pcoll | beam.Select( + **{ + name: _as_callable(original_fields, expr, name, language) + for (name, expr) in fields.items() + }) + + +class SqlMappingProvider(yaml_provider.Provider): + def __init__(self, sql_provider=None): + if sql_provider is None: + sql_provider = yaml_provider.beam_jar( + urns={'Sql': 'beam:external:java:sql:v1'}, + gradle_target='sdks:java:extensions:sql:expansion-service:shadowJar') + self._sql_provider = sql_provider + + def available(self): + return self._sql_provider.available() + + def cache_artifacts(self): + return self._sql_provider.cache_artifacts() + + def provided_transforms(self) -> Iterable[str]: + return [ + 'Filter-sql', + 'Filter-calcite', + 'MapToFields-sql', + 'MapToFields-calcite' + ] + + def create_transform( + self, + typ: str, + args: Mapping[str, Any], + yaml_create_transform: Callable[ + [Mapping[str, Any], Iterable[beam.PCollection]], beam.PTransform] + ) -> beam.PTransform: + if typ.startswith('Filter-'): + return _SqlFilterTransform( + self._sql_provider, yaml_create_transform, **args) + if typ.startswith('MapToFields-'): + return _SqlMapToFieldsTransform( + self._sql_provider, yaml_create_transform, **args) + else: + raise NotImplementedError(typ) + + def underlying_provider(self): + return self._sql_provider + + def to_json(self): + return {'type': "SqlMappingProvider"} + + +@beam.ptransform.ptransform_fn +def _SqlFilterTransform( + pcoll, sql_provider, yaml_create_transform, keep, language): + return pcoll | sql_provider.create_transform( + 'Sql', {'query': f'SELECT * FROM PCOLLECTION WHERE {keep}'}, + yaml_create_transform) -def create_mapping_provider(): +@beam.ptransform.ptransform_fn +def _SqlMapToFieldsTransform( + pcoll, sql_provider, yaml_create_transform, **mapping_args): + _, fields = normalize_fields(pcoll, **mapping_args) + + def extract_expr(name, v): + if isinstance(v, str): + return v + elif 'expression' in v: + return v['expression'] + else: + raise ValueError("Only expressions allowed in SQL at {name}.") + + selects = [ + f'({extract_expr(name, expr)}) AS {name}' + for (name, expr) in fields.items() + ] + query = "SELECT " + ", ".join(selects) + " FROM PCOLLECTION" + return pcoll | sql_provider.create_transform( + 'Sql', {'query': query}, yaml_create_transform) + + +def create_mapping_providers(): # These are MetaInlineProviders because their expansion is in terms of other # YamlTransforms, but in a way that needs to be deferred until the input # schema is known. - return yaml_provider.MetaInlineProvider({ - 'MapToFields': MapToFields, - 'Filter': ( - lambda yaml_create_transform, - keep, - **kwargs: MapToFields( - yaml_create_transform, - keep=keep, - fields={}, - append=True, - transform_name='Filter', - **kwargs)), - 'Explode': ( - lambda yaml_create_transform, - explode, - **kwargs: MapToFields( - yaml_create_transform, - explode=explode, - fields={}, - append=True, - transform_name='Explode', - **kwargs)), - }) + return [ + yaml_provider.InlineProvider({ + 'Explode': _Explode, + 'Filter-python': _PyJsFilter, + 'Filter-javascript': _PyJsFilter, + 'MapToFields-python': _PyJsMapToFields, + 'MapToFields-javascript': _PyJsMapToFields, + 'MapToFields-generic': _PyJsMapToFields, + }), + SqlMappingProvider(), + ] diff --git a/sdks/python/apache_beam/yaml/yaml_mapping_test.py b/sdks/python/apache_beam/yaml/yaml_mapping_test.py index 728476b1fd5d..55032aeae52e 100644 --- a/sdks/python/apache_beam/yaml/yaml_mapping_test.py +++ b/sdks/python/apache_beam/yaml/yaml_mapping_test.py @@ -82,18 +82,18 @@ def test_filter(self): elements = p | beam.Create(DATA) result = elements | YamlTransform( ''' - type: MapToFields + type: Filter input: input config: language: python - fields: - label: label keep: "rank > 0" ''') assert_that( - result, equal_to([ - beam.Row(label='37a'), - beam.Row(label='389a'), + result + | beam.Map(lambda named_tuple: beam.Row(**named_tuple._asdict())), + equal_to([ + beam.Row(label='37a', conductor=37, rank=1), + beam.Row(label='389a', conductor=389, rank=2), ])) def test_explode(self): @@ -105,15 +105,19 @@ def test_explode(self): ]) result = elements | YamlTransform( ''' - type: MapToFields + type: chain input: input - config: - language: python - append: true - fields: - range: "range(a)" - explode: [range, b] - cross_product: true + transforms: + - type: MapToFields + config: + language: python + append: true + fields: + range: "range(a)" + - type: Explode + config: + fields: [range, b] + cross_product: true ''') assert_that( result, diff --git a/sdks/python/apache_beam/yaml/yaml_provider.py b/sdks/python/apache_beam/yaml/yaml_provider.py index 736b1cab4658..6f760f359b06 100644 --- a/sdks/python/apache_beam/yaml/yaml_provider.py +++ b/sdks/python/apache_beam/yaml/yaml_provider.py @@ -73,6 +73,16 @@ def provided_transforms(self) -> Iterable[str]: def config_schema(self, type): return None + def requires_inputs(self, typ: str, args: Mapping[str, Any]) -> bool: + """Returns whether this transform requires inputs. + + Specifically, if this returns True and inputs are not provided than an error + will be thrown. + + This is best-effort, primarily for better and earlier error messages. + """ + return not typ.startswith('Read') + def create_transform( self, typ: str, @@ -157,7 +167,15 @@ def config_schema(self, type): return named_tuple_to_schema( self.schema_transforms()[self._urns[type]].configuration_schema) + def requires_inputs(self, typ, args): + if self._urns[type] in self.schema_transforms(): + return bool(self.schema_transforms()[self._urns[type]].inputs) + else: + return super().requires_inputs(typ, args) + def create_transform(self, type, args, yaml_create_transform): + if callable(self._service): + self._service = self._service() urn = self._urns[type] if urn in self.schema_transforms(): return external.SchemaAwareExternalTransform( @@ -206,6 +224,7 @@ def provider_from_spec(cls, spec): def register_provider_type(cls, type_name): def apply(constructor): cls._provider_types[type_name] = constructor + return constructor return apply @@ -375,8 +394,9 @@ def fn_takes_side_inputs(fn): class InlineProvider(Provider): - def __init__(self, transform_factories): + def __init__(self, transform_factories, no_input_transforms=()): self._transform_factories = transform_factories + self._no_input_transforms = set(no_input_transforms) def available(self): return True @@ -418,6 +438,14 @@ def create_transform(self, type, args, yaml_create_transform): def to_json(self): return {'type': "InlineProvider"} + def requires_inputs(self, typ, args): + if typ in self._no_input_transforms: + return False + elif hasattr(self._transform_factories[typ], '_yaml_requires_inputs'): + return self._transform_factories[typ]._yaml_requires_inputs + else: + return super().requires_inputs(typ, args) + class MetaInlineProvider(InlineProvider): def create_transform(self, type, args, yaml_create_transform): @@ -552,30 +580,24 @@ def _parse_window_spec(spec): # TODO: Triggering, etc. return beam.WindowInto(window_fn) - return InlineProvider( - dict({ - 'Create': create, - 'PyMap': lambda fn: beam.Map( - python_callable.PythonCallableWithSource(fn)), - 'PyMapTuple': lambda fn: beam.MapTuple( - python_callable.PythonCallableWithSource(fn)), - 'PyFlatMap': lambda fn: beam.FlatMap( - python_callable.PythonCallableWithSource(fn)), - 'PyFlatMapTuple': lambda fn: beam.FlatMapTuple( - python_callable.PythonCallableWithSource(fn)), - 'PyFilter': lambda keep: beam.Filter( - python_callable.PythonCallableWithSource(keep)), - 'PyTransform': fully_qualified_named_transform, - 'PyToRow': lambda fields: beam.Select( - **{ - name: python_callable.PythonCallableWithSource(fn) - for (name, fn) in fields.items() - }), - 'WithSchema': with_schema, - 'Flatten': Flatten, - 'WindowInto': WindowInto, - 'GroupByKey': beam.GroupByKey, - })) + return InlineProvider({ + 'Create': create, + 'PyMap': lambda fn: beam.Map( + python_callable.PythonCallableWithSource(fn)), + 'PyMapTuple': lambda fn: beam.MapTuple( + python_callable.PythonCallableWithSource(fn)), + 'PyFlatMap': lambda fn: beam.FlatMap( + python_callable.PythonCallableWithSource(fn)), + 'PyFlatMapTuple': lambda fn: beam.FlatMapTuple( + python_callable.PythonCallableWithSource(fn)), + 'PyFilter': lambda keep: beam.Filter( + python_callable.PythonCallableWithSource(keep)), + 'PyTransform': fully_qualified_named_transform, + 'WithSchemaExperimental': with_schema, + 'Flatten': Flatten, + 'WindowInto': WindowInto, + }, + no_input_transforms=('Create', )) class PypiExpansionService: @@ -696,6 +718,9 @@ def config_schema(self, type): dest in self._mappings[type].items() ]) + def requires_inputs(self, typ, args): + return self._underlying_provider.requires_inputs(typ, args) + def create_transform( self, typ: str, @@ -741,13 +766,15 @@ def merge_providers(*provider_sets): transform_type: [provider] for transform_type in provider.provided_transforms() } + elif isinstance(provider_set, list): + provider_set = merge_providers(*provider_set) for transform_type, providers in provider_set.items(): result[transform_type].extend(providers) return result def standard_providers(): - from apache_beam.yaml.yaml_mapping import create_mapping_provider + from apache_beam.yaml.yaml_mapping import create_mapping_providers from apache_beam.yaml.yaml_io import io_providers with open(os.path.join(os.path.dirname(__file__), 'standard_providers.yaml')) as fin: @@ -755,7 +782,7 @@ def standard_providers(): return merge_providers( create_builtin_provider(), - create_mapping_provider(), + create_mapping_providers(), io_providers(), parse_providers(standard_providers)) diff --git a/sdks/python/apache_beam/yaml/yaml_transform.py b/sdks/python/apache_beam/yaml/yaml_transform.py index 8bee2ccf2b98..78546aa28cb1 100644 --- a/sdks/python/apache_beam/yaml/yaml_transform.py +++ b/sdks/python/apache_beam/yaml/yaml_transform.py @@ -76,6 +76,28 @@ def only_element(xs): return x +# These allow a user to explicitly pass no input to a transform (i.e. use it +# as a root transform) without an error even if the transform is not known to +# handle it. +def explicitly_empty(): + return {'__explicitly_empty__': None} + + +def is_explicitly_empty(io): + return io == explicitly_empty() + + +def is_empty(io): + return not io or is_explicitly_empty(io) + + +def empty_if_explicitly_empty(io): + if is_explicitly_empty(io): + return {} + else: + return io + + class SafeLineLoader(SafeLoader): """A yaml loader that attaches line information to mappings and strings.""" class TaggedString(str): @@ -186,7 +208,7 @@ def followers(self, transform_name): # TODO(yaml): Also trace through outputs and composites. for transform in self._transforms: if transform['type'] != 'composite': - for input in transform.get('input').values(): + for input in empty_if_explicitly_empty(transform['input']).values(): transform_id, _ = self.get_transform_id_and_output_name(input) self._all_followers[transform_id].append(transform['__uuid__']) return self._all_followers[self.get_transform_id(transform_name)] @@ -324,6 +346,12 @@ def create_ptransform(self, spec, input_pcolls): raise ValueError( 'Config for transform at %s must be a mapping.' % identify_object(spec)) + + if (not input_pcolls and not is_explicitly_empty(spec.get('input', {})) and + provider.requires_inputs(spec['type'], config)): + raise ValueError( + f'Missing inputs for transform at {identify_object(spec)}') + try: # pylint: disable=undefined-loop-variable ptransform = provider.create_transform( @@ -402,7 +430,7 @@ def expand_leaf_transform(spec, scope): spec = normalize_inputs_outputs(spec) inputs_dict = { key: scope.get_pcollection(value) - for (key, value) in spec['input'].items() + for (key, value) in empty_if_explicitly_empty(spec['input']).items() } input_type = spec.get('input_type', 'default') if input_type == 'list': @@ -442,10 +470,10 @@ def expand_composite_transform(spec, scope): spec = normalize_inputs_outputs(normalize_source_sink(spec)) inner_scope = Scope( - scope.root, { + scope.root, + { key: scope.get_pcollection(value) - for key, - value in spec['input'].items() + for (key, value) in empty_if_explicitly_empty(spec['input']).items() }, spec['transforms'], yaml_provider.merge_providers( @@ -470,8 +498,7 @@ def expand(inputs): _LOGGER.info("Expanding %s ", identify_object(spec)) return ({ key: scope.get_pcollection(value) - for key, - value in spec['input'].items() + for (key, value) in empty_if_explicitly_empty(spec['input']).items() } or scope.root) | scope.unique_name(spec, None) >> CompositePTransform() @@ -496,12 +523,25 @@ def is_not_output_of_last_transform(new_transforms, value): composite_spec = normalize_inputs_outputs(spec) new_transforms = [] for ix, transform in enumerate(composite_spec['transforms']): - if any(io in transform for io in ('input', 'output', 'input', 'output')): - raise ValueError( - f'Transform {identify_object(transform)} is part of a chain, ' - 'must have implicit inputs and outputs.') + if any(io in transform for io in ('input', 'output')): + if (ix == 0 and 'input' in transform and 'output' not in transform and + is_explicitly_empty(transform['input'])): + # This is OK as source clause sets an explicitly empty input. + pass + else: + raise ValueError( + f'Transform {identify_object(transform)} is part of a chain, ' + 'must have implicit inputs and outputs.') if ix == 0: - transform['input'] = {key: key for key in composite_spec['input'].keys()} + if is_explicitly_empty(transform.get('input', None)): + pass + elif is_explicitly_empty(composite_spec['input']): + transform['input'] = composite_spec['input'] + else: + transform['input'] = { + key: key + for key in composite_spec['input'].keys() + } else: transform['input'] = new_transforms[-1]['__uuid__'] new_transforms.append(transform) @@ -554,6 +594,8 @@ def normalize_source_sink(spec): spec = dict(spec) spec['transforms'] = list(spec.get('transforms', [])) if 'source' in spec: + if 'input' not in spec['source']: + spec['source']['input'] = explicitly_empty() spec['transforms'].insert(0, spec.pop('source')) if 'sink' in spec: spec['transforms'].append(spec.pop('sink')) @@ -567,6 +609,13 @@ def preprocess_source_sink(spec): return spec +def tag_explicit_inputs(spec): + if 'input' in spec and not SafeLineLoader.strip_metadata(spec['input']): + return dict(spec, input=explicitly_empty()) + else: + return spec + + def normalize_inputs_outputs(spec): spec = dict(spec) @@ -611,7 +660,7 @@ def push_windowing_to_roots(spec): scope = LightweightScope(spec['transforms']) consumed_outputs_by_transform = collections.defaultdict(set) for transform in spec['transforms']: - for _, input_ref in transform['input'].items(): + for _, input_ref in empty_if_explicitly_empty(transform['input']).items(): try: transform_id, output = scope.get_transform_id_and_output_name(input_ref) consumed_outputs_by_transform[transform_id].add(output) @@ -620,7 +669,7 @@ def push_windowing_to_roots(spec): pass for transform in spec['transforms']: - if not transform['input'] and 'windowing' not in transform: + if is_empty(transform['input']) and 'windowing' not in transform: transform['windowing'] = spec['windowing'] transform['__consumed_outputs'] = consumed_outputs_by_transform[ transform['__uuid__']] @@ -647,7 +696,7 @@ def preprocess_windowing(spec): spec = push_windowing_to_roots(spec) windowing = spec.pop('windowing') - if spec['input']: + if not is_empty(spec['input']): # Apply the windowing to all inputs by wrapping it in a transform that # first applies windowing and then applies the original transform. original_inputs = spec['input'] @@ -778,7 +827,7 @@ def ensure_errors_consumed(spec): raise ValueError( f'Missing output in error_handling of {identify_object(t)}') to_handle[t['__uuid__'], config['error_handling']['output']] = t - for _, input in t['input'].items(): + for _, input in empty_if_explicitly_empty(t['input']).items(): if input not in spec['input']: consumed.add(scope.get_transform_id_and_output_name(input)) for error_pcoll, t in to_handle.items(): @@ -815,7 +864,7 @@ def preprocess(spec, verbose=False, known_transforms=None): def apply(phase, spec): spec = phase(spec) - if spec['type'] in {'composite', 'chain'}: + if spec['type'] in {'composite', 'chain'} and 'transforms' in spec: spec = dict( spec, transforms=[apply(phase, t) for t in spec['transforms']]) return spec @@ -830,11 +879,27 @@ def ensure_transforms_have_providers(spec): f'Unknown type or missing provider for {identify_object(spec)}') return spec + def preprocess_langauges(spec): + if spec['type'] in ('Filter', 'MapToFields'): + language = spec.get('config', {}).get('language', 'generic') + new_type = spec['type'] + '-' + language + if known_transforms and new_type not in known_transforms: + if language == 'generic': + raise ValueError(f'Missing language for {identify_object(spec)}') + else: + raise ValueError( + f'Unknown language {language} for {identify_object(spec)}') + return dict(spec, type=new_type, name=spec.get('name', spec['type'])) + else: + return spec + for phase in [ ensure_transforms_have_types, + preprocess_langauges, ensure_transforms_have_providers, preprocess_source_sink, preprocess_chain, + tag_explicit_inputs, normalize_inputs_outputs, preprocess_flattened_inputs, ensure_errors_consumed, diff --git a/sdks/python/apache_beam/yaml/yaml_transform_scope_test.py b/sdks/python/apache_beam/yaml/yaml_transform_scope_test.py index 733f47583a7f..85a99623332c 100644 --- a/sdks/python/apache_beam/yaml/yaml_transform_scope_test.py +++ b/sdks/python/apache_beam/yaml/yaml_transform_scope_test.py @@ -88,40 +88,13 @@ def test_create_ptransform(self): spec = ''' transforms: - type: PyMap + input: something config: fn: "lambda x: x*x" ''' scope, spec = self.get_scope_by_spec(p, spec) - result = scope.create_ptransform(spec['transforms'][0], []) - self.assertIsInstance(result, beam.transforms.ParDo) - self.assertEqual(result.label, 'Map(lambda x: x*x)') - - result_annotations = {**result.annotations()} - target_annotations = { - 'yaml_type': 'PyMap', - 'yaml_args': '{"fn": "lambda x: x*x"}', - 'yaml_provider': '{"type": "InlineProvider"}' - } - - # Check if target_annotations is a subset of result_annotations - self.assertDictEqual( - result_annotations, { - **result_annotations, **target_annotations - }) - - def test_create_ptransform_with_inputs(self): - with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( - pickle_library='cloudpickle')) as p: - spec = ''' - transforms: - - type: PyMap - config: - fn: "lambda x: x*x" - ''' - scope, spec = self.get_scope_by_spec(p, spec) - - result = scope.create_ptransform(spec['transforms'][0], []) + result = scope.create_ptransform(spec['transforms'][0], ['something']) self.assertIsInstance(result, beam.transforms.ParDo) self.assertEqual(result.label, 'Map(lambda x: x*x)') diff --git a/sdks/python/apache_beam/yaml/yaml_transform_test.py b/sdks/python/apache_beam/yaml/yaml_transform_test.py index 26baebec86e4..ebf12710d3f2 100644 --- a/sdks/python/apache_beam/yaml/yaml_transform_test.py +++ b/sdks/python/apache_beam/yaml/yaml_transform_test.py @@ -250,6 +250,51 @@ def test_name_is_ambiguous(self): output: AnotherFilter ''') + def test_empty_inputs_throws_error(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + with self.assertRaisesRegex(ValueError, + 'Missing inputs for transform at ' + '"EmptyInputOkButYamlDoesntKnow" at line .*'): + _ = p | YamlTransform( + ''' + type: composite + transforms: + - type: PyTransform + name: EmptyInputOkButYamlDoesntKnow + config: + constructor: apache_beam.Impulse + ''') + + def test_empty_inputs_ok_in_source(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + # Does not throw an error like it does above. + _ = p | YamlTransform( + ''' + type: composite + source: + type: PyTransform + name: EmptyInputOkButYamlDoesntKnow + config: + constructor: apache_beam.Impulse + ''') + + def test_empty_inputs_ok_if_explicit(self): + with beam.Pipeline(options=beam.options.pipeline_options.PipelineOptions( + pickle_library='cloudpickle')) as p: + # Does not throw an error like it does above. + _ = p | YamlTransform( + ''' + type: composite + transforms: + - type: PyTransform + name: EmptyInputOkButYamlDoesntKnow + input: {} + config: + constructor: apache_beam.Impulse + ''') + def test_annotations(self): t = LinearTransform(5, b=100) annotations = t.annotations() @@ -269,6 +314,8 @@ def test_annotations(self): class CreateTimestamped(beam.PTransform): + _yaml_requires_inputs = False + def __init__(self, elements): self._elements = elements @@ -372,21 +419,27 @@ def test_mapping_errors(self): input: Create config: fn: "lambda x: beam.Row(num=x, str='a' * x or 'bbb')" + - type: Filter + input: ToRow + config: + language: python + keep: + str[1] >= 'a' + error_handling: + output: errors - type: MapToFields name: MapWithErrorHandling - input: ToRow + input: Filter config: language: python fields: num: num inverse: float(1 / num) - keep: - str[1] >= 'a' error_handling: output: errors - type: PyMap name: TrimErrors - input: MapWithErrorHandling.errors + input: [MapWithErrorHandling.errors, Filter.errors] config: fn: "lambda x: x.msg" - type: MapToFields diff --git a/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py b/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py index d57a77d326fb..0d89360c6c3d 100644 --- a/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py +++ b/sdks/python/apache_beam/yaml/yaml_transform_unit_test.py @@ -347,6 +347,7 @@ def test_normalize_source_sink(self): expected = ''' transforms: - type: Create + input: {'__explicitly_empty__': null} config: elements: [0,1,2] - type: PyMap @@ -376,6 +377,7 @@ def test_normalize_source_sink_only_source(self): expected = ''' transforms: - type: Create + input: {'__explicitly_empty__': null} config: elements: [0,1,2] - type: PyMap @@ -445,6 +447,7 @@ def test_preprocess_source_sink_composite(self): type: composite transforms: - type: Create + input: {'__explicitly_empty__': null} config: elements: [0,1,2] - type: PyMap @@ -472,6 +475,7 @@ def test_preprocess_source_sink_chain(self): type: chain transforms: - type: Create + input: {'__explicitly_empty__': null} config: elements: [0,1,2] - type: PyMap diff --git a/sdks/python/apache_beam/yaml/yaml_udf_test.py b/sdks/python/apache_beam/yaml/yaml_udf_test.py index bb15cd494757..5e9faa08253c 100644 --- a/sdks/python/apache_beam/yaml/yaml_udf_test.py +++ b/sdks/python/apache_beam/yaml/yaml_udf_test.py @@ -28,6 +28,10 @@ from apache_beam.yaml.yaml_transform import YamlTransform +def AsRows(): + return beam.Map(lambda named_tuple: beam.Row(**named_tuple._asdict())) + + class YamlUDFMappingTest(unittest.TestCase): def __init__(self, method_name='runYamlMappingTest'): super().__init__(method_name) @@ -59,12 +63,11 @@ def test_map_to_fields_filter_inline_js(self): callable: "function label_map(x) {return x.label + 'x'}" conductor: callable: "function conductor_map(x) {return x.conductor + 1}" - keep: - callable: "function filter(x) {return x.rank > 0}" ''') assert_that( result, equal_to([ + beam.Row(label='11ax', conductor=12), beam.Row(label='37ax', conductor=38), beam.Row(label='389ax', conductor=390), ])) @@ -84,12 +87,11 @@ def test_map_to_fields_filter_inline_py(self): callable: "lambda x: x.label + 'x'" conductor: callable: "lambda x: x.conductor + 1" - keep: - callable: "lambda x: x.rank > 0" ''') assert_that( result, equal_to([ + beam.Row(label='11ax', conductor=12), beam.Row(label='37ax', conductor=38), beam.Row(label='389ax', conductor=390), ])) @@ -104,11 +106,11 @@ def test_filter_inline_js(self): input: input config: language: javascript - keep: + keep: callable: "function filter(x) {return x.rank > 0}" ''') assert_that( - result, + result | AsRows(), equal_to([ beam.Row(label='37a', conductor=37, rank=1), beam.Row(label='389a', conductor=389, rank=2), @@ -124,11 +126,11 @@ def test_filter_inline_py(self): input: input config: language: python - keep: + keep: callable: "lambda x: x.rank > 0" ''') assert_that( - result, + result | AsRows(), equal_to([ beam.Row(label='37a', conductor=37, rank=1), beam.Row(label='389a', conductor=389, rank=2), @@ -144,11 +146,12 @@ def test_filter_expression_js(self): input: input config: language: javascript - keep: + keep: expression: "label.toUpperCase().indexOf('3') == -1 && conductor" ''') assert_that( - result, equal_to([ + result | AsRows(), + equal_to([ beam.Row(label='11a', conductor=11, rank=0), ])) @@ -162,11 +165,12 @@ def test_filter_expression_py(self): input: input config: language: python - keep: + keep: expression: "'3' not in label" ''') assert_that( - result, equal_to([ + result | AsRows(), + equal_to([ beam.Row(label='11a', conductor=11, rank=0), ])) @@ -175,7 +179,7 @@ def test_filter_inline_js_file(self): function f(x) { return x.rank > 0 } - + function g(x) { return x.rank > 1 } @@ -193,12 +197,12 @@ def test_filter_inline_js_file(self): input: input config: language: javascript - keep: + keep: path: {path} name: "f" ''') assert_that( - result, + result | AsRows(), equal_to([ beam.Row(label='37a', conductor=37, rank=1), beam.Row(label='389a', conductor=389, rank=2), @@ -225,12 +229,12 @@ def g(x): input: input config: language: python - keep: + keep: path: {path} name: "f" ''') assert_that( - result, + result | AsRows(), equal_to([ beam.Row(label='37a', conductor=37, rank=1), beam.Row(label='389a', conductor=389, rank=2), diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle index 88d0abae83bd..762bed268d63 100644 --- a/sdks/python/build.gradle +++ b/sdks/python/build.gradle @@ -96,7 +96,7 @@ platform_identifiers_map.each { platform, idsuffix -> exec { environment CIBW_BUILD: "cp${pyversion}-${idsuffix}" environment CIBW_ENVIRONMENT: "SETUPTOOLS_USE_DISTUTILS=stdlib" - environment CIBW_BEFORE_BUILD: "pip install cython==0.29.36 numpy && pip install --upgrade setuptools" + environment CIBW_BEFORE_BUILD: "pip install cython==0.29.36 numpy --config-settings=setup-args='-Dallow-noblas=true' && pip install --upgrade setuptools" // note: sync cibuildwheel version with GitHub Action // .github/workflow/build_wheel.yml:build_wheels "Install cibuildwheel" step executable 'sh' diff --git a/sdks/python/test-suites/dataflow/common.gradle b/sdks/python/test-suites/dataflow/common.gradle index eee6e9d21887..8b8a56808996 100644 --- a/sdks/python/test-suites/dataflow/common.gradle +++ b/sdks/python/test-suites/dataflow/common.gradle @@ -418,7 +418,8 @@ task vertexAIInferenceTest { "test_opts": testOpts, "suite": "VertexAITests-df-py${pythonVersionSuffix}", "collect": "uses_vertex_ai and it_postcommit" , - "runner": "TestDataflowRunner" + "runner": "TestDataflowRunner", + "requirements_file": "$requirementsFile" ] def cmdArgs = mapToArgString(argMap) exec { diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 1ce1c23c0600..1caf25caf080 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -31,7 +31,7 @@ select = E3 # https://github.com/apache/beam/issues/25668 pip_pre = True # allow apps that support color to use it. -passenv=TERM +passenv=TERM,CLOUDSDK_CONFIG # Set [] options for pip installation of apache-beam tarball. extras = test,dataframe # Don't warn that these commands aren't installed. diff --git a/sdks/typescript/package-lock.json b/sdks/typescript/package-lock.json index 51a10d2b4a8f..e4556449fde4 100644 --- a/sdks/typescript/package-lock.json +++ b/sdks/typescript/package-lock.json @@ -1,15 +1,15 @@ { "name": "apache-beam", - "version": "2.50.0-SNAPSHOT", + "version": "2.50.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "apache-beam", - "version": "2.50.0-SNAPSHOT", + "version": "2.50.0", "dependencies": { "@google-cloud/pubsub": "^2.19.4", - "@grpc/grpc-js": "^1.8.8", + "@grpc/grpc-js": "^1.4.6", "@protobuf-ts/grpc-transport": "^2.1.0", "@protobuf-ts/plugin": "^2.1.0", "bson": "^4.6.0", @@ -19,7 +19,7 @@ "fast-deep-equal": "^3.1.3", "find-git-root": "^1.0.4", "long": "^4.0.0", - "protobufjs": "^7.2.4", + "protobufjs": "^6.11.3", "queue-typescript": "^1.0.1", "serialize-closures": "^0.2.7", "ts-closure-transform": "^0.1.7", @@ -190,73 +190,17 @@ } }, "node_modules/@grpc/grpc-js": { - "version": "1.8.8", - "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.8.8.tgz", - "integrity": "sha512-4gfDqMLXTrorvYTKA1jL22zLvVwiHJ73t6Re1OHwdCFRjdGTDOVtSJuaWhtHaivyeDGg0LeCkmU77MTKoV3wPA==", + "version": "1.4.6", + "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.4.6.tgz", + "integrity": "sha512-Byau4xiXfIixb1PnW30V/P9mkrZ05lknyNqiK+cVY9J5hj3gecxd/anwaUbAM8j834zg1x78NvAbwGnMfWEu7A==", "dependencies": { - "@grpc/proto-loader": "^0.7.0", + "@grpc/proto-loader": "^0.6.4", "@types/node": ">=12.12.47" }, "engines": { "node": "^8.13.0 || >=10.10.0" } }, - "node_modules/@grpc/grpc-js/node_modules/@grpc/proto-loader": { - "version": "0.7.7", - "resolved": "https://registry.npmjs.org/@grpc/proto-loader/-/proto-loader-0.7.7.tgz", - "integrity": "sha512-1TIeXOi8TuSCQprPItwoMymZXxWT0CPxUhkrkeCUH+D8U7QDwQ6b7SUz2MaLuWM2llT+J/TVFLmQI5KtML3BhQ==", - "dependencies": { - "@types/long": "^4.0.1", - "lodash.camelcase": "^4.3.0", - "long": "^4.0.0", - "protobufjs": "^7.0.0", - "yargs": "^17.7.2" - }, - "bin": { - "proto-loader-gen-types": "build/bin/proto-loader-gen-types.js" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/@grpc/grpc-js/node_modules/cliui": { - "version": "8.0.1", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", - "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", - "dependencies": { - "string-width": "^4.2.0", - "strip-ansi": "^6.0.1", - "wrap-ansi": "^7.0.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/@grpc/grpc-js/node_modules/yargs": { - "version": "17.7.2", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", - "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", - "dependencies": { - "cliui": "^8.0.1", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "require-directory": "^2.1.1", - "string-width": "^4.2.3", - "y18n": "^5.0.5", - "yargs-parser": "^21.1.1" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/@grpc/grpc-js/node_modules/yargs-parser": { - "version": "21.1.1", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", - "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", - "engines": { - "node": ">=12" - } - }, "node_modules/@grpc/proto-loader": { "version": "0.6.9", "resolved": "https://registry.npmjs.org/@grpc/proto-loader/-/proto-loader-0.6.9.tgz", @@ -275,31 +219,6 @@ "node": ">=6" } }, - "node_modules/@grpc/proto-loader/node_modules/protobufjs": { - "version": "6.11.3", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz", - "integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==", - "hasInstallScript": true, - "dependencies": { - "@protobufjs/aspromise": "^1.1.2", - "@protobufjs/base64": "^1.1.2", - "@protobufjs/codegen": "^2.0.4", - "@protobufjs/eventemitter": "^1.1.0", - "@protobufjs/fetch": "^1.1.0", - "@protobufjs/float": "^1.0.2", - "@protobufjs/inquire": "^1.1.0", - "@protobufjs/path": "^1.1.2", - "@protobufjs/pool": "^1.1.0", - "@protobufjs/utf8": "^1.1.0", - "@types/long": "^4.0.1", - "@types/node": ">=13.7.0", - "long": "^4.0.0" - }, - "bin": { - "pbjs": "bin/pbjs", - "pbts": "bin/pbts" - } - }, "node_modules/@humanwhocodes/config-array": { "version": "0.9.5", "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.9.5.tgz", @@ -3293,7 +3212,7 @@ "protobufjs": "^6.11.2" } }, - "node_modules/proto3-json-serializer/node_modules/protobufjs": { + "node_modules/protobufjs": { "version": "6.11.3", "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz", "integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==", @@ -3318,34 +3237,6 @@ "pbts": "bin/pbts" } }, - "node_modules/protobufjs": { - "version": "7.2.4", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.2.4.tgz", - "integrity": "sha512-AT+RJgD2sH8phPmCf7OUZR8xGdcJRga4+1cOaXJ64hvcSkVhNcRHOwIxUatPH15+nj59WAGTDv3LSGZPEQbJaQ==", - "hasInstallScript": true, - "dependencies": { - "@protobufjs/aspromise": "^1.1.2", - "@protobufjs/base64": "^1.1.2", - "@protobufjs/codegen": "^2.0.4", - "@protobufjs/eventemitter": "^1.1.0", - "@protobufjs/fetch": "^1.1.0", - "@protobufjs/float": "^1.0.2", - "@protobufjs/inquire": "^1.1.0", - "@protobufjs/path": "^1.1.2", - "@protobufjs/pool": "^1.1.0", - "@protobufjs/utf8": "^1.1.0", - "@types/node": ">=13.7.0", - "long": "^5.0.0" - }, - "engines": { - "node": ">=12.0.0" - } - }, - "node_modules/protobufjs/node_modules/long": { - "version": "5.2.3", - "resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz", - "integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q==" - }, "node_modules/punycode": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-1.4.1.tgz", @@ -4296,55 +4187,12 @@ } }, "@grpc/grpc-js": { - "version": "1.8.8", - "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.8.8.tgz", - "integrity": "sha512-4gfDqMLXTrorvYTKA1jL22zLvVwiHJ73t6Re1OHwdCFRjdGTDOVtSJuaWhtHaivyeDGg0LeCkmU77MTKoV3wPA==", + "version": "1.4.6", + "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.4.6.tgz", + "integrity": "sha512-Byau4xiXfIixb1PnW30V/P9mkrZ05lknyNqiK+cVY9J5hj3gecxd/anwaUbAM8j834zg1x78NvAbwGnMfWEu7A==", "requires": { - "@grpc/proto-loader": "^0.7.0", + "@grpc/proto-loader": "^0.6.4", "@types/node": ">=12.12.47" - }, - "dependencies": { - "@grpc/proto-loader": { - "version": "0.7.7", - "resolved": "https://registry.npmjs.org/@grpc/proto-loader/-/proto-loader-0.7.7.tgz", - "integrity": "sha512-1TIeXOi8TuSCQprPItwoMymZXxWT0CPxUhkrkeCUH+D8U7QDwQ6b7SUz2MaLuWM2llT+J/TVFLmQI5KtML3BhQ==", - "requires": { - "@types/long": "^4.0.1", - "lodash.camelcase": "^4.3.0", - "long": "^4.0.0", - "protobufjs": "^7.0.0", - "yargs": "^17.7.2" - } - }, - "cliui": { - "version": "8.0.1", - "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", - "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", - "requires": { - "string-width": "^4.2.0", - "strip-ansi": "^6.0.1", - "wrap-ansi": "^7.0.0" - } - }, - "yargs": { - "version": "17.7.2", - "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", - "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", - "requires": { - "cliui": "^8.0.1", - "escalade": "^3.1.1", - "get-caller-file": "^2.0.5", - "require-directory": "^2.1.1", - "string-width": "^4.2.3", - "y18n": "^5.0.5", - "yargs-parser": "^21.1.1" - } - }, - "yargs-parser": { - "version": "21.1.1", - "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", - "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==" - } } }, "@grpc/proto-loader": { @@ -4357,28 +4205,6 @@ "long": "^4.0.0", "protobufjs": "^6.10.0", "yargs": "^16.2.0" - }, - "dependencies": { - "protobufjs": { - "version": "6.11.3", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz", - "integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==", - "requires": { - "@protobufjs/aspromise": "^1.1.2", - "@protobufjs/base64": "^1.1.2", - "@protobufjs/codegen": "^2.0.4", - "@protobufjs/eventemitter": "^1.1.0", - "@protobufjs/fetch": "^1.1.0", - "@protobufjs/float": "^1.0.2", - "@protobufjs/inquire": "^1.1.0", - "@protobufjs/path": "^1.1.2", - "@protobufjs/pool": "^1.1.0", - "@protobufjs/utf8": "^1.1.0", - "@types/long": "^4.0.1", - "@types/node": ">=13.7.0", - "long": "^4.0.0" - } - } } }, "@humanwhocodes/config-array": { @@ -6583,34 +6409,12 @@ "integrity": "sha512-A60IisqvnuI45qNRygJjrnNjX2TMdQGMY+57tR3nul3ZgO2zXkR9OGR8AXxJhkqx84g0FTnrfi3D5fWMSdANdQ==", "requires": { "protobufjs": "^6.11.2" - }, - "dependencies": { - "protobufjs": { - "version": "6.11.3", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz", - "integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==", - "requires": { - "@protobufjs/aspromise": "^1.1.2", - "@protobufjs/base64": "^1.1.2", - "@protobufjs/codegen": "^2.0.4", - "@protobufjs/eventemitter": "^1.1.0", - "@protobufjs/fetch": "^1.1.0", - "@protobufjs/float": "^1.0.2", - "@protobufjs/inquire": "^1.1.0", - "@protobufjs/path": "^1.1.2", - "@protobufjs/pool": "^1.1.0", - "@protobufjs/utf8": "^1.1.0", - "@types/long": "^4.0.1", - "@types/node": ">=13.7.0", - "long": "^4.0.0" - } - } } }, "protobufjs": { - "version": "7.2.4", - "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-7.2.4.tgz", - "integrity": "sha512-AT+RJgD2sH8phPmCf7OUZR8xGdcJRga4+1cOaXJ64hvcSkVhNcRHOwIxUatPH15+nj59WAGTDv3LSGZPEQbJaQ==", + "version": "6.11.3", + "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.3.tgz", + "integrity": "sha512-xL96WDdCZYdU7Slin569tFX712BxsxslWwAfAhCYjQKGTq7dAU91Lomy6nLLhh/dyGhk/YH4TwTSRxTzhuHyZg==", "requires": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", @@ -6622,15 +6426,9 @@ "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.0", + "@types/long": "^4.0.1", "@types/node": ">=13.7.0", - "long": "^5.0.0" - }, - "dependencies": { - "long": { - "version": "5.2.3", - "resolved": "https://registry.npmjs.org/long/-/long-5.2.3.tgz", - "integrity": "sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q==" - } + "long": "^4.0.0" } }, "punycode": { diff --git a/sdks/typescript/package.json b/sdks/typescript/package.json index 041121bd41ea..35a1e8134e29 100644 --- a/sdks/typescript/package.json +++ b/sdks/typescript/package.json @@ -36,7 +36,7 @@ }, "dependencies": { "@google-cloud/pubsub": "^2.19.4", - "@grpc/grpc-js": "^1.8.8", + "@grpc/grpc-js": "~1.4.6", "@protobuf-ts/grpc-transport": "^2.1.0", "@protobuf-ts/plugin": "^2.1.0", "bson": "^4.6.0", @@ -46,7 +46,7 @@ "fast-deep-equal": "^3.1.3", "find-git-root": "^1.0.4", "long": "^4.0.0", - "protobufjs": "^7.2.4", + "protobufjs": "~6.11.3", "queue-typescript": "^1.0.1", "serialize-closures": "^0.2.7", "ts-closure-transform": "^0.1.7", diff --git a/sdks/typescript/src/apache_beam/runners/dataflow.ts b/sdks/typescript/src/apache_beam/runners/dataflow.ts index 950e630d82d9..e7da1f7ada51 100644 --- a/sdks/typescript/src/apache_beam/runners/dataflow.ts +++ b/sdks/typescript/src/apache_beam/runners/dataflow.ts @@ -33,6 +33,8 @@ export function dataflowRunner(runnerOptions: { options: Object = {} ): Promise { var augmentedOptions = { experiments: [] as string[], ...options }; + augmentedOptions.experiments.push("use_runner_v2"); + augmentedOptions.experiments.push("use_portable_job_submission"); augmentedOptions.experiments.push("use_sibling_sdk_workers"); return new PortableRunner( runnerOptions as any, diff --git a/website/www/site/content/en/documentation/io/built-in/google-bigquery.md b/website/www/site/content/en/documentation/io/built-in/google-bigquery.md index 24314dc11800..eae98b84d2c1 100644 --- a/website/www/site/content/en/documentation/io/built-in/google-bigquery.md +++ b/website/www/site/content/en/documentation/io/built-in/google-bigquery.md @@ -788,6 +788,8 @@ BigQuery Storage Write API for Python SDK currently has some limitations on supp {{< paragraph class="language-py" >}} **Note:** If you want to run WriteToBigQuery with Storage Write API from the source code, you need to run `./gradlew :sdks:java:io:google-cloud-platform:expansion-service:build` to build the expansion-service jar. If you are running from a released Beam SDK, the jar will already be included. +**Note:** Auto sharding is not currently supported for Python's Storage Write API. + {{< /paragraph >}} #### Exactly-once semantics @@ -877,6 +879,8 @@ explicitly enable this using [`withAutoSharding`](https://beam.apache.org/releas ***Note:*** `STORAGE_WRITE_API` will default to dynamic sharding when `numStorageWriteApiStreams` is set to 0 or is unspecified. + +***Note:*** Auto sharding with `STORAGE_WRITE_API` is supported on Dataflow's legacy runner, but **not** on Runner V2 {{< /paragraph >}} When using `STORAGE_WRITE_API`, the PCollection returned by diff --git a/website/www/site/content/en/documentation/sdks/python-dependencies.md b/website/www/site/content/en/documentation/sdks/python-dependencies.md index a96d722d05c9..09c56adac430 100644 --- a/website/www/site/content/en/documentation/sdks/python-dependencies.md +++ b/website/www/site/content/en/documentation/sdks/python-dependencies.md @@ -32,7 +32,7 @@ Dependencies for your Beam SDK version are listed in `setup.py` in the Beam repo https://raw.githubusercontent.com/apache/beam/v/sdks/python/setup.py ``` -

Replace `<VERSION_NUMBER>` with the major.minor.patch version of the SDK. For example, https://raw.githubusercontent.com/apache/beam/v{{< param release_latest >}}/sdks/python/setup.py will provide the dependencies for the {{< param release_latest >}} release.

+

Replace `<VERSION_NUMBER>` with the major.minor.patch version of the SDK. For example, https://raw.githubusercontent.com/apache/beam/v{{< param release_latest >}}/sdks/python/setup.py will provide the dependencies for the {{< param release_latest >}} release.

2. Review the core dependency list under `REQUIRED_PACKAGES`. diff --git a/website/www/site/layouts/partials/header.html b/website/www/site/layouts/partials/header.html index 3fa8bdc9455c..957e3de2b1ed 100644 --- a/website/www/site/layouts/partials/header.html +++ b/website/www/site/layouts/partials/header.html @@ -208,9 +208,9 @@
diff --git a/website/www/site/static/images/college_2023_banner_desktop.png b/website/www/site/static/images/college_2023_banner_desktop.png new file mode 100644 index 000000000000..09e0cdfe24b9 Binary files /dev/null and b/website/www/site/static/images/college_2023_banner_desktop.png differ diff --git a/website/www/site/static/images/college_2023_banner_mobile.png b/website/www/site/static/images/college_2023_banner_mobile.png new file mode 100644 index 000000000000..e6b733a7e2ef Binary files /dev/null and b/website/www/site/static/images/college_2023_banner_mobile.png differ