Skip to content

Commit

Permalink
Merge branch 'master' into issue24563_tobf_gradle
Browse files Browse the repository at this point in the history
  • Loading branch information
darkhan.nausharipov committed Jan 9, 2023
2 parents 6ebe89d + 95e5391 commit bb0b08e
Show file tree
Hide file tree
Showing 176 changed files with 17,902 additions and 1,351 deletions.
1 change: 1 addition & 0 deletions .github/workflows/playground_examples_ci_reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ jobs:
working-directory: playground/infrastructure
env:
BEAM_ROOT_DIR: "../.."
BEAM_EXAMPLE_CATEGORIES: "../categories.yaml"

ci_cd:
name: ${{ inputs.step }} ${{ inputs.sdk }} ${{ inputs.origin }}
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ website/www/yarn-error.log
**/.packages
**/generated_plugin_registrant.dart
playground/frontend/playground_components/pubspec.lock
playground/frontend/playground_components/test/tools/extract_symbols_java/dependencies
playground/frontend/playground_components_dev/pubspec.lock

# Ignore Beam Playground Terraform
**/.terraform
Expand All @@ -136,4 +138,4 @@ playground/frontend/playground_components/pubspec.lock
**/*.tfvars

# Ignore Katas auto-generated files
**/*-remote-info.yaml
**/*-remote-info.yaml
54 changes: 54 additions & 0 deletions .test-infra/jenkins/job_CloudMLBenchmarkTests_Python.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import CommonJobProperties as commonJobProperties
import PhraseTriggeringPostCommitBuilder
import CronJobBuilder

def cloudMLJob = { scope ->
scope.description('Runs the TFT Criteo Examples on the Dataflow runner.')

// Set common parameters.
commonJobProperties.setTopLevelMainJobProperties(scope, 'master', 360)

// Gradle goals for this job.
scope.steps {
gradle {
rootBuildScriptDir(commonJobProperties.checkoutDir)
commonJobProperties.setGradleSwitches(delegate)
tasks(':sdks:python:test-suites:dataflow:tftTests')
}
}
}

PhraseTriggeringPostCommitBuilder.postCommitJob(
'beam_CloudML_Benchmarks_Dataflow',
'Run TFT Criteo Benchmarks',
'TFT Criteo benchmarks on Dataflow(\"Run TFT Criteo Benchmarks"\"")',
this
) {
cloudMLJob(delegate)
}

CronJobBuilder.cronJob(
'beam_CloudML_Benchmarks_Dataflow',
'H 14 * * *',
this
) {
cloudMLJob(delegate)
}
3 changes: 1 addition & 2 deletions .test-infra/jenkins/job_LoadTests_Combine_Python.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,7 @@ def loadTestConfigurations = { datasetName, mode ->

def addStreamingOptions(test){
test.pipelineOptions << [streaming: null,
// TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved.
experiments: "use_runner_v2,shuffle_mode=appliance"
experiments: "use_runner_v2"
]
}

Expand Down
3 changes: 1 addition & 2 deletions .test-infra/jenkins/job_LoadTests_GBK_Python.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,7 @@ def addStreamingOptions(test) {
// Use the new Dataflow runner, which offers improved efficiency of Dataflow jobs.
// See https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#dataflow-runner-v2
// for more details.
// TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved.
experiments: 'use_runner_v2,shuffle_mode=appliance',
experiments: 'use_runner_v2',
]
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,7 @@ def addStreamingOptions(test) {
// Use the new Dataflow runner, which offers improved efficiency of Dataflow jobs.
// See https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#dataflow-runner-v2
// for more details.
// TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved.
experiments: 'use_runner_v2,shuffle_mode=appliance',
experiments: 'use_runner_v2',
]
}

Expand Down
3 changes: 1 addition & 2 deletions .test-infra/jenkins/job_LoadTests_ParDo_Python.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -131,8 +131,7 @@ def addStreamingOptions(test) {
// Use the new Dataflow runner, which offers improved efficiency of Dataflow jobs.
// See https://cloud.google.com/dataflow/docs/guides/deploying-a-pipeline#dataflow-runner-v2
// for more details.
// TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved.
experiments: 'use_runner_v2,shuffle_mode=appliance',
experiments: 'use_runner_v2',
]
}

Expand Down
3 changes: 1 addition & 2 deletions .test-infra/jenkins/job_LoadTests_SideInput_Python.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ def fromTemplate = { mode, name, id, datasetName, testSpecificOptions ->
influx_measurement : "python_${mode}_sideinput_${id}",
num_workers : 10,
autoscaling_algorithm: 'NONE',
// TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved.
experiments : 'use_runner_v2,shuffle_mode=appliance',
experiments : 'use_runner_v2',
] << testSpecificOptions
]
}
Expand Down
3 changes: 1 addition & 2 deletions .test-infra/jenkins/job_PerformanceTests_KafkaIO_IT.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,7 @@ job(jobName) {
readTimeout : '1500',
bigQueryTable : 'kafkaioit_results_runner_v2',
influxMeasurement : 'kafkaioit_results_runner_v2',
// TODO(https://github.com/apache/beam/issues/20806) remove shuffle_mode=appliance with runner v2 once issue is resolved.
experiments : 'use_runner_v2,shuffle_mode=appliance,use_unified_worker',
experiments : 'use_runner_v2,use_unified_worker',
]

steps {
Expand Down
187 changes: 187 additions & 0 deletions .test-infra/metrics/sync/github/sync_workflows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
'''
This module queries GitHub to collect Beam-related workflows metrics and put them in
PostgreSQL.
This Script is running every 3 hours in a cloud function in apache-beam-testing project.
This cloud function is triggered by a pubsub topic.
You can find the cloud function in the next link
https://console.cloud.google.com/functions/details/us-central1/github_actions_workflows_dashboard_sync?env=gen1&project=apache-beam-testing
Pub sub topic : https://console.cloud.google.com/cloudpubsub/topic/detail/github_actions_workflows_sync?project=apache-beam-testing
Cron Job : https://console.cloud.google.com/cloudscheduler/jobs/edit/us-central1/github_actions_workflows_dashboard_sync?project=apache-beam-testing
Writing the latest 10 runs of every postcommit workflow in master branch in a beammetrics database
'''

import os
import sys
import time
import re
import requests
import psycopg2

from datetime import datetime
from github import GithubIntegration

DB_HOST = os.environ['DB_HOST']
DB_PORT = os.environ['DB_PORT']
DB_NAME = os.environ['DB_NAME']
DB_USER_NAME = os.environ['DB_USER']
DB_PASSWORD = os.environ['DB_PASS']
GH_WORKFLOWS_TABLE_NAME = "github_workflows"
# Number of workflows that fetch github API
GH_NUMBER_OF_WORKFLOWS = 100
GH_WORKFLOWS_NUMBER_EXECUTIONS = 100
WORKFLOWS_OBJECT_LIST = []


class Workflow:
def __init__(self,id,name,filename):
self.id = id
self.name = name
self.filename = filename
self.listOfRuns = []
self.runUrl = []

# The table will save the latest ten run of every workflow
GH_WORKFLOWS_CREATE_TABLE_QUERY = f"""
CREATE TABLE IF NOT EXISTS {GH_WORKFLOWS_TABLE_NAME} (
job_name text PRIMARY KEY,
job_yml_filename text"""
for i in range(0,GH_WORKFLOWS_NUMBER_EXECUTIONS):
i = i + 1
GH_WORKFLOWS_CREATE_TABLE_QUERY += """,\n run{} text,
run{}Id text""".format(str(i),str(i))
GH_WORKFLOWS_CREATE_TABLE_QUERY += ")\n"

def githubWorkflowsGrafanaSync(data,context):
print('Started')
print('Updating table with recent workflow runs')
databaseOperations(initDbConnection(),fetchWorkflowData())
print('Done')
return "Completed"

def initDbConnection():
'''Init connection with the Database'''
connection = None
maxRetries = 3
i = 0
while connection == None and i < maxRetries:
try:
connection = psycopg2.connect(
f"dbname='{DB_NAME}' user='{DB_USER_NAME}' host='{DB_HOST}'"
f" port='{DB_PORT}' password='{DB_PASSWORD}'")
except Exception as e:
print('Failed to connect to DB; retrying in 1 minute')
print(e)
time.sleep(60)
i = i + 1
if i >= maxRetries:
print("Number of retries exceded ")
sys.exit(1)
return connection

def getToken():
git_integration = GithubIntegration(
os.environ["GH_APP_ID"],
os.environ["GH_PEM_KEY"])
token=git_integration.get_access_token(
os.environ["GH_APP_INSTALLATION_ID"]
).token
return token

def retriesRequest(request):
requestSucceeded = False
retryFactor = 1
while not requestSucceeded:
retryTime = 60 * retryFactor
if request.status_code != 200:
print('Failed to get the request with code {}'.format(request.status_code))
time.sleep(retryTime)
retryFactor = retryFactor + retryFactor
if retryFactor * 60 >= 3600:
print("Error: The request take more than an hour")
sys.exit(1)
else:
requestSucceeded = True
def fetchWorkflowData():
'''Return a json with all the workflows and the latests
ten executions'''
completed = False
page = 1
workflows = []
try:
while not completed:
url = "https://api.github.com/repos/apache/beam/actions/workflows"
queryOptions = { 'branch' : 'master', 'page': page, 'per_page' : GH_NUMBER_OF_WORKFLOWS }
response = requests.get(url = url, params = queryOptions)
retriesRequest(response)
jsonResponse = response.json()
if jsonResponse['total_count'] >= GH_NUMBER_OF_WORKFLOWS:
page = page + 1
workflowsPage = jsonResponse['workflows']
workflows.append(workflowsPage)
else:
completed = True
workflowsPage = jsonResponse['workflows']
workflows.append(workflowsPage)
for pageItem in workflows:
for item in pageItem:
path =item['path']
isPostCommit = re.search('(.*)postcommit(.*)',path)
if isPostCommit:
result = re.search('/(.*).yml', path)
path =(result.group(1)) + ".yml"
workflowObject = Workflow(item['id'],item['name'],path)
WORKFLOWS_OBJECT_LIST.append(workflowObject)
url = "https://api.github.com/repos/apache/beam/actions/workflows/"
queryOptions = { 'branch' : 'master', 'per_page' : GH_WORKFLOWS_NUMBER_EXECUTIONS,
'page' :'1', 'exclude_pull_request':True }
for workflow in WORKFLOWS_OBJECT_LIST:
response = requests.get(url = "{}{}/runs".format(url,workflow.id),
params=queryOptions)
retriesRequest(response)
responseJson = response.json()
workflowsRuns = responseJson['workflow_runs']
for item in workflowsRuns:
if item['status'] == 'completed':
workflow.runUrl.append(item['html_url'])
workflow.listOfRuns.append(item['conclusion'])
else:
workflow.listOfRuns.append(item['status'])
workflow.runUrl.append(item['html_url'])
for i in range(0,GH_WORKFLOWS_NUMBER_EXECUTIONS):
if i >= len(workflow.listOfRuns):
workflow.listOfRuns.append('None')
workflow.runUrl.append('None')
except Exception as e:
print('Failed to get GHA workflows')
print(e)

def databaseOperations(connection,fetchWorkflows):
'''Create the table if not exist and update the table with the latest runs
of the workflows '''
queryInsert = "INSERT INTO {} VALUES ".format(GH_WORKFLOWS_TABLE_NAME)
cursor = connection.cursor()
cursor.execute(GH_WORKFLOWS_CREATE_TABLE_QUERY)
cursor.execute("DELETE FROM {};".format(GH_WORKFLOWS_TABLE_NAME))
query = ""
for workflow in WORKFLOWS_OBJECT_LIST:
rowInsert = "(\'{}\',\'{}\'".format(workflow.name,workflow.filename)
for run, runUrl in zip(workflow.listOfRuns,workflow.runUrl):
rowInsert += ",\'{}\',\'{}\'".format(run,runUrl)
query = query + rowInsert
query += "),"
query = query[:-1] + ";"
query = queryInsert + query
cursor.execute(query)
cursor.close()
connection.commit()
connection.close()
6 changes: 5 additions & 1 deletion CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@

* RunInference Wrapper with Sklearn Model Handler support added in Go SDK ([#24497](https://github.com/apache/beam/issues/23382)).
* X feature added (Java/Python) ([#X](https://github.com/apache/beam/issues/X)).
* Adding override of allowed TLS algorithms (Java), now maintaining the disabled/legacy algorithms
present in 2.43.0 (up to 1.8.0_342, 11.0.16, 17.0.2 for respective Java versions). This is accompanied
by an explicit re-enabling of TLSv1 and TLSv1.1 for Java 8 and Java 11.

## Breaking Changes

Expand All @@ -81,6 +84,7 @@
## Bugfixes

* Avoids Cassandra syntax error when user-defined query has no where clause in it (Java) ([#24829](https://github.com/apache/beam/issues/24829)).
* Fixed JDBC connection failures (Java) during handshake due to deprecated TLSv1(.1) protocol for the JDK. ([#24623](https://github.com/apache/beam/issues/24623))

## Known Issues

Expand Down Expand Up @@ -147,7 +151,7 @@

* Decreased TextSource CPU utilization by 2.3x (Java) ([#23193](https://github.com/apache/beam/issues/23193)).
* Fixed bug when using SpannerIO with RuntimeValueProvider options (Java) ([#22146](https://github.com/apache/beam/issues/22146)).
* Fixed issue for unicode rendering on WriteToBigQuery ([#10785](https://github.com/apache/beam/issues/10785))
* Fixed issue for unicode rendering on WriteToBigQuery ([#22312](https://github.com/apache/beam/issues/22312))
* Remove obsolete variants of BigQuery Read and Write, always using Beam-native variant
([#23564](https://github.com/apache/beam/issues/23564) and [#23559](https://github.com/apache/beam/issues/23559)).
* Bumped google-cloud-spanner dependency version to 3.x for Python SDK ([#21198](https://github.com/apache/beam/issues/21198)).
Expand Down
Loading

0 comments on commit bb0b08e

Please sign in to comment.