From ad729f762ef1c056e4f4c4624fd7505ddc580924 Mon Sep 17 00:00:00 2001 From: Timur Sultanov Date: Wed, 11 Jan 2023 23:15:41 +0400 Subject: [PATCH] [Playground] Add an option for Datastore namespace value for ci_cd.py script (#24818) * Add option to pass an arbitrary Datastore namespace value into ci_cd.py script * Pass Google Cloud Datastore project value into ci_cd.py script via an argument instead of using an environment variable directly * Update README.md to reflect changes to ci_cd.py script arguments --- .../workflows/playground_deploy_examples.yml | 12 ++--- .../playground_examples_cd_reusable.yml | 1 + .../playground_examples_ci_reusable.yml | 1 + playground/README.md | 54 ++++++++++++------- playground/infrastructure/ci_cd.py | 18 +++++-- playground/infrastructure/config.py | 2 +- playground/infrastructure/datastore_client.py | 8 +-- playground/infrastructure/test_ci_cd.py | 3 +- .../infrastructure/test_datastore_client.py | 29 ++++------ 9 files changed, 72 insertions(+), 56 deletions(-) diff --git a/.github/workflows/playground_deploy_examples.yml b/.github/workflows/playground_deploy_examples.yml index c2072d7ea8c7..cede42c8f886 100644 --- a/.github/workflows/playground_deploy_examples.yml +++ b/.github/workflows/playground_deploy_examples.yml @@ -156,41 +156,41 @@ jobs: K8S_SERVER_ADDRESS=$(kubectl get svc -n $K8S_NAMESPACE -l "app=service-python" -o jsonpath='{.items[0].status.loadBalancer.ingress[0].ip}') echo $K8S_SERVER_ADDRESS export SERVER_ADDRESS="$K8S_SERVER_ADDRESS:8081" - python3 ci_cd.py --step CI --sdk SDK_PYTHON --origin $ORIGIN --subdirs $SUBDIRS + python3 ci_cd.py --datastore-project $GOOGLE_CLOUD_PROJECT --step CI --sdk SDK_PYTHON --origin $ORIGIN --subdirs $SUBDIRS working-directory: playground/infrastructure - name: Run Python Examples CD run: | K8S_SERVER_ADDRESS=$(kubectl get svc -n $K8S_NAMESPACE -l "app=service-python" -o jsonpath='{.items[0].status.loadBalancer.ingress[0].ip}') export SERVER_ADDRESS="$K8S_SERVER_ADDRESS:8081" - python3 ci_cd.py --step CD --sdk SDK_PYTHON --origin $ORIGIN --subdirs $SUBDIRS + python3 ci_cd.py --datastore-project $GOOGLE_CLOUD_PROJECT --step CD --sdk SDK_PYTHON --origin $ORIGIN --subdirs $SUBDIRS working-directory: playground/infrastructure - name: Run Go Examples CI run: | K8S_SERVER_ADDRESS=$(kubectl get svc -n $K8S_NAMESPACE -l "app=service-go" -o jsonpath='{.items[0].status.loadBalancer.ingress[0].ip}') export SERVER_ADDRESS="$K8S_SERVER_ADDRESS:8082" - python3 ci_cd.py --step CI --sdk SDK_GO --origin $ORIGIN --subdirs $SUBDIRS + python3 ci_cd.py --datastore-project $GOOGLE_CLOUD_PROJECT --step CI --sdk SDK_GO --origin $ORIGIN --subdirs $SUBDIRS working-directory: playground/infrastructure - name: Run Go Examples CD run: | K8S_SERVER_ADDRESS=$(kubectl get svc -n $K8S_NAMESPACE -l "app=service-go" -o jsonpath='{.items[0].status.loadBalancer.ingress[0].ip}') export SERVER_ADDRESS="$K8S_SERVER_ADDRESS:8082" - python3 ci_cd.py --step CD --sdk SDK_GO --origin $ORIGIN --subdirs $SUBDIRS + python3 ci_cd.py --datastore-project $GOOGLE_CLOUD_PROJECT --step CD --sdk SDK_GO --origin $ORIGIN --subdirs $SUBDIRS working-directory: playground/infrastructure - name: Run Java Examples CI run: | K8S_SERVER_ADDRESS=$(kubectl get svc -n $K8S_NAMESPACE -l "app=service-java" -o jsonpath='{.items[0].status.loadBalancer.ingress[0].ip}') export SERVER_ADDRESS="$K8S_SERVER_ADDRESS:8080" - python3 ci_cd.py --step CI --sdk SDK_JAVA --origin $ORIGIN --subdirs $SUBDIRS + python3 ci_cd.py --datastore-project $GOOGLE_CLOUD_PROJECT --step CI --sdk SDK_JAVA --origin $ORIGIN --subdirs $SUBDIRS working-directory: playground/infrastructure - name: Run Java Examples CD run: | K8S_SERVER_ADDRESS=$(kubectl get svc -n $K8S_NAMESPACE -l "app=service-java" -o jsonpath='{.items[0].status.loadBalancer.ingress[0].ip}') export SERVER_ADDRESS="$K8S_SERVER_ADDRESS:8080" - python3 ci_cd.py --step CD --sdk SDK_JAVA --origin $ORIGIN --subdirs $SUBDIRS + python3 ci_cd.py --datastore-project $GOOGLE_CLOUD_PROJECT --step CD --sdk SDK_JAVA --origin $ORIGIN --subdirs $SUBDIRS working-directory: playground/infrastructure - name: Delete Helm Chart if: always() diff --git a/.github/workflows/playground_examples_cd_reusable.yml b/.github/workflows/playground_examples_cd_reusable.yml index 250143540be4..4c5b42e70e29 100644 --- a/.github/workflows/playground_examples_cd_reusable.yml +++ b/.github/workflows/playground_examples_cd_reusable.yml @@ -62,6 +62,7 @@ jobs: - name: Run ci_cd.py run: | python3 ci_cd.py \ + --datastore-project $GOOGLE_CLOUD_PROJECT \ --step $STEP \ --sdk SDK_${SDK^^} \ --origin $ORIGIN \ diff --git a/.github/workflows/playground_examples_ci_reusable.yml b/.github/workflows/playground_examples_ci_reusable.yml index 7c5f039c3488..8bf8e4ccad48 100644 --- a/.github/workflows/playground_examples_ci_reusable.yml +++ b/.github/workflows/playground_examples_ci_reusable.yml @@ -173,6 +173,7 @@ jobs: - name: Run ci_cd.py run: | python3 ci_cd.py \ + --datastore-project $GOOGLE_CLOUD_PROJECT \ --step $STEP \ --sdk SDK_${SDK^^} \ --origin $ORIGIN \ diff --git a/playground/README.md b/playground/README.md index 4b3d9a6e574c..e92effe1697a 100644 --- a/playground/README.md +++ b/playground/README.md @@ -132,24 +132,38 @@ The following requirements are needed for deploying examples manually: ## Run example deployment script Example deployment scripts uses following environment variables: -GOOGLE_CLOUD_PROJECT - GCP project id where Playground backend is deployed -BEAM_ROOT_DIR - root folder to search for playground examples -SDK_CONFIG - location of sdk and default example configuration file -BEAM_EXAMPLE_CATEGORIES - location of example category configuration file -BEAM_USE_WEBGRPC - use grpc-Web instead of grpc (default) -GRPC_TIMEOUT - timeout for grpc calls (defaults to 10 sec) -BEAM_CONCURRENCY - number of eaxmples to run in parallel (defaults to 10) -SERVER_ADDRESS - address of the backend runnner service for a particular SDK - -usage: ci_cd.py [-h] ---step {CI,CD} ---sdk {SDK_JAVA,SDK_GO,SDK_PYTHON,SDK_SCIO} ---origin {PG_EXAMPLES,TB_EXAMPLES} ---subdirs SUBDIRS [SUBDIRS ...] +- GOOGLE_CLOUD_PROJECT - GCP project id where Playground backend is deployed +- BEAM_ROOT_DIR - root folder to search for playground examples +- SDK_CONFIG - location of sdk and default example configuration file +- BEAM_EXAMPLE_CATEGORIES - location of example category configuration file +- BEAM_USE_WEBGRPC - use grpc-Web instead of grpc (default) +- GRPC_TIMEOUT - timeout for grpc calls (defaults to 10 sec) +- BEAM_CONCURRENCY - number of eaxmples to run in parallel (defaults to 10) +- SERVER_ADDRESS - address of the backend runnner service for a particular SDK + +``` +usage: ci_cd.py [-h] --step {CI,CD} [--namespace NAMESPACE] --datastore-project DATASTORE_PROJECT --sdk {SDK_JAVA,SDK_GO,SDK_PYTHON,SDK_SCIO} --origin {PG_EXAMPLES,TB_EXAMPLES} --subdirs SUBDIRS [SUBDIRS ...] + +CI/CD Steps for Playground objects + +optional arguments: + -h, --help show this help message and exit + --step {CI,CD} CI step to verify all beam examples/tests/katas. CD step to save all beam examples/tests/katas and their outputs on the GCD + --namespace NAMESPACE + Datastore namespace to use when saving data (default: Playground) + --datastore-project DATASTORE_PROJECT + Datastore project to use when saving data + --sdk {SDK_JAVA,SDK_GO,SDK_PYTHON,SDK_SCIO} + Supported SDKs + --origin {PG_EXAMPLES,TB_EXAMPLES} + ORIGIN field of pg_examples/pg_snippets + --subdirs SUBDIRS [SUBDIRS ...] + limit sub directories to walk through, relative to BEAM_ROOT_DIR +``` Helper script to deploy examples for all supported sdk's: -``` +```shell cd playground/infrastructure export BEAM_ROOT_DIR="../../" @@ -160,10 +174,12 @@ export BEAM_CONCURRENCY=4 export PLAYGROUND_DNS_NAME="your registered dns name for Playground" for sdk in go java python scio; do + export SDK=$sdk && + export SERVER_ADDRESS=https://${SDK}.$PLAYGROUND_DNS_NAME && -export SDK=$sdk && -export SERVER_ADDRESS=https://${SDK}.$PLAYGROUND_DNS_NAME && - -python3 ci_cd.py --step CD --sdk SDK_${SDK^^} --origin PG_EXAMPLES --subdirs ./learning/katas ./examples ./sdks + python3 ci_cd.py --datastore-project $GOOGLE_CLOUD_PROJECT \ + --step CD --sdk SDK_${SDK^^} \ + --origin PG_EXAMPLES \ + --subdirs ./learning/katas ./examples ./sdks done ``` diff --git a/playground/infrastructure/ci_cd.py b/playground/infrastructure/ci_cd.py index 9c3b8842a9aa..3c1d62583279 100644 --- a/playground/infrastructure/ci_cd.py +++ b/playground/infrastructure/ci_cd.py @@ -43,6 +43,18 @@ "beam examples/tests/katas and their outputs on the GCD", choices=[Config.CI_STEP_NAME, Config.CD_STEP_NAME], ) +parser.add_argument( + "--namespace", + dest="namespace", + help=f"Datastore namespace to use when saving data (default: {Config.DEFAULT_NAMESPACE})", + default=Config.DEFAULT_NAMESPACE +) +parser.add_argument( + "--datastore-project", + dest="datastore_project", + help="Datastore project to use when saving data", + required=True +) parser.add_argument( "--sdk", dest="sdk", @@ -78,7 +90,7 @@ def _check_envs(): ) -def _run_ci_cd(step: str, raw_sdk: str, origin: Origin, subdirs: List[str]): +def _run_ci_cd(step: str, raw_sdk: str, origin: Origin, project: str, namespace: str, subdirs: List[str]): sdk: SdkEnum = StringToSdkEnum(raw_sdk) load_supported_categories(categories_file) @@ -94,7 +106,7 @@ def _run_ci_cd(step: str, raw_sdk: str, origin: Origin, subdirs: List[str]): if step == Config.CD_STEP_NAME: logging.info("Start of sending Playground examples to the Cloud Datastore ...") - datastore_client = DatastoreClient() + datastore_client = DatastoreClient(project, namespace) datastore_client.save_catalogs() datastore_client.save_to_cloud_datastore(examples, sdk, origin) logging.info("Finish of sending Playground examples to the Cloud Datastore") @@ -104,4 +116,4 @@ def _run_ci_cd(step: str, raw_sdk: str, origin: Origin, subdirs: List[str]): parser = parser.parse_args() _check_envs() setup_logger() - _run_ci_cd(parser.step, parser.sdk, parser.origin, parser.subdirs) + _run_ci_cd(parser.step, parser.sdk, parser.origin, parser.datastore_project, parser.namespace, parser.subdirs) diff --git a/playground/infrastructure/config.py b/playground/infrastructure/config.py index 4cc5dbd43768..e06ee4490eb5 100644 --- a/playground/infrastructure/config.py +++ b/playground/infrastructure/config.py @@ -60,8 +60,8 @@ class Config: CD_STEP_NAME = "CD" CI_CD_LITERAL = Literal["CI", "CD"] URL_VCS_PREFIX = "https://github.com/apache/beam/blob/master" - GOOGLE_CLOUD_PROJECT = os.getenv("GOOGLE_CLOUD_PROJECT") SDK_CONFIG = os.getenv("SDK_CONFIG", "../../playground/sdks.yaml") + DEFAULT_NAMESPACE = "Playground" @dataclass(frozen=True) diff --git a/playground/infrastructure/datastore_client.py b/playground/infrastructure/datastore_client.py index 31065177db6c..8fd0c8500e7f 100644 --- a/playground/infrastructure/datastore_client.py +++ b/playground/infrastructure/datastore_client.py @@ -44,17 +44,13 @@ class DatastoreClient: _datastore_client: datastore.Client - def __init__(self): + def __init__(self, project:str, namespace: str): self._check_envs() self._datastore_client = datastore.Client( - namespace=DatastoreProps.NAMESPACE, project=Config.GOOGLE_CLOUD_PROJECT + namespace=namespace, project=project ) def _check_envs(self): - if Config.GOOGLE_CLOUD_PROJECT is None: - raise KeyError( - "GOOGLE_CLOUD_PROJECT environment variable should be specified in os" - ) if Config.SDK_CONFIG is None: raise KeyError("SDK_CONFIG environment variable should be specified in os") diff --git a/playground/infrastructure/test_ci_cd.py b/playground/infrastructure/test_ci_cd.py index 9e5c041e27ef..7daf631ccec4 100644 --- a/playground/infrastructure/test_ci_cd.py +++ b/playground/infrastructure/test_ci_cd.py @@ -18,7 +18,7 @@ from api.v1.api_pb2 import SDK_JAVA from ci_cd import _check_envs, _run_ci_cd -from config import Origin +from config import Origin, Config @pytest.mark.parametrize("step", ["CI", "CD"]) @@ -37,6 +37,7 @@ def test_ci_step( step, "SDK_JAVA", Origin.PG_EXAMPLES, + Config.DEFAULT_NAMESPACE, [ "../../examples", ], diff --git a/playground/infrastructure/test_datastore_client.py b/playground/infrastructure/test_datastore_client.py index 18879a243e1d..2043fcda749c 100644 --- a/playground/infrastructure/test_datastore_client.py +++ b/playground/infrastructure/test_datastore_client.py @@ -20,7 +20,7 @@ from mock.mock import call from google.cloud import datastore -from config import Origin +from config import Origin, Config from datastore_client import DatastoreClient, DatastoreException from models import SdkEnum from test_utils import _get_examples @@ -30,35 +30,22 @@ """ -@mock.patch("config.Config.GOOGLE_CLOUD_PROJECT") @mock.patch("google.cloud.datastore.Client") def test_save_to_cloud_datastore_when_schema_version_not_found( - mock_datastore_client, mock_config_project + mock_datastore_client ): """ Test saving examples to the cloud datastore when the schema version not found """ - mock_config_project.return_value = "MOCK_PROJECT_ID" with pytest.raises( DatastoreException, match="Schema versions not found. Schema versions must be downloaded during application startup", ): examples = _get_examples(1) - client = DatastoreClient() + client = DatastoreClient("MOCK_PROJECT_ID", Config.DEFAULT_NAMESPACE) client.save_to_cloud_datastore(examples, SdkEnum.JAVA, Origin.PG_EXAMPLES) -def test_save_to_cloud_datastore_when_google_cloud_project_id_not_set(): - """ - Test saving examples to the cloud datastore when the Google Cloud Project ID is not set - """ - with pytest.raises( - KeyError, - match="GOOGLE_CLOUD_PROJECT environment variable should be specified in os", - ): - DatastoreClient() - - @pytest.mark.parametrize("is_multifile", [False, True]) @pytest.mark.parametrize("with_kafka", [False, True]) @pytest.mark.parametrize( @@ -68,13 +55,12 @@ def test_save_to_cloud_datastore_when_google_cloud_project_id_not_set(): pytest.param(Origin.TB_EXAMPLES, "TB_EXAMPLES_", id="TB_EXAMPLES"), ], ) +@pytest.mark.parametrize("namespace", [Config.DEFAULT_NAMESPACE, "Staging"]) @mock.patch("datastore_client.DatastoreClient._get_all_examples") @mock.patch("datastore_client.DatastoreClient._get_actual_schema_version_key") -@mock.patch("config.Config.GOOGLE_CLOUD_PROJECT") @mock.patch("google.cloud.datastore.Client") def test_save_to_cloud_datastore_in_the_usual_case( mock_client, - mock_config_project, mock_get_schema, mock_get_examples, create_test_example, @@ -82,6 +68,7 @@ def test_save_to_cloud_datastore_in_the_usual_case( key_prefix, with_kafka, is_multifile, + namespace, ): """ Test saving examples to the cloud datastore in the usual case @@ -90,11 +77,13 @@ def test_save_to_cloud_datastore_in_the_usual_case( mock_get_schema.return_value = mock_schema_key mock_examples = MagicMock() mock_get_examples.return_value = mock_examples - mock_config_project.return_value = "MOCK_PROJECT_ID" + + project_id = "MOCK_PROJECT_ID" examples = [create_test_example(is_multifile=is_multifile, with_kafka=with_kafka)] - client = DatastoreClient() + client = DatastoreClient(project_id, namespace) client.save_to_cloud_datastore(examples, SdkEnum.JAVA, origin) + mock_client.assert_called_once_with(namespace=namespace, project=project_id) mock_client.assert_called_once() mock_get_schema.assert_called_once() mock_get_examples.assert_called_once()