Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add GoogleAdsToBigQuery template #359

Merged
merged 5 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ mvn clean package -pl v2/pubsub-binary-to-bigquery -am
- [Dataplex: Tier Data from BigQuery to Cloud Storage](https://github.com/search?q=repo%3AGoogleCloudPlatform%2FDataflowTemplates%20Dataplex_BigQuery_to_GCS&type=code)
- [Firestore (Datastore mode) to BigQuery](https://github.com/search?q=repo%3AGoogleCloudPlatform%2FDataflowTemplates%20Firestore_to_BigQuery_Flex&type=code)
- [Firestore (Datastore mode) to Text Files on Cloud Storage](https://github.com/search?q=repo%3AGoogleCloudPlatform%2FDataflowTemplates%20Firestore_to_GCS_Text&type=code)
- [Google Ads to BigQuery](https://github.com/search?q=repo%3AGoogleCloudPlatform%2FDataflowTemplates%20Google_Ads_to_BigQuery&type=code)
- [Google Cloud to Neo4j](https://github.com/search?q=repo%3AGoogleCloudPlatform%2FDataflowTemplates%20Google_Cloud_to_Neo4j&type=code)
- [JDBC to BigQuery](https://github.com/search?q=repo%3AGoogleCloudPlatform%2FDataflowTemplates%20Jdbc_to_BigQuery&type=code)
- [JDBC to BigQuery with BigQuery Storage API support](https://github.com/search?q=repo%3AGoogleCloudPlatform%2FDataflowTemplates%20Jdbc_to_BigQuery_Flex&type=code)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ public final class TemplateParameter {
String description() default "";
}

/** Template Parameter containing numerical inputs (32 bits). */
/** Template Parameter containing integer numerical inputs (32 bits). */
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD, ElementType.METHOD})
public @interface Integer {
Expand All @@ -167,7 +167,7 @@ public final class TemplateParameter {
String example() default "";
}

/** Template Parameter containing numerical inputs (64 bits). */
/** Template Parameter containing integer numerical inputs (64 bits). */
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD, ElementType.METHOD})
public @interface Long {
Expand All @@ -193,6 +193,58 @@ public final class TemplateParameter {
String example() default "";
}

/** Template Parameter containing floating point numerical inputs (32 bits). */
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD, ElementType.METHOD})
public @interface Float {
/** Order of appearance. */
int order() default 999;

/** Name of the parameter. */
String name() default "";

/** Group Name of the parameter. */
String groupName() default "";

/** If parameter is optional. */
boolean optional() default false;

/** Description of the parameter. */
String description();

/** Help text of the parameter. */
String helpText();

/** Example of the parameter. */
String example() default "";
}

/** Template Parameter containing floating point numerical inputs (64 bits). */
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD, ElementType.METHOD})
public @interface Double {
/** Order of appearance. */
int order() default 999;

/** Name of the parameter. */
String name() default "";

/** Group Name of the parameter. */
String groupName() default "";

/** If parameter is optional. */
boolean optional() default false;

/** Description of the parameter. */
String description();

/** Help text of the parameter. */
String helpText();

/** Example of the parameter. */
String example() default "";
}

/** Template Parameter containing logical inputs. */
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD, ElementType.METHOD})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ public final class MetadataUtils {
TemplateParameter.BigQueryTable.class,
TemplateParameter.Boolean.class,
TemplateParameter.DateTime.class,
TemplateParameter.Double.class,
TemplateParameter.Duration.class,
TemplateParameter.Enum.class,
TemplateParameter.Float.class,
TemplateParameter.GcsReadFile.class,
TemplateParameter.GcsReadFolder.class,
TemplateParameter.GcsWriteFile.class,
Expand Down Expand Up @@ -164,6 +166,12 @@ public static List<String> getRegexes(Annotation parameterAnnotation) {
case "Long":
TemplateParameter.Long longParam = (TemplateParameter.Long) parameterAnnotation;
return List.of("^[0-9]+$");
case "Float":
TemplateParameter.Float floatParam = (TemplateParameter.Float) parameterAnnotation;
return List.of("^-?(0|[1-9][0-9]*)(\\.[0-9]+)?([eE][-+]?[0-9]+)?$");
case "Double":
TemplateParameter.Double doubleParam = (TemplateParameter.Double) parameterAnnotation;
return List.of("^-?(0|[1-9][0-9]*)(\\.[0-9]+)?([eE][-+]?[0-9]+)?$");
case "Enum":
TemplateParameter.Enum enumParam = (TemplateParameter.Enum) parameterAnnotation;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,32 @@ public void processParamType(Annotation parameterAnnotation) {
this.setOptional(longParam.optional());
this.setParamType(ImageSpecParameterType.NUMBER);
break;
case "Float":
TemplateParameter.Float floatParam = (TemplateParameter.Float) parameterAnnotation;
if (!floatParam.name().isEmpty()) {
this.setName(floatParam.name());
}
processDescriptions(
floatParam.groupName(),
floatParam.description(),
floatParam.helpText(),
floatParam.example());
this.setOptional(floatParam.optional());
this.setParamType(ImageSpecParameterType.NUMBER);
break;
case "Double":
TemplateParameter.Double doubleParam = (TemplateParameter.Double) parameterAnnotation;
if (!doubleParam.name().isEmpty()) {
this.setName(doubleParam.name());
}
processDescriptions(
doubleParam.groupName(),
doubleParam.description(),
doubleParam.helpText(),
doubleParam.example());
this.setOptional(doubleParam.optional());
this.setParamType(ImageSpecParameterType.NUMBER);
break;
case "Enum":
TemplateParameter.Enum enumParam = (TemplateParameter.Enum) parameterAnnotation;
if (!enumParam.name().isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ public static TableSchema createBigQuerySchema(
}

/** Handlers proto field to BigQuery field conversion. */
private static TableFieldSchema convertProtoFieldDescriptorToBigQueryField(
public static TableFieldSchema convertProtoFieldDescriptorToBigQueryField(
FieldDescriptor fieldDescriptor,
boolean preserveProtoFieldNames,
@Nullable FieldDescriptor parent,
Expand Down
191 changes: 191 additions & 0 deletions v2/googlecloud-to-googlecloud/README_Google_Ads_to_BigQuery.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@

Google Ads to BigQuery template
---
A pipeline that reads Google Ads reports and writes to BigQuery.


:memo: This is a Google-provided template! Please
check [Provided templates documentation](https://cloud.google.com/dataflow/docs/guides/templates/provided-templates)
on how to use it without having to build from sources using [Create job from template](https://console.cloud.google.com/dataflow/createjob?template=Google_Ads_to_BigQuery).

:bulb: This is a generated documentation based
on [Metadata Annotations](https://github.com/GoogleCloudPlatform/DataflowTemplates#metadata-annotations)
. Do not change this file directly.

## Parameters

### Required Parameters

* **customerIds** (Google Ads account IDs): A list of Google Ads account IDs for which to execute the query. (Example: 12345,67890).
* **query** (Google Ads Query Language query): See https://developers.google.com/google-ads/api/docs/query/overview. (Example: SELECT campaign.id, campaign.name FROM campaign).
* **qpsPerWorker** (Required Google Ads request rate per worker): Indicates rate of query requests per second to be submitted to Google Ads. Divide the desired per pipeline QPS by the maximum number of workers. Avoid exceeding per account or developer token limits. See https://developers.google.com/google-ads/api/docs/best-practices/rate-limits.
* **googleAdsClientId** (OAuth 2.0 Client ID identifying the application): See https://developers.google.com/google-ads/api/docs/oauth/overview.
* **googleAdsClientSecret** (OAuth 2.0 Client Secret for the specified Client ID): See https://developers.google.com/google-ads/api/docs/oauth/overview.
* **googleAdsRefreshToken** (OAuth 2.0 Refresh Token for the user connecting to the Google Ads API): See https://developers.google.com/google-ads/api/docs/oauth/overview.
* **googleAdsDeveloperToken** (Google Ads developer token for the user connecting to the Google Ads API): See https://developers.google.com/google-ads/api/docs/get-started/dev-token.
* **outputTableSpec** (BigQuery output table): BigQuery table location to write the output to. The name should be in the format <project>:<dataset>.<table_name>. The table's schema must match input objects.

### Optional Parameters

* **loginCustomerId** (Google Ads manager account ID): A Google Ads manager account ID for which to access the account IDs. (Example: 12345).
* **bigQueryTableSchemaPath** (BigQuery Table Schema Path): Cloud Storage path to the BigQuery schema JSON file. If this is not set, then the schema is inferred from the Proto schema. (Example: gs://MyBucket/bq_schema.json).
* **writeDisposition** (Write Disposition to use for BigQuery): BigQuery WriteDisposition. For example, WRITE_APPEND, WRITE_EMPTY or WRITE_TRUNCATE. Defaults to: WRITE_APPEND.
* **createDisposition** (Create Disposition to use for BigQuery): BigQuery CreateDisposition. For example, CREATE_IF_NEEDED, CREATE_NEVER. Defaults to: CREATE_IF_NEEDED.



## Getting Started

### Requirements

* Java 11
* Maven
* [gcloud CLI](https://cloud.google.com/sdk/gcloud), and execution of the
following commands:
* `gcloud auth login`
* `gcloud auth application-default login`

:star2: Those dependencies are pre-installed if you use Google Cloud Shell!
[![Open in Cloud Shell](http://gstatic.com/cloudssh/images/open-btn.svg)](https://console.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2FGoogleCloudPlatform%2FDataflowTemplates.git&cloudshell_open_in_editor=v2/googlecloud-to-googlecloud/src/main/java/com/google/cloud/teleport/v2/templates/GoogleAdsToBigQuery.java)

### Templates Plugin

This README provides instructions using
the [Templates Plugin](https://github.com/GoogleCloudPlatform/DataflowTemplates#templates-plugin)
. Install the plugin with the following command before proceeding:

```shell
mvn clean install -pl plugins/templates-maven-plugin -am
```

### Building Template

This template is a Flex Template, meaning that the pipeline code will be
containerized and the container will be executed on Dataflow. Please
check [Use Flex Templates](https://cloud.google.com/dataflow/docs/guides/templates/using-flex-templates)
and [Configure Flex Templates](https://cloud.google.com/dataflow/docs/guides/templates/configuring-flex-templates)
for more information.

#### Staging the Template

If the plan is to just stage the template (i.e., make it available to use) by
the `gcloud` command or Dataflow "Create job from template" UI,
the `-PtemplatesStage` profile should be used:

```shell
export PROJECT=<my-project>
export BUCKET_NAME=<bucket-name>

mvn clean package -PtemplatesStage \
-DskipTests \
-DprojectId="$PROJECT" \
-DbucketName="$BUCKET_NAME" \
-DstagePrefix="templates" \
-DtemplateName="Google_Ads_to_BigQuery" \
-pl v2/googlecloud-to-googlecloud \
-am
```


The command should build and save the template to Google Cloud, and then print
the complete location on Cloud Storage:

```
Flex Template was staged! gs://<bucket-name>/templates/flex/Google_Ads_to_BigQuery
```

The specific path should be copied as it will be used in the following steps.

#### Running the Template

**Using the staged template**:

You can use the path above run the template (or share with others for execution).

To start a job with the template at any time using `gcloud`, you are going to
need valid resources for the required parameters.

Provided that, the following command line can be used:

```shell
export PROJECT=<my-project>
export BUCKET_NAME=<bucket-name>
export REGION=us-central1
export TEMPLATE_SPEC_GCSPATH="gs://$BUCKET_NAME/templates/flex/Google_Ads_to_BigQuery"

### Required
export CUSTOMER_IDS=<customerIds>
export QUERY=<query>
export QPS_PER_WORKER=<qpsPerWorker>
export GOOGLE_ADS_CLIENT_ID=<googleAdsClientId>
export GOOGLE_ADS_CLIENT_SECRET=<googleAdsClientSecret>
export GOOGLE_ADS_REFRESH_TOKEN=<googleAdsRefreshToken>
export GOOGLE_ADS_DEVELOPER_TOKEN=<googleAdsDeveloperToken>
export OUTPUT_TABLE_SPEC=<outputTableSpec>

### Optional
export LOGIN_CUSTOMER_ID=<loginCustomerId>
export BIG_QUERY_TABLE_SCHEMA_PATH=<bigQueryTableSchemaPath>
export WRITE_DISPOSITION=WRITE_APPEND
export CREATE_DISPOSITION=CREATE_IF_NEEDED

gcloud dataflow flex-template run "google-ads-to-bigquery-job" \
--project "$PROJECT" \
--region "$REGION" \
--template-file-gcs-location "$TEMPLATE_SPEC_GCSPATH" \
--parameters "loginCustomerId=$LOGIN_CUSTOMER_ID" \
--parameters "customerIds=$CUSTOMER_IDS" \
--parameters "query=$QUERY" \
--parameters "qpsPerWorker=$QPS_PER_WORKER" \
--parameters "bigQueryTableSchemaPath=$BIG_QUERY_TABLE_SCHEMA_PATH" \
--parameters "googleAdsClientId=$GOOGLE_ADS_CLIENT_ID" \
--parameters "googleAdsClientSecret=$GOOGLE_ADS_CLIENT_SECRET" \
--parameters "googleAdsRefreshToken=$GOOGLE_ADS_REFRESH_TOKEN" \
--parameters "googleAdsDeveloperToken=$GOOGLE_ADS_DEVELOPER_TOKEN" \
--parameters "outputTableSpec=$OUTPUT_TABLE_SPEC" \
--parameters "writeDisposition=$WRITE_DISPOSITION" \
--parameters "createDisposition=$CREATE_DISPOSITION"
```

For more information about the command, please check:
https://cloud.google.com/sdk/gcloud/reference/dataflow/flex-template/run


**Using the plugin**:

Instead of just generating the template in the folder, it is possible to stage
and run the template in a single command. This may be useful for testing when
changing the templates.

```shell
export PROJECT=<my-project>
export BUCKET_NAME=<bucket-name>
export REGION=us-central1

### Required
export CUSTOMER_IDS=<customerIds>
export QUERY=<query>
export QPS_PER_WORKER=<qpsPerWorker>
export GOOGLE_ADS_CLIENT_ID=<googleAdsClientId>
export GOOGLE_ADS_CLIENT_SECRET=<googleAdsClientSecret>
export GOOGLE_ADS_REFRESH_TOKEN=<googleAdsRefreshToken>
export GOOGLE_ADS_DEVELOPER_TOKEN=<googleAdsDeveloperToken>
export OUTPUT_TABLE_SPEC=<outputTableSpec>

### Optional
export LOGIN_CUSTOMER_ID=<loginCustomerId>
export BIG_QUERY_TABLE_SCHEMA_PATH=<bigQueryTableSchemaPath>
export WRITE_DISPOSITION=WRITE_APPEND
export CREATE_DISPOSITION=CREATE_IF_NEEDED

mvn clean package -PtemplatesRun \
-DskipTests \
-DprojectId="$PROJECT" \
-DbucketName="$BUCKET_NAME" \
-Dregion="$REGION" \
-DjobName="google-ads-to-bigquery-job" \
-DtemplateName="Google_Ads_to_BigQuery" \
-Dparameters="loginCustomerId=$LOGIN_CUSTOMER_ID,customerIds=$CUSTOMER_IDS,query=$QUERY,qpsPerWorker=$QPS_PER_WORKER,bigQueryTableSchemaPath=$BIG_QUERY_TABLE_SCHEMA_PATH,googleAdsClientId=$GOOGLE_ADS_CLIENT_ID,googleAdsClientSecret=$GOOGLE_ADS_CLIENT_SECRET,googleAdsRefreshToken=$GOOGLE_ADS_REFRESH_TOKEN,googleAdsDeveloperToken=$GOOGLE_ADS_DEVELOPER_TOKEN,outputTableSpec=$OUTPUT_TABLE_SPEC,writeDisposition=$WRITE_DISPOSITION,createDisposition=$CREATE_DISPOSITION" \
-pl v2/googlecloud-to-googlecloud \
-am
```
9 changes: 9 additions & 0 deletions v2/googlecloud-to-googlecloud/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@
</build>

<dependencies>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-google-ads</artifactId>
</dependency>
<dependency>
<groupId>org.apache.beam</groupId>
<artifactId>beam-sdks-java-io-hadoop-common</artifactId>
Expand All @@ -105,6 +109,11 @@
<artifactId>jdbc-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.google.api-ads</groupId>
<artifactId>google-ads</artifactId>
<version>26.0.0</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
Expand Down
Loading