-
Notifications
You must be signed in to change notification settings - Fork 4.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[CdapIO] Complete examples for CDAP Hubspot plugins (#24568)
* Add examples for Cdap Hubspot plugins * Fix dependencies * Move Cdap Hubspot examples to the separate gradle module * Move common classes to Examples Cdap module * Fix readme * Refactoring * Fix typo
- Loading branch information
Showing
21 changed files
with
1,227 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* License); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an AS IS BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
import groovy.json.JsonOutput | ||
|
||
plugins { | ||
id 'java' | ||
id 'org.apache.beam.module' | ||
id 'com.github.johnrengelman.shadow' | ||
} | ||
|
||
applyJavaNature( | ||
exportJavadoc: false, | ||
automaticModuleName: 'org.apache.beam.examples.complete.cdap.hubspot', | ||
) | ||
|
||
description = "Apache Beam :: Examples :: Java :: CDAP :: Hubspot" | ||
ext.summary = """Apache Beam SDK provides a simple, Java-based | ||
interface for processing virtually any size data. This | ||
artifact includes CDAP Hubspot Apache Beam Java SDK examples.""" | ||
|
||
/** Define the list of runners which execute a precommit test. | ||
* Some runners are run from separate projects, see the preCommit task below | ||
* for details. | ||
*/ | ||
def preCommitRunners = ["directRunner", "flinkRunner"] | ||
for (String runner : preCommitRunners) { | ||
configurations.create(runner + "PreCommit") | ||
} | ||
|
||
dependencies { | ||
implementation enforcedPlatform(library.java.google_cloud_platform_libraries_bom) | ||
implementation project(path: ":sdks:java:core", configuration: "shadow") | ||
implementation project(":examples:java:cdap") | ||
implementation project(":sdks:java:io:cdap") | ||
implementation project(":sdks:java:io:hadoop-common") | ||
implementation library.java.cdap_api | ||
implementation library.java.cdap_api_commons | ||
permitUnusedDeclared library.java.cdap_api_commons | ||
implementation library.java.cdap_etl_api | ||
permitUnusedDeclared library.java.cdap_etl_api | ||
implementation library.java.cdap_etl_api_spark | ||
permitUnusedDeclared library.java.cdap_etl_api_spark | ||
implementation library.java.cdap_hydrator_common | ||
//TODO: modify to 'implementation library.java.cdap_plugin_hubspot', | ||
// when new release with HasOffset interface will be published | ||
implementation "com.akvelon:cdap-hubspot-plugins:1.1.0" | ||
implementation library.java.google_code_gson | ||
implementation library.java.hadoop_common | ||
implementation library.java.slf4j_api | ||
implementation library.java.vendored_guava_26_0_jre | ||
runtimeOnly project(path: ":runners:direct-java", configuration: "shadow") | ||
|
||
// Add dependencies for the PreCommit configurations | ||
// For each runner a project level dependency on the examples project. | ||
for (String runner : preCommitRunners) { | ||
delegate.add(runner + "PreCommit", project(":examples:java:cdap:hubspot")) | ||
delegate.add(runner + "PreCommit", project(path: ":examples:java:cdap:hubspot", configuration: "testRuntimeMigration")) | ||
} | ||
directRunnerPreCommit project(path: ":runners:direct-java", configuration: "shadow") | ||
flinkRunnerPreCommit project(":runners:flink:${project.ext.latestFlinkVersion}") | ||
} | ||
|
||
/* | ||
* Create a ${runner}PreCommit task for each runner which runs a set | ||
* of integration tests for WordCount and WindowedWordCount. | ||
*/ | ||
def preCommitRunnerClass = [ | ||
directRunner: "org.apache.beam.runners.direct.DirectRunner", | ||
flinkRunner: "org.apache.beam.runners.flink.TestFlinkRunner" | ||
] | ||
|
||
for (String runner : preCommitRunners) { | ||
tasks.create(name: runner + "PreCommit", type: Test) { | ||
def preCommitBeamTestPipelineOptions = [ | ||
"--runner=" + preCommitRunnerClass[runner], | ||
] | ||
classpath = configurations."${runner}PreCommit" | ||
forkEvery 1 | ||
maxParallelForks 4 | ||
systemProperty "beamTestPipelineOptions", JsonOutput.toJson(preCommitBeamTestPipelineOptions) | ||
} | ||
} | ||
|
||
/* Define a common precommit task which depends on all the individual precommits. */ | ||
task preCommit() { | ||
for (String runner : preCommitRunners) { | ||
dependsOn runner + "PreCommit" | ||
} | ||
} | ||
|
||
task executeCdap (type:JavaExec) { | ||
mainClass = System.getProperty("mainClass") | ||
classpath = sourceSets.main.runtimeClasspath | ||
systemProperties System.getProperties() | ||
args System.getProperty("exec.args", "").split() | ||
} |
146 changes: 146 additions & 0 deletions
146
...c/main/java/org/apache/beam/examples/complete/cdap/hubspot/CdapHubspotStreamingToTxt.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.beam.examples.complete.cdap.hubspot; | ||
|
||
import java.util.Map; | ||
import org.apache.beam.examples.complete.cdap.hubspot.options.CdapHubspotStreamingSourceOptions; | ||
import org.apache.beam.examples.complete.cdap.hubspot.transforms.FormatInputTransform; | ||
import org.apache.beam.examples.complete.cdap.hubspot.utils.PluginConfigOptionsConverter; | ||
import org.apache.beam.sdk.Pipeline; | ||
import org.apache.beam.sdk.PipelineResult; | ||
import org.apache.beam.sdk.coders.KvCoder; | ||
import org.apache.beam.sdk.coders.NullableCoder; | ||
import org.apache.beam.sdk.coders.StringUtf8Coder; | ||
import org.apache.beam.sdk.io.TextIO; | ||
import org.apache.beam.sdk.io.hadoop.WritableCoder; | ||
import org.apache.beam.sdk.options.PipelineOptionsFactory; | ||
import org.apache.beam.sdk.transforms.Values; | ||
import org.apache.beam.sdk.transforms.windowing.AfterProcessingTime; | ||
import org.apache.beam.sdk.transforms.windowing.GlobalWindows; | ||
import org.apache.beam.sdk.transforms.windowing.Repeatedly; | ||
import org.apache.beam.sdk.transforms.windowing.Window; | ||
import org.apache.beam.sdk.values.KV; | ||
import org.apache.hadoop.io.NullWritable; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* The {@link CdapHubspotStreamingToTxt} pipeline is a streaming pipeline which ingests data in JSON | ||
* format from CDAP Hubspot, and outputs the resulting records to .txt file. Hubspot parameters and | ||
* output .txt file path are specified by the user as template parameters. <br> | ||
* | ||
* <p><b>Example Usage</b> | ||
* | ||
* <pre> | ||
* # Gradle preparation | ||
* | ||
* To run this example your {@code build.gradle} file should contain the following task | ||
* to execute the pipeline: | ||
* {@code | ||
* task executeCdap (type:JavaExec) { | ||
* mainClass = System.getProperty("mainClass") | ||
* classpath = sourceSets.main.runtimeClasspath | ||
* systemProperties System.getProperties() | ||
* args System.getProperty("exec.args", "").split() | ||
* } | ||
* } | ||
* | ||
* This task allows to run the pipeline via the following command: | ||
* {@code | ||
* gradle clean executeCdap -DmainClass=org.apache.beam.examples.complete.cdap.hubspot.CdapHubspotStreamingToTxt \ | ||
* -Dexec.args="--<argument>=<value> --<argument>=<value>" | ||
* } | ||
* | ||
* # Running the pipeline | ||
* To execute this pipeline, specify the parameters in the following format: | ||
* {@code | ||
* --authToken=your-private-app-access-token \ | ||
* --referenceName=your-reference-name \ | ||
* --objectType=Contacts \ | ||
* --outputTxtFilePathPrefix=your-path-to-output-folder-with-filename-prefix \ | ||
* --pullFrequencySec=1 \ | ||
* --startOffset=0 | ||
* } | ||
* | ||
* By default this will run the pipeline locally with the DirectRunner. To change the runner, specify: | ||
* {@code | ||
* --runner=YOUR_SELECTED_RUNNER | ||
* } | ||
* </pre> | ||
*/ | ||
public class CdapHubspotStreamingToTxt { | ||
|
||
/* Logger for class.*/ | ||
private static final Logger LOG = LoggerFactory.getLogger(CdapHubspotStreamingToTxt.class); | ||
|
||
/** | ||
* Main entry point for pipeline execution. | ||
* | ||
* @param args Command line arguments to the pipeline. | ||
*/ | ||
public static void main(String[] args) { | ||
CdapHubspotStreamingSourceOptions options = | ||
PipelineOptionsFactory.fromArgs(args) | ||
.withValidation() | ||
.as(CdapHubspotStreamingSourceOptions.class); | ||
|
||
// Create the pipeline | ||
Pipeline pipeline = Pipeline.create(options); | ||
run(pipeline, options); | ||
} | ||
|
||
/** | ||
* Runs a pipeline which reads records from CDAP Hubspot and writes it to .txt file. | ||
* | ||
* @param options arguments to the pipeline | ||
*/ | ||
public static PipelineResult run(Pipeline pipeline, CdapHubspotStreamingSourceOptions options) { | ||
Map<String, Object> paramsMap = PluginConfigOptionsConverter.hubspotOptionsToParamsMap(options); | ||
LOG.info("Starting Cdap-Hubspot-streaming-to-txt pipeline with parameters: {}", paramsMap); | ||
|
||
/* | ||
* Steps: | ||
* 1) Read messages in from Cdap Hubspot | ||
* 2) Extract values only | ||
* 3) Write successful records to .txt file | ||
*/ | ||
|
||
pipeline | ||
.apply( | ||
"readFromCdapHubspotStreaming", | ||
FormatInputTransform.readFromCdapHubspotStreaming( | ||
paramsMap, options.getPullFrequencySec(), options.getStartOffset())) | ||
.setCoder( | ||
KvCoder.of( | ||
NullableCoder.of(WritableCoder.of(NullWritable.class)), StringUtf8Coder.of())) | ||
.apply( | ||
"globalwindow", | ||
Window.<KV<NullWritable, String>>into(new GlobalWindows()) | ||
.triggering(Repeatedly.forever(AfterProcessingTime.pastFirstElementInPane())) | ||
.discardingFiredPanes()) | ||
.apply(Values.create()) | ||
.apply( | ||
"writeToTxt", | ||
TextIO.write() | ||
.withWindowedWrites() | ||
.withNumShards(1) | ||
.to(options.getOutputTxtFilePathPrefix())); | ||
|
||
return pipeline.run(); | ||
} | ||
} |
Oops, something went wrong.