Skip to content

Commit

Permalink
docs(tutorials): add new tutorials directory and JsonWriterDefaultStr…
Browse files Browse the repository at this point in the history
…eam tutorial (#1498)

* docs(tutorials): add new tutorials directory and JsonWriterDefaultStream tutorial

porting over tutorial from #1497

* exclude the templated files from owlbot

* rename className

* 🦉 Updates from OwlBot

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
stephaniewang526 and gcf-owl-bot[bot] authored Jan 28, 2022
1 parent 0d414f8 commit fd1ecf2
Show file tree
Hide file tree
Showing 7 changed files with 223 additions and 1 deletion.
2 changes: 2 additions & 0 deletions .github/workflows/approve-readme.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
on:
pull_request:
paths-ignore:
- 'tutorials/**'
name: auto-merge-readme
jobs:
approve:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/auto-release.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
on:
pull_request:
paths-ignore:
- 'tutorials/**'
name: auto-release
jobs:
approve:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
on:
push:
paths-ignore:
- 'tutorials/**'
branches:
- main
pull_request:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/samples.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
on:
pull_request:
paths-ignore:
- 'tutorials/**'
name: samples
jobs:
checkstyle:
Expand Down
6 changes: 5 additions & 1 deletion owlbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
".kokoro/build.sh",
".kokoro/nightly/samples.cfg",
".kokoro/presubmit/samples.cfg",
".kokoro/dependencies.sh"
".kokoro/dependencies.sh",
".github/workflows/approve-readme.yaml",
".github/workflows/auto-release.yaml",
".github/workflows/ci.yaml",
".github/workflows/samples.yaml"
]
)
126 changes: 126 additions & 0 deletions tutorials/JsonWriterDefaultStream/JsonWriterDefaultStream.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* Copyright 2022 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquerystorage;

import com.google.api.core.ApiFuture;
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.cloud.bigquery.StandardTableDefinition;
import com.google.cloud.bigquery.Table;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableInfo;
import com.google.cloud.bigquery.storage.v1.AppendRowsResponse;
import com.google.cloud.bigquery.storage.v1.JsonStreamWriter;
import com.google.cloud.bigquery.storage.v1.TableName;
import com.google.cloud.bigquery.storage.v1.TableSchema;
import com.google.protobuf.Descriptors.DescriptorValidationException;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.concurrent.ExecutionException;
import org.json.JSONArray;
import org.json.JSONObject;

public class JsonWriterDefaultStream {

public static void main(String[] args) throws Exception {
if (args.length < 4) {
System.out.println("Arguments: project, dataset, table, source_file");
return;
}

String projectId = args[0];
String datasetName = args[1];
String tableName = args[2];
String dataFile = args[3];
createDestinationTable(projectId, datasetName, tableName);
writeToDefaultStream(projectId, datasetName, tableName, dataFile);
}

// createDestinationTable: Creates the destination table for streaming with the desired schema.
public static void createDestinationTable(
String projectId, String datasetName, String tableName) {
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();
// Create a schema that matches the source data.
Schema schema =
Schema.of(
Field.of("commit", StandardSQLTypeName.STRING),
Field.newBuilder("parent", StandardSQLTypeName.STRING)
.setMode(Field.Mode.REPEATED)
.build(),
Field.of("author", StandardSQLTypeName.STRING),
Field.of("committer", StandardSQLTypeName.STRING),
Field.of("time_sec", StandardSQLTypeName.INT64),
Field.of("subject", StandardSQLTypeName.STRING),
Field.of("message", StandardSQLTypeName.STRING),
Field.of("repo_name", StandardSQLTypeName.STRING));

// Create a table that uses this schema.
TableId tableId = TableId.of(projectId, datasetName, tableName);
Table table = bigquery.getTable(tableId);
if (table == null) {
TableInfo tableInfo =
TableInfo.newBuilder(tableId, StandardTableDefinition.of(schema)).build();
bigquery.create(tableInfo);
}
}

// writeToDefaultStream: Writes records from the source file to the destination table.
public static void writeToDefaultStream(
String projectId, String datasetName, String tableName, String dataFile)
throws DescriptorValidationException, InterruptedException, IOException {

BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();

// Get the schema of the destination table and convert to the equivalent BigQueryStorage type.
Table table = bigquery.getTable(datasetName, tableName);
Schema schema = table.getDefinition().getSchema();
TableSchema tableSchema = BqToBqStorageSchemaConverter.convertTableSchema(schema);

// Use the JSON stream writer to send records in JSON format.
TableName parentTable = TableName.of(projectId, datasetName, tableName);
try (JsonStreamWriter writer =
JsonStreamWriter.newBuilder(parentTable.toString(), tableSchema).build()) {
// Read JSON data from the source file and send it to the Write API.
BufferedReader reader = new BufferedReader(new FileReader(dataFile));
String line = reader.readLine();
while (line != null) {
// As a best practice, send batches of records, instead of single records at a time.
JSONArray jsonArr = new JSONArray();
for (int i = 0; i < 100; i++) {
JSONObject record = new JSONObject(line);
jsonArr.put(record);
line = reader.readLine();
if (line == null) {
break;
}
} // batch
ApiFuture<AppendRowsResponse> future = writer.append(jsonArr);
AppendRowsResponse response = future.get();
}
System.out.println("Appended records successfully.");
} catch (ExecutionException e) {
// If the wrapped exception is a StatusRuntimeException, check the state of the operation.
// If the state is INTERNAL, CANCELLED, or ABORTED, you can retry. For more information, see:
// https://grpc.github.io/grpc-java/javadoc/io/grpc/StatusRuntimeException.html
System.out.println("Failed to append records. \n" + e.toString());
}
}
}
84 changes: 84 additions & 0 deletions tutorials/JsonWriterDefaultStream/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
<project>
<modelVersion>4.0.0</modelVersion>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-bigquerystorage</artifactId>

<parent>
<groupId>com.google.cloud.samples</groupId>
<artifactId>shared-configuration</artifactId>
<version>1.2.0</version>
</parent>

<properties>
<maven.compiler.target>1.8</maven.compiler.target>
<maven.compiler.source>1.8</maven.compiler.source>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<dependencies>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-bigquerystorage</artifactId>
<version>2.8.2</version>
</dependency>
<dependency>
<groupId>com.google.cloud</groupId>
<artifactId>google-cloud-bigquery</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>1.10.2</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-vector</artifactId>
<version>6.0.1</version>
</dependency>
<dependency>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-memory-netty</artifactId>
<version>6.0.1</version>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>3.0.0</version>
<executions>
<execution>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<source>src/main/java</source>
<source>../snippets/src/main/java/com/example/bigquerystorage</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.0.0</version>
<executions>
<execution>
<goals>
<goal>exec</goal>
</goals>
</execution>
</executions>
<configuration>
<executable>maven</executable>
</configuration>
</plugin>
</plugins>
</build>
</project>

0 comments on commit fd1ecf2

Please sign in to comment.