apache · Abacn · Sep 5, 2023 · Sep 5, 2023 · Sep 7, 2023 · Sep 14, 2023
diff --git a/runners/google-cloud-dataflow-java/build.gradle b/runners/google-cloud-dataflow-java/build.gradle
@@ -612,9 +612,6 @@ task googleCloudPlatformRunnerV2IntegrationTest(type: Test) {
   exclude '**/FhirIOLROIT.class'
   exclude '**/FhirIOSearchIT.class'
   exclude '**/FhirIOPatientEverythingIT.class'
-  // failing due to pane index not incrementing after Reshuffle:
-  // https://github.com/apache/beam/issues/28219
-  exclude '**/FileLoadsStreamingIT.class'
 
   maxParallelForks 4
   classpath = configurations.googleCloudPlatformIntegrationTest

diff --git a/...le-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java b/...le-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
@@ -1755,7 +1755,7 @@ void maybeRecordPCollectionWithAutoSharding(PCollection<?> pcol) {
         options.isEnableStreamingEngine(),
         "Runner determined sharding not available in Dataflow for GroupIntoBatches for"
             + " non-Streaming-Engine jobs. In order to use runner determined sharding, please use"
-            + " --streaming --enable_streaming_engine");
+            + " --streaming --experiments=enable_streaming_engine");
     pCollectionsPreservedKeys.add(pcol);
     pcollectionsRequiringAutoSharding.add(pcol);
   }

diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicImpulse.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicImpulse.java
@@ -17,11 +17,15 @@
  */
 package org.apache.beam.sdk.transforms;
 
+import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument;
+
+import org.apache.beam.sdk.annotations.Internal;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.transforms.windowing.FixedWindows;
 import org.apache.beam.sdk.transforms.windowing.Window;
 import org.apache.beam.sdk.values.PBegin;
 import org.apache.beam.sdk.values.PCollection;
+import org.checkerframework.checker.nullness.qual.Nullable;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 
@@ -34,28 +38,58 @@
  */
 public class PeriodicImpulse extends PTransform<PBegin, PCollection<Instant>> {
 
-  Instant startTimestamp = Instant.now();
-  Instant stopTimestamp = BoundedWindow.TIMESTAMP_MAX_VALUE;
-  Duration fireInterval = Duration.standardMinutes(1);
+  Instant startTimestamp;
+  Instant stopTimestamp;
+  @Nullable Duration stopDuration;
+  Duration fireInterval;
   boolean applyWindowing = false;
   boolean catchUpToNow = true;
 
-  private PeriodicImpulse() {}
+  private PeriodicImpulse() {
+    this.startTimestamp = Instant.now();
+    this.stopTimestamp = BoundedWindow.TIMESTAMP_MAX_VALUE;
+    this.fireInterval = Duration.standardMinutes(1);
+  }
 
   public static PeriodicImpulse create() {
     return new PeriodicImpulse();
   }
 
+  /**
+   * Assign a timestamp when the pipeliene starts to produce data.
+   *
+   * <p>Cannot be used along with {@link #stopAfter}.
+   */
   public PeriodicImpulse startAt(Instant startTime) {
+    checkArgument(stopDuration == null, "startAt and stopAfter cannot be set at the same time");
     this.startTimestamp = startTime;
     return this;
   }
 
+  /**
+   * Assign a timestamp when the pipeliene stops producing data.
+   *
+   * <p>Cannot be used along with {@link #stopAfter}.
+   */
   public PeriodicImpulse stopAt(Instant stopTime) {
+    checkArgument(stopDuration == null, "stopAt and stopAfter cannot be set at the same time");
     this.stopTimestamp = stopTime;
     return this;
   }
 
+  /**
+   * <b><i>For internal use only; no backwards-compatibility guarantees.</i></b>
+   *
+   * <p>Assign a time interval at which the pipeliene produces data. This is different from setting
+   * {@link #startAt} and {@link #stopAt}, as the first timestamp is determined at run time
+   * (pipeline starts processing).
+   */
+  @Internal
+  public PeriodicImpulse stopAfter(Duration duration) {
+    this.stopDuration = duration;
+    return this;
+  }
+
   public PeriodicImpulse withInterval(Duration interval) {
     this.fireInterval = interval;
     return this;
@@ -67,31 +101,65 @@ public PeriodicImpulse applyWindowing() {
   }
 
   /**
-   * The default behavior is that PeriodicImpulse emits all instants until Instant.now(), then
+   * <b><i>For internal use only; no backwards-compatibility guarantees.</i></b>
+   *
+   * <p>The default behavior is that PeriodicImpulse emits all instants until Instant.now(), then
    * starts firing at the specified interval. If this is set to false, the PeriodicImpulse will
    * perform the interval wait before firing each instant.
    */
+  @Internal
   public PeriodicImpulse catchUpToNow(boolean catchUpToNow) {
     this.catchUpToNow = catchUpToNow;
     return this;
   }
 
   @Override
   public PCollection<Instant> expand(PBegin input) {
-    PCollection<Instant> result =
-        input
-            .apply(
-                Create.<PeriodicSequence.SequenceDefinition>of(
-                    new PeriodicSequence.SequenceDefinition(
-                        startTimestamp, stopTimestamp, fireInterval, catchUpToNow)))
-            .apply(PeriodicSequence.create());
+    PCollection<PeriodicSequence.SequenceDefinition> seqDef;
+    if (stopDuration != null) {
+      // nonnull guaranteed
+      Duration d = stopDuration;
+      seqDef =
+          input
+              .apply(Impulse.create())
+              .apply(ParDo.of(new RuntimeSequenceFn(d, fireInterval, catchUpToNow)));
+    } else {
+      seqDef =
+          input.apply(
+              Create.of(
+                  new PeriodicSequence.SequenceDefinition(
+                      startTimestamp, stopTimestamp, fireInterval, catchUpToNow)));
+    }
+    PCollection<Instant> result = seqDef.apply(PeriodicSequence.create());
 
     if (this.applyWindowing) {
       result =
-          result.apply(
-              Window.<Instant>into(FixedWindows.of(Duration.millis(fireInterval.getMillis()))));
+          result.apply(Window.into(FixedWindows.of(Duration.millis(fireInterval.getMillis()))));
     }
-
     return result;
   }
+
+  /**
+   * A DoFn generated a SequenceDefinition at run time. This enables set first element timestamp at
+   * pipeline start processing data.
+   */
+  private static class RuntimeSequenceFn extends DoFn<byte[], PeriodicSequence.SequenceDefinition> {
+    Duration stopDuration;
+    Duration fireInterval;
+    boolean catchUpToNow;
+
+    RuntimeSequenceFn(Duration stopDuration, Duration fireInterval, boolean catchUpToNow) {
+      this.stopDuration = stopDuration;
+      this.fireInterval = fireInterval;
+      this.catchUpToNow = catchUpToNow;
+    }
+
+    @ProcessElement
+    public void process(ProcessContext c) {
+      Instant now = Instant.now();
+      c.output(
+          new PeriodicSequence.SequenceDefinition(
+              now, now.plus(stopDuration), fireInterval, catchUpToNow));
+    }
+  }
 }
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicSequence.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/PeriodicSequence.java
@@ -22,6 +22,7 @@
 import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState;
 
 import java.util.Objects;
+import org.apache.beam.sdk.annotations.Internal;
 import org.apache.beam.sdk.io.range.OffsetRange;
 import org.apache.beam.sdk.schemas.JavaFieldSchema;
 import org.apache.beam.sdk.schemas.annotations.DefaultSchema;
@@ -67,6 +68,8 @@ public SequenceDefinition(Instant first, Instant last, Duration duration) {
       this.catchUpToNow = true;
     }
 
+    /** <b><i>catchUpToNow is experimental; no backwards-compatibility guarantees.</i></b> */
+    @Internal
     public SequenceDefinition(
         Instant first, Instant last, Duration duration, boolean catchUpToNow) {
       this.first = first;

diff --git a/...ogle-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java b/...ogle-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
@@ -28,6 +28,7 @@
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.ListCoder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.coders.VarLongCoder;
 import org.apache.beam.sdk.io.gcp.bigquery.WriteBundlesToFiles.Result;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.values.KV;
@@ -48,25 +49,31 @@ class WritePartition<DestinationT>
   @AutoValue
   abstract static class Result {
     public abstract List<String> getFilenames();
-
+    // Downstream operations may rely on pane info which will get lost after a ReShuffle
     abstract Boolean isFirstPane();
+
+    abstract Long getPaneIndex();
   }
 
   static class ResultCoder extends AtomicCoder<Result> {
     private static final Coder<List<String>> FILENAMES_CODER = ListCoder.of(StringUtf8Coder.of());
     private static final Coder<Boolean> FIRST_PANE_CODER = BooleanCoder.of();
+    private static final Coder<Long> PANE_INDEX_CODER = VarLongCoder.of();
     static final ResultCoder INSTANCE = new ResultCoder();
 
     @Override
     public void encode(Result value, OutputStream outStream) throws IOException {
       FILENAMES_CODER.encode(value.getFilenames(), outStream);
       FIRST_PANE_CODER.encode(value.isFirstPane(), outStream);
+      PANE_INDEX_CODER.encode(value.getPaneIndex(), outStream);
     }
 
     @Override
     public Result decode(InputStream inStream) throws IOException {
       return new AutoValue_WritePartition_Result(
-          FILENAMES_CODER.decode(inStream), FIRST_PANE_CODER.decode(inStream));
+          FILENAMES_CODER.decode(inStream),
+          FIRST_PANE_CODER.decode(inStream),
+          PANE_INDEX_CODER.decode(inStream));
     }
   }
 
@@ -234,7 +241,7 @@ public void processElement(ProcessContext c) throws Exception {
             KV.of(
                 ShardedKey.of(destination, i + 1),
                 new AutoValue_WritePartition_Result(
-                    partitionData.getFilenames(), c.pane().isFirst())));
+                    partitionData.getFilenames(), c.pane().isFirst(), c.pane().getIndex())));
       }
     }
   }

diff --git a/.../google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java b/.../google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
@@ -208,6 +208,20 @@ private PendingJobData startWriteRename(
         BigQueryResourceNaming.createJobIdWithDestination(
             c.sideInput(jobIdToken), finalTableDestination, -1, c.pane().getIndex());
 
+    if (isFirstPane) {
+      LOG.info(
+          "Setup write disposition {}, create disposition {} for first pane BigQuery job {}",
+          writeDisposition,
+          createDisposition,
+          jobIdPrefix);
+    } else {
+      LOG.debug(
+          "Setup write disposition {}, create disposition {} for BigQuery job {}",
+          writeDisposition,
+          createDisposition,
+          jobIdPrefix);
+    }
+
     BigQueryHelpers.PendingJob retryJob =
         startCopy(
             bqServices.getJobService(c.getPipelineOptions().as(BigQueryOptions.class)),

diff --git a/.../google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java b/.../google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
@@ -36,7 +36,6 @@
 import java.util.stream.Collectors;
 import org.apache.beam.sdk.coders.AtomicCoder;
 import org.apache.beam.sdk.coders.BooleanCoder;
-import org.apache.beam.sdk.coders.CoderException;
 import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.coders.VoidCoder;
@@ -75,10 +74,8 @@
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists;
 import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps;
-import org.checkerframework.checker.initialization.qual.Initialized;
 import org.checkerframework.checker.nullness.qual.NonNull;
 import org.checkerframework.checker.nullness.qual.Nullable;
-import org.checkerframework.checker.nullness.qual.UnknownKeyFor;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -101,25 +98,21 @@ class WriteTables<DestinationT extends @NonNull Object>
   @AutoValue
   abstract static class Result {
     abstract String getTableName();
-
+    // Downstream operations may rely on pane info which will get lost after a ReShuffle
     abstract Boolean isFirstPane();
   }
 
   static class ResultCoder extends AtomicCoder<WriteTables.Result> {
     static final ResultCoder INSTANCE = new ResultCoder();
 
     @Override
-    public void encode(Result value, @UnknownKeyFor @NonNull @Initialized OutputStream outStream)
-        throws @UnknownKeyFor @NonNull @Initialized CoderException, @UnknownKeyFor @NonNull
-            @Initialized IOException {
+    public void encode(Result value, OutputStream outStream) throws IOException {
       StringUtf8Coder.of().encode(value.getTableName(), outStream);
       BooleanCoder.of().encode(value.isFirstPane(), outStream);
     }
 
     @Override
-    public Result decode(@UnknownKeyFor @NonNull @Initialized InputStream inStream)
-        throws @UnknownKeyFor @NonNull @Initialized CoderException, @UnknownKeyFor @NonNull
-            @Initialized IOException {
+    public Result decode(InputStream inStream) throws IOException {
       return new AutoValue_WriteTables_Result(
           StringUtf8Coder.of().decode(inStream), BooleanCoder.of().decode(inStream));
     }
@@ -156,27 +149,36 @@ private class WriteTablesDoFn
     private class PendingJobData {
       final BoundedWindow window;
       final BigQueryHelpers.PendingJob retryJob;
-      final List<String> partitionFiles;
+      final WritePartition.Result partitionResult;
       final TableDestination tableDestination;
       final TableReference tableReference;
       final DestinationT destinationT;
-      final boolean isFirstPane;
 
       public PendingJobData(
           BoundedWindow window,
           BigQueryHelpers.PendingJob retryJob,
-          List<String> partitionFiles,
+          WritePartition.Result partitionResult,
           TableDestination tableDestination,
           TableReference tableReference,
-          DestinationT destinationT,
-          boolean isFirstPane) {
+          DestinationT destinationT) {
         this.window = window;
         this.retryJob = retryJob;
-        this.partitionFiles = partitionFiles;
+        this.partitionResult = partitionResult;
         this.tableDestination = tableDestination;
         this.tableReference = tableReference;
         this.destinationT = destinationT;
-        this.isFirstPane = isFirstPane;
+      }
+
+      public List<String> paritionFiles() {
+        return partitionResult.getFilenames();
+      }
+
+      public boolean isFirstPane() {
+        return partitionResult.isFirstPane();
+      }
+
+      public long paneIndex() {
+        return partitionResult.getPaneIndex();
       }
     }
     // All pending load jobs.
@@ -251,7 +253,10 @@ public void processElement(
       List<String> partitionFiles = Lists.newArrayList(element.getValue().getFilenames());
       String jobIdPrefix =
           BigQueryResourceNaming.createJobIdWithDestination(
-              c.sideInput(loadJobIdPrefixView), tableDestination, partition, c.pane().getIndex());
+              c.sideInput(loadJobIdPrefixView),
+              tableDestination,
+              partition,
+              element.getValue().getPaneIndex());
 
       if (tempTable) {
         if (tempDataset != null) {
@@ -291,13 +296,7 @@ public void processElement(
 
       pendingJobs.add(
           new PendingJobData(
-              window,
-              retryJob,
-              partitionFiles,
-              tableDestination,
-              tableReference,
-              destination,
-              element.getValue().isFirstPane()));
+              window, retryJob, element.getValue(), tableDestination, tableReference, destination));
     }
 
     @Teardown
@@ -361,13 +360,13 @@ public void finishBundle(FinishBundleContext c) throws Exception {
                     Result result =
                         new AutoValue_WriteTables_Result(
                             BigQueryHelpers.toJsonString(pendingJob.tableReference),
-                            pendingJob.isFirstPane);
+                            pendingJob.isFirstPane());
                     c.output(
                         mainOutputTag,
                         KV.of(pendingJob.destinationT, result),
                         pendingJob.window.maxTimestamp(),
                         pendingJob.window);
-                    for (String file : pendingJob.partitionFiles) {
+                    for (String file : pendingJob.paritionFiles()) {
                       c.output(
                           temporaryFilesTag,
                           file,