palantir · yifeih · May 23, 2019 · May 24, 2019 · May 24, 2019 · May 24, 2019
diff --git a/core/src/main/java/org/apache/spark/api/shuffle/ShuffleDataIO.java b/core/src/main/java/org/apache/spark/api/shuffle/ShuffleDataIO.java
@@ -18,6 +18,7 @@
 package org.apache.spark.api.shuffle;
 
 import org.apache.spark.annotation.Experimental;
+import org.apache.spark.api.java.Optional;
 
 /**
  * :: Experimental ::
@@ -31,4 +32,6 @@ public interface ShuffleDataIO {
 
   ShuffleDriverComponents driver();
   ShuffleExecutorComponents executor();
+  Optional<ShuffleLocationComponents> shuffleLocations();
+
 }
diff --git a/core/src/main/java/org/apache/spark/api/shuffle/ShuffleDriverComponents.java b/core/src/main/java/org/apache/spark/api/shuffle/ShuffleDriverComponents.java
@@ -30,4 +30,5 @@ public interface ShuffleDriverComponents {
   void cleanupApplication() throws IOException;
 
   void removeShuffleData(int shuffleId, boolean blocking) throws IOException;
+
 }
diff --git a/core/src/main/java/org/apache/spark/api/shuffle/ShuffleLocationComponents.java b/core/src/main/java/org/apache/spark/api/shuffle/ShuffleLocationComponents.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.shuffle;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * :: Experimental ::
+ * An interface for interaction with shuffle locations.
+ *
+ * @since 3.0.0
+ */
+@Experimental
+public interface ShuffleLocationComponents {
+
+  /**
+   * Returns whether the MapShuffleLocations now has missing data based on the
+   * removal of the lost shuffle location.
+   */
+  boolean shouldRemoveMapOutputOnLostBlock(
+      ShuffleLocation lostLocation,
+      MapShuffleLocations mapOutputLocations);
+}
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/DefaultShuffleDataIO.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/DefaultShuffleDataIO.java
@@ -18,9 +18,10 @@
 package org.apache.spark.shuffle.sort.io;
 
 import org.apache.spark.SparkConf;
-import org.apache.spark.api.shuffle.ShuffleDriverComponents;
-import org.apache.spark.api.shuffle.ShuffleExecutorComponents;
-import org.apache.spark.api.shuffle.ShuffleDataIO;
+import org.apache.spark.api.java.Optional;
+import org.apache.spark.api.shuffle.*;
+import org.apache.spark.internal.config.package$;
+import org.apache.spark.shuffle.sort.DefaultMapShuffleLocations;
 import org.apache.spark.shuffle.sort.lifecycle.DefaultShuffleDriverComponents;
 
 public class DefaultShuffleDataIO implements ShuffleDataIO {
@@ -40,4 +41,9 @@ public ShuffleExecutorComponents executor() {
   public ShuffleDriverComponents driver() {
     return new DefaultShuffleDriverComponents();
   }
+
+  @Override
+  public Optional<ShuffleLocationComponents> shuffleLocations() {
+    return Optional.of(new DefaultShuffleLocationComponents(sparkConf));
+  }
 }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/DefaultShuffleLocationComponents.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/DefaultShuffleLocationComponents.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.sort.io;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.shuffle.MapShuffleLocations;
+import org.apache.spark.api.shuffle.ShuffleLocation;
+import org.apache.spark.api.shuffle.ShuffleLocationComponents;
+import org.apache.spark.internal.config.package$;
+import org.apache.spark.shuffle.sort.DefaultMapShuffleLocations;
+
+public class DefaultShuffleLocationComponents implements ShuffleLocationComponents {
+
+  private final boolean externalShuffleServiceEnabled;
+  private final boolean unRegisterOutputHostOnFetchFailure;
+
+  public DefaultShuffleLocationComponents(SparkConf sparkConf) {
+    externalShuffleServiceEnabled = (boolean) sparkConf.get(package$.MODULE$.SHUFFLE_SERVICE_ENABLED());
+    unRegisterOutputHostOnFetchFailure = (boolean)
+        sparkConf.get(package$.MODULE$.UNREGISTER_OUTPUT_ON_HOST_ON_FETCH_FAILURE());
+  }
+
+  @Override
+  public boolean shouldRemoveMapOutputOnLostBlock(
+      ShuffleLocation lostLocation,
+      MapShuffleLocations mapOutputLocations) {
+    DefaultMapShuffleLocations mapStatusLoc = (DefaultMapShuffleLocations) mapOutputLocations;
+    DefaultMapShuffleLocations lostLoc = (DefaultMapShuffleLocations) lostLocation;
+    if (externalShuffleServiceEnabled && unRegisterOutputHostOnFetchFailure) {
+      return mapStatusLoc.getBlockManagerId().host().equals(lostLoc.getBlockManagerId().host());
+    } else {
+      return mapStatusLoc.getBlockManagerId().executorId().equals(lostLoc.getBlockManagerId().executorId());
+    }
+  }
+}
diff --git a/...src/main/java/org/apache/spark/shuffle/sort/lifecycle/DefaultShuffleDriverComponents.java b/...src/main/java/org/apache/spark/shuffle/sort/lifecycle/DefaultShuffleDriverComponents.java
@@ -18,8 +18,13 @@
 package org.apache.spark.shuffle.sort.lifecycle;
 
 import com.google.common.collect.ImmutableMap;
+import org.apache.spark.SparkConf;
 import org.apache.spark.SparkEnv;
+import org.apache.spark.api.shuffle.MapShuffleLocations;
 import org.apache.spark.api.shuffle.ShuffleDriverComponents;
+import org.apache.spark.api.shuffle.ShuffleLocation;
+import org.apache.spark.internal.config.package$;
+import org.apache.spark.shuffle.sort.DefaultMapShuffleLocations;
 import org.apache.spark.storage.BlockManagerMaster;
 
 import java.io.IOException;

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -28,7 +28,7 @@ import scala.concurrent.duration.Duration
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
-import org.apache.spark.api.shuffle.{MapShuffleLocations, ShuffleLocation}
+import org.apache.spark.api.shuffle.{MapShuffleLocations, ShuffleDriverComponents, ShuffleLocation, ShuffleLocationComponents}
 import org.apache.spark.broadcast.{Broadcast, BroadcastManager}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
@@ -98,12 +98,12 @@ private class ShuffleStatus(numPartitions: Int) {
   }
 
   /**
-   * Remove the map output which was served by the specified block manager.
-   * This is a no-op if there is no registered map output or if the registered output is from a
-   * different block manager.
+   * Remove the map output which contains the specific shuffle location for the given reduce Id.
    */
-  def removeMapOutput(mapId: Int, bmAddress: BlockManagerId): Unit = synchronized {
-    if (mapStatuses(mapId) != null && mapStatuses(mapId).location == bmAddress) {
+  def removeMapOutput(mapId: Int, reduceId: Int, shuffleLoc: ShuffleLocation)
+    : Unit = synchronized {
+    if (mapStatuses(mapId) != null && mapStatuses(mapId).mapShuffleLocations != null &&
+      mapStatuses(mapId).mapShuffleLocations.getLocationForBlock(reduceId) == shuffleLoc) {
       _numAvailableOutputs -= 1
       mapStatuses(mapId) = null
       invalidateSerializedMapOutputStatusCache()
@@ -141,6 +141,18 @@ private class ShuffleStatus(numPartitions: Int) {
     }
   }
 
+  def removeOutputsByShuffleLocation(
+    shuffleLoc: ShuffleLocation,
+    f: (ShuffleLocation, MapShuffleLocations) => Boolean) : Unit = synchronized {
+    for (mapId <- 0 until mapStatuses.length) {
+      if (mapStatuses(mapId) != null && f(shuffleLoc, mapStatuses(mapId).mapShuffleLocations)) {
+        _numAvailableOutputs -= 1
+        mapStatuses(mapId) = null
+        invalidateSerializedMapOutputStatusCache()
+      }
+    }
+  }
+
   /**
    * Number of partitions that have shuffle outputs.
    */
@@ -319,6 +331,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
 private[spark] class MapOutputTrackerMaster(
     conf: SparkConf,
     broadcastManager: BroadcastManager,
+    shuffleLocationComponents: Option[ShuffleLocationComponents],
     isLocal: Boolean)
   extends MapOutputTracker(conf) {
 
@@ -423,17 +436,32 @@ private[spark] class MapOutputTrackerMaster(
     shuffleStatuses(shuffleId).addMapOutput(mapId, status)
   }
 
-  /** Unregister map output information of the given shuffle, mapper and block manager */
-  def unregisterMapOutput(shuffleId: Int, mapId: Int, bmAddress: BlockManagerId) {
+  /** Unregister map output information of the given shuffle, mapper, reducer and location */
+  def unregisterMapOutput(
+    shuffleId: Int,
+    mapId: Int,
+    reduceId: Int,
+    shuffleLoc: ShuffleLocation): Unit = {
     shuffleStatuses.get(shuffleId) match {
       case Some(shuffleStatus) =>
-        shuffleStatus.removeMapOutput(mapId, bmAddress)
+        shuffleStatus.removeMapOutput(mapId, reduceId, shuffleLoc)
         incrementEpoch()
       case None =>
         throw new SparkException("unregisterMapOutput called for nonexistent shuffle ID")
     }
   }
 
+  def removeMapAtLocation(shuffleLoc: ShuffleLocation): Unit = {
+    shuffleStatuses.valuesIterator.foreach { mapStatuses =>
+      if (shuffleLocationComponents.isDefined) {
+        mapStatuses.removeOutputsByShuffleLocation(
+          shuffleLoc,
+          shuffleLocationComponents.get.shouldRemoveMapOutputOnLostBlock)
+      }
+    }
+    incrementEpoch()
+  }
+
   /** Unregister all map output information of the given shuffle. */
   def unregisterAllMapOutput(shuffleId: Int) {
     shuffleStatuses.get(shuffleId) match {

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -43,7 +43,7 @@ import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFor
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.conda.CondaEnvironment
 import org.apache.spark.api.conda.CondaEnvironment.CondaSetupInstructions
-import org.apache.spark.api.shuffle.{ShuffleDataIO, ShuffleDriverComponents}
+import org.apache.spark.api.shuffle.{ShuffleDataIO, ShuffleDriverComponents, ShuffleLocationComponents}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.{CondaRunner, LocalSparkCluster, SparkHadoopUtil}
 import org.apache.spark.input.{FixedLengthBinaryInputFormat, PortableDataStream, StreamInputFormat, WholeTextFileInputFormat}
@@ -216,7 +216,7 @@ class SparkContext(config: SparkConf) extends SafeLogging {
   private var _shutdownHookRef: AnyRef = _
   private var _statusStore: AppStatusStore = _
   private var _heartbeater: Heartbeater = _
-  private var _shuffleDriverComponents: ShuffleDriverComponents = _
+  private var _shuffleDataIo: ShuffleDataIO = _
 
   /* ------------------------------------------------------------------------------------- *
    | Accessors and public fields. These provide access to the internal state of the        |
@@ -257,8 +257,10 @@ class SparkContext(config: SparkConf) extends SafeLogging {
   private[spark] def createSparkEnv(
       conf: SparkConf,
       isLocal: Boolean,
-      listenerBus: LiveListenerBus): SparkEnv = {
-    SparkEnv.createDriverEnv(conf, isLocal, listenerBus, SparkContext.numDriverCores(master, conf))
+      listenerBus: LiveListenerBus,
+      shuffleDataIO: ShuffleDataIO): SparkEnv = {
+    SparkEnv.createDriverEnv(conf, isLocal, listenerBus, SparkContext.numDriverCores(master, conf),
+      shuffleDataIO)
   }
 
   private[spark] def env: SparkEnv = _env
@@ -308,6 +310,10 @@ class SparkContext(config: SparkConf) extends SafeLogging {
     _dagScheduler = ds
   }
 
+  private[spark] def shuffleLocationComponents: Some[ShuffleLocationComponents] = {
+    Some(_shuffleDataIo.shuffleLocations().orNull())
+  }
+
   /**
    * A unique identifier for the Spark application.
    * Its format depends on the scheduler implementation.
@@ -429,8 +435,14 @@ class SparkContext(config: SparkConf) extends SafeLogging {
     _statusStore = AppStatusStore.createLiveStore(conf, appStatusSource)
     listenerBus.addToStatusQueue(_statusStore.listener.get)
 
+
+    val configuredPluginClasses = conf.get(SHUFFLE_IO_PLUGIN_CLASS)
+    val maybeIO = Utils.loadExtensions(
+      classOf[ShuffleDataIO], Seq(configuredPluginClasses), conf)
+    require(maybeIO.size == 1, s"Failed to load plugins of type $configuredPluginClasses")
+
     // Create the Spark execution environment (cache, map output tracker, etc)
-    _env = createSparkEnv(_conf, isLocal, listenerBus)
+    _env = createSparkEnv(_conf, isLocal, listenerBus, maybeIO.head)
     SparkEnv.set(_env)
 
     // If running the REPL, register the repl's output dir with the file server.
@@ -493,12 +505,9 @@ class SparkContext(config: SparkConf) extends SafeLogging {
     executorEnvs ++= _conf.getExecutorEnv
     executorEnvs("SPARK_USER") = sparkUser
 
-    val configuredPluginClasses = conf.get(SHUFFLE_IO_PLUGIN_CLASS)
-    val maybeIO = Utils.loadExtensions(
-      classOf[ShuffleDataIO], Seq(configuredPluginClasses), conf)
-    require(maybeIO.size == 1, s"Failed to load plugins of type $configuredPluginClasses")
-    _shuffleDriverComponents = maybeIO.head.driver()
-    _shuffleDriverComponents.initializeApplication().asScala.foreach {
+    _shuffleDataIo = maybeIO.head
+    maybeIO.head.driver()
+    .initializeApplication().asScala.foreach {
       case (k, v) => _conf.set(ShuffleDataIO.SHUFFLE_SPARK_CONF_PREFIX + k, v) }
 
     // We need to register "HeartbeatReceiver" before "createTaskScheduler" because Executor will
@@ -570,7 +579,7 @@ class SparkContext(config: SparkConf) extends SafeLogging {
 
     _cleaner =
       if (_conf.get(CLEANER_REFERENCE_TRACKING)) {
-        Some(new ContextCleaner(this, _shuffleDriverComponents))
+        Some(new ContextCleaner(this, _shuffleDataIo.driver()))
       } else {
         None
       }
@@ -1960,7 +1969,7 @@ class SparkContext(config: SparkConf) extends SafeLogging {
       }
       _heartbeater = null
     }
-    _shuffleDriverComponents.cleanupApplication()
+    _shuffleDataIo.driver().cleanupApplication()
     if (env != null && _heartbeatReceiver != null) {
       Utils.tryLogNonFatalError {
         env.rpcEnv.stop(_heartbeatReceiver)

diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.conda.CondaEnvironment.CondaSetupInstructions
 import org.apache.spark.api.python.PythonWorkerFactory
+import org.apache.spark.api.shuffle.ShuffleDataIO
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config._
@@ -200,6 +201,7 @@ object SparkEnv extends Logging {
       isLocal: Boolean,
       listenerBus: LiveListenerBus,
       numCores: Int,
+      shuffleDataIO: ShuffleDataIO,
       mockOutputCommitCoordinator: Option[OutputCommitCoordinator] = None): SparkEnv = {
     assert(conf.contains(DRIVER_HOST_ADDRESS),
       s"${DRIVER_HOST_ADDRESS.key} is not set on the driver!")
@@ -221,6 +223,7 @@ object SparkEnv extends Logging {
       isLocal,
       numCores,
       ioEncryptionKey,
+      shuffleDataIO = Some(shuffleDataIO),
       listenerBus = listenerBus,
       mockOutputCommitCoordinator = mockOutputCommitCoordinator
     )
@@ -254,6 +257,7 @@ object SparkEnv extends Logging {
   /**
    * Helper method to create a SparkEnv for a driver or an executor.
    */
+  // scalastyle:off
   private def create(
       conf: SparkConf,
       executorId: String,
@@ -263,6 +267,7 @@ object SparkEnv extends Logging {
       isLocal: Boolean,
       numUsableCores: Int,
       ioEncryptionKey: Option[Array[Byte]],
+      shuffleDataIO: Option[ShuffleDataIO] = Option.empty,
       listenerBus: LiveListenerBus = null,
       mockOutputCommitCoordinator: Option[OutputCommitCoordinator] = None): SparkEnv = {
 
@@ -341,7 +346,8 @@ object SparkEnv extends Logging {
     val broadcastManager = new BroadcastManager(isDriver, conf, securityManager)
 
     val mapOutputTracker = if (isDriver) {
-      new MapOutputTrackerMaster(conf, broadcastManager, isLocal)
+      new MapOutputTrackerMaster(
+        conf, broadcastManager, Some(shuffleDataIO.get.shuffleLocations().orNull()), isLocal)
     } else {
       new MapOutputTrackerWorker(conf)
     }
Original file line number	Diff line number	Diff line change
Expand Up		@@ -30,4 +30,5 @@ public interface ShuffleDriverComponents {
		void cleanupApplication() throws IOException;

		void removeShuffleData(int shuffleId, boolean blocking) throws IOException;

		}