From 7df859a3bba5a9fbb85324db21565cbae5e27b54 Mon Sep 17 00:00:00 2001 From: Julio Merino Date: Fri, 14 Jul 2023 08:57:25 -0700 Subject: [PATCH] Add the experimental_remote_require_cached flag When set to true, this flag causes Bazel to abort the build whenever it encounters an action that is not cached. This is very useful when trying to troubleshoot action caching issues across machines because it allows running a build on one and having it fail on another as soon as there is a problem without tainting what already exists in the cache. My workflow is to essentially do: 1. Machine 1: bazel clean 2. Machine 1: bazel build ... 3. Machine 2: bazel clean 4. Machine 2: bazel build --experimental_remote_require_cached ... which makes step 4 fail on the first action that wasn't cached as expected. Then I can address that problem and re-run step 4 to encounter the next issue. --- .../build/lib/remote/RemoteSpawnRunner.java | 13 ++++++++ .../lib/remote/options/RemoteOptions.java | 12 ++++++++ .../bazel/remote/remote_execution_test.sh | 30 +++++++++++++++++++ 3 files changed, 55 insertions(+) diff --git a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java index 04e3976876c7d1..9a0d47cdaf8067 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java +++ b/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java @@ -236,6 +236,19 @@ public SpawnResult exec(Spawn spawn, SpawnExecutionContext context) return execLocallyAndUploadOrFail(action, spawn, context, uploadLocalResults, e); } + if (remoteOptions.remoteRequireCached) { + return new SpawnResult.Builder() + .setStatus(SpawnResult.Status.EXECUTION_DENIED) + .setExitCode(1) + .setFailureMessage("Action must be cached due to --experimental_remote_require_cached but it is not") + .setFailureDetail( + FailureDetail.newBuilder() + .setSpawn(FailureDetails.Spawn.newBuilder().setCode(FailureDetails.Spawn.Code.EXECUTION_DENIED)) + .build()) + .setRunnerName("remote") + .build(); + } + AtomicBoolean useCachedResult = new AtomicBoolean(acceptCachedResult); AtomicBoolean forceUploadInput = new AtomicBoolean(false); try { diff --git a/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java b/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java index 022fec36748f95..89135eb29b5534 100644 --- a/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java +++ b/src/main/java/com/google/devtools/build/lib/remote/options/RemoteOptions.java @@ -229,6 +229,18 @@ public final class RemoteOptions extends CommonRemoteOptions { help = "Whether to accept remotely cached action results.") public boolean remoteAcceptCached; + @Option( + name = "experimental_remote_require_cached", + defaultValue = "false", + documentationCategory = OptionDocumentationCategory.REMOTE, + effectTags = {OptionEffectTag.UNKNOWN}, + help = + "If set to true, enforce that all actions that can run remotely are cached, or else " + + "fail the build. This is useful to troubleshoot non-determinism issues as it " + + "allows checking whether actions that should be cached are actually cached " + + "without spuriously injecting new results into the cache.") + public boolean remoteRequireCached; + @Option( name = "remote_local_fallback", defaultValue = "false", diff --git a/src/test/shell/bazel/remote/remote_execution_test.sh b/src/test/shell/bazel/remote/remote_execution_test.sh index 88c0682ad14dda..470fc77520c713 100755 --- a/src/test/shell/bazel/remote/remote_execution_test.sh +++ b/src/test/shell/bazel/remote/remote_execution_test.sh @@ -1263,6 +1263,36 @@ EOF expect_not_log "1 local" } +function test_require_cached() { + mkdir -p a + cat > a/BUILD <<'EOF' +genrule( + name = "foo", + srcs = ["foo.in"], + outs = ["foo.out"], + cmd = "cp \"$<\" \"$@\"", +) +EOF + + echo "input 1" >a/foo.in + bazel build \ + --remote_executor=grpc://localhost:${worker_port} \ + //a:foo >& $TEST_log || fail "Failed to build //a:foo" + + expect_log "1 remote" + + echo "input 2" >a/foo.in + if bazel build \ + --remote_executor=grpc://localhost:${worker_port} \ + --experimental_remote_require_cached \ + //a:foo >& $TEST_log; then + fail "Build of //a:foo succeeded but it should have failed" + fi + + expect_log "Action must be cached due to --experimental_remote_require_cached but it is not" + expect_not_log "remote cache hit" +} + function test_nobuild_runfile_links() { mkdir data && echo "hello" > data/hello && echo "world" > data/world cat > WORKSPACE <