summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKlaus Aehlig <klaus.aehlig@huawei.com>2024-10-28 14:23:03 +0100
committerKlaus Aehlig <klaus.aehlig@huawei.com>2024-10-28 16:21:34 +0100
commit08bf03382124fe5fbe58dd068d4ebb546b5c3e87 (patch)
tree047aa0a4bde4d777a32429de20f17f25579c2832 /src
parent70893762d10b25cdba70d72b78a640bdb29c0dcd (diff)
downloadjustbuild-08bf03382124fe5fbe58dd068d4ebb546b5c3e87.tar.gz
Retry Execution on FAILED_PRECONDITION
The specification for this status code is as follows. One or more errors occurred in setting up the action requested, such as a missing input or command or no worker being available. The client may be able to fix the errors and retry. We routinely ensure all inputs are available to the remote execution before we start an action, so all prerequisites will be there on a compliant server, however might not actually be on a server where the CAS only has eventual consistency or is incorrect (due to old cache entries on CAS purge) in its answer to FindMissingBlobs. While we have no guarantee that a retry will help, we still retry; at least in the case of an unavailable worker or CAS entries not yet available due to eventual consistency, this will help. Also, we log at debug lvel the full response, including the repeated Any message. In this way, we can find out what useful information (if any) is sent by popular remote-execution services and implement more specific mitigations in the future.
Diffstat (limited to 'src')
-rw-r--r--src/buildtool/execution_api/remote/TARGETS1
-rw-r--r--src/buildtool/execution_api/remote/bazel/bazel_execution_client.cpp24
2 files changed, 25 insertions, 0 deletions
diff --git a/src/buildtool/execution_api/remote/TARGETS b/src/buildtool/execution_api/remote/TARGETS
index 2c356660..a5290001 100644
--- a/src/buildtool/execution_api/remote/TARGETS
+++ b/src/buildtool/execution_api/remote/TARGETS
@@ -51,6 +51,7 @@
, "private-deps":
[ ["@", "fmt", "", "fmt"]
, ["@", "grpc", "", "grpc++"]
+ , ["@", "protoc", "", "libprotobuf"]
, ["src/buildtool/common", "artifact_digest_factory"]
, ["src/buildtool/common", "bazel_digest_factory"]
, ["src/buildtool/common", "protocol_traits"]
diff --git a/src/buildtool/execution_api/remote/bazel/bazel_execution_client.cpp b/src/buildtool/execution_api/remote/bazel/bazel_execution_client.cpp
index 8eb26a0c..09f16b8f 100644
--- a/src/buildtool/execution_api/remote/bazel/bazel_execution_client.cpp
+++ b/src/buildtool/execution_api/remote/bazel/bazel_execution_client.cpp
@@ -16,6 +16,8 @@
#include <utility> // std::move
+#include "fmt/core.h"
+#include "google/protobuf/text_format.h"
#include "grpcpp/grpcpp.h"
#include "src/buildtool/common/remote/client_common.hpp"
#include "src/buildtool/common/remote/retry.hpp"
@@ -40,6 +42,17 @@ void LogExecutionStatus(gsl::not_null<Logger const*> const& logger,
"Execution could not be started.\n{}",
s.ShortDebugString());
break;
+ case grpc::StatusCode::FAILED_PRECONDITION:
+ // quote from remote_execution.proto:
+ // One or more errors occurred in setting up the
+ // action requested, such as a missing input or command or no worker
+ // being available. The client may be able to fix the errors and
+ // retry.
+ logger->Emit(LogLevel::Progress,
+ "Some precondition for the action failed.\n{}",
+ s.message());
+ break;
+
default:
// fallback to default status logging
LogStatus(logger, LogLevel::Error, s);
@@ -231,6 +244,17 @@ auto BazelExecutionClient::ExtractContents(
if (status_code == grpc::StatusCode::UNAVAILABLE) {
response.state = ExecutionResponse::State::Retry;
}
+ else if (status_code == grpc::StatusCode::FAILED_PRECONDITION) {
+ logger_.Emit(LogLevel::Debug, [&exec_response] {
+ std::string text_repr;
+ google::protobuf::TextFormat::PrintToString(exec_response,
+ &text_repr);
+ return fmt::format(
+ "Full exec_response of precondition failure\n{}",
+ text_repr);
+ });
+ response.state = ExecutionResponse::State::Retry;
+ }
else {
response.state = ExecutionResponse::State::Failed;
}