diff options
author | Klaus Aehlig <klaus.aehlig@huawei.com> | 2024-10-28 14:23:03 +0100 |
---|---|---|
committer | Klaus Aehlig <klaus.aehlig@huawei.com> | 2024-10-28 16:21:34 +0100 |
commit | 08bf03382124fe5fbe58dd068d4ebb546b5c3e87 (patch) | |
tree | 047aa0a4bde4d777a32429de20f17f25579c2832 /src | |
parent | 70893762d10b25cdba70d72b78a640bdb29c0dcd (diff) | |
download | justbuild-08bf03382124fe5fbe58dd068d4ebb546b5c3e87.tar.gz |
Retry Execution on FAILED_PRECONDITION
The specification for this status code is as follows.
One or more errors occurred in setting up the action requested,
such as a missing input or command or no worker being available.
The client may be able to fix the errors and retry.
We routinely ensure all inputs are available to the remote execution
before we start an action, so all prerequisites will be there on a
compliant server, however might not actually be on a server where
the CAS only has eventual consistency or is incorrect (due to old
cache entries on CAS purge) in its answer to FindMissingBlobs.
While we have no guarantee that a retry will help, we still retry;
at least in the case of an unavailable worker or CAS entries not
yet available due to eventual consistency, this will help. Also,
we log at debug lvel the full response, including the repeated Any
message. In this way, we can find out what useful information (if
any) is sent by popular remote-execution services and implement
more specific mitigations in the future.
Diffstat (limited to 'src')
-rw-r--r-- | src/buildtool/execution_api/remote/TARGETS | 1 | ||||
-rw-r--r-- | src/buildtool/execution_api/remote/bazel/bazel_execution_client.cpp | 24 |
2 files changed, 25 insertions, 0 deletions
diff --git a/src/buildtool/execution_api/remote/TARGETS b/src/buildtool/execution_api/remote/TARGETS index 2c356660..a5290001 100644 --- a/src/buildtool/execution_api/remote/TARGETS +++ b/src/buildtool/execution_api/remote/TARGETS @@ -51,6 +51,7 @@ , "private-deps": [ ["@", "fmt", "", "fmt"] , ["@", "grpc", "", "grpc++"] + , ["@", "protoc", "", "libprotobuf"] , ["src/buildtool/common", "artifact_digest_factory"] , ["src/buildtool/common", "bazel_digest_factory"] , ["src/buildtool/common", "protocol_traits"] diff --git a/src/buildtool/execution_api/remote/bazel/bazel_execution_client.cpp b/src/buildtool/execution_api/remote/bazel/bazel_execution_client.cpp index 8eb26a0c..09f16b8f 100644 --- a/src/buildtool/execution_api/remote/bazel/bazel_execution_client.cpp +++ b/src/buildtool/execution_api/remote/bazel/bazel_execution_client.cpp @@ -16,6 +16,8 @@ #include <utility> // std::move +#include "fmt/core.h" +#include "google/protobuf/text_format.h" #include "grpcpp/grpcpp.h" #include "src/buildtool/common/remote/client_common.hpp" #include "src/buildtool/common/remote/retry.hpp" @@ -40,6 +42,17 @@ void LogExecutionStatus(gsl::not_null<Logger const*> const& logger, "Execution could not be started.\n{}", s.ShortDebugString()); break; + case grpc::StatusCode::FAILED_PRECONDITION: + // quote from remote_execution.proto: + // One or more errors occurred in setting up the + // action requested, such as a missing input or command or no worker + // being available. The client may be able to fix the errors and + // retry. + logger->Emit(LogLevel::Progress, + "Some precondition for the action failed.\n{}", + s.message()); + break; + default: // fallback to default status logging LogStatus(logger, LogLevel::Error, s); @@ -231,6 +244,17 @@ auto BazelExecutionClient::ExtractContents( if (status_code == grpc::StatusCode::UNAVAILABLE) { response.state = ExecutionResponse::State::Retry; } + else if (status_code == grpc::StatusCode::FAILED_PRECONDITION) { + logger_.Emit(LogLevel::Debug, [&exec_response] { + std::string text_repr; + google::protobuf::TextFormat::PrintToString(exec_response, + &text_repr); + return fmt::format( + "Full exec_response of precondition failure\n{}", + text_repr); + }); + response.state = ExecutionResponse::State::Retry; + } else { response.state = ExecutionResponse::State::Failed; } |