diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/buildtool/common/remote/TARGETS | 18 | ||||
-rw-r--r-- | src/buildtool/common/remote/retry.hpp | 146 | ||||
-rw-r--r-- | src/buildtool/common/remote/retry_parameters.hpp | 127 |
3 files changed, 291 insertions, 0 deletions
diff --git a/src/buildtool/common/remote/TARGETS b/src/buildtool/common/remote/TARGETS index 97966431..087a0ea4 100644 --- a/src/buildtool/common/remote/TARGETS +++ b/src/buildtool/common/remote/TARGETS @@ -29,4 +29,22 @@ , "deps": [["@", "fmt", "", "fmt"], ["@", "json", "", "json"], "port"] , "stage": ["src", "buildtool", "common", "remote"] } +, "retry_parameters": + { "type": ["@", "rules", "CC", "library"] + , "name": ["retry_parameters"] + , "hdrs": ["retry_parameters.hpp"] + , "deps": [["src/buildtool/logging", "logging"]] + , "stage": ["src", "buildtool", "common", "remote"] + } +, "retry": + { "type": ["@", "rules", "CC", "library"] + , "name": ["retry"] + , "hdrs": ["retry.hpp"] + , "deps": + [ ["src/buildtool/logging", "logging"] + , ["@", "grpc", "", "grpc++"] + , "retry_parameters" + ] + , "stage": ["src", "buildtool", "common", "remote"] + } } diff --git a/src/buildtool/common/remote/retry.hpp b/src/buildtool/common/remote/retry.hpp new file mode 100644 index 00000000..7abfcc2e --- /dev/null +++ b/src/buildtool/common/remote/retry.hpp @@ -0,0 +1,146 @@ +// Copyright 2023 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <optional> +#include <thread> + +#include <grpcpp/grpcpp.h> + +#include "src/buildtool/common/remote/retry_parameters.hpp" +#include "src/buildtool/logging/logger.hpp" + +// Utility class to help detecting when exit the retry loop. This class can be +// used when the failure cannot be immediately detected by the return value of +// the function. E.g., when using a grpc stream. +// +// Please note that it is user's responsibility to do not set both to true. +// +// Design note: even though only one bool could be sufficient (e.g., exit), this +// would require to check two times if we exited because of a success or a +// failure: the first time, inside the retry loop; the second time, by the +// caller. +struct RetryResponse { + // When set to true, it means the function successfully run + bool ok{false}; + // When set to true, it means that it is not worthy to retry. + bool exit_retry_loop{false}; + // error message logged when exit_retry_loop was set to true or when the + // last retry attempt failed + std::optional<std::string> error_msg{std::nullopt}; +}; + +template <typename F> +concept CallableReturningRetryResponse = requires(F const& f) { + {RetryResponse{f()}}; +}; + +template <CallableReturningRetryResponse F> +// \p f is the callable invoked with a back off algorithm. The retry loop is +// interrupted when one of the two member of the returned RetryResponse object +// is set to true. +[[nodiscard]] auto WithRetry(F const& f, Logger const& logger) noexcept + -> bool { + try { + auto const& attempts = Retry::GetMaxAttempts(); + for (auto attempt = 1U; attempt <= attempts; ++attempt) { + auto [ok, fatal, error_msg] = f(); + if (ok) { + return true; + } + if (fatal) { + if (error_msg) { + logger.Emit(LogLevel::Error, *error_msg); + } + return false; + } + // don't wait if it was the last attempt + if (attempt < attempts) { + auto const sleep_for_seconds = + Retry::GetSleepTimeSeconds(attempt); + logger.Emit(kRetryLogLevel, + "Attempt {}/{} failed{} Retrying in {} seconds.", + attempt, + attempts, + error_msg ? fmt::format(": {}", *error_msg) : ".", + sleep_for_seconds); + std::this_thread::sleep_for( + std::chrono::seconds(sleep_for_seconds)); + } + else { + if (error_msg) { + logger.Emit(LogLevel::Error, + "After {} attempts: {}", + attempt, + *error_msg); + } + } + } + } catch (...) { + logger.Emit(LogLevel::Error, "WithRetry: caught unknown exception"); + } + return false; +} + +template <typename F> +concept CallableReturningGrpcStatus = requires(F const& f) { + {grpc::Status{f()}}; +}; + +template <CallableReturningGrpcStatus F> +// F is the function to be invoked with a back off algorithm +[[nodiscard]] auto WithRetry(F const& f, Logger const& logger) noexcept + -> std::pair<bool, grpc::Status> { + grpc::Status status{}; + try { + auto attempts = Retry::GetMaxAttempts(); + for (auto attempt = 1U; attempt <= attempts; ++attempt) { + status = f(); + if (status.ok() or + status.error_code() != grpc::StatusCode::UNAVAILABLE) { + return {status.ok(), std::move(status)}; + } + // don't wait if it was the last attempt + if (attempt < attempts) { + auto const sleep_for_seconds = + Retry::GetSleepTimeSeconds(attempt); + logger.Emit( + kRetryLogLevel, + "Attempt {}/{} failed: {}: {}: Retrying in {} seconds.", + attempt, + attempts, + static_cast<int>(status.error_code()), + status.error_message(), + sleep_for_seconds); + std::this_thread::sleep_for( + std::chrono::seconds(sleep_for_seconds)); + } + else { + // The caller performs a second check on the + // status.error_code(), and, eventually, emits to Error level + // there. + // + // To avoid duplication of similar errors, we emit to Debug + // level. + logger.Emit(LogLevel::Debug, + "After {} attempts: {}: {}", + attempt, + static_cast<int>(status.error_code()), + status.error_message()); + } + } + } catch (...) { + logger.Emit(LogLevel::Error, "WithRetry: caught unknown exception"); + } + return {false, std::move(status)}; +} diff --git a/src/buildtool/common/remote/retry_parameters.hpp b/src/buildtool/common/remote/retry_parameters.hpp new file mode 100644 index 00000000..73b92db7 --- /dev/null +++ b/src/buildtool/common/remote/retry_parameters.hpp @@ -0,0 +1,127 @@ +// Copyright 2023 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include <mutex> +#include <random> + +#include "src/buildtool/logging/logger.hpp" + +constexpr unsigned int kDefaultInitialBackoffSeconds{1}; +constexpr unsigned int kDefaultMaxBackoffSeconds{60}; +constexpr unsigned int kDefaultAttempts{1}; +constexpr auto kRetryLogLevel = LogLevel::Progress; +class Retry { + using dist_type = std::uniform_int_distribution<std::mt19937::result_type>; + + public: + Retry() = default; + [[nodiscard]] static auto Instance() -> Retry& { + static Retry instance{}; + return instance; + } + + [[nodiscard]] static auto SetInitialBackoffSeconds(unsigned int x) noexcept + -> bool { + if (x < 1) { + Logger::Log( + LogLevel::Error, + "Invalid initial amount of seconds provided: {}. Value must " + "be strictly greater than 0.", + x); + return false; + } + Instance().initial_backoff_seconds_ = x; + return true; + } + + [[nodiscard]] static auto SetMaxBackoffSeconds(unsigned int x) noexcept + -> bool { + if (x < 1) { + Logger::Log(LogLevel::Error, + "Invalid max backoff provided: {}. Value must be " + "strictly greater than 0.", + x); + return false; + } + Instance().max_backoff_seconds_ = x; + return true; + } + + [[nodiscard]] static auto GetMaxBackoffSeconds() noexcept -> unsigned int { + return Instance().max_backoff_seconds_; + } + + [[nodiscard]] static auto SetMaxAttempts(unsigned int x) noexcept -> bool { + if (x < 1) { + Logger::Log(LogLevel::Error, + "Invalid number of max number of attempts provided: " + "{}. Value must be strictly greater than 0", + x); + return false; + } + Instance().attempts_ = x; + return true; + } + + [[nodiscard]] static auto GetInitialBackoffSeconds() noexcept + -> unsigned int { + return Instance().initial_backoff_seconds_; + } + + [[nodiscard]] static auto GetMaxAttempts() noexcept -> unsigned int { + return Instance().attempts_; + } + + [[nodiscard]] static auto Jitter(unsigned int backoff) noexcept -> + typename dist_type::result_type { + auto& inst = Instance(); + try { + dist_type dist{0, backoff * 3}; + std::unique_lock lock(inst.mutex_); + return dist(inst.rng_); + } catch (...) { + return 0; + } + } + + /// \brief The waiting time is exponentially increased at each \p attempt + /// until it exceeds max_backoff_seconds. + /// + /// To avoid overloading of the reachable resources, a jitter (aka, random + /// value) is added to distributed the workload. + [[nodiscard]] static auto GetSleepTimeSeconds(unsigned int attempt) noexcept + -> unsigned int { + auto backoff = Retry::GetInitialBackoffSeconds(); + auto const& max_backoff = Retry::GetMaxBackoffSeconds(); + // on the first attempt, we don't double the backoff time + // also we do it in a for loop to avoid overflow + for (auto x = 1U; x < attempt; ++x) { + backoff <<= 1U; + if (backoff >= max_backoff) { + backoff = max_backoff; + break; + } + } + return backoff + Retry::Jitter(backoff); + } + + private: + unsigned int initial_backoff_seconds_{kDefaultInitialBackoffSeconds}; + unsigned int max_backoff_seconds_{kDefaultMaxBackoffSeconds}; + unsigned int attempts_{kDefaultAttempts}; + LogLevel retry_log_level_{kRetryLogLevel}; + std::mutex mutex_; + std::random_device dev_; + std::mt19937 rng_{dev_()}; +}; |