summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/buildtool/common/remote/TARGETS18
-rw-r--r--src/buildtool/common/remote/retry.hpp146
-rw-r--r--src/buildtool/common/remote/retry_parameters.hpp127
3 files changed, 291 insertions, 0 deletions
diff --git a/src/buildtool/common/remote/TARGETS b/src/buildtool/common/remote/TARGETS
index 97966431..087a0ea4 100644
--- a/src/buildtool/common/remote/TARGETS
+++ b/src/buildtool/common/remote/TARGETS
@@ -29,4 +29,22 @@
, "deps": [["@", "fmt", "", "fmt"], ["@", "json", "", "json"], "port"]
, "stage": ["src", "buildtool", "common", "remote"]
}
+, "retry_parameters":
+ { "type": ["@", "rules", "CC", "library"]
+ , "name": ["retry_parameters"]
+ , "hdrs": ["retry_parameters.hpp"]
+ , "deps": [["src/buildtool/logging", "logging"]]
+ , "stage": ["src", "buildtool", "common", "remote"]
+ }
+, "retry":
+ { "type": ["@", "rules", "CC", "library"]
+ , "name": ["retry"]
+ , "hdrs": ["retry.hpp"]
+ , "deps":
+ [ ["src/buildtool/logging", "logging"]
+ , ["@", "grpc", "", "grpc++"]
+ , "retry_parameters"
+ ]
+ , "stage": ["src", "buildtool", "common", "remote"]
+ }
}
diff --git a/src/buildtool/common/remote/retry.hpp b/src/buildtool/common/remote/retry.hpp
new file mode 100644
index 00000000..7abfcc2e
--- /dev/null
+++ b/src/buildtool/common/remote/retry.hpp
@@ -0,0 +1,146 @@
+// Copyright 2023 Huawei Cloud Computing Technology Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <optional>
+#include <thread>
+
+#include <grpcpp/grpcpp.h>
+
+#include "src/buildtool/common/remote/retry_parameters.hpp"
+#include "src/buildtool/logging/logger.hpp"
+
+// Utility class to help detecting when exit the retry loop. This class can be
+// used when the failure cannot be immediately detected by the return value of
+// the function. E.g., when using a grpc stream.
+//
+// Please note that it is user's responsibility to do not set both to true.
+//
+// Design note: even though only one bool could be sufficient (e.g., exit), this
+// would require to check two times if we exited because of a success or a
+// failure: the first time, inside the retry loop; the second time, by the
+// caller.
+struct RetryResponse {
+ // When set to true, it means the function successfully run
+ bool ok{false};
+ // When set to true, it means that it is not worthy to retry.
+ bool exit_retry_loop{false};
+ // error message logged when exit_retry_loop was set to true or when the
+ // last retry attempt failed
+ std::optional<std::string> error_msg{std::nullopt};
+};
+
+template <typename F>
+concept CallableReturningRetryResponse = requires(F const& f) {
+ {RetryResponse{f()}};
+};
+
+template <CallableReturningRetryResponse F>
+// \p f is the callable invoked with a back off algorithm. The retry loop is
+// interrupted when one of the two member of the returned RetryResponse object
+// is set to true.
+[[nodiscard]] auto WithRetry(F const& f, Logger const& logger) noexcept
+ -> bool {
+ try {
+ auto const& attempts = Retry::GetMaxAttempts();
+ for (auto attempt = 1U; attempt <= attempts; ++attempt) {
+ auto [ok, fatal, error_msg] = f();
+ if (ok) {
+ return true;
+ }
+ if (fatal) {
+ if (error_msg) {
+ logger.Emit(LogLevel::Error, *error_msg);
+ }
+ return false;
+ }
+ // don't wait if it was the last attempt
+ if (attempt < attempts) {
+ auto const sleep_for_seconds =
+ Retry::GetSleepTimeSeconds(attempt);
+ logger.Emit(kRetryLogLevel,
+ "Attempt {}/{} failed{} Retrying in {} seconds.",
+ attempt,
+ attempts,
+ error_msg ? fmt::format(": {}", *error_msg) : ".",
+ sleep_for_seconds);
+ std::this_thread::sleep_for(
+ std::chrono::seconds(sleep_for_seconds));
+ }
+ else {
+ if (error_msg) {
+ logger.Emit(LogLevel::Error,
+ "After {} attempts: {}",
+ attempt,
+ *error_msg);
+ }
+ }
+ }
+ } catch (...) {
+ logger.Emit(LogLevel::Error, "WithRetry: caught unknown exception");
+ }
+ return false;
+}
+
+template <typename F>
+concept CallableReturningGrpcStatus = requires(F const& f) {
+ {grpc::Status{f()}};
+};
+
+template <CallableReturningGrpcStatus F>
+// F is the function to be invoked with a back off algorithm
+[[nodiscard]] auto WithRetry(F const& f, Logger const& logger) noexcept
+ -> std::pair<bool, grpc::Status> {
+ grpc::Status status{};
+ try {
+ auto attempts = Retry::GetMaxAttempts();
+ for (auto attempt = 1U; attempt <= attempts; ++attempt) {
+ status = f();
+ if (status.ok() or
+ status.error_code() != grpc::StatusCode::UNAVAILABLE) {
+ return {status.ok(), std::move(status)};
+ }
+ // don't wait if it was the last attempt
+ if (attempt < attempts) {
+ auto const sleep_for_seconds =
+ Retry::GetSleepTimeSeconds(attempt);
+ logger.Emit(
+ kRetryLogLevel,
+ "Attempt {}/{} failed: {}: {}: Retrying in {} seconds.",
+ attempt,
+ attempts,
+ static_cast<int>(status.error_code()),
+ status.error_message(),
+ sleep_for_seconds);
+ std::this_thread::sleep_for(
+ std::chrono::seconds(sleep_for_seconds));
+ }
+ else {
+ // The caller performs a second check on the
+ // status.error_code(), and, eventually, emits to Error level
+ // there.
+ //
+ // To avoid duplication of similar errors, we emit to Debug
+ // level.
+ logger.Emit(LogLevel::Debug,
+ "After {} attempts: {}: {}",
+ attempt,
+ static_cast<int>(status.error_code()),
+ status.error_message());
+ }
+ }
+ } catch (...) {
+ logger.Emit(LogLevel::Error, "WithRetry: caught unknown exception");
+ }
+ return {false, std::move(status)};
+}
diff --git a/src/buildtool/common/remote/retry_parameters.hpp b/src/buildtool/common/remote/retry_parameters.hpp
new file mode 100644
index 00000000..73b92db7
--- /dev/null
+++ b/src/buildtool/common/remote/retry_parameters.hpp
@@ -0,0 +1,127 @@
+// Copyright 2023 Huawei Cloud Computing Technology Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <mutex>
+#include <random>
+
+#include "src/buildtool/logging/logger.hpp"
+
+constexpr unsigned int kDefaultInitialBackoffSeconds{1};
+constexpr unsigned int kDefaultMaxBackoffSeconds{60};
+constexpr unsigned int kDefaultAttempts{1};
+constexpr auto kRetryLogLevel = LogLevel::Progress;
+class Retry {
+ using dist_type = std::uniform_int_distribution<std::mt19937::result_type>;
+
+ public:
+ Retry() = default;
+ [[nodiscard]] static auto Instance() -> Retry& {
+ static Retry instance{};
+ return instance;
+ }
+
+ [[nodiscard]] static auto SetInitialBackoffSeconds(unsigned int x) noexcept
+ -> bool {
+ if (x < 1) {
+ Logger::Log(
+ LogLevel::Error,
+ "Invalid initial amount of seconds provided: {}. Value must "
+ "be strictly greater than 0.",
+ x);
+ return false;
+ }
+ Instance().initial_backoff_seconds_ = x;
+ return true;
+ }
+
+ [[nodiscard]] static auto SetMaxBackoffSeconds(unsigned int x) noexcept
+ -> bool {
+ if (x < 1) {
+ Logger::Log(LogLevel::Error,
+ "Invalid max backoff provided: {}. Value must be "
+ "strictly greater than 0.",
+ x);
+ return false;
+ }
+ Instance().max_backoff_seconds_ = x;
+ return true;
+ }
+
+ [[nodiscard]] static auto GetMaxBackoffSeconds() noexcept -> unsigned int {
+ return Instance().max_backoff_seconds_;
+ }
+
+ [[nodiscard]] static auto SetMaxAttempts(unsigned int x) noexcept -> bool {
+ if (x < 1) {
+ Logger::Log(LogLevel::Error,
+ "Invalid number of max number of attempts provided: "
+ "{}. Value must be strictly greater than 0",
+ x);
+ return false;
+ }
+ Instance().attempts_ = x;
+ return true;
+ }
+
+ [[nodiscard]] static auto GetInitialBackoffSeconds() noexcept
+ -> unsigned int {
+ return Instance().initial_backoff_seconds_;
+ }
+
+ [[nodiscard]] static auto GetMaxAttempts() noexcept -> unsigned int {
+ return Instance().attempts_;
+ }
+
+ [[nodiscard]] static auto Jitter(unsigned int backoff) noexcept ->
+ typename dist_type::result_type {
+ auto& inst = Instance();
+ try {
+ dist_type dist{0, backoff * 3};
+ std::unique_lock lock(inst.mutex_);
+ return dist(inst.rng_);
+ } catch (...) {
+ return 0;
+ }
+ }
+
+ /// \brief The waiting time is exponentially increased at each \p attempt
+ /// until it exceeds max_backoff_seconds.
+ ///
+ /// To avoid overloading of the reachable resources, a jitter (aka, random
+ /// value) is added to distributed the workload.
+ [[nodiscard]] static auto GetSleepTimeSeconds(unsigned int attempt) noexcept
+ -> unsigned int {
+ auto backoff = Retry::GetInitialBackoffSeconds();
+ auto const& max_backoff = Retry::GetMaxBackoffSeconds();
+ // on the first attempt, we don't double the backoff time
+ // also we do it in a for loop to avoid overflow
+ for (auto x = 1U; x < attempt; ++x) {
+ backoff <<= 1U;
+ if (backoff >= max_backoff) {
+ backoff = max_backoff;
+ break;
+ }
+ }
+ return backoff + Retry::Jitter(backoff);
+ }
+
+ private:
+ unsigned int initial_backoff_seconds_{kDefaultInitialBackoffSeconds};
+ unsigned int max_backoff_seconds_{kDefaultMaxBackoffSeconds};
+ unsigned int attempts_{kDefaultAttempts};
+ LogLevel retry_log_level_{kRetryLogLevel};
+ std::mutex mutex_;
+ std::random_device dev_;
+ std::mt19937 rng_{dev_()};
+};