summaryrefslogtreecommitdiff
path: root/src/buildtool/graph_traverser/graph_traverser.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/buildtool/graph_traverser/graph_traverser.hpp')
-rw-r--r--src/buildtool/graph_traverser/graph_traverser.hpp569
1 files changed, 569 insertions, 0 deletions
diff --git a/src/buildtool/graph_traverser/graph_traverser.hpp b/src/buildtool/graph_traverser/graph_traverser.hpp
new file mode 100644
index 00000000..c92fbbf8
--- /dev/null
+++ b/src/buildtool/graph_traverser/graph_traverser.hpp
@@ -0,0 +1,569 @@
+#ifndef INCLUDED_SRC_BUILDTOOL_GRAPH_TRAVERSER_GRAPH_TRAVERSER_HPP
+#define INCLUDED_SRC_BUILDTOOL_GRAPH_TRAVERSER_GRAPH_TRAVERSER_HPP
+
+#include <cstdlib>
+#include <filesystem>
+#include <map>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+
+#include "fmt/core.h"
+#include "gsl-lite/gsl-lite.hpp"
+#include "src/buildtool/common/cli.hpp"
+#include "src/buildtool/common/statistics.hpp"
+#include "src/buildtool/common/tree.hpp"
+#include "src/buildtool/execution_api/bazel_msg/bazel_blob_container.hpp"
+#include "src/buildtool/execution_api/local/local_api.hpp"
+#include "src/buildtool/execution_api/remote/bazel/bazel_api.hpp"
+#include "src/buildtool/execution_api/remote/config.hpp"
+#include "src/buildtool/execution_engine/dag/dag.hpp"
+#include "src/buildtool/execution_engine/executor/executor.hpp"
+#include "src/buildtool/execution_engine/traverser/traverser.hpp"
+#include "src/buildtool/file_system/file_system_manager.hpp"
+#include "src/buildtool/file_system/jsonfs.hpp"
+#include "src/buildtool/file_system/object_type.hpp"
+#include "src/buildtool/logging/log_sink_cmdline.hpp"
+#include "src/buildtool/logging/log_sink_file.hpp"
+#include "src/buildtool/logging/logger.hpp"
+#include "src/utils/cpp/json.hpp"
+
+class GraphTraverser {
+ public:
+ struct CommandLineArguments {
+ std::size_t jobs;
+ EndpointArguments endpoint;
+ BuildArguments build;
+ std::optional<StageArguments> stage;
+ std::optional<RebuildArguments> rebuild;
+ };
+
+ explicit GraphTraverser(CommandLineArguments clargs)
+ : clargs_{std::move(clargs)},
+ api_{CreateExecutionApi(clargs_.endpoint)} {}
+
+ /// \brief Parses actions and blobs into graph, traverses it and retrieves
+ /// outputs specified by command line arguments
+ [[nodiscard]] auto BuildAndStage(
+ std::map<std::string, ArtifactDescription> const& artifact_descriptions,
+ std::map<std::string, ArtifactDescription> const& runfile_descriptions,
+ std::vector<ActionDescription> const& action_descriptions,
+ std::vector<std::string> const& blobs,
+ std::vector<Tree> const& trees) const
+ -> std::optional<std::pair<std::vector<std::filesystem::path>, bool>> {
+ DependencyGraph graph; // must outlive artifact_nodes
+ auto artifacts = BuildArtifacts(&graph,
+ artifact_descriptions,
+ runfile_descriptions,
+ action_descriptions,
+ trees,
+ blobs);
+ if (not artifacts) {
+ return std::nullopt;
+ }
+ auto const [rel_paths, artifact_nodes] = *artifacts;
+
+ auto const object_infos = CollectObjectInfos(artifact_nodes);
+ if (not object_infos) {
+ return std::nullopt;
+ }
+ bool failed_artifacts = false;
+ for (auto const& obj_info : *object_infos) {
+ failed_artifacts = failed_artifacts || obj_info.failed;
+ }
+
+ if (not clargs_.stage) {
+ PrintOutputs("Artifacts built, logical paths are:",
+ rel_paths,
+ artifact_nodes,
+ runfile_descriptions);
+ MaybePrintToStdout(*artifacts);
+ return std::make_pair(std::move(artifacts->first),
+ failed_artifacts);
+ }
+
+ auto output_paths = RetrieveOutputs(rel_paths, *object_infos);
+ if (not output_paths) {
+ return std::nullopt;
+ }
+ PrintOutputs("Artifacts can be found in:",
+ *output_paths,
+ artifact_nodes,
+ runfile_descriptions);
+
+ MaybePrintToStdout(*artifacts);
+
+ return std::make_pair(*output_paths, failed_artifacts);
+ }
+
+ /// \brief Parses graph description into graph, traverses it and retrieves
+ /// outputs specified by command line arguments
+ [[nodiscard]] auto BuildAndStage(
+ std::filesystem::path const& graph_description,
+ nlohmann::json const& artifacts) const
+ -> std::optional<std::pair<std::vector<std::filesystem::path>, bool>> {
+ // Read blobs to upload and actions from graph description file
+ auto desc = ReadGraphDescription(graph_description);
+ if (not desc) {
+ return std::nullopt;
+ }
+ auto const [blobs, tree_descs, actions] = *desc;
+
+ std::vector<ActionDescription> action_descriptions{};
+ action_descriptions.reserve(actions.size());
+ for (auto const& [id, description] : actions.items()) {
+ auto action = ActionDescription::FromJson(id, description);
+ if (not action) {
+ return std::nullopt; // Error already logged
+ }
+ action_descriptions.emplace_back(std::move(*action));
+ }
+
+ std::vector<Tree> trees{};
+ for (auto const& [id, description] : tree_descs.items()) {
+ auto tree = Tree::FromJson(id, description);
+ if (not tree) {
+ return std::nullopt;
+ }
+ trees.emplace_back(std::move(*tree));
+ }
+
+ std::map<std::string, ArtifactDescription> artifact_descriptions{};
+ for (auto const& [rel_path, description] : artifacts.items()) {
+ auto artifact = ArtifactDescription::FromJson(description);
+ if (not artifact) {
+ return std::nullopt; // Error already logged
+ }
+ artifact_descriptions.emplace(rel_path, std::move(*artifact));
+ }
+
+ return BuildAndStage(
+ artifact_descriptions, {}, action_descriptions, blobs, trees);
+ }
+
+ [[nodiscard]] auto ExecutionApi() const -> gsl::not_null<IExecutionApi*> {
+ return &(*api_);
+ }
+
+ private:
+ CommandLineArguments const clargs_;
+ gsl::not_null<IExecutionApi::Ptr> const api_;
+
+ /// \brief Reads contents of graph description file as json object. In case
+ /// the description is missing "blobs" or "actions" key/value pairs or they
+ /// can't be retrieved with the appropriate types, execution is terminated
+ /// after logging error
+ /// \returns A pair containing the blobs to upload (as a vector of strings)
+ /// and the actions as a json object.
+ [[nodiscard]] static auto ReadGraphDescription(
+ std::filesystem::path const& graph_description)
+ -> std::optional<
+ std::tuple<nlohmann::json, nlohmann::json, nlohmann::json>> {
+ auto const graph_description_opt = Json::ReadFile(graph_description);
+ if (not graph_description_opt.has_value()) {
+ Logger::Log(LogLevel::Error,
+ "parsing graph from {}",
+ graph_description.string());
+ return std::nullopt;
+ }
+ auto blobs_opt = ExtractValueAs<std::vector<std::string>>(
+ *graph_description_opt, "blobs", [](std::string const& s) {
+ Logger::Log(LogLevel::Error,
+ "{}\ncan not retrieve value for \"blobs\" from "
+ "graph description.",
+ s);
+ });
+ auto trees_opt = ExtractValueAs<nlohmann::json>(
+ *graph_description_opt, "trees", [](std::string const& s) {
+ Logger::Log(LogLevel::Error,
+ "{}\ncan not retrieve value for \"trees\" from "
+ "graph description.",
+ s);
+ });
+ auto actions_opt = ExtractValueAs<nlohmann::json>(
+ *graph_description_opt, "actions", [](std::string const& s) {
+ Logger::Log(LogLevel::Error,
+ "{}\ncan not retrieve value for \"actions\" from "
+ "graph description.",
+ s);
+ });
+ if (not blobs_opt or not trees_opt or not actions_opt) {
+ return std::nullopt;
+ }
+ return std::make_tuple(std::move(*blobs_opt),
+ std::move(*trees_opt),
+ std::move(*actions_opt));
+ }
+
+ [[nodiscard]] static auto CreateExecutionApi(
+ EndpointArguments const& clargs) -> gsl::not_null<IExecutionApi::Ptr> {
+ if (clargs.remote_execution_address) {
+ auto remote = RemoteExecutionConfig{};
+ if (not remote.SetAddress(*clargs.remote_execution_address)) {
+ Logger::Log(LogLevel::Error,
+ "parsing remote execution address '{}' failed.",
+ *clargs.remote_execution_address);
+ std::exit(EXIT_FAILURE);
+ }
+
+ ExecutionConfiguration config;
+ config.skip_cache_lookup = false;
+
+ return std::make_unique<BazelApi>(
+ "remote-execution", remote.Host(), remote.Port(), config);
+ }
+ return std::make_unique<LocalApi>();
+ }
+
+ /// \brief Requires for the executor to upload blobs to CAS. In the case any
+ /// of the uploads fails, execution is terminated
+ /// \param[in] blobs blobs to be uploaded
+ [[nodiscard]] auto UploadBlobs(
+ std::vector<std::string> const& blobs) const noexcept -> bool {
+ BlobContainer container;
+ for (auto const& blob : blobs) {
+ auto digest = ArtifactDigest{ComputeHash(blob), blob.size()};
+ Logger::Log(LogLevel::Trace, [&]() {
+ return fmt::format(
+ "Uploaded blob {}, its digest has id {} and size {}.",
+ nlohmann::json(blob).dump(),
+ digest.hash(),
+ digest.size());
+ });
+ try {
+ container.Emplace(BazelBlob{std::move(digest), blob});
+ } catch (std::exception const& ex) {
+ Logger::Log(
+ LogLevel::Error, "failed to create blob with: ", ex.what());
+ return false;
+ }
+ }
+ return api_->Upload(container);
+ }
+
+ /// \brief Adds the artifacts to be retrieved to the graph
+ /// \param[in] g dependency graph
+ /// \param[in] artifacts output artifact map
+ /// \param[in] runfiles output runfile map
+ /// \returns pair of vectors where the first vector contains the absolute
+ /// paths to which the artifacts will be retrieved and the second one
+ /// contains the ids of the artifacts to be retrieved
+ [[nodiscard]] static auto AddArtifactsToRetrieve(
+ gsl::not_null<DependencyGraph*> const& g,
+ std::map<std::string, ArtifactDescription> const& artifacts,
+ std::map<std::string, ArtifactDescription> const& runfiles)
+ -> std::optional<std::pair<std::vector<std::filesystem::path>,
+ std::vector<ArtifactIdentifier>>> {
+ std::vector<std::filesystem::path> rel_paths;
+ std::vector<ArtifactIdentifier> ids;
+ auto total_size = artifacts.size() + runfiles.size();
+ rel_paths.reserve(total_size);
+ ids.reserve(total_size);
+ auto add_and_get_info =
+ [&g, &rel_paths, &ids](
+ std::map<std::string, ArtifactDescription> const& descriptions)
+ -> bool {
+ for (auto const& [rel_path, artifact] : descriptions) {
+ rel_paths.emplace_back(rel_path);
+ ids.emplace_back(g->AddArtifact(artifact));
+ }
+ return true;
+ };
+ if (add_and_get_info(artifacts) and add_and_get_info(runfiles)) {
+ return std::make_pair(std::move(rel_paths), std::move(ids));
+ }
+ return std::nullopt;
+ }
+
+ /// \brief Traverses the graph. In case any of the artifact ids
+ /// specified by the command line arguments is duplicated, execution is
+ /// terminated.
+ [[nodiscard]] auto Traverse(
+ DependencyGraph const& g,
+ std::vector<ArtifactIdentifier> const& artifact_ids) const -> bool {
+ Executor executor{&(*api_), clargs_.build.platform_properties};
+ Traverser t{executor, g, clargs_.jobs};
+ return t.Traverse({std::begin(artifact_ids), std::end(artifact_ids)});
+ }
+
+ [[nodiscard]] auto TraverseRebuild(
+ DependencyGraph const& g,
+ std::vector<ArtifactIdentifier> const& artifact_ids) const -> bool {
+ // create second configuration for cache endpoint
+ auto cache_args = clargs_.endpoint;
+ if (not clargs_.rebuild->cache_endpoint.value_or("").empty()) {
+ cache_args.remote_execution_address =
+ *clargs_.rebuild->cache_endpoint == "local"
+ ? std::nullopt // disable
+ : clargs_.rebuild->cache_endpoint; // set endpoint
+ }
+
+ // setup rebuilder with api for cache endpoint
+ auto api_cached = CreateExecutionApi(cache_args);
+ Rebuilder executor{
+ &(*api_), &(*api_cached), clargs_.build.platform_properties};
+ bool success{false};
+ {
+ Traverser t{executor, g, clargs_.jobs};
+ success =
+ t.Traverse({std::begin(artifact_ids), std::end(artifact_ids)});
+ }
+
+ if (success and clargs_.rebuild->dump_flaky) {
+ std::ofstream file{*clargs_.rebuild->dump_flaky};
+ file << executor.DumpFlakyActions().dump(2);
+ }
+ return success;
+ }
+
+ /// \brief Retrieves nodes corresponding to artifacts with ids in artifacts.
+ /// In case any of the identifiers doesn't correspond to a node inside the
+ /// graph, we write out error message and stop execution with failure code
+ [[nodiscard]] static auto GetArtifactNodes(
+ DependencyGraph const& g,
+ std::vector<ArtifactIdentifier> const& artifact_ids) noexcept
+ -> std::optional<std::vector<DependencyGraph::ArtifactNode const*>> {
+ std::vector<DependencyGraph::ArtifactNode const*> nodes{};
+
+ for (auto const& art_id : artifact_ids) {
+ auto const* node = g.ArtifactNodeWithId(art_id);
+ if (node == nullptr) {
+ Logger::Log(
+ LogLevel::Error, "Artifact {} not found in graph.", art_id);
+ return std::nullopt;
+ }
+ nodes.push_back(node);
+ }
+ return nodes;
+ }
+
+ void LogStatistics() const noexcept {
+ auto const& stats = Statistics::Instance();
+ if (clargs_.rebuild) {
+ std::stringstream ss{};
+ ss << stats.RebuiltActionComparedCounter()
+ << " actions compared with cache";
+ if (stats.ActionsFlakyCounter() > 0) {
+ ss << ", " << stats.ActionsFlakyCounter()
+ << " flaky actions found";
+ ss << " (" << stats.ActionsFlakyTaintedCounter()
+ << " of which tainted)";
+ }
+ if (stats.RebuiltActionMissingCounter() > 0) {
+ ss << ", no cache entry found for "
+ << stats.RebuiltActionMissingCounter() << " actions";
+ }
+ ss << ".";
+ Logger::Log(LogLevel::Info, ss.str());
+ }
+ else {
+ Logger::Log(LogLevel::Info,
+ "Processed {} actions, {} cache hits.",
+ stats.ActionsQueuedCounter(),
+ stats.ActionsCachedCounter());
+ }
+ }
+
+ [[nodiscard]] auto BuildArtifacts(
+ gsl::not_null<DependencyGraph*> const& graph,
+ std::map<std::string, ArtifactDescription> const& artifacts,
+ std::map<std::string, ArtifactDescription> const& runfiles,
+ std::vector<ActionDescription> const& actions,
+ std::vector<Tree> const& trees,
+ std::vector<std::string> const& blobs) const
+ -> std::optional<
+ std::pair<std::vector<std::filesystem::path>,
+ std::vector<DependencyGraph::ArtifactNode const*>>> {
+ if (not UploadBlobs(blobs)) {
+ return std::nullopt;
+ }
+
+ auto artifact_infos =
+ AddArtifactsToRetrieve(graph, artifacts, runfiles);
+ if (not artifact_infos) {
+ return std::nullopt;
+ }
+ auto& [output_paths, artifact_ids] = *artifact_infos;
+
+ std::vector<ActionDescription> tree_actions{};
+ tree_actions.reserve(trees.size());
+ for (auto const& tree : trees) {
+ tree_actions.emplace_back(tree.Action());
+ }
+
+ if (not graph->Add(actions) or not graph->Add(tree_actions)) {
+ Logger::Log(LogLevel::Error, [&actions]() {
+ auto json = nlohmann::json::array();
+ for (auto const& desc : actions) {
+ json.push_back(desc.ToJson());
+ }
+ return fmt::format(
+ "could not build the dependency graph from the actions "
+ "described in {}.",
+ json.dump());
+ });
+ return std::nullopt;
+ }
+
+ if (clargs_.rebuild ? not TraverseRebuild(*graph, artifact_ids)
+ : not Traverse(*graph, artifact_ids)) {
+ Logger::Log(LogLevel::Error, "traversing graph failed.");
+ return std::nullopt;
+ }
+
+ LogStatistics();
+
+ auto artifact_nodes = GetArtifactNodes(*graph, artifact_ids);
+ if (not artifact_nodes) {
+ return std::nullopt;
+ }
+ return std::make_pair(std::move(output_paths),
+ std::move(*artifact_nodes));
+ }
+
+ [[nodiscard]] auto PrepareOutputPaths(
+ std::vector<std::filesystem::path> const& rel_paths) const
+ -> std::optional<std::vector<std::filesystem::path>> {
+ std::vector<std::filesystem::path> output_paths{};
+ output_paths.reserve(rel_paths.size());
+ for (auto const& rel_path : rel_paths) {
+ auto output_path = clargs_.stage->output_dir / rel_path;
+ if (FileSystemManager::IsFile(output_path) and
+ not FileSystemManager::RemoveFile(output_path)) {
+ Logger::Log(LogLevel::Error,
+ "Could not clean output path {}",
+ output_path.string());
+ return std::nullopt;
+ }
+ output_paths.emplace_back(std::move(output_path));
+ }
+ return output_paths;
+ }
+
+ [[nodiscard]] static auto CollectObjectInfos(
+ std::vector<DependencyGraph::ArtifactNode const*> const& artifact_nodes)
+ -> std::optional<std::vector<Artifact::ObjectInfo>> {
+ std::vector<Artifact::ObjectInfo> object_infos;
+ object_infos.reserve(artifact_nodes.size());
+ for (auto const* art_ptr : artifact_nodes) {
+ auto const& info = art_ptr->Content().Info();
+ if (info) {
+ object_infos.push_back(*info);
+ }
+ else {
+ Logger::Log(LogLevel::Error,
+ "artifact {} could not be retrieved, it can not be "
+ "found in CAS.",
+ art_ptr->Content().Id());
+ return std::nullopt;
+ }
+ }
+ return object_infos;
+ }
+
+ /// \brief Asks execution API to copy output artifacts to paths specified by
+ /// command line arguments and writes location info. In case the executor
+ /// couldn't retrieve any of the outputs, execution is terminated.
+ [[nodiscard]] auto RetrieveOutputs(
+ std::vector<std::filesystem::path> const& rel_paths,
+ std::vector<Artifact::ObjectInfo> const& object_infos) const
+ -> std::optional<std::vector<std::filesystem::path>> {
+ // Create output directory
+ if (not FileSystemManager::CreateDirectory(clargs_.stage->output_dir)) {
+ return std::nullopt; // Message logged in the file system manager
+ }
+
+ auto output_paths = PrepareOutputPaths(rel_paths);
+
+ if (not output_paths or
+ not api_->RetrieveToPaths(object_infos, *output_paths)) {
+ Logger::Log(LogLevel::Error, "Could not retrieve outputs.");
+ return std::nullopt;
+ }
+
+ return std::move(*output_paths);
+ }
+
+ void PrintOutputs(
+ std::string message,
+ std::vector<std::filesystem::path> const& paths,
+ std::vector<DependencyGraph::ArtifactNode const*> const& artifact_nodes,
+ std::map<std::string, ArtifactDescription> const& runfiles) const {
+ std::string msg_dbg{"Artifact ids:"};
+ nlohmann::json json{};
+ for (std::size_t pos = 0; pos < paths.size(); ++pos) {
+ auto path = paths[pos].string();
+ auto id = IdentifierToString(artifact_nodes[pos]->Content().Id());
+ if (clargs_.build.show_runfiles or
+ not runfiles.contains(clargs_.stage
+ ? std::filesystem::proximate(
+ path, clargs_.stage->output_dir)
+ .string()
+ : path)) {
+ auto info = artifact_nodes[pos]->Content().Info();
+ if (info) {
+ message += fmt::format("\n {} {}", path, info->ToString());
+ if (clargs_.build.dump_artifacts) {
+ json[path] = info->ToJson();
+ }
+ }
+ else {
+ Logger::Log(
+ LogLevel::Error, "Missing info for artifact {}.", id);
+ }
+ }
+ msg_dbg += fmt::format("\n {}: {}", path, id);
+ }
+
+ if (not clargs_.build.show_runfiles and !runfiles.empty()) {
+ message += fmt::format("\n({} runfiles omitted.)", runfiles.size());
+ }
+
+ Logger::Log(LogLevel::Info, "{}", message);
+ Logger::Log(LogLevel::Debug, "{}", msg_dbg);
+
+ if (clargs_.build.dump_artifacts) {
+ if (*clargs_.build.dump_artifacts == "-") {
+ std::cout << std::setw(2) << json << std::endl;
+ }
+ else {
+ std::ofstream os(*clargs_.build.dump_artifacts);
+ os << std::setw(2) << json << std::endl;
+ }
+ }
+ }
+
+ void MaybePrintToStdout(
+ std::pair<std::vector<std::filesystem::path>,
+ std::vector<DependencyGraph::ArtifactNode const*>> artifacts)
+ const {
+ if (clargs_.build.print_to_stdout) {
+ for (size_t i = 0; i < artifacts.first.size(); i++) {
+ if (artifacts.first[i] == *(clargs_.build.print_to_stdout)) {
+ auto info = artifacts.second[i]->Content().Info();
+ if (info) {
+ if (not api_->RetrieveToFds({*info},
+ {dup(fileno(stdout))})) {
+ Logger::Log(LogLevel::Error,
+ "Failed to retrieve {}",
+ *(clargs_.build.print_to_stdout));
+ }
+ }
+ else {
+ Logger::Log(
+ LogLevel::Error,
+ "Failed to obtain object information for {}",
+ *(clargs_.build.print_to_stdout));
+ }
+ return;
+ }
+ }
+ Logger::Log(LogLevel::Warning,
+ "{} not a logical path of the specified target",
+ *(clargs_.build.print_to_stdout));
+ }
+ }
+};
+
+#endif // INCLUDED_SRC_BUILDTOOL_GRAPH_TRAVERSER_GRAPH_TRAVERSER_HPP