1 files changed, 569 insertions, 0 deletions
diff --git a/src/buildtool/graph_traverser/graph_traverser.hpp b/src/buildtool/graph_traverser/graph_traverser.hpp
new file mode 100644
index 00000000..c92fbbf8
--- /dev/null
+++ b/src/buildtool/graph_traverser/graph_traverser.hpp
@@ -0,0 +1,569 @@
+#ifndef INCLUDED_SRC_BUILDTOOL_GRAPH_TRAVERSER_GRAPH_TRAVERSER_HPP
+#define INCLUDED_SRC_BUILDTOOL_GRAPH_TRAVERSER_GRAPH_TRAVERSER_HPP
+
+#include <cstdlib>
+#include <filesystem>
+#include <map>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+
+#include "fmt/core.h"
+#include "gsl-lite/gsl-lite.hpp"
+#include "src/buildtool/common/cli.hpp"
+#include "src/buildtool/common/statistics.hpp"
+#include "src/buildtool/common/tree.hpp"
+#include "src/buildtool/execution_api/bazel_msg/bazel_blob_container.hpp"
+#include "src/buildtool/execution_api/local/local_api.hpp"
+#include "src/buildtool/execution_api/remote/bazel/bazel_api.hpp"
+#include "src/buildtool/execution_api/remote/config.hpp"
+#include "src/buildtool/execution_engine/dag/dag.hpp"
+#include "src/buildtool/execution_engine/executor/executor.hpp"
+#include "src/buildtool/execution_engine/traverser/traverser.hpp"
+#include "src/buildtool/file_system/file_system_manager.hpp"
+#include "src/buildtool/file_system/jsonfs.hpp"
+#include "src/buildtool/file_system/object_type.hpp"
+#include "src/buildtool/logging/log_sink_cmdline.hpp"
+#include "src/buildtool/logging/log_sink_file.hpp"
+#include "src/buildtool/logging/logger.hpp"
+#include "src/utils/cpp/json.hpp"
+
+class GraphTraverser {
+  public:
+    struct CommandLineArguments {
+        std::size_t jobs;
+        EndpointArguments endpoint;
+        BuildArguments build;
+        std::optional<StageArguments> stage;
+        std::optional<RebuildArguments> rebuild;
+    };
+
+    explicit GraphTraverser(CommandLineArguments clargs)
+        : clargs_{std::move(clargs)},
+          api_{CreateExecutionApi(clargs_.endpoint)} {}
+
+    /// \brief Parses actions and blobs into graph, traverses it and retrieves
+    /// outputs specified by command line arguments
+    [[nodiscard]] auto BuildAndStage(
+        std::map<std::string, ArtifactDescription> const& artifact_descriptions,
+        std::map<std::string, ArtifactDescription> const& runfile_descriptions,
+        std::vector<ActionDescription> const& action_descriptions,
+        std::vector<std::string> const& blobs,
+        std::vector<Tree> const& trees) const
+        -> std::optional<std::pair<std::vector<std::filesystem::path>, bool>> {
+        DependencyGraph graph;  // must outlive artifact_nodes
+        auto artifacts = BuildArtifacts(&graph,
+                                        artifact_descriptions,
+                                        runfile_descriptions,
+                                        action_descriptions,
+                                        trees,
+                                        blobs);
+        if (not artifacts) {
+            return std::nullopt;
+        }
+        auto const [rel_paths, artifact_nodes] = *artifacts;
+
+        auto const object_infos = CollectObjectInfos(artifact_nodes);
+        if (not object_infos) {
+            return std::nullopt;
+        }
+        bool failed_artifacts = false;
+        for (auto const& obj_info : *object_infos) {
+            failed_artifacts = failed_artifacts || obj_info.failed;
+        }
+
+        if (not clargs_.stage) {
+            PrintOutputs("Artifacts built, logical paths are:",
+                         rel_paths,
+                         artifact_nodes,
+                         runfile_descriptions);
+            MaybePrintToStdout(*artifacts);
+            return std::make_pair(std::move(artifacts->first),
+                                  failed_artifacts);
+        }
+
+        auto output_paths = RetrieveOutputs(rel_paths, *object_infos);
+        if (not output_paths) {
+            return std::nullopt;
+        }
+        PrintOutputs("Artifacts can be found in:",
+                     *output_paths,
+                     artifact_nodes,
+                     runfile_descriptions);
+
+        MaybePrintToStdout(*artifacts);
+
+        return std::make_pair(*output_paths, failed_artifacts);
+    }
+
+    /// \brief Parses graph description into graph, traverses it and retrieves
+    /// outputs specified by command line arguments
+    [[nodiscard]] auto BuildAndStage(
+        std::filesystem::path const& graph_description,
+        nlohmann::json const& artifacts) const
+        -> std::optional<std::pair<std::vector<std::filesystem::path>, bool>> {
+        // Read blobs to upload and actions from graph description file
+        auto desc = ReadGraphDescription(graph_description);
+        if (not desc) {
+            return std::nullopt;
+        }
+        auto const [blobs, tree_descs, actions] = *desc;
+
+        std::vector<ActionDescription> action_descriptions{};
+        action_descriptions.reserve(actions.size());
+        for (auto const& [id, description] : actions.items()) {
+            auto action = ActionDescription::FromJson(id, description);
+            if (not action) {
+                return std::nullopt;  // Error already logged
+            }
+            action_descriptions.emplace_back(std::move(*action));
+        }
+
+        std::vector<Tree> trees{};
+        for (auto const& [id, description] : tree_descs.items()) {
+            auto tree = Tree::FromJson(id, description);
+            if (not tree) {
+                return std::nullopt;
+            }
+            trees.emplace_back(std::move(*tree));
+        }
+
+        std::map<std::string, ArtifactDescription> artifact_descriptions{};
+        for (auto const& [rel_path, description] : artifacts.items()) {
+            auto artifact = ArtifactDescription::FromJson(description);
+            if (not artifact) {
+                return std::nullopt;  // Error already logged
+            }
+            artifact_descriptions.emplace(rel_path, std::move(*artifact));
+        }
+
+        return BuildAndStage(
+            artifact_descriptions, {}, action_descriptions, blobs, trees);
+    }
+
+    [[nodiscard]] auto ExecutionApi() const -> gsl::not_null<IExecutionApi*> {
+        return &(*api_);
+    }
+
+  private:
+    CommandLineArguments const clargs_;
+    gsl::not_null<IExecutionApi::Ptr> const api_;
+
+    /// \brief Reads contents of graph description file as json object. In case
+    /// the description is missing "blobs" or "actions" key/value pairs or they
+    /// can't be retrieved with the appropriate types, execution is terminated
+    /// after logging error
+    /// \returns A pair containing the blobs to upload (as a vector of strings)
+    /// and the actions as a json object.
+    [[nodiscard]] static auto ReadGraphDescription(
+        std::filesystem::path const& graph_description)
+        -> std::optional<
+            std::tuple<nlohmann::json, nlohmann::json, nlohmann::json>> {
+        auto const graph_description_opt = Json::ReadFile(graph_description);
+        if (not graph_description_opt.has_value()) {
+            Logger::Log(LogLevel::Error,
+                        "parsing graph from {}",
+                        graph_description.string());
+            return std::nullopt;
+        }
+        auto blobs_opt = ExtractValueAs<std::vector<std::string>>(
+            *graph_description_opt, "blobs", [](std::string const& s) {
+                Logger::Log(LogLevel::Error,
+                            "{}\ncan not retrieve value for \"blobs\" from "
+                            "graph description.",
+                            s);
+            });
+        auto trees_opt = ExtractValueAs<nlohmann::json>(
+            *graph_description_opt, "trees", [](std::string const& s) {
+                Logger::Log(LogLevel::Error,
+                            "{}\ncan not retrieve value for \"trees\" from "
+                            "graph description.",
+                            s);
+            });
+        auto actions_opt = ExtractValueAs<nlohmann::json>(
+            *graph_description_opt, "actions", [](std::string const& s) {
+                Logger::Log(LogLevel::Error,
+                            "{}\ncan not retrieve value for \"actions\" from "
+                            "graph description.",
+                            s);
+            });
+        if (not blobs_opt or not trees_opt or not actions_opt) {
+            return std::nullopt;
+        }
+        return std::make_tuple(std::move(*blobs_opt),
+                               std::move(*trees_opt),
+                               std::move(*actions_opt));
+    }
+
+    [[nodiscard]] static auto CreateExecutionApi(
+        EndpointArguments const& clargs) -> gsl::not_null<IExecutionApi::Ptr> {
+        if (clargs.remote_execution_address) {
+            auto remote = RemoteExecutionConfig{};
+            if (not remote.SetAddress(*clargs.remote_execution_address)) {
+                Logger::Log(LogLevel::Error,
+                            "parsing remote execution address '{}' failed.",
+                            *clargs.remote_execution_address);
+                std::exit(EXIT_FAILURE);
+            }
+
+            ExecutionConfiguration config;
+            config.skip_cache_lookup = false;
+
+            return std::make_unique<BazelApi>(
+                "remote-execution", remote.Host(), remote.Port(), config);
+        }
+        return std::make_unique<LocalApi>();
+    }
+
+    /// \brief Requires for the executor to upload blobs to CAS. In the case any
+    /// of the uploads fails, execution is terminated
+    /// \param[in]  blobs   blobs to be uploaded
+    [[nodiscard]] auto UploadBlobs(
+        std::vector<std::string> const& blobs) const noexcept -> bool {
+        BlobContainer container;
+        for (auto const& blob : blobs) {
+            auto digest = ArtifactDigest{ComputeHash(blob), blob.size()};
+            Logger::Log(LogLevel::Trace, [&]() {
+                return fmt::format(
+                    "Uploaded blob {}, its digest has id {} and size {}.",
+                    nlohmann::json(blob).dump(),
+                    digest.hash(),
+                    digest.size());
+            });
+            try {
+                container.Emplace(BazelBlob{std::move(digest), blob});
+            } catch (std::exception const& ex) {
+                Logger::Log(
+                    LogLevel::Error, "failed to create blob with: ", ex.what());
+                return false;
+            }
+        }
+        return api_->Upload(container);
+    }
+
+    /// \brief Adds the artifacts to be retrieved to the graph
+    /// \param[in]  g   dependency graph
+    /// \param[in]  artifacts   output artifact map
+    /// \param[in]  runfiles    output runfile map
+    /// \returns    pair of vectors where the first vector contains the absolute
+    /// paths to which the artifacts will be retrieved and the second one
+    /// contains the ids of the artifacts to be retrieved
+    [[nodiscard]] static auto AddArtifactsToRetrieve(
+        gsl::not_null<DependencyGraph*> const& g,
+        std::map<std::string, ArtifactDescription> const& artifacts,
+        std::map<std::string, ArtifactDescription> const& runfiles)
+        -> std::optional<std::pair<std::vector<std::filesystem::path>,
+                                   std::vector<ArtifactIdentifier>>> {
+        std::vector<std::filesystem::path> rel_paths;
+        std::vector<ArtifactIdentifier> ids;
+        auto total_size = artifacts.size() + runfiles.size();
+        rel_paths.reserve(total_size);
+        ids.reserve(total_size);
+        auto add_and_get_info =
+            [&g, &rel_paths, &ids](
+                std::map<std::string, ArtifactDescription> const& descriptions)
+            -> bool {
+            for (auto const& [rel_path, artifact] : descriptions) {
+                rel_paths.emplace_back(rel_path);
+                ids.emplace_back(g->AddArtifact(artifact));
+            }
+            return true;
+        };
+        if (add_and_get_info(artifacts) and add_and_get_info(runfiles)) {
+            return std::make_pair(std::move(rel_paths), std::move(ids));
+        }
+        return std::nullopt;
+    }
+
+    /// \brief Traverses the graph. In case any of the artifact ids
+    /// specified by the command line arguments is duplicated, execution is
+    /// terminated.
+    [[nodiscard]] auto Traverse(
+        DependencyGraph const& g,
+        std::vector<ArtifactIdentifier> const& artifact_ids) const -> bool {
+        Executor executor{&(*api_), clargs_.build.platform_properties};
+        Traverser t{executor, g, clargs_.jobs};
+        return t.Traverse({std::begin(artifact_ids), std::end(artifact_ids)});
+    }
+
+    [[nodiscard]] auto TraverseRebuild(
+        DependencyGraph const& g,
+        std::vector<ArtifactIdentifier> const& artifact_ids) const -> bool {
+        // create second configuration for cache endpoint
+        auto cache_args = clargs_.endpoint;
+        if (not clargs_.rebuild->cache_endpoint.value_or("").empty()) {
+            cache_args.remote_execution_address =
+                *clargs_.rebuild->cache_endpoint == "local"
+                    ? std::nullopt                      // disable
+                    : clargs_.rebuild->cache_endpoint;  // set endpoint
+        }
+
+        // setup rebuilder with api for cache endpoint
+        auto api_cached = CreateExecutionApi(cache_args);
+        Rebuilder executor{
+            &(*api_), &(*api_cached), clargs_.build.platform_properties};
+        bool success{false};
+        {
+            Traverser t{executor, g, clargs_.jobs};
+            success =
+                t.Traverse({std::begin(artifact_ids), std::end(artifact_ids)});
+        }
+
+        if (success and clargs_.rebuild->dump_flaky) {
+            std::ofstream file{*clargs_.rebuild->dump_flaky};
+            file << executor.DumpFlakyActions().dump(2);
+        }
+        return success;
+    }
+
+    /// \brief Retrieves nodes corresponding to artifacts with ids in artifacts.
+    /// In case any of the identifiers doesn't correspond to a node inside the
+    /// graph, we write out error message and stop execution with failure code
+    [[nodiscard]] static auto GetArtifactNodes(
+        DependencyGraph const& g,
+        std::vector<ArtifactIdentifier> const& artifact_ids) noexcept
+        -> std::optional<std::vector<DependencyGraph::ArtifactNode const*>> {
+        std::vector<DependencyGraph::ArtifactNode const*> nodes{};
+
+        for (auto const& art_id : artifact_ids) {
+            auto const* node = g.ArtifactNodeWithId(art_id);
+            if (node == nullptr) {
+                Logger::Log(
+                    LogLevel::Error, "Artifact {} not found in graph.", art_id);
+                return std::nullopt;
+            }
+            nodes.push_back(node);
+        }
+        return nodes;
+    }
+
+    void LogStatistics() const noexcept {
+        auto const& stats = Statistics::Instance();
+        if (clargs_.rebuild) {
+            std::stringstream ss{};
+            ss << stats.RebuiltActionComparedCounter()
+               << " actions compared with cache";
+            if (stats.ActionsFlakyCounter() > 0) {
+                ss << ", " << stats.ActionsFlakyCounter()
+                   << " flaky actions found";
+                ss << " (" << stats.ActionsFlakyTaintedCounter()
+                   << " of which tainted)";
+            }
+            if (stats.RebuiltActionMissingCounter() > 0) {
+                ss << ", no cache entry found for "
+                   << stats.RebuiltActionMissingCounter() << " actions";
+            }
+            ss << ".";
+            Logger::Log(LogLevel::Info, ss.str());
+        }
+        else {
+            Logger::Log(LogLevel::Info,
+                        "Processed {} actions, {} cache hits.",
+                        stats.ActionsQueuedCounter(),
+                        stats.ActionsCachedCounter());
+        }
+    }
+
+    [[nodiscard]] auto BuildArtifacts(
+        gsl::not_null<DependencyGraph*> const& graph,
+        std::map<std::string, ArtifactDescription> const& artifacts,
+        std::map<std::string, ArtifactDescription> const& runfiles,
+        std::vector<ActionDescription> const& actions,
+        std::vector<Tree> const& trees,
+        std::vector<std::string> const& blobs) const
+        -> std::optional<
+            std::pair<std::vector<std::filesystem::path>,
+                      std::vector<DependencyGraph::ArtifactNode const*>>> {
+        if (not UploadBlobs(blobs)) {
+            return std::nullopt;
+        }
+
+        auto artifact_infos =
+            AddArtifactsToRetrieve(graph, artifacts, runfiles);
+        if (not artifact_infos) {
+            return std::nullopt;
+        }
+        auto& [output_paths, artifact_ids] = *artifact_infos;
+
+        std::vector<ActionDescription> tree_actions{};
+        tree_actions.reserve(trees.size());
+        for (auto const& tree : trees) {
+            tree_actions.emplace_back(tree.Action());
+        }
+
+        if (not graph->Add(actions) or not graph->Add(tree_actions)) {
+            Logger::Log(LogLevel::Error, [&actions]() {
+                auto json = nlohmann::json::array();
+                for (auto const& desc : actions) {
+                    json.push_back(desc.ToJson());
+                }
+                return fmt::format(
+                    "could not build the dependency graph from the actions "
+                    "described in {}.",
+                    json.dump());
+            });
+            return std::nullopt;
+        }
+
+        if (clargs_.rebuild ? not TraverseRebuild(*graph, artifact_ids)
+                            : not Traverse(*graph, artifact_ids)) {
+            Logger::Log(LogLevel::Error, "traversing graph failed.");
+            return std::nullopt;
+        }
+
+        LogStatistics();
+
+        auto artifact_nodes = GetArtifactNodes(*graph, artifact_ids);
+        if (not artifact_nodes) {
+            return std::nullopt;
+        }
+        return std::make_pair(std::move(output_paths),
+                              std::move(*artifact_nodes));
+    }
+
+    [[nodiscard]] auto PrepareOutputPaths(
+        std::vector<std::filesystem::path> const& rel_paths) const
+        -> std::optional<std::vector<std::filesystem::path>> {
+        std::vector<std::filesystem::path> output_paths{};
+        output_paths.reserve(rel_paths.size());
+        for (auto const& rel_path : rel_paths) {
+            auto output_path = clargs_.stage->output_dir / rel_path;
+            if (FileSystemManager::IsFile(output_path) and
+                not FileSystemManager::RemoveFile(output_path)) {
+                Logger::Log(LogLevel::Error,
+                            "Could not clean output path {}",
+                            output_path.string());
+                return std::nullopt;
+            }
+            output_paths.emplace_back(std::move(output_path));
+        }
+        return output_paths;
+    }
+
+    [[nodiscard]] static auto CollectObjectInfos(
+        std::vector<DependencyGraph::ArtifactNode const*> const& artifact_nodes)
+        -> std::optional<std::vector<Artifact::ObjectInfo>> {
+        std::vector<Artifact::ObjectInfo> object_infos;
+        object_infos.reserve(artifact_nodes.size());
+        for (auto const* art_ptr : artifact_nodes) {
+            auto const& info = art_ptr->Content().Info();
+            if (info) {
+                object_infos.push_back(*info);
+            }
+            else {
+                Logger::Log(LogLevel::Error,
+                            "artifact {} could not be retrieved, it can not be "
+                            "found in CAS.",
+                            art_ptr->Content().Id());
+                return std::nullopt;
+            }
+        }
+        return object_infos;
+    }
+
+    /// \brief Asks execution API to copy output artifacts to paths specified by
+    /// command line arguments and writes location info. In case the executor
+    /// couldn't retrieve any of the outputs, execution is terminated.
+    [[nodiscard]] auto RetrieveOutputs(
+        std::vector<std::filesystem::path> const& rel_paths,
+        std::vector<Artifact::ObjectInfo> const& object_infos) const
+        -> std::optional<std::vector<std::filesystem::path>> {
+        // Create output directory
+        if (not FileSystemManager::CreateDirectory(clargs_.stage->output_dir)) {
+            return std::nullopt;  // Message logged in the file system manager
+        }
+
+        auto output_paths = PrepareOutputPaths(rel_paths);
+
+        if (not output_paths or
+            not api_->RetrieveToPaths(object_infos, *output_paths)) {
+            Logger::Log(LogLevel::Error, "Could not retrieve outputs.");
+            return std::nullopt;
+        }
+
+        return std::move(*output_paths);
+    }
+
+    void PrintOutputs(
+        std::string message,
+        std::vector<std::filesystem::path> const& paths,
+        std::vector<DependencyGraph::ArtifactNode const*> const& artifact_nodes,
+        std::map<std::string, ArtifactDescription> const& runfiles) const {
+        std::string msg_dbg{"Artifact ids:"};
+        nlohmann::json json{};
+        for (std::size_t pos = 0; pos < paths.size(); ++pos) {
+            auto path = paths[pos].string();
+            auto id = IdentifierToString(artifact_nodes[pos]->Content().Id());
+            if (clargs_.build.show_runfiles or
+                not runfiles.contains(clargs_.stage
+                                          ? std::filesystem::proximate(
+                                                path, clargs_.stage->output_dir)
+                                                .string()
+                                          : path)) {
+                auto info = artifact_nodes[pos]->Content().Info();
+                if (info) {
+                    message += fmt::format("\n  {} {}", path, info->ToString());
+                    if (clargs_.build.dump_artifacts) {
+                        json[path] = info->ToJson();
+                    }
+                }
+                else {
+                    Logger::Log(
+                        LogLevel::Error, "Missing info for artifact {}.", id);
+                }
+            }
+            msg_dbg += fmt::format("\n  {}: {}", path, id);
+        }
+
+        if (not clargs_.build.show_runfiles and !runfiles.empty()) {
+            message += fmt::format("\n({} runfiles omitted.)", runfiles.size());
+        }
+
+        Logger::Log(LogLevel::Info, "{}", message);
+        Logger::Log(LogLevel::Debug, "{}", msg_dbg);
+
+        if (clargs_.build.dump_artifacts) {
+            if (*clargs_.build.dump_artifacts == "-") {
+                std::cout << std::setw(2) << json << std::endl;
+            }
+            else {
+                std::ofstream os(*clargs_.build.dump_artifacts);
+                os << std::setw(2) << json << std::endl;
+            }
+        }
+    }
+
+    void MaybePrintToStdout(
+        std::pair<std::vector<std::filesystem::path>,
+                  std::vector<DependencyGraph::ArtifactNode const*>> artifacts)
+        const {
+        if (clargs_.build.print_to_stdout) {
+            for (size_t i = 0; i < artifacts.first.size(); i++) {
+                if (artifacts.first[i] == *(clargs_.build.print_to_stdout)) {
+                    auto info = artifacts.second[i]->Content().Info();
+                    if (info) {
+                        if (not api_->RetrieveToFds({*info},
+                                                    {dup(fileno(stdout))})) {
+                            Logger::Log(LogLevel::Error,
+                                        "Failed to retrieve {}",
+                                        *(clargs_.build.print_to_stdout));
+                        }
+                    }
+                    else {
+                        Logger::Log(
+                            LogLevel::Error,
+                            "Failed to obtain object information for {}",
+                            *(clargs_.build.print_to_stdout));
+                    }
+                    return;
+                }
+            }
+            Logger::Log(LogLevel::Warning,
+                        "{} not a logical path of the specified target",
+                        *(clargs_.build.print_to_stdout));
+        }
+    }
+};
+
+#endif  // INCLUDED_SRC_BUILDTOOL_GRAPH_TRAVERSER_GRAPH_TRAVERSER_HPP