diff options
Diffstat (limited to 'src/buildtool/graph_traverser/graph_traverser.hpp')
-rw-r--r-- | src/buildtool/graph_traverser/graph_traverser.hpp | 569 |
1 files changed, 569 insertions, 0 deletions
diff --git a/src/buildtool/graph_traverser/graph_traverser.hpp b/src/buildtool/graph_traverser/graph_traverser.hpp new file mode 100644 index 00000000..c92fbbf8 --- /dev/null +++ b/src/buildtool/graph_traverser/graph_traverser.hpp @@ -0,0 +1,569 @@ +#ifndef INCLUDED_SRC_BUILDTOOL_GRAPH_TRAVERSER_GRAPH_TRAVERSER_HPP +#define INCLUDED_SRC_BUILDTOOL_GRAPH_TRAVERSER_GRAPH_TRAVERSER_HPP + +#include <cstdlib> +#include <filesystem> +#include <map> +#include <optional> +#include <sstream> +#include <string> +#include <unordered_map> + +#include "fmt/core.h" +#include "gsl-lite/gsl-lite.hpp" +#include "src/buildtool/common/cli.hpp" +#include "src/buildtool/common/statistics.hpp" +#include "src/buildtool/common/tree.hpp" +#include "src/buildtool/execution_api/bazel_msg/bazel_blob_container.hpp" +#include "src/buildtool/execution_api/local/local_api.hpp" +#include "src/buildtool/execution_api/remote/bazel/bazel_api.hpp" +#include "src/buildtool/execution_api/remote/config.hpp" +#include "src/buildtool/execution_engine/dag/dag.hpp" +#include "src/buildtool/execution_engine/executor/executor.hpp" +#include "src/buildtool/execution_engine/traverser/traverser.hpp" +#include "src/buildtool/file_system/file_system_manager.hpp" +#include "src/buildtool/file_system/jsonfs.hpp" +#include "src/buildtool/file_system/object_type.hpp" +#include "src/buildtool/logging/log_sink_cmdline.hpp" +#include "src/buildtool/logging/log_sink_file.hpp" +#include "src/buildtool/logging/logger.hpp" +#include "src/utils/cpp/json.hpp" + +class GraphTraverser { + public: + struct CommandLineArguments { + std::size_t jobs; + EndpointArguments endpoint; + BuildArguments build; + std::optional<StageArguments> stage; + std::optional<RebuildArguments> rebuild; + }; + + explicit GraphTraverser(CommandLineArguments clargs) + : clargs_{std::move(clargs)}, + api_{CreateExecutionApi(clargs_.endpoint)} {} + + /// \brief Parses actions and blobs into graph, traverses it and retrieves + /// outputs specified by command line arguments + [[nodiscard]] auto BuildAndStage( + std::map<std::string, ArtifactDescription> const& artifact_descriptions, + std::map<std::string, ArtifactDescription> const& runfile_descriptions, + std::vector<ActionDescription> const& action_descriptions, + std::vector<std::string> const& blobs, + std::vector<Tree> const& trees) const + -> std::optional<std::pair<std::vector<std::filesystem::path>, bool>> { + DependencyGraph graph; // must outlive artifact_nodes + auto artifacts = BuildArtifacts(&graph, + artifact_descriptions, + runfile_descriptions, + action_descriptions, + trees, + blobs); + if (not artifacts) { + return std::nullopt; + } + auto const [rel_paths, artifact_nodes] = *artifacts; + + auto const object_infos = CollectObjectInfos(artifact_nodes); + if (not object_infos) { + return std::nullopt; + } + bool failed_artifacts = false; + for (auto const& obj_info : *object_infos) { + failed_artifacts = failed_artifacts || obj_info.failed; + } + + if (not clargs_.stage) { + PrintOutputs("Artifacts built, logical paths are:", + rel_paths, + artifact_nodes, + runfile_descriptions); + MaybePrintToStdout(*artifacts); + return std::make_pair(std::move(artifacts->first), + failed_artifacts); + } + + auto output_paths = RetrieveOutputs(rel_paths, *object_infos); + if (not output_paths) { + return std::nullopt; + } + PrintOutputs("Artifacts can be found in:", + *output_paths, + artifact_nodes, + runfile_descriptions); + + MaybePrintToStdout(*artifacts); + + return std::make_pair(*output_paths, failed_artifacts); + } + + /// \brief Parses graph description into graph, traverses it and retrieves + /// outputs specified by command line arguments + [[nodiscard]] auto BuildAndStage( + std::filesystem::path const& graph_description, + nlohmann::json const& artifacts) const + -> std::optional<std::pair<std::vector<std::filesystem::path>, bool>> { + // Read blobs to upload and actions from graph description file + auto desc = ReadGraphDescription(graph_description); + if (not desc) { + return std::nullopt; + } + auto const [blobs, tree_descs, actions] = *desc; + + std::vector<ActionDescription> action_descriptions{}; + action_descriptions.reserve(actions.size()); + for (auto const& [id, description] : actions.items()) { + auto action = ActionDescription::FromJson(id, description); + if (not action) { + return std::nullopt; // Error already logged + } + action_descriptions.emplace_back(std::move(*action)); + } + + std::vector<Tree> trees{}; + for (auto const& [id, description] : tree_descs.items()) { + auto tree = Tree::FromJson(id, description); + if (not tree) { + return std::nullopt; + } + trees.emplace_back(std::move(*tree)); + } + + std::map<std::string, ArtifactDescription> artifact_descriptions{}; + for (auto const& [rel_path, description] : artifacts.items()) { + auto artifact = ArtifactDescription::FromJson(description); + if (not artifact) { + return std::nullopt; // Error already logged + } + artifact_descriptions.emplace(rel_path, std::move(*artifact)); + } + + return BuildAndStage( + artifact_descriptions, {}, action_descriptions, blobs, trees); + } + + [[nodiscard]] auto ExecutionApi() const -> gsl::not_null<IExecutionApi*> { + return &(*api_); + } + + private: + CommandLineArguments const clargs_; + gsl::not_null<IExecutionApi::Ptr> const api_; + + /// \brief Reads contents of graph description file as json object. In case + /// the description is missing "blobs" or "actions" key/value pairs or they + /// can't be retrieved with the appropriate types, execution is terminated + /// after logging error + /// \returns A pair containing the blobs to upload (as a vector of strings) + /// and the actions as a json object. + [[nodiscard]] static auto ReadGraphDescription( + std::filesystem::path const& graph_description) + -> std::optional< + std::tuple<nlohmann::json, nlohmann::json, nlohmann::json>> { + auto const graph_description_opt = Json::ReadFile(graph_description); + if (not graph_description_opt.has_value()) { + Logger::Log(LogLevel::Error, + "parsing graph from {}", + graph_description.string()); + return std::nullopt; + } + auto blobs_opt = ExtractValueAs<std::vector<std::string>>( + *graph_description_opt, "blobs", [](std::string const& s) { + Logger::Log(LogLevel::Error, + "{}\ncan not retrieve value for \"blobs\" from " + "graph description.", + s); + }); + auto trees_opt = ExtractValueAs<nlohmann::json>( + *graph_description_opt, "trees", [](std::string const& s) { + Logger::Log(LogLevel::Error, + "{}\ncan not retrieve value for \"trees\" from " + "graph description.", + s); + }); + auto actions_opt = ExtractValueAs<nlohmann::json>( + *graph_description_opt, "actions", [](std::string const& s) { + Logger::Log(LogLevel::Error, + "{}\ncan not retrieve value for \"actions\" from " + "graph description.", + s); + }); + if (not blobs_opt or not trees_opt or not actions_opt) { + return std::nullopt; + } + return std::make_tuple(std::move(*blobs_opt), + std::move(*trees_opt), + std::move(*actions_opt)); + } + + [[nodiscard]] static auto CreateExecutionApi( + EndpointArguments const& clargs) -> gsl::not_null<IExecutionApi::Ptr> { + if (clargs.remote_execution_address) { + auto remote = RemoteExecutionConfig{}; + if (not remote.SetAddress(*clargs.remote_execution_address)) { + Logger::Log(LogLevel::Error, + "parsing remote execution address '{}' failed.", + *clargs.remote_execution_address); + std::exit(EXIT_FAILURE); + } + + ExecutionConfiguration config; + config.skip_cache_lookup = false; + + return std::make_unique<BazelApi>( + "remote-execution", remote.Host(), remote.Port(), config); + } + return std::make_unique<LocalApi>(); + } + + /// \brief Requires for the executor to upload blobs to CAS. In the case any + /// of the uploads fails, execution is terminated + /// \param[in] blobs blobs to be uploaded + [[nodiscard]] auto UploadBlobs( + std::vector<std::string> const& blobs) const noexcept -> bool { + BlobContainer container; + for (auto const& blob : blobs) { + auto digest = ArtifactDigest{ComputeHash(blob), blob.size()}; + Logger::Log(LogLevel::Trace, [&]() { + return fmt::format( + "Uploaded blob {}, its digest has id {} and size {}.", + nlohmann::json(blob).dump(), + digest.hash(), + digest.size()); + }); + try { + container.Emplace(BazelBlob{std::move(digest), blob}); + } catch (std::exception const& ex) { + Logger::Log( + LogLevel::Error, "failed to create blob with: ", ex.what()); + return false; + } + } + return api_->Upload(container); + } + + /// \brief Adds the artifacts to be retrieved to the graph + /// \param[in] g dependency graph + /// \param[in] artifacts output artifact map + /// \param[in] runfiles output runfile map + /// \returns pair of vectors where the first vector contains the absolute + /// paths to which the artifacts will be retrieved and the second one + /// contains the ids of the artifacts to be retrieved + [[nodiscard]] static auto AddArtifactsToRetrieve( + gsl::not_null<DependencyGraph*> const& g, + std::map<std::string, ArtifactDescription> const& artifacts, + std::map<std::string, ArtifactDescription> const& runfiles) + -> std::optional<std::pair<std::vector<std::filesystem::path>, + std::vector<ArtifactIdentifier>>> { + std::vector<std::filesystem::path> rel_paths; + std::vector<ArtifactIdentifier> ids; + auto total_size = artifacts.size() + runfiles.size(); + rel_paths.reserve(total_size); + ids.reserve(total_size); + auto add_and_get_info = + [&g, &rel_paths, &ids]( + std::map<std::string, ArtifactDescription> const& descriptions) + -> bool { + for (auto const& [rel_path, artifact] : descriptions) { + rel_paths.emplace_back(rel_path); + ids.emplace_back(g->AddArtifact(artifact)); + } + return true; + }; + if (add_and_get_info(artifacts) and add_and_get_info(runfiles)) { + return std::make_pair(std::move(rel_paths), std::move(ids)); + } + return std::nullopt; + } + + /// \brief Traverses the graph. In case any of the artifact ids + /// specified by the command line arguments is duplicated, execution is + /// terminated. + [[nodiscard]] auto Traverse( + DependencyGraph const& g, + std::vector<ArtifactIdentifier> const& artifact_ids) const -> bool { + Executor executor{&(*api_), clargs_.build.platform_properties}; + Traverser t{executor, g, clargs_.jobs}; + return t.Traverse({std::begin(artifact_ids), std::end(artifact_ids)}); + } + + [[nodiscard]] auto TraverseRebuild( + DependencyGraph const& g, + std::vector<ArtifactIdentifier> const& artifact_ids) const -> bool { + // create second configuration for cache endpoint + auto cache_args = clargs_.endpoint; + if (not clargs_.rebuild->cache_endpoint.value_or("").empty()) { + cache_args.remote_execution_address = + *clargs_.rebuild->cache_endpoint == "local" + ? std::nullopt // disable + : clargs_.rebuild->cache_endpoint; // set endpoint + } + + // setup rebuilder with api for cache endpoint + auto api_cached = CreateExecutionApi(cache_args); + Rebuilder executor{ + &(*api_), &(*api_cached), clargs_.build.platform_properties}; + bool success{false}; + { + Traverser t{executor, g, clargs_.jobs}; + success = + t.Traverse({std::begin(artifact_ids), std::end(artifact_ids)}); + } + + if (success and clargs_.rebuild->dump_flaky) { + std::ofstream file{*clargs_.rebuild->dump_flaky}; + file << executor.DumpFlakyActions().dump(2); + } + return success; + } + + /// \brief Retrieves nodes corresponding to artifacts with ids in artifacts. + /// In case any of the identifiers doesn't correspond to a node inside the + /// graph, we write out error message and stop execution with failure code + [[nodiscard]] static auto GetArtifactNodes( + DependencyGraph const& g, + std::vector<ArtifactIdentifier> const& artifact_ids) noexcept + -> std::optional<std::vector<DependencyGraph::ArtifactNode const*>> { + std::vector<DependencyGraph::ArtifactNode const*> nodes{}; + + for (auto const& art_id : artifact_ids) { + auto const* node = g.ArtifactNodeWithId(art_id); + if (node == nullptr) { + Logger::Log( + LogLevel::Error, "Artifact {} not found in graph.", art_id); + return std::nullopt; + } + nodes.push_back(node); + } + return nodes; + } + + void LogStatistics() const noexcept { + auto const& stats = Statistics::Instance(); + if (clargs_.rebuild) { + std::stringstream ss{}; + ss << stats.RebuiltActionComparedCounter() + << " actions compared with cache"; + if (stats.ActionsFlakyCounter() > 0) { + ss << ", " << stats.ActionsFlakyCounter() + << " flaky actions found"; + ss << " (" << stats.ActionsFlakyTaintedCounter() + << " of which tainted)"; + } + if (stats.RebuiltActionMissingCounter() > 0) { + ss << ", no cache entry found for " + << stats.RebuiltActionMissingCounter() << " actions"; + } + ss << "."; + Logger::Log(LogLevel::Info, ss.str()); + } + else { + Logger::Log(LogLevel::Info, + "Processed {} actions, {} cache hits.", + stats.ActionsQueuedCounter(), + stats.ActionsCachedCounter()); + } + } + + [[nodiscard]] auto BuildArtifacts( + gsl::not_null<DependencyGraph*> const& graph, + std::map<std::string, ArtifactDescription> const& artifacts, + std::map<std::string, ArtifactDescription> const& runfiles, + std::vector<ActionDescription> const& actions, + std::vector<Tree> const& trees, + std::vector<std::string> const& blobs) const + -> std::optional< + std::pair<std::vector<std::filesystem::path>, + std::vector<DependencyGraph::ArtifactNode const*>>> { + if (not UploadBlobs(blobs)) { + return std::nullopt; + } + + auto artifact_infos = + AddArtifactsToRetrieve(graph, artifacts, runfiles); + if (not artifact_infos) { + return std::nullopt; + } + auto& [output_paths, artifact_ids] = *artifact_infos; + + std::vector<ActionDescription> tree_actions{}; + tree_actions.reserve(trees.size()); + for (auto const& tree : trees) { + tree_actions.emplace_back(tree.Action()); + } + + if (not graph->Add(actions) or not graph->Add(tree_actions)) { + Logger::Log(LogLevel::Error, [&actions]() { + auto json = nlohmann::json::array(); + for (auto const& desc : actions) { + json.push_back(desc.ToJson()); + } + return fmt::format( + "could not build the dependency graph from the actions " + "described in {}.", + json.dump()); + }); + return std::nullopt; + } + + if (clargs_.rebuild ? not TraverseRebuild(*graph, artifact_ids) + : not Traverse(*graph, artifact_ids)) { + Logger::Log(LogLevel::Error, "traversing graph failed."); + return std::nullopt; + } + + LogStatistics(); + + auto artifact_nodes = GetArtifactNodes(*graph, artifact_ids); + if (not artifact_nodes) { + return std::nullopt; + } + return std::make_pair(std::move(output_paths), + std::move(*artifact_nodes)); + } + + [[nodiscard]] auto PrepareOutputPaths( + std::vector<std::filesystem::path> const& rel_paths) const + -> std::optional<std::vector<std::filesystem::path>> { + std::vector<std::filesystem::path> output_paths{}; + output_paths.reserve(rel_paths.size()); + for (auto const& rel_path : rel_paths) { + auto output_path = clargs_.stage->output_dir / rel_path; + if (FileSystemManager::IsFile(output_path) and + not FileSystemManager::RemoveFile(output_path)) { + Logger::Log(LogLevel::Error, + "Could not clean output path {}", + output_path.string()); + return std::nullopt; + } + output_paths.emplace_back(std::move(output_path)); + } + return output_paths; + } + + [[nodiscard]] static auto CollectObjectInfos( + std::vector<DependencyGraph::ArtifactNode const*> const& artifact_nodes) + -> std::optional<std::vector<Artifact::ObjectInfo>> { + std::vector<Artifact::ObjectInfo> object_infos; + object_infos.reserve(artifact_nodes.size()); + for (auto const* art_ptr : artifact_nodes) { + auto const& info = art_ptr->Content().Info(); + if (info) { + object_infos.push_back(*info); + } + else { + Logger::Log(LogLevel::Error, + "artifact {} could not be retrieved, it can not be " + "found in CAS.", + art_ptr->Content().Id()); + return std::nullopt; + } + } + return object_infos; + } + + /// \brief Asks execution API to copy output artifacts to paths specified by + /// command line arguments and writes location info. In case the executor + /// couldn't retrieve any of the outputs, execution is terminated. + [[nodiscard]] auto RetrieveOutputs( + std::vector<std::filesystem::path> const& rel_paths, + std::vector<Artifact::ObjectInfo> const& object_infos) const + -> std::optional<std::vector<std::filesystem::path>> { + // Create output directory + if (not FileSystemManager::CreateDirectory(clargs_.stage->output_dir)) { + return std::nullopt; // Message logged in the file system manager + } + + auto output_paths = PrepareOutputPaths(rel_paths); + + if (not output_paths or + not api_->RetrieveToPaths(object_infos, *output_paths)) { + Logger::Log(LogLevel::Error, "Could not retrieve outputs."); + return std::nullopt; + } + + return std::move(*output_paths); + } + + void PrintOutputs( + std::string message, + std::vector<std::filesystem::path> const& paths, + std::vector<DependencyGraph::ArtifactNode const*> const& artifact_nodes, + std::map<std::string, ArtifactDescription> const& runfiles) const { + std::string msg_dbg{"Artifact ids:"}; + nlohmann::json json{}; + for (std::size_t pos = 0; pos < paths.size(); ++pos) { + auto path = paths[pos].string(); + auto id = IdentifierToString(artifact_nodes[pos]->Content().Id()); + if (clargs_.build.show_runfiles or + not runfiles.contains(clargs_.stage + ? std::filesystem::proximate( + path, clargs_.stage->output_dir) + .string() + : path)) { + auto info = artifact_nodes[pos]->Content().Info(); + if (info) { + message += fmt::format("\n {} {}", path, info->ToString()); + if (clargs_.build.dump_artifacts) { + json[path] = info->ToJson(); + } + } + else { + Logger::Log( + LogLevel::Error, "Missing info for artifact {}.", id); + } + } + msg_dbg += fmt::format("\n {}: {}", path, id); + } + + if (not clargs_.build.show_runfiles and !runfiles.empty()) { + message += fmt::format("\n({} runfiles omitted.)", runfiles.size()); + } + + Logger::Log(LogLevel::Info, "{}", message); + Logger::Log(LogLevel::Debug, "{}", msg_dbg); + + if (clargs_.build.dump_artifacts) { + if (*clargs_.build.dump_artifacts == "-") { + std::cout << std::setw(2) << json << std::endl; + } + else { + std::ofstream os(*clargs_.build.dump_artifacts); + os << std::setw(2) << json << std::endl; + } + } + } + + void MaybePrintToStdout( + std::pair<std::vector<std::filesystem::path>, + std::vector<DependencyGraph::ArtifactNode const*>> artifacts) + const { + if (clargs_.build.print_to_stdout) { + for (size_t i = 0; i < artifacts.first.size(); i++) { + if (artifacts.first[i] == *(clargs_.build.print_to_stdout)) { + auto info = artifacts.second[i]->Content().Info(); + if (info) { + if (not api_->RetrieveToFds({*info}, + {dup(fileno(stdout))})) { + Logger::Log(LogLevel::Error, + "Failed to retrieve {}", + *(clargs_.build.print_to_stdout)); + } + } + else { + Logger::Log( + LogLevel::Error, + "Failed to obtain object information for {}", + *(clargs_.build.print_to_stdout)); + } + return; + } + } + Logger::Log(LogLevel::Warning, + "{} not a logical path of the specified target", + *(clargs_.build.print_to_stdout)); + } + } +}; + +#endif // INCLUDED_SRC_BUILDTOOL_GRAPH_TRAVERSER_GRAPH_TRAVERSER_HPP |