diff options
author | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2023-03-21 14:02:44 +0100 |
---|---|---|
committer | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2023-03-22 17:35:18 +0100 |
commit | 17143ed02545efab55c175db66a9e31db4cce0f0 (patch) | |
tree | 953bb233a3b0fcd7b3bd3bec04ef196f6739ab82 /src | |
parent | 854745ad36f54559360a7dfc937c382cd0faf057 (diff) | |
download | justbuild-17143ed02545efab55c175db66a9e31db4cce0f0.tar.gz |
just-mr: Shell out to system Git for fetches over SSH...
...due to limited SSH support in libgit2. In order to allow the
fetches to still be parallel, we execute:
git fetch --no-auto-gc --no-write-fetch-head <repo> [<branch>]
This only fetches the packs without updating any refs, at the slight
cost of sometimes fetching some redundant information, which for our
purposes is practically a non-issue.
(If really needed, a 'git gc' call can be done eventually to try to
compact the fetched packs, although a save in disk space is not
actually guaranteed.)
Diffstat (limited to 'src')
-rw-r--r-- | src/buildtool/file_system/git_cas.hpp | 2 | ||||
-rw-r--r-- | src/other_tools/git_operations/TARGETS | 2 | ||||
-rw-r--r-- | src/other_tools/git_operations/git_repo_remote.cpp | 146 | ||||
-rw-r--r-- | src/other_tools/git_operations/git_repo_remote.hpp | 22 | ||||
-rw-r--r-- | src/other_tools/ops_maps/import_to_git_map.cpp | 14 | ||||
-rw-r--r-- | src/other_tools/ops_maps/import_to_git_map.hpp | 1 | ||||
-rw-r--r-- | src/other_tools/root_maps/commit_git_map.cpp | 17 | ||||
-rw-r--r-- | src/other_tools/root_maps/commit_git_map.hpp | 1 | ||||
-rw-r--r-- | src/other_tools/root_maps/tree_id_git_map.cpp | 2 |
9 files changed, 145 insertions, 62 deletions
diff --git a/src/buildtool/file_system/git_cas.hpp b/src/buildtool/file_system/git_cas.hpp index 03b772bc..44b61cb4 100644 --- a/src/buildtool/file_system/git_cas.hpp +++ b/src/buildtool/file_system/git_cas.hpp @@ -64,7 +64,7 @@ class GitCAS { // IMPORTANT: the GitContext needs to be initialized before any git object! GitContext git_context_{}; // maintains a Git context while CAS is alive std::unique_ptr<git_odb, decltype(&odb_closer)> odb_{nullptr, odb_closer}; - // git folder path of repo; used for logging + // git folder path of repo std::filesystem::path git_path_{}; [[nodiscard]] auto OpenODB(std::filesystem::path const& repo_path) noexcept diff --git a/src/other_tools/git_operations/TARGETS b/src/other_tools/git_operations/TARGETS index 19dabe4f..8a135ecf 100644 --- a/src/other_tools/git_operations/TARGETS +++ b/src/other_tools/git_operations/TARGETS @@ -32,6 +32,8 @@ , ["src/buildtool/file_system", "git_utils"] , ["@", "fmt", "", "fmt"] , ["", "libgit2"] + , ["@", "json", "", "json"] + , ["src/buildtool/system", "system_command"] , "git_config_settings" ] } diff --git a/src/other_tools/git_operations/git_repo_remote.cpp b/src/other_tools/git_operations/git_repo_remote.cpp index 4965f121..6cff2a82 100644 --- a/src/other_tools/git_operations/git_repo_remote.cpp +++ b/src/other_tools/git_operations/git_repo_remote.cpp @@ -15,8 +15,10 @@ #include <src/other_tools/git_operations/git_repo_remote.hpp> #include "fmt/core.h" +#include "nlohmann/json.hpp" #include "src/buildtool/file_system/git_utils.hpp" #include "src/buildtool/logging/logger.hpp" +#include "src/buildtool/system/system_command.hpp" #include "src/other_tools/git_operations/git_config_settings.hpp" extern "C" { @@ -26,6 +28,21 @@ extern "C" { namespace { +/// \brief Basic check for libgit2 protocols we support. For all other cases, we +/// should default to shell out to git instead. +[[nodiscard]] auto IsSupported(std::string const& url) -> bool { + // look for explicit schemes + if (url.starts_with("git://") or url.starts_with("http://") or + url.starts_with("https://") or url.starts_with("file://")) { + return true; + } + // look for url as existing filesystem directory path + if (FileSystemManager::IsDirectory(std::filesystem::path(url))) { + return true; + } + return false; // as default +} + struct FetchIntoODBBackend { git_odb_backend parent; git_odb* target_odb; // the odb where the fetch objects will end up into @@ -414,56 +431,105 @@ auto GitRepoRemote::UpdateCommitViaTmpRepo( } } -auto GitRepoRemote::FetchViaTmpRepo(std::filesystem::path const& tmp_repo_path, +auto GitRepoRemote::FetchViaTmpRepo(std::filesystem::path const& tmp_dir, std::string const& repo_url, std::optional<std::string> const& branch, + std::vector<std::string> const& launcher, anon_logger_ptr const& logger) noexcept -> bool { try { - // preferably with a "fake" repository! - if (not IsRepoFake()) { - Logger::Log(LogLevel::Debug, - "Branch fetch called on a real repository"); - } - // create the temporary real repository - // it can be bare, as the refspecs for this fetch will be given - // explicitly. - auto tmp_repo = - GitRepoRemote::InitAndOpen(tmp_repo_path, /*is_bare=*/true); - if (tmp_repo == std::nullopt) { + // check for internally supported protocols + if (IsSupported(repo_url)) { + // preferably with a "fake" repository! + if (not IsRepoFake()) { + Logger::Log(LogLevel::Debug, + "Branch fetch called on a real repository"); + } + // create the temporary real repository + // it can be bare, as the refspecs for this fetch will be given + // explicitly. + auto tmp_repo = + GitRepoRemote::InitAndOpen(tmp_dir, /*is_bare=*/true); + if (tmp_repo == std::nullopt) { + return false; + } + // add backend, with max priority + FetchIntoODBBackend b{kFetchIntoODBParent, GetGitOdb().get()}; + if (git_odb_add_backend( + tmp_repo->GetGitOdb().get(), + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + reinterpret_cast<git_odb_backend*>(&b), + std::numeric_limits<int>::max()) == 0) { + // setup wrapped logger + auto wrapped_logger = std::make_shared<anon_logger_t>( + [logger](auto const& msg, bool fatal) { + (*logger)(fmt::format("While doing branch fetch via " + "tmp repo:\n{}", + msg), + fatal); + }); + // get the config of the correct target repo + auto cfg = GetConfigSnapshot(); + if (cfg == nullptr) { + (*logger)( + fmt::format("retrieving config object in fetch via " + "tmp repo failed with:\n{}", + GitLastError()), + true /*fatal*/); + return false; + } + return tmp_repo->FetchFromRemote( + cfg, repo_url, branch, wrapped_logger); + } return false; } - // add backend, with max priority - FetchIntoODBBackend b{kFetchIntoODBParent, GetGitOdb().get()}; - if (git_odb_add_backend( - tmp_repo->GetGitOdb().get(), - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - reinterpret_cast<git_odb_backend*>(&b), - std::numeric_limits<int>::max()) == 0) { - // setup wrapped logger - auto wrapped_logger = std::make_shared<anon_logger_t>( - [logger](auto const& msg, bool fatal) { - (*logger)( - fmt::format( - "While doing branch fetch via tmp repo:\n{}", msg), - fatal); - }); - // get the config of the correct target repo - auto cfg = GetConfigSnapshot(); - if (cfg == nullptr) { - (*logger)(fmt::format("retrieving config object in fetch via " - "tmp repo failed with:\n{}", - GitLastError()), - true /*fatal*/); - return false; + // default to shelling out to git for non-explicitly supported protocols + auto cmdline = launcher; + // Note: Because we fetch with URL, not a known remote, no refs are + // being updated by default, so git has no reason to get a lock + // file. This however does not imply automatically that fetches + // might not internally wait for each other through other means. + cmdline.insert(cmdline.end(), + {"git", + "fetch", + "--no-auto-gc", + "--no-write-fetch-head", + repo_url}); + if (branch) { + cmdline.push_back(*branch); + } + // run command + SystemCommand system{repo_url}; + auto const command_output = system.Execute(cmdline, + {}, // caller env + GetGitPath(), + tmp_dir); + if (not command_output) { + std::string out_str{}; + std::string err_str{}; + auto cmd_out = + FileSystemManager::ReadFile(command_output->stdout_file); + auto cmd_err = + FileSystemManager::ReadFile(command_output->stderr_file); + if (cmd_out) { + out_str = *cmd_out; + } + if (cmd_err) { + err_str = *cmd_err; + } + std::string output{}; + if (!out_str.empty() || !err_str.empty()) { + output = fmt::format(" with output:\n{}{}", out_str, err_str); } - return tmp_repo->FetchFromRemote( - cfg, repo_url, branch, wrapped_logger); + (*logger)(fmt::format("Fetch command {} failed{}", + nlohmann::json(cmdline).dump(), + output), + /*fatal=*/true); + return false; } - return false; + return true; // fetch successful } catch (std::exception const& ex) { - Logger::Log( - LogLevel::Error, "fetch via tmp repo failed with:\n{}", ex.what()); + Logger::Log(LogLevel::Error, "Fetch failed with:\n{}", ex.what()); return false; } } diff --git a/src/other_tools/git_operations/git_repo_remote.hpp b/src/other_tools/git_operations/git_repo_remote.hpp index 8b0021b8..64c827c4 100644 --- a/src/other_tools/git_operations/git_repo_remote.hpp +++ b/src/other_tools/git_operations/git_repo_remote.hpp @@ -88,20 +88,22 @@ class GitRepoRemote : public GitRepo { anon_logger_ptr const& logger) const noexcept -> std::optional<std::string>; - /// \brief Fetch from a remote via a temporary repository. - /// Calling it from a fake repository allows thread-safe use. - /// Creates a temporary real repository at the given location and uses a - /// custom backend to redirect the fetched objects into the desired odb. + /// \brief Fetch from a remote. If URL is SSH, shells out to system git to + /// retrieve packs in a safe manner, with the only side-effect being that + /// there can be some redundancy in the fetched packs. The tmp dir is used + /// to pipe the stdout and stderr to. + /// If URL is non-SSH, uses tmp dir to fetch asynchronously using libgit2. /// Caller needs to make sure the temporary directory exists and that the /// given path is thread- and process-safe! - /// Uses either a given branch, or fetches using base refspecs. + /// Uses either a given branch, or fetches all (with base refspecs). /// Returns a success flag. /// It guarantees the logger is called exactly once with fatal if failure. - [[nodiscard]] auto FetchViaTmpRepo( - std::filesystem::path const& tmp_repo_path, - std::string const& repo_url, - std::optional<std::string> const& branch, - anon_logger_ptr const& logger) noexcept -> bool; + [[nodiscard]] auto FetchViaTmpRepo(std::filesystem::path const& tmp_dir, + std::string const& repo_url, + std::optional<std::string> const& branch, + std::vector<std::string> const& launcher, + anon_logger_ptr const& logger) noexcept + -> bool; /// \brief Get a snapshot of the repository configuration. /// Returns nullptr on errors. diff --git a/src/other_tools/ops_maps/import_to_git_map.cpp b/src/other_tools/ops_maps/import_to_git_map.cpp index 88e107f5..278e9df1 100644 --- a/src/other_tools/ops_maps/import_to_git_map.cpp +++ b/src/other_tools/ops_maps/import_to_git_map.cpp @@ -85,12 +85,13 @@ void KeepCommitAndSetTree( auto CreateImportToGitMap( gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, + std::vector<std::string> const& launcher, std::size_t jobs) -> ImportToGitMap { - auto import_to_git = [critical_git_op_map](auto ts, - auto setter, - auto logger, - auto /*unused*/, - auto const& key) { + auto import_to_git = [critical_git_op_map, launcher](auto ts, + auto setter, + auto logger, + auto /*unused*/, + auto const& key) { // Perform initial commit at location: init + add . + commit GitOpKey op_key = { { @@ -106,6 +107,7 @@ auto CreateImportToGitMap( {std::move(op_key)}, [critical_git_op_map, target_path = key.target_path, + launcher, ts, setter, logger](auto const& values) { @@ -134,6 +136,7 @@ auto CreateImportToGitMap( commit = *op_result.result, target_path, git_cas = op_result.git_cas, + launcher, ts, setter, logger](auto const& values) { @@ -179,6 +182,7 @@ auto CreateImportToGitMap( just_git_repo->FetchViaTmpRepo(*tmp_repo_path, target_path.string(), std::nullopt, + launcher, wrapped_logger); if (not success) { return; diff --git a/src/other_tools/ops_maps/import_to_git_map.hpp b/src/other_tools/ops_maps/import_to_git_map.hpp index 096d8146..77f0983a 100644 --- a/src/other_tools/ops_maps/import_to_git_map.hpp +++ b/src/other_tools/ops_maps/import_to_git_map.hpp @@ -59,6 +59,7 @@ using ImportToGitMap = [[nodiscard]] auto CreateImportToGitMap( gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, + std::vector<std::string> const& launcher, std::size_t jobs) -> ImportToGitMap; #endif // INCLUDED_SRC_OTHER_TOOLS_OPS_MAPS_IMPORT_TO_GIT_MAP_HPP
\ No newline at end of file diff --git a/src/other_tools/root_maps/commit_git_map.cpp b/src/other_tools/root_maps/commit_git_map.cpp index fae205ed..3500245b 100644 --- a/src/other_tools/root_maps/commit_git_map.cpp +++ b/src/other_tools/root_maps/commit_git_map.cpp @@ -36,6 +36,7 @@ void EnsureCommit(GitRepoInfo const& repo_info, std::filesystem::path const& repo_root, GitCASPtr const& git_cas, gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, + std::vector<std::string> const& launcher, gsl::not_null<TaskSystem*> const& ts, CommitGitMap::SetterPtr const& ws_setter, CommitGitMap::LoggerPtr const& logger) { @@ -76,6 +77,7 @@ void EnsureCommit(GitRepoInfo const& repo_info, if (not git_repo->FetchViaTmpRepo(tmp_dir->GetPath(), repo_info.repo_url, repo_info.branch, + launcher, wrapped_logger)) { return; } @@ -184,12 +186,14 @@ void EnsureCommit(GitRepoInfo const& repo_info, auto CreateCommitGitMap( gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, JustMR::PathsPtr const& just_mr_paths, + std::vector<std::string> const& launcher, std::size_t jobs) -> CommitGitMap { - auto commit_to_git = [critical_git_op_map, just_mr_paths](auto ts, - auto setter, - auto logger, - auto /* unused */, - auto const& key) { + auto commit_to_git = [critical_git_op_map, just_mr_paths, launcher]( + auto ts, + auto setter, + auto logger, + auto /* unused */, + auto const& key) { // get root for repo (making sure that if repo is a path, it is // absolute) std::string fetch_repo = key.repo_url; @@ -213,7 +217,7 @@ auto CreateCommitGitMap( critical_git_op_map->ConsumeAfterKeysReady( ts, {std::move(op_key)}, - [key, repo_root, critical_git_op_map, ts, setter, logger]( + [key, repo_root, critical_git_op_map, launcher, ts, setter, logger]( auto const& values) { GitOpValue op_result = *values[0]; // check flag @@ -236,6 +240,7 @@ auto CreateCommitGitMap( repo_root, op_result.git_cas, critical_git_op_map, + launcher, ts, setter, wrapped_logger); diff --git a/src/other_tools/root_maps/commit_git_map.hpp b/src/other_tools/root_maps/commit_git_map.hpp index 413d83ac..c021382c 100644 --- a/src/other_tools/root_maps/commit_git_map.hpp +++ b/src/other_tools/root_maps/commit_git_map.hpp @@ -58,6 +58,7 @@ using CommitGitMap = [[nodiscard]] auto CreateCommitGitMap( gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map, JustMR::PathsPtr const& just_mr_paths, + std::vector<std::string> const& launcher, std::size_t jobs) -> CommitGitMap; #endif // INCLUDED_SRC_OTHER_TOOLS_ROOT_MAPS_COMMIT_GIT_MAP_HPP diff --git a/src/other_tools/root_maps/tree_id_git_map.cpp b/src/other_tools/root_maps/tree_id_git_map.cpp index a0afb061..7d750b8f 100644 --- a/src/other_tools/root_maps/tree_id_git_map.cpp +++ b/src/other_tools/root_maps/tree_id_git_map.cpp @@ -190,6 +190,7 @@ auto CreateTreeIdGitMap( cmdline, command_output, key, + launcher, ts, setter, logger](auto const& values) { @@ -293,6 +294,7 @@ auto CreateTreeIdGitMap( *tmp_repo_path, target_path.string(), std::nullopt, + launcher, wrapped_logger); if (not success) { return; |