summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPaul Cristian Sarbu <paul.cristian.sarbu@huawei.com>2023-03-21 14:02:44 +0100
committerPaul Cristian Sarbu <paul.cristian.sarbu@huawei.com>2023-03-22 17:35:18 +0100
commit17143ed02545efab55c175db66a9e31db4cce0f0 (patch)
tree953bb233a3b0fcd7b3bd3bec04ef196f6739ab82 /src
parent854745ad36f54559360a7dfc937c382cd0faf057 (diff)
downloadjustbuild-17143ed02545efab55c175db66a9e31db4cce0f0.tar.gz
just-mr: Shell out to system Git for fetches over SSH...
...due to limited SSH support in libgit2. In order to allow the fetches to still be parallel, we execute: git fetch --no-auto-gc --no-write-fetch-head <repo> [<branch>] This only fetches the packs without updating any refs, at the slight cost of sometimes fetching some redundant information, which for our purposes is practically a non-issue. (If really needed, a 'git gc' call can be done eventually to try to compact the fetched packs, although a save in disk space is not actually guaranteed.)
Diffstat (limited to 'src')
-rw-r--r--src/buildtool/file_system/git_cas.hpp2
-rw-r--r--src/other_tools/git_operations/TARGETS2
-rw-r--r--src/other_tools/git_operations/git_repo_remote.cpp146
-rw-r--r--src/other_tools/git_operations/git_repo_remote.hpp22
-rw-r--r--src/other_tools/ops_maps/import_to_git_map.cpp14
-rw-r--r--src/other_tools/ops_maps/import_to_git_map.hpp1
-rw-r--r--src/other_tools/root_maps/commit_git_map.cpp17
-rw-r--r--src/other_tools/root_maps/commit_git_map.hpp1
-rw-r--r--src/other_tools/root_maps/tree_id_git_map.cpp2
9 files changed, 145 insertions, 62 deletions
diff --git a/src/buildtool/file_system/git_cas.hpp b/src/buildtool/file_system/git_cas.hpp
index 03b772bc..44b61cb4 100644
--- a/src/buildtool/file_system/git_cas.hpp
+++ b/src/buildtool/file_system/git_cas.hpp
@@ -64,7 +64,7 @@ class GitCAS {
// IMPORTANT: the GitContext needs to be initialized before any git object!
GitContext git_context_{}; // maintains a Git context while CAS is alive
std::unique_ptr<git_odb, decltype(&odb_closer)> odb_{nullptr, odb_closer};
- // git folder path of repo; used for logging
+ // git folder path of repo
std::filesystem::path git_path_{};
[[nodiscard]] auto OpenODB(std::filesystem::path const& repo_path) noexcept
diff --git a/src/other_tools/git_operations/TARGETS b/src/other_tools/git_operations/TARGETS
index 19dabe4f..8a135ecf 100644
--- a/src/other_tools/git_operations/TARGETS
+++ b/src/other_tools/git_operations/TARGETS
@@ -32,6 +32,8 @@
, ["src/buildtool/file_system", "git_utils"]
, ["@", "fmt", "", "fmt"]
, ["", "libgit2"]
+ , ["@", "json", "", "json"]
+ , ["src/buildtool/system", "system_command"]
, "git_config_settings"
]
}
diff --git a/src/other_tools/git_operations/git_repo_remote.cpp b/src/other_tools/git_operations/git_repo_remote.cpp
index 4965f121..6cff2a82 100644
--- a/src/other_tools/git_operations/git_repo_remote.cpp
+++ b/src/other_tools/git_operations/git_repo_remote.cpp
@@ -15,8 +15,10 @@
#include <src/other_tools/git_operations/git_repo_remote.hpp>
#include "fmt/core.h"
+#include "nlohmann/json.hpp"
#include "src/buildtool/file_system/git_utils.hpp"
#include "src/buildtool/logging/logger.hpp"
+#include "src/buildtool/system/system_command.hpp"
#include "src/other_tools/git_operations/git_config_settings.hpp"
extern "C" {
@@ -26,6 +28,21 @@ extern "C" {
namespace {
+/// \brief Basic check for libgit2 protocols we support. For all other cases, we
+/// should default to shell out to git instead.
+[[nodiscard]] auto IsSupported(std::string const& url) -> bool {
+ // look for explicit schemes
+ if (url.starts_with("git://") or url.starts_with("http://") or
+ url.starts_with("https://") or url.starts_with("file://")) {
+ return true;
+ }
+ // look for url as existing filesystem directory path
+ if (FileSystemManager::IsDirectory(std::filesystem::path(url))) {
+ return true;
+ }
+ return false; // as default
+}
+
struct FetchIntoODBBackend {
git_odb_backend parent;
git_odb* target_odb; // the odb where the fetch objects will end up into
@@ -414,56 +431,105 @@ auto GitRepoRemote::UpdateCommitViaTmpRepo(
}
}
-auto GitRepoRemote::FetchViaTmpRepo(std::filesystem::path const& tmp_repo_path,
+auto GitRepoRemote::FetchViaTmpRepo(std::filesystem::path const& tmp_dir,
std::string const& repo_url,
std::optional<std::string> const& branch,
+ std::vector<std::string> const& launcher,
anon_logger_ptr const& logger) noexcept
-> bool {
try {
- // preferably with a "fake" repository!
- if (not IsRepoFake()) {
- Logger::Log(LogLevel::Debug,
- "Branch fetch called on a real repository");
- }
- // create the temporary real repository
- // it can be bare, as the refspecs for this fetch will be given
- // explicitly.
- auto tmp_repo =
- GitRepoRemote::InitAndOpen(tmp_repo_path, /*is_bare=*/true);
- if (tmp_repo == std::nullopt) {
+ // check for internally supported protocols
+ if (IsSupported(repo_url)) {
+ // preferably with a "fake" repository!
+ if (not IsRepoFake()) {
+ Logger::Log(LogLevel::Debug,
+ "Branch fetch called on a real repository");
+ }
+ // create the temporary real repository
+ // it can be bare, as the refspecs for this fetch will be given
+ // explicitly.
+ auto tmp_repo =
+ GitRepoRemote::InitAndOpen(tmp_dir, /*is_bare=*/true);
+ if (tmp_repo == std::nullopt) {
+ return false;
+ }
+ // add backend, with max priority
+ FetchIntoODBBackend b{kFetchIntoODBParent, GetGitOdb().get()};
+ if (git_odb_add_backend(
+ tmp_repo->GetGitOdb().get(),
+ // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+ reinterpret_cast<git_odb_backend*>(&b),
+ std::numeric_limits<int>::max()) == 0) {
+ // setup wrapped logger
+ auto wrapped_logger = std::make_shared<anon_logger_t>(
+ [logger](auto const& msg, bool fatal) {
+ (*logger)(fmt::format("While doing branch fetch via "
+ "tmp repo:\n{}",
+ msg),
+ fatal);
+ });
+ // get the config of the correct target repo
+ auto cfg = GetConfigSnapshot();
+ if (cfg == nullptr) {
+ (*logger)(
+ fmt::format("retrieving config object in fetch via "
+ "tmp repo failed with:\n{}",
+ GitLastError()),
+ true /*fatal*/);
+ return false;
+ }
+ return tmp_repo->FetchFromRemote(
+ cfg, repo_url, branch, wrapped_logger);
+ }
return false;
}
- // add backend, with max priority
- FetchIntoODBBackend b{kFetchIntoODBParent, GetGitOdb().get()};
- if (git_odb_add_backend(
- tmp_repo->GetGitOdb().get(),
- // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
- reinterpret_cast<git_odb_backend*>(&b),
- std::numeric_limits<int>::max()) == 0) {
- // setup wrapped logger
- auto wrapped_logger = std::make_shared<anon_logger_t>(
- [logger](auto const& msg, bool fatal) {
- (*logger)(
- fmt::format(
- "While doing branch fetch via tmp repo:\n{}", msg),
- fatal);
- });
- // get the config of the correct target repo
- auto cfg = GetConfigSnapshot();
- if (cfg == nullptr) {
- (*logger)(fmt::format("retrieving config object in fetch via "
- "tmp repo failed with:\n{}",
- GitLastError()),
- true /*fatal*/);
- return false;
+ // default to shelling out to git for non-explicitly supported protocols
+ auto cmdline = launcher;
+ // Note: Because we fetch with URL, not a known remote, no refs are
+ // being updated by default, so git has no reason to get a lock
+ // file. This however does not imply automatically that fetches
+ // might not internally wait for each other through other means.
+ cmdline.insert(cmdline.end(),
+ {"git",
+ "fetch",
+ "--no-auto-gc",
+ "--no-write-fetch-head",
+ repo_url});
+ if (branch) {
+ cmdline.push_back(*branch);
+ }
+ // run command
+ SystemCommand system{repo_url};
+ auto const command_output = system.Execute(cmdline,
+ {}, // caller env
+ GetGitPath(),
+ tmp_dir);
+ if (not command_output) {
+ std::string out_str{};
+ std::string err_str{};
+ auto cmd_out =
+ FileSystemManager::ReadFile(command_output->stdout_file);
+ auto cmd_err =
+ FileSystemManager::ReadFile(command_output->stderr_file);
+ if (cmd_out) {
+ out_str = *cmd_out;
+ }
+ if (cmd_err) {
+ err_str = *cmd_err;
+ }
+ std::string output{};
+ if (!out_str.empty() || !err_str.empty()) {
+ output = fmt::format(" with output:\n{}{}", out_str, err_str);
}
- return tmp_repo->FetchFromRemote(
- cfg, repo_url, branch, wrapped_logger);
+ (*logger)(fmt::format("Fetch command {} failed{}",
+ nlohmann::json(cmdline).dump(),
+ output),
+ /*fatal=*/true);
+ return false;
}
- return false;
+ return true; // fetch successful
} catch (std::exception const& ex) {
- Logger::Log(
- LogLevel::Error, "fetch via tmp repo failed with:\n{}", ex.what());
+ Logger::Log(LogLevel::Error, "Fetch failed with:\n{}", ex.what());
return false;
}
}
diff --git a/src/other_tools/git_operations/git_repo_remote.hpp b/src/other_tools/git_operations/git_repo_remote.hpp
index 8b0021b8..64c827c4 100644
--- a/src/other_tools/git_operations/git_repo_remote.hpp
+++ b/src/other_tools/git_operations/git_repo_remote.hpp
@@ -88,20 +88,22 @@ class GitRepoRemote : public GitRepo {
anon_logger_ptr const& logger) const noexcept
-> std::optional<std::string>;
- /// \brief Fetch from a remote via a temporary repository.
- /// Calling it from a fake repository allows thread-safe use.
- /// Creates a temporary real repository at the given location and uses a
- /// custom backend to redirect the fetched objects into the desired odb.
+ /// \brief Fetch from a remote. If URL is SSH, shells out to system git to
+ /// retrieve packs in a safe manner, with the only side-effect being that
+ /// there can be some redundancy in the fetched packs. The tmp dir is used
+ /// to pipe the stdout and stderr to.
+ /// If URL is non-SSH, uses tmp dir to fetch asynchronously using libgit2.
/// Caller needs to make sure the temporary directory exists and that the
/// given path is thread- and process-safe!
- /// Uses either a given branch, or fetches using base refspecs.
+ /// Uses either a given branch, or fetches all (with base refspecs).
/// Returns a success flag.
/// It guarantees the logger is called exactly once with fatal if failure.
- [[nodiscard]] auto FetchViaTmpRepo(
- std::filesystem::path const& tmp_repo_path,
- std::string const& repo_url,
- std::optional<std::string> const& branch,
- anon_logger_ptr const& logger) noexcept -> bool;
+ [[nodiscard]] auto FetchViaTmpRepo(std::filesystem::path const& tmp_dir,
+ std::string const& repo_url,
+ std::optional<std::string> const& branch,
+ std::vector<std::string> const& launcher,
+ anon_logger_ptr const& logger) noexcept
+ -> bool;
/// \brief Get a snapshot of the repository configuration.
/// Returns nullptr on errors.
diff --git a/src/other_tools/ops_maps/import_to_git_map.cpp b/src/other_tools/ops_maps/import_to_git_map.cpp
index 88e107f5..278e9df1 100644
--- a/src/other_tools/ops_maps/import_to_git_map.cpp
+++ b/src/other_tools/ops_maps/import_to_git_map.cpp
@@ -85,12 +85,13 @@ void KeepCommitAndSetTree(
auto CreateImportToGitMap(
gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map,
+ std::vector<std::string> const& launcher,
std::size_t jobs) -> ImportToGitMap {
- auto import_to_git = [critical_git_op_map](auto ts,
- auto setter,
- auto logger,
- auto /*unused*/,
- auto const& key) {
+ auto import_to_git = [critical_git_op_map, launcher](auto ts,
+ auto setter,
+ auto logger,
+ auto /*unused*/,
+ auto const& key) {
// Perform initial commit at location: init + add . + commit
GitOpKey op_key = {
{
@@ -106,6 +107,7 @@ auto CreateImportToGitMap(
{std::move(op_key)},
[critical_git_op_map,
target_path = key.target_path,
+ launcher,
ts,
setter,
logger](auto const& values) {
@@ -134,6 +136,7 @@ auto CreateImportToGitMap(
commit = *op_result.result,
target_path,
git_cas = op_result.git_cas,
+ launcher,
ts,
setter,
logger](auto const& values) {
@@ -179,6 +182,7 @@ auto CreateImportToGitMap(
just_git_repo->FetchViaTmpRepo(*tmp_repo_path,
target_path.string(),
std::nullopt,
+ launcher,
wrapped_logger);
if (not success) {
return;
diff --git a/src/other_tools/ops_maps/import_to_git_map.hpp b/src/other_tools/ops_maps/import_to_git_map.hpp
index 096d8146..77f0983a 100644
--- a/src/other_tools/ops_maps/import_to_git_map.hpp
+++ b/src/other_tools/ops_maps/import_to_git_map.hpp
@@ -59,6 +59,7 @@ using ImportToGitMap =
[[nodiscard]] auto CreateImportToGitMap(
gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map,
+ std::vector<std::string> const& launcher,
std::size_t jobs) -> ImportToGitMap;
#endif // INCLUDED_SRC_OTHER_TOOLS_OPS_MAPS_IMPORT_TO_GIT_MAP_HPP \ No newline at end of file
diff --git a/src/other_tools/root_maps/commit_git_map.cpp b/src/other_tools/root_maps/commit_git_map.cpp
index fae205ed..3500245b 100644
--- a/src/other_tools/root_maps/commit_git_map.cpp
+++ b/src/other_tools/root_maps/commit_git_map.cpp
@@ -36,6 +36,7 @@ void EnsureCommit(GitRepoInfo const& repo_info,
std::filesystem::path const& repo_root,
GitCASPtr const& git_cas,
gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map,
+ std::vector<std::string> const& launcher,
gsl::not_null<TaskSystem*> const& ts,
CommitGitMap::SetterPtr const& ws_setter,
CommitGitMap::LoggerPtr const& logger) {
@@ -76,6 +77,7 @@ void EnsureCommit(GitRepoInfo const& repo_info,
if (not git_repo->FetchViaTmpRepo(tmp_dir->GetPath(),
repo_info.repo_url,
repo_info.branch,
+ launcher,
wrapped_logger)) {
return;
}
@@ -184,12 +186,14 @@ void EnsureCommit(GitRepoInfo const& repo_info,
auto CreateCommitGitMap(
gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map,
JustMR::PathsPtr const& just_mr_paths,
+ std::vector<std::string> const& launcher,
std::size_t jobs) -> CommitGitMap {
- auto commit_to_git = [critical_git_op_map, just_mr_paths](auto ts,
- auto setter,
- auto logger,
- auto /* unused */,
- auto const& key) {
+ auto commit_to_git = [critical_git_op_map, just_mr_paths, launcher](
+ auto ts,
+ auto setter,
+ auto logger,
+ auto /* unused */,
+ auto const& key) {
// get root for repo (making sure that if repo is a path, it is
// absolute)
std::string fetch_repo = key.repo_url;
@@ -213,7 +217,7 @@ auto CreateCommitGitMap(
critical_git_op_map->ConsumeAfterKeysReady(
ts,
{std::move(op_key)},
- [key, repo_root, critical_git_op_map, ts, setter, logger](
+ [key, repo_root, critical_git_op_map, launcher, ts, setter, logger](
auto const& values) {
GitOpValue op_result = *values[0];
// check flag
@@ -236,6 +240,7 @@ auto CreateCommitGitMap(
repo_root,
op_result.git_cas,
critical_git_op_map,
+ launcher,
ts,
setter,
wrapped_logger);
diff --git a/src/other_tools/root_maps/commit_git_map.hpp b/src/other_tools/root_maps/commit_git_map.hpp
index 413d83ac..c021382c 100644
--- a/src/other_tools/root_maps/commit_git_map.hpp
+++ b/src/other_tools/root_maps/commit_git_map.hpp
@@ -58,6 +58,7 @@ using CommitGitMap =
[[nodiscard]] auto CreateCommitGitMap(
gsl::not_null<CriticalGitOpMap*> const& critical_git_op_map,
JustMR::PathsPtr const& just_mr_paths,
+ std::vector<std::string> const& launcher,
std::size_t jobs) -> CommitGitMap;
#endif // INCLUDED_SRC_OTHER_TOOLS_ROOT_MAPS_COMMIT_GIT_MAP_HPP
diff --git a/src/other_tools/root_maps/tree_id_git_map.cpp b/src/other_tools/root_maps/tree_id_git_map.cpp
index a0afb061..7d750b8f 100644
--- a/src/other_tools/root_maps/tree_id_git_map.cpp
+++ b/src/other_tools/root_maps/tree_id_git_map.cpp
@@ -190,6 +190,7 @@ auto CreateTreeIdGitMap(
cmdline,
command_output,
key,
+ launcher,
ts,
setter,
logger](auto const& values) {
@@ -293,6 +294,7 @@ auto CreateTreeIdGitMap(
*tmp_repo_path,
target_path.string(),
std::nullopt,
+ launcher,
wrapped_logger);
if (not success) {
return;