From ca54778751856d77f7da2dba051c473a488f7d1e Mon Sep 17 00:00:00 2001 From: Klaus Aehlig Date: Mon, 19 Feb 2024 13:04:07 +0100 Subject: Git URL scheme handling: whitelist path instead of non-path protocols At some point we have to decide if a given git repository URL is a path. So far, we assumed that anything not starting with ssh://, http://, or https:// is a path. This ignores the facts that - the file:// scheme, while referring to a file, does not denote a relative path starting file://, - the [user@]host:path scheme is not a path on the local machine, - there exist the URL schemes git://, ftp://, and ftps://, and - future extension might add additional schemes. To also correctly handle new schemes that git might add (which we indeed can handle, as we simply shell out to the git binary), we reverse the approach: we give the user the means to unambigiously specify that they refer to a path on the local machine, by either - using the file:// scheme, - providing an absolute path starting with /, or - providing a relative path starting with ./ All other schemes will not be modified. The file scheme, as well as the git://, http://, and https:// scheme, are handled interally using libgit2; all others are passed on to git in an unmodified form. --- CHANGELOG.md | 7 +++++ src/other_tools/root_maps/commit_git_map.cpp | 42 +++++++++++++++++++--------- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e9b1fb0b..96aba158 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,13 @@ A feature release on top of `1.2.0`, backwards compatible. the upgrade. - The taintedness of `"configure"` targets is now propagated correctly in analysis. +- It is no longer incorrectly assumed that every `git` URL not + starting with `ssh://`, `http://`, nor `https://` is a file on the + local disk. Now, only URLs starting with `/`, `./`, or `file://` + are considered file URLs. File URLs, as well as URLs starting + with `git://`, `http://`, or `https://`, are handled by `just-mr` + using `libgit2`; for every other URL, `just-mr` shells out to + `git` for fetching and the URL is passed to `git` unchanged. - Improved portability and update of the bundled dependencies. - Various minor improvements and typo fixes in the documentation. - Fixed a race condition in an internal cache of `just execute` diff --git a/src/other_tools/root_maps/commit_git_map.cpp b/src/other_tools/root_maps/commit_git_map.cpp index ddbe3a88..08680531 100644 --- a/src/other_tools/root_maps/commit_git_map.cpp +++ b/src/other_tools/root_maps/commit_git_map.cpp @@ -15,6 +15,8 @@ #include "src/other_tools/root_maps/commit_git_map.hpp" #include +#include +#include #include "fmt/core.h" #include "src/buildtool/file_system/file_root.hpp" @@ -31,13 +33,23 @@ namespace { -[[nodiscard]] auto GitURLIsPath(std::string const& url) noexcept -> bool { - auto prefixes = std::vector{"ssh://", "http://", "https://"}; - // return true if no URL prefix exists - return std::none_of( - prefixes.begin(), prefixes.end(), [url](auto const& prefix) { - return (url.rfind(prefix, 0) == 0); - }); +[[nodiscard]] auto GitURLIsPath(std::string const& url) noexcept + -> std::optional { + static auto const kAbsPath = std::string{"/"}; + static auto const kRelPath = std::string{"./"}; + static auto const kFileScheme = std::string{"file://"}; + + if (url.starts_with(kAbsPath)) { + return url; + } + if (url.starts_with(kRelPath)) { + return url.substr(kRelPath.length()); + } + if (url.starts_with(kFileScheme)) { + return url.substr(kFileScheme.length()); + } + + return std::nullopt; } /// \brief Helper function for ensuring the serve endpoint, if given, has the @@ -485,8 +497,9 @@ void EnsureCommit( auto local_mirrors = MirrorsUtils::GetLocalMirrors(additional_mirrors, fetch_repo); for (auto mirror : local_mirrors) { - if (GitURLIsPath(mirror)) { - mirror = std::filesystem::absolute(mirror).string(); + auto mirror_path = GitURLIsPath(mirror); + if (mirror_path) { + mirror = std::filesystem::absolute(*mirror_path).string(); } auto wrapped_logger = std::make_shared( [mirror, &err_messages](auto const& msg, bool /*fatal*/) { @@ -572,8 +585,10 @@ void EnsureCommit( remotes_buffer.append(fmt::format("\n> {}", fetch_repo)); // now try to fetch from mirrors, in order, if given for (auto mirror : repo_info.mirrors) { - if (GitURLIsPath(mirror)) { - mirror = std::filesystem::absolute(mirror).string(); + auto mirror_path = GitURLIsPath(mirror); + if (mirror_path) { + mirror = std::filesystem::absolute(*mirror_path) + .string(); } else { // if non-path, try each of the preferred hostnames @@ -811,8 +826,9 @@ auto CreateCommitGitMap( // get root for repo (making sure that if repo is a path, it is // absolute) std::string fetch_repo = key.repo_url; - if (GitURLIsPath(fetch_repo)) { - fetch_repo = std::filesystem::absolute(fetch_repo).string(); + auto fetch_repo_path = GitURLIsPath(fetch_repo); + if (fetch_repo_path) { + fetch_repo = std::filesystem::absolute(*fetch_repo_path).string(); } std::filesystem::path repo_root = StorageUtils::GetGitRoot(just_mr_paths, fetch_repo); -- cgit v1.2.3