diff options
author | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2023-02-09 18:31:41 +0100 |
---|---|---|
committer | Paul Cristian Sarbu <paul.cristian.sarbu@huawei.com> | 2023-03-03 16:20:49 +0100 |
commit | 23f51ef2062b09c9212d1b1bb7325df4b1bb014f (patch) | |
tree | 90fb1d226f282c0f868ef82399896e891d4aa6dc /src | |
parent | 16b04bcb5f8a84409ae1a864882d768acac9a680 (diff) | |
download | justbuild-23f51ef2062b09c9212d1b1bb7325df4b1bb014f.tar.gz |
utils: Add utility class for handling URLs
It uses libcurl to parse URLs. The current version contains
methods needed for correctly handling the matching gitconfig
entries and the no_proxy-type envariable patterns.
Diffstat (limited to 'src')
-rw-r--r-- | src/other_tools/utils/TARGETS | 9 | ||||
-rw-r--r-- | src/other_tools/utils/curl_url_handle.cpp | 787 | ||||
-rw-r--r-- | src/other_tools/utils/curl_url_handle.hpp | 164 |
3 files changed, 960 insertions, 0 deletions
diff --git a/src/other_tools/utils/TARGETS b/src/other_tools/utils/TARGETS index 883ab841..cf80bc47 100644 --- a/src/other_tools/utils/TARGETS +++ b/src/other_tools/utils/TARGETS @@ -29,4 +29,13 @@ , ["", "libcurl"] ] } +, "curl_url_handle": + { "type": ["@", "rules", "CC", "library"] + , "name": ["curl_url_handle"] + , "hdrs": ["curl_url_handle.hpp"] + , "srcs": ["curl_url_handle.cpp"] + , "deps": ["curl_context", ["@", "gsl-lite", "", "gsl-lite"]] + , "stage": ["src", "other_tools", "utils"] + , "private-deps": [["src/buildtool/logging", "logging"], ["", "libcurl"]] + } } diff --git a/src/other_tools/utils/curl_url_handle.cpp b/src/other_tools/utils/curl_url_handle.cpp new file mode 100644 index 00000000..8895bf84 --- /dev/null +++ b/src/other_tools/utils/curl_url_handle.cpp @@ -0,0 +1,787 @@ +// Copyright 2023 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "src/other_tools/utils/curl_url_handle.hpp" + +#include <regex> +#include <sstream> + +#include "src/buildtool/logging/logger.hpp" + +extern "C" { +#include "curl/curl.h" +} + +void curl_url_closer(gsl::owner<CURLU*> handle) { + curl_url_cleanup(handle); +} + +namespace { + +/// \brief Compares the two hosts as '.'-delimited substrings until there is a +/// mismatch. Wildcard ("*") matches any substring. +/// Returns a success flag. +[[nodiscard]] auto HostsMatch(std::string const& key_host, + std::string const& url_host) noexcept -> bool { + // split key host + std::vector<std::string> key_tokens{}; + std::string token{}; + std::istringstream iss(key_host); + while (std::getline(iss, token, '.')) { + key_tokens.emplace_back(token); + } + // split url host + std::vector<std::string> url_tokens{}; + iss = std::istringstream{url_host}; + while (std::getline(iss, token, '.')) { + url_tokens.emplace_back(token); + } + // number of tokens must match + if (key_tokens.size() != url_tokens.size()) { + return false; + } + // check for substring mismatch + auto key_it = key_tokens.begin(); + auto url_it = url_tokens.begin(); + for (; key_it != key_tokens.end(); ++key_it, ++url_it) { + if (*key_it != *url_it and *key_it != "*") { + return false; + } + } + return true; +} + +/// \brief Compares the two paths as '/'-delimited substrings until there is a +/// mismatch or the end of the key path. +/// Returns the size of the key path if match successful, otherwise nullopt. +[[nodiscard]] auto PathMatchSize(std::string const& key_path, + std::string const& url_path) noexcept + -> std::optional<size_t> { + // split key path + std::vector<std::string> key_tokens{}; + std::string token{}; + std::istringstream iss(key_path); + while (std::getline(iss, token, '/')) { + key_tokens.emplace_back(token); + } + // split url path + std::vector<std::string> url_tokens{}; + iss = std::istringstream{url_path}; + while (std::getline(iss, token, '/')) { + url_tokens.emplace_back(token); + } + // key path should not have more tokens than the url path + if (key_tokens.size() > url_tokens.size()) { + return std::nullopt; + } + // check for substring mismatch + auto key_it = key_tokens.begin(); + auto url_it = url_tokens.begin(); + for (; key_it != key_tokens.end(); ++key_it, ++url_it) { + if (*key_it != *url_it) { + return std::nullopt; + } + } + // on success, return size of key path + return key_path.size(); +} + +/// \brief Parses the given string according to the scheme: +/// [[.]<dot-separated-host-prefixes>.]<domain>[:<port>] +/// The parsing ignores a single leading '.' character, if present. +/// Does not perform any other validity check (e.g., for port value). +[[nodiscard]] auto ParseNoproxyPattern(std::string const& pattern) noexcept + -> NoproxyPattern { + // get the host part + std::string host{}; + std::istringstream iss(pattern); + std::getline(iss, host, ':'); // stop at port part or end of string + // check if port part exists + std::optional<std::string> port{std::nullopt}; + if (host.size() != pattern.size()) { + port = std::string( + pattern.begin() + + static_cast<std::string::difference_type>(host.size()) + 1, + pattern.end()); + } + // remove one leading '.' char from host part, if present + if (host[0] == '.') { + host = std::string(host.begin() + 1, host.end()); + } + // split the host part + std::vector<std::string> host_tokens{}; + std::string token{}; + iss = std::istringstream(host); + while (std::getline(iss, token, '.')) { + host_tokens.emplace_back(token); + } + return NoproxyPattern{host_tokens, port}; +} + +/// \brief Check whether a given test pattern matches a target pattern with +/// respect to the matching rules for the no_proxy envariable. +[[nodiscard]] auto NoproxyPatternMatches(NoproxyPattern const& test_pattern, + NoproxyPattern const& target_pattern) + -> bool { + // check if port matches, if given + if (test_pattern.port and test_pattern.port != target_pattern.port) { + return false; + } + // host tokens must exist + if (test_pattern.host_tokens.empty() or + target_pattern.host_tokens.empty() or + test_pattern.host_tokens.size() > target_pattern.host_tokens.size()) { + return false; + } + // check if the host/domain substrings match, in reverse order + auto test_it = test_pattern.host_tokens.end() - 1; + auto target_it = target_pattern.host_tokens.end() - 1; + for (; test_it != test_pattern.host_tokens.begin() - 1; + --test_it, --target_it) { + if (*test_it != *target_it) { + return false; + } + } + return true; +} + +} // namespace + +auto CurlURLHandle::Create(std::string const& url) noexcept + -> std::optional<CurlURLHandlePtr> { + try { + auto url_h = std::make_shared<CurlURLHandle>(); + auto* handle = curl_url(); + // try to parse the given url + auto rc = curl_url_set(handle, CURLUPART_URL, url.c_str(), 0U); + if (rc != CURLUE_OK) { + Logger::Log(LogLevel::Debug, + "CurlURLHandle: parsing URL failed with:\n{}", + curl_url_strerror(rc)); + curl_url_cleanup(handle); + return nullptr; + } + url_h->handle_.reset(handle); + return std::make_optional<CurlURLHandlePtr>(url_h); + } catch (std::exception const& ex) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: creating curl URL handle failed " + "unexpectedly with:\n{}", + ex.what()); + return std::nullopt; + } +} + +auto CurlURLHandle::CreatePermissive(std::string const& url, + bool use_guess_scheme, + bool use_default_scheme, + bool use_non_support_scheme, + bool use_no_authority, + bool use_path_as_is, + bool use_allow_space) noexcept + -> std::optional<CurlURLHandlePtr> { + try { + auto url_h = std::make_shared<CurlURLHandle>(); + auto* handle = curl_url(); + // set up flags + // NOLINTNEXTLINE(hicpp-signed-bitwise) + auto flags{use_guess_scheme ? CURLU_GUESS_SCHEME : 0U}; + if (use_default_scheme) { + // NOLINTNEXTLINE(hicpp-signed-bitwise) + flags |= CURLU_DEFAULT_SCHEME; + } + if (use_non_support_scheme) { + // NOLINTNEXTLINE(hicpp-signed-bitwise) + flags |= CURLU_NON_SUPPORT_SCHEME; + } + if (use_no_authority) { + // NOLINTNEXTLINE(hicpp-signed-bitwise) + flags |= CURLU_NO_AUTHORITY; + } + if (use_path_as_is) { + // NOLINTNEXTLINE(hicpp-signed-bitwise) + flags |= CURLU_PATH_AS_IS; + } + if (use_allow_space) { + // NOLINTNEXTLINE(hicpp-signed-bitwise) + flags |= CURLU_ALLOW_SPACE; + } + // try to parse the given url + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + auto rc = curl_url_set(handle, CURLUPART_URL, url.c_str(), flags); + if (rc != CURLUE_OK) { + Logger::Log( + LogLevel::Debug, + "CurlURLHandle: parsing URL permissively failed with:\n{}", + curl_url_strerror(rc)); + curl_url_cleanup(handle); + return nullptr; + } + url_h->handle_.reset(handle); + return std::make_optional<CurlURLHandlePtr>(url_h); + } catch (std::exception const& ex) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: creating permissive curl URL handle failed " + "unexpectedly with:\n{}", + ex.what()); + return std::nullopt; + } +} + +auto CurlURLHandle::Duplicate() noexcept -> CurlURLHandlePtr { + try { + auto url_h = std::make_shared<CurlURLHandle>(); + url_h->handle_.reset(curl_url_dup(handle_.get())); + return url_h; + } catch (std::exception const& ex) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: duplicating curl URL handle failed " + "unexpectedly with:\n{}", + ex.what()); + return nullptr; + } +} + +auto CurlURLHandle::GetURL(bool use_default_port, + bool use_default_scheme, + bool use_no_default_port) noexcept + -> std::optional<std::string> { + try { + // set up flags + // NOLINTNEXTLINE(hicpp-signed-bitwise) + auto flags{use_default_port ? CURLU_DEFAULT_PORT : 0U}; + if (use_default_scheme) { + // NOLINTNEXTLINE(hicpp-signed-bitwise) + flags |= CURLU_DEFAULT_SCHEME; + } + if (use_no_default_port) { + // NOLINTNEXTLINE(hicpp-signed-bitwise) + flags |= CURLU_NO_DEFAULT_PORT; + } + // get the URL + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* url = nullptr; + auto rc = curl_url_get(handle_.get(), CURLUPART_URL, &url, flags); + if (rc != CURLUE_OK) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving URL failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + std::string url_str{url}; + // free memory + curl_free(url); + return url_str; + } catch (std::exception const& ex) { + Logger::Log( + LogLevel::Error, + "CurlURLHandle: retrieving URL failed unexpectedly with:\n{}", + ex.what()); + return std::nullopt; + } +} + +auto CurlURLHandle::GetScheme(bool use_default_scheme) noexcept + -> std::optional<OptionalString> { + try { + // NOLINTNEXTLINE(hicpp-signed-bitwise) + auto flags{use_default_scheme ? CURLU_DEFAULT_SCHEME : 0U}; + // get the scheme + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* scheme = nullptr; + auto rc = curl_url_get(handle_.get(), CURLUPART_SCHEME, &scheme, flags); + if (rc != CURLUE_OK and rc != CURLUE_NO_SCHEME) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving scheme failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + auto res = OptionalString{std::nullopt}; + if (rc != CURLUE_NO_SCHEME) { + res = OptionalString{std::string{scheme}}; + } + // free memory + curl_free(scheme); + return res; + } catch (std::exception const& ex) { + Logger::Log( + LogLevel::Error, + "CurlURLHandle: retrieving scheme failed unexpectedly with:\n{}", + ex.what()); + return std::nullopt; + } +} + +auto CurlURLHandle::GetConfigStructFromKey(std::string const& key) noexcept + -> std::optional<GitConfigKeyPtr> { + try { + auto parsed_key = Create(key); + if (not parsed_key) { + return std::nullopt; // report exception + } + if (*parsed_key == nullptr) { + return nullptr; // unparsable key + } + // populate all useful components + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* field = nullptr; + auto* h = parsed_key.value()->handle_.get(); + auto gconfig = std::make_shared<GitConfigKey>(); + + auto rc = curl_url_get(h, CURLUPART_SCHEME, &field, 0U); + if (rc != CURLUE_OK and rc != CURLUE_NO_SCHEME) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving scheme in get config struct " + "failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + if (rc != CURLUE_NO_SCHEME) { + gconfig->scheme = std::string(field); + } + curl_free(field); + field = nullptr; + + rc = curl_url_get(h, CURLUPART_USER, &field, 0U); + if (rc != CURLUE_OK and rc != CURLUE_NO_USER) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving user in get config struct " + "failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + if (rc != CURLUE_NO_USER) { + gconfig->user = std::string(field); + } + curl_free(field); + field = nullptr; + + rc = curl_url_get(h, CURLUPART_HOST, &field, 0U); + if (rc != CURLUE_OK and rc != CURLUE_NO_HOST) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving host in get config struct " + "failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + if (rc != CURLUE_NO_HOST) { + gconfig->host = std::string(field); + } + curl_free(field); + field = nullptr; + + rc = curl_url_get(h, + CURLUPART_PORT, + &field, + // NOLINTNEXTLINE(hicpp-signed-bitwise) + CURLU_DEFAULT_PORT); // enforce port existence + if (rc != CURLUE_OK and rc != CURLUE_NO_PORT) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving port in get config struct " + "failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + if (rc != CURLUE_NO_PORT) { + gconfig->port = std::string(field); + } + curl_free(field); + field = nullptr; + + // stored path will contain also query and fragment, if existing, and + // must end with a '/' + rc = curl_url_get(h, CURLUPART_PATH, &field, 0U); + if (rc != CURLUE_OK) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving path in get config struct " + "failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + auto running_path = std::filesystem::path{"/"} / std::string(field); + curl_free(field); + field = nullptr; + + rc = curl_url_get(h, CURLUPART_QUERY, &field, 0U); + if (rc != CURLUE_OK and rc != CURLUE_NO_QUERY) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving query in get config struct " + "failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + if (rc != CURLUE_NO_QUERY) { + running_path += std::string("?") + std::string(field); + } + curl_free(field); + field = nullptr; + + rc = curl_url_get(h, CURLUPART_FRAGMENT, &field, 0U); + if (rc != CURLUE_OK and rc != CURLUE_NO_FRAGMENT) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving fragment in get config " + "struct failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + if (rc != CURLUE_NO_FRAGMENT) { + running_path += std::string("#") + std::string(field); + } + curl_free(field); + + running_path /= ""; // make sure it ends with a '/' + gconfig->path = running_path; + + return gconfig; + } catch (std::exception const& ex) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: get config struct from parsed key failed " + "unexpectedly with:\n{}", + ex.what()); + return std::nullopt; + } +} + +auto CurlURLHandle::ParseConfigKey(std::string const& key) noexcept + -> std::optional<GitConfigKeyPtr> { + try { + // if key has no asterisks, parse as usual + if (key.find('*') == std::string::npos) { + return GetConfigStructFromKey(key); + } + + // replace all '*' wildcards with '.' + std::string tmp_key{key}; + std::replace(tmp_key.begin(), tmp_key.end(), '*', '.'); + + // parse and extract hostname + auto tmp_parsed = Create(tmp_key); + if (not tmp_parsed) { + return std::nullopt; // exception + } + if (tmp_parsed == nullptr) { + return nullptr; // unparsable + } + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* host_ptr = nullptr; + auto rc = curl_url_get( + tmp_parsed.value()->handle_.get(), CURLUPART_HOST, &host_ptr, 0U); + if (rc != CURLUE_OK) { + Logger::Log( + LogLevel::Error, + "CurlURLHandle: retrieving host in parse config key failed " + "with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + std::string parsed_host{host_ptr}; + curl_free(host_ptr); // release memory + + // create regex to find all possible matches of the parsed host in the + // original key, where any '.' can also be a '*' + std::stringstream pattern{}; + size_t old_index{}; + size_t index{}; + while ((index = parsed_host.find('.', old_index)) != + std::string::npos) { + pattern << parsed_host.substr(old_index, index - old_index); + pattern << R"([\.\*])"; + old_index = index + 1; + } + pattern << parsed_host.substr(old_index); + std::regex re(pattern.str()); + + // for every match, replace the parsed host in the found position and + // try to parse as usual + size_t host_len = parsed_host.length(); + for (auto it = std::sregex_iterator(key.begin(), key.end(), re); + it != std::sregex_iterator(); + ++it) { + std::string new_key{key}; + new_key.replace( + static_cast<size_t>(it->position()), host_len, parsed_host); + + // try to parse new key + auto try_config_key = GetConfigStructFromKey(new_key); + if (try_config_key and *try_config_key != nullptr) { + // replace the parsed hostname with the match + try_config_key.value()->host = it->str(); + return try_config_key; + } + } + // no match was parsable + return nullptr; + } catch (std::exception const& ex) { + Logger::Log( + LogLevel::Error, + "CurlURLHandle: parse config key failed unexpectedly with:\n{}", + ex.what()); + return std::nullopt; + } +} + +auto CurlURLHandle::MatchConfigKey(std::string const& key) noexcept + -> std::optional<ConfigKeyMatchDegree> { + try { + size_t host_len{}; + bool user_matched{false}; + + // parse the given key + auto parsed_key = ParseConfigKey(key); + if (not parsed_key) { + return std::nullopt; // an exception occurred that shouldn't have + } + if (*parsed_key == nullptr) { + return ConfigKeyMatchDegree{}; // non-parsable, so return no match + } + + // check that scheme matches + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* url_scheme = nullptr; + auto rc = + curl_url_get(handle_.get(), CURLUPART_SCHEME, &url_scheme, 0U); + if (rc != CURLUE_OK and rc != CURLUE_NO_SCHEME) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving url scheme in matching " + "config key failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + auto url_scheme_str = url_scheme == nullptr + ? std::nullopt + : std::make_optional<std::string>(url_scheme); + curl_free(url_scheme); + + if (parsed_key.value()->scheme != url_scheme_str) { + return ConfigKeyMatchDegree{}; // mismatch + } + + // check the user, if the config key has the field + if (parsed_key.value()->user) { + // check the user field in stored URL + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* url_user = nullptr; + rc = curl_url_get(handle_.get(), CURLUPART_USER, &url_user, 0U); + if (rc != CURLUE_OK) { // if key has user field, url must as well + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving url user in matching " + "config key failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + auto url_user_str = url_user == nullptr + ? std::nullopt + : std::make_optional<std::string>(url_user); + curl_free(url_user); + + if (not url_user_str or parsed_key.value()->user != *url_user_str) { + return ConfigKeyMatchDegree{}; // mismatch + } + // signal the match + user_matched = true; + } + + // check that host/domain name matches + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* url_host = nullptr; + rc = curl_url_get(handle_.get(), CURLUPART_HOST, &url_host, 0U); + if (rc != CURLUE_OK and rc != CURLUE_NO_HOST) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving url host in matching " + "config key failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + auto url_host_str = url_host == nullptr + ? std::nullopt + : std::make_optional<std::string>(url_host); + curl_free(url_host); + + if (parsed_key.value()->host != url_host_str) { + if (not(parsed_key.value()->host and url_host_str and + HostsMatch(parsed_key.value()->host.value(), + *url_host_str))) { + return ConfigKeyMatchDegree{}; // mismatch + } + } + // store matched host length + host_len += + parsed_key.value()->host ? parsed_key.value()->host->size() : 0U; + + // check port match; get with default value if not existing + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* url_port = nullptr; + rc = curl_url_get(handle_.get(), + CURLUPART_PORT, + &url_port, + // NOLINTNEXTLINE(hicpp-signed-bitwise) + CURLU_DEFAULT_PORT); // enforce port existence + if (rc != CURLUE_OK) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving url port in matching " + "config key failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + auto url_port_str = url_port == nullptr + ? std::nullopt + : std::make_optional<std::string>(url_port); + curl_free(url_port); + + if (parsed_key.value()->port != url_port_str) { + return ConfigKeyMatchDegree{}; // mismatch + } + + // check path match; this is done up to any '/'-delimited prefix; + // we need the complete path, so path + query + fragment (if existing) + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* url_path = nullptr; + rc = curl_url_get(handle_.get(), CURLUPART_PATH, &url_path, 0U); + if (rc != CURLUE_OK) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving url path in matching " + "config key failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + // parsed path is never empty + auto url_path_str = std::filesystem::path{"/"} / std::string(url_path); + curl_free(url_path); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* url_query = nullptr; + rc = curl_url_get(handle_.get(), CURLUPART_QUERY, &url_query, 0U); + if (rc != CURLUE_OK and rc != CURLUE_NO_QUERY) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving url query in matching " + "config key failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + // append to path + url_path_str += url_query == nullptr + ? std::string() + : std::string("?") + std::string(url_query); + curl_free(url_query); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* url_fragment = nullptr; + rc = curl_url_get(handle_.get(), CURLUPART_FRAGMENT, &url_fragment, 0U); + if (rc != CURLUE_OK and rc != CURLUE_NO_FRAGMENT) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving url fragment in matching " + "config key failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + // append to path + url_path_str += url_fragment == nullptr + ? std::string() + : std::string("#") + std::string(url_fragment); + curl_free(url_fragment); + + // make sure path ends with '/' for comparison purposes + url_path_str /= ""; + + auto path_len = PathMatchSize(parsed_key.value()->path.string(), + url_path_str.string()); + if (not path_len) { + return ConfigKeyMatchDegree{}; // paths do not match + } + + // key matches; success! + return ConfigKeyMatchDegree{ + true /*matched*/, host_len, *path_len, user_matched}; + } catch (std::exception const& ex) { + Logger::Log( + LogLevel::Error, + "CurlURLHandle: match config key failed unexpectedly with:\n{}", + ex.what()); + return std::nullopt; + } +} + +auto CurlURLHandle::NoproxyStringMatches(std::string const& no_proxy) noexcept + -> std::optional<bool> { + try { + // split no_proxy string by both spaces and commas + std::vector<std::string> patterns{}; + std::string token1{}; + std::istringstream iss1(no_proxy); + // split by spaces + while (std::getline(iss1, token1, ' ')) { + std::istringstream iss2(token1); + std::string token2{}; + // for each such token, split by commas + while (std::getline(iss2, token2, ',')) { + if (not token2.empty()) { + patterns.emplace_back(token2); + } + } + } + // get the stored URL host (mandatory) and port (optional) as a + // NoproxyPattern object + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* url_host = nullptr; + auto rc = curl_url_get(handle_.get(), CURLUPART_HOST, &url_host, 0U); + if (rc != CURLUE_OK) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving url host in no_proxy string " + "matching failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + std::string tmp_pattern{url_host}; + curl_free(url_host); + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg,hicpp-vararg) + char* url_port = nullptr; + rc = curl_url_get(handle_.get(), CURLUPART_PORT, &url_port, 0U); + if (rc != CURLUE_OK and rc != CURLUE_NO_PORT) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: retrieving url port in no_proxy string " + "matching failed with:\n{}", + curl_url_strerror(rc)); + return std::nullopt; + } + // it's simpler to (re)use the existing pattern parser + if (url_port != nullptr) { + tmp_pattern += ":"; + tmp_pattern += std::string(url_port); + } + auto url_hostport_as_pattern = ParseNoproxyPattern(tmp_pattern); + + // check for match with any pattern + for (auto const& pattern : patterns) { + // ignore an empty pattern + if (pattern.empty()) { + continue; + } + // check for trivial wildcard + if (pattern == "*") { + return true; + } + // parse pattern and check for match + auto parsed_pattern = ParseNoproxyPattern(pattern); + if (NoproxyPatternMatches(parsed_pattern, + url_hostport_as_pattern)) { + return true; + } + } + return false; + } catch (std::exception const& ex) { + Logger::Log(LogLevel::Error, + "CurlURLHandle: no_proxy string matching failed " + "unexpectedly with:\n{}", + ex.what()); + return std::nullopt; + } +} diff --git a/src/other_tools/utils/curl_url_handle.hpp b/src/other_tools/utils/curl_url_handle.hpp new file mode 100644 index 00000000..d7192142 --- /dev/null +++ b/src/other_tools/utils/curl_url_handle.hpp @@ -0,0 +1,164 @@ +// Copyright 2023 Huawei Cloud Computing Technology Co., Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef INCLUDED_SRC_OTHER_TOOLS_UTILS_CURL_URL_HANDLE_HPP +#define INCLUDED_SRC_OTHER_TOOLS_UTILS_CURL_URL_HANDLE_HPP + +#include <filesystem> +#include <functional> +#include <memory> +#include <optional> +#include <vector> + +#include "gsl-lite/gsl-lite.hpp" +#include "src/other_tools/utils/curl_context.hpp" + +extern "C" { +using CURLU = struct Curl_URL; +} + +class CurlURLHandle; +using CurlURLHandlePtr = std::shared_ptr<CurlURLHandle>; + +/// \brief Type describing a possibly missing string. Used to store a retrieved +/// field of a parsed URL. +using OptionalString = std::optional<std::string>; + +void curl_url_closer(gsl::owner<CURLU*> handle); + +struct GitConfigKey { + OptionalString scheme{std::nullopt}; + OptionalString user{std::nullopt}; + // might contain wildcards + OptionalString host{std::nullopt}; + OptionalString port{std::nullopt}; + // will include query and fragment, if existing + std::filesystem::path path{"/"}; +}; + +using GitConfigKeyPtr = std::shared_ptr<GitConfigKey>; + +/// \brief Structure storing the information needed to quantify precedence with +/// respect to the gitconfig keys matching rules. +/// Non-default values are set ONLY if matching rules are satisfied. +struct ConfigKeyMatchDegree { + // if a matching happened; + bool matched{false}; + // length of config key's host field if host was matched + size_t host_len{}; + // length of config key's path field if path was matched; + // comparison ends on a '/' char or the end of the path + size_t path_len{}; + // signals a match for the user field between config key and remote URL, + // only if user field exists in config key + bool user_matched{false}; +}; + +/// \brief Stores the components of a valid no_proxy envariable pattern +struct NoproxyPattern { + // stores the substrings of the host portion of the pattern, obtained by + // splitting with delimiter '.' + std::vector<std::string> host_tokens{}; + // port number as string, or nullopt if port missing + std::optional<std::string> port; +}; + +/// \brief Class handling URLs using libcurl API. +/// As with libcurl, only limited checks are performed in order to parse the +/// required fields for a given URL string. +class CurlURLHandle { + public: + CurlURLHandle() noexcept = default; + ~CurlURLHandle() noexcept = default; + + // prohibit moves & copies + CurlURLHandle(CurlURLHandle const&) = delete; + CurlURLHandle(CurlURLHandle&& other) = delete; + auto operator=(CurlURLHandle const&) = delete; + auto operator=(CurlURLHandle&& other) = delete; + + /// \brief Creates a CurlURLHandle object by parsing the given URL. + /// It performs also a normalization step of the path. Requires the protocol + /// to be explicitly specified, i.e., it must have a non-empty scheme field. + /// Returns nullptr on failure to parse, and nullopt on an unexpected + /// exception. + [[nodiscard]] auto static Create(std::string const& url) noexcept + -> std::optional<CurlURLHandlePtr>; + + /// \brief Creates a CurlURLHandle object by parsing the given URL. + /// It allows the user to be very permissive with the types of URL strings + /// it can parse by providing configuration arguments that mirror those + /// provided by the libcurl API (see libcurl docs for effects of each flag). + /// Returns nullptr on failure to parse with given arguments, and nullopt on + /// an unexpected exception. + [[nodiscard]] auto static CreatePermissive( + std::string const& url, + bool use_guess_scheme = false, + bool use_default_scheme = false, + bool use_non_support_scheme = false, + bool use_no_authority = false, + bool use_path_as_is = false, + bool use_allow_space = false) noexcept + -> std::optional<CurlURLHandlePtr>; + + /// \brief Creates a duplicate CurlURLHandle object. + /// Returns nullptr on errors. + [[nodiscard]] auto Duplicate() noexcept -> CurlURLHandlePtr; + + /// \brief Recomposes the URL from the fields in the stored handle. + /// Flags parallel the libcurl API for handling the scheme and port fields. + /// Returns the recomposed URL as a string, or nullopt on errors. + [[nodiscard]] auto GetURL(bool use_default_port = false, + bool use_default_scheme = false, + bool use_no_default_port = false) noexcept + -> std::optional<std::string>; + + /// \brief Gets the parsed scheme field. + [[nodiscard]] auto GetScheme(bool use_default_scheme = false) noexcept + -> std::optional<OptionalString>; + + /// \brief While libcurl's URL API correctly checks that valid hostnames + /// don't contain special characters, gitconfig key URLs (*.<key>.*) allow + /// asterisks ('*'). This function recognizes such hostnames and returns a + /// struct containing all the relevant parsed fields required for matching. + /// Returns nullopt if errors, nullptr if unparsable. + [[nodiscard]] auto static ParseConfigKey(std::string const& key) noexcept + -> std::optional<GitConfigKeyPtr>; + + /// \brief Parses a given gitconfig key url component (e.g., http.<key>.*) + /// and returns to what degree it matches the stored URL. + /// In particular, a non-parsable key returns a non-match. + /// Returns nullopt on errors. + [[nodiscard]] auto MatchConfigKey(std::string const& key) noexcept + -> std::optional<ConfigKeyMatchDegree>; + + /// \brief Checks if the stored URL matches a given "no_proxy"-style string. + /// Returns whether a match was found, or nullopt on errors. + [[nodiscard]] auto NoproxyStringMatches( + std::string const& no_proxy) noexcept -> std::optional<bool>; + + private: + // IMPORTANT: the CurlContext must be initialized before any curl + // object! + CurlContext curl_context_{}; + std::unique_ptr<CURLU, decltype(&curl_url_closer)> handle_{nullptr, + curl_url_closer}; + + /// \brief Try to parse the given key as a valid URL and, if successful, + /// populate a struct with the parsed components needed for config matching. + [[nodiscard]] auto static GetConfigStructFromKey( + std::string const& key) noexcept -> std::optional<GitConfigKeyPtr>; +}; + +#endif // INCLUDED_SRC_OTHER_TOOLS_UTILS_CURL_URL_HANDLE_HPP |