summaryrefslogtreecommitdiff
path: root/src/other_tools/root_maps/content_git_map.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/other_tools/root_maps/content_git_map.cpp')
-rw-r--r--src/other_tools/root_maps/content_git_map.cpp808
1 files changed, 336 insertions, 472 deletions
diff --git a/src/other_tools/root_maps/content_git_map.cpp b/src/other_tools/root_maps/content_git_map.cpp
index 344fec0b..924f678b 100644
--- a/src/other_tools/root_maps/content_git_map.cpp
+++ b/src/other_tools/root_maps/content_git_map.cpp
@@ -415,140 +415,150 @@ auto CreateContentGitMap(
});
}
else {
- if (key.absent) {
- // check if content already in CAS
- auto const& cas = Storage::Instance().CAS();
- auto digest = ArtifactDigest(key.archive.content, 0, false);
- if (auto content_cas_path =
- cas.BlobPath(digest, /*is_executable=*/false)) {
- ExtractAndImportToGit(*content_cas_path,
- archive_tree_id_file,
- key.repo_type,
- key.archive.content,
- key.subdir,
- key.pragma_special,
- key.absent,
- fetch_absent,
- import_to_git_map,
- resolve_symlinks_map,
- ts,
- setter,
- logger);
- // done
- return;
- }
- JustMRProgress::Instance().TaskTracker().Start(
+ // check if content already in CAS
+ auto const& cas = Storage::Instance().CAS();
+ auto digest = ArtifactDigest(key.archive.content, 0, false);
+ if (auto content_cas_path =
+ cas.BlobPath(digest, /*is_executable=*/false)) {
+ ExtractAndImportToGit(*content_cas_path,
+ archive_tree_id_file,
+ key.repo_type,
+ key.archive.content,
+ key.subdir,
+ key.pragma_special,
+ key.absent,
+ fetch_absent,
+ import_to_git_map,
+ resolve_symlinks_map,
+ ts,
+ setter,
+ logger);
+ // done
+ return;
+ }
+ JustMRProgress::Instance().TaskTracker().Start(key.archive.origin);
+ // add disfile to CAS
+ auto repo_distfile =
+ (key.archive.distfile
+ ? key.archive.distfile.value()
+ : std::filesystem::path(key.archive.fetch_url)
+ .filename()
+ .string());
+ StorageUtils::AddDistfileToCAS(repo_distfile, just_mr_paths);
+ // check if content is in CAS now
+ if (auto content_cas_path =
+ cas.BlobPath(digest, /*is_executable=*/false)) {
+ JustMRProgress::Instance().TaskTracker().Stop(
key.archive.origin);
- // add disfile to CAS
- auto repo_distfile =
- (key.archive.distfile
- ? key.archive.distfile.value()
- : std::filesystem::path(key.archive.fetch_url)
- .filename()
- .string());
- StorageUtils::AddDistfileToCAS(repo_distfile, just_mr_paths);
- // check if content is in CAS now
- if (auto content_cas_path =
- cas.BlobPath(digest, /*is_executable=*/false)) {
- JustMRProgress::Instance().TaskTracker().Stop(
- key.archive.origin);
- ExtractAndImportToGit(*content_cas_path,
- archive_tree_id_file,
- key.repo_type,
- key.archive.content,
+ ExtractAndImportToGit(*content_cas_path,
+ archive_tree_id_file,
+ key.repo_type,
+ key.archive.content,
+ key.subdir,
+ key.pragma_special,
+ key.absent,
+ fetch_absent,
+ import_to_git_map,
+ resolve_symlinks_map,
+ ts,
+ setter,
+ logger);
+ // done
+ return;
+ }
+ // check if content is known to remote serve service
+ if (serve_api_exists) {
+ // if purely absent, request the resolved subdir tree directly
+ if (key.absent and not fetch_absent) {
+ if (auto tree_id = ServeApi::RetrieveTreeFromArchive(
+ key.archive.content,
+ key.repo_type,
+ key.subdir,
+ key.pragma_special,
+ /*sync_tree = */ false)) {
+ // set the workspace root as absent
+ JustMRProgress::Instance().TaskTracker().Stop(
+ key.archive.origin);
+ (*setter)(
+ std::pair(nlohmann::json::array(
+ {FileRoot::kGitTreeMarker, *tree_id}),
+ /*is_cache_hit = */ false));
+ return;
+ }
+ // give warning
+ (*logger)(fmt::format("Tree at subdir {} for archive {} "
+ "could not be served",
key.subdir,
- key.pragma_special,
- key.absent,
- fetch_absent,
- import_to_git_map,
- resolve_symlinks_map,
- ts,
- setter,
- logger);
- // done
- return;
+ key.archive.content),
+ /*fatal=*/false);
}
- // check if content is known to remote serve service
- if (serve_api_exists) {
- // if fetching absent, request (and sync) the whole archive
- // tree, UNRESOLVED, to ensure we maintain the id file
- // association
- if (fetch_absent) {
- if (auto root_tree_id =
- ServeApi::RetrieveTreeFromArchive(
- key.archive.content,
- key.repo_type,
- /*subdir = */ ".",
- /* resolve_symlinks = */ std::nullopt,
- /*sync_tree = */ true)) {
- // ensure Git cache
- // define Git operation to be done
- GitOpKey op_key = {
- .params =
- {
- StorageConfig::
- GitRoot(), // target_path
- "", // git_hash
- "", // branch
- std::nullopt, // message
- true // init_bare
- },
- .op_type = GitOpType::ENSURE_INIT};
- critical_git_op_map->ConsumeAfterKeysReady(
- ts,
- {std::move(op_key)},
- [root_tree_id,
- repo_type = key.repo_type,
- subdir = key.subdir,
- archive_tree_id_file,
- content = key.archive.content,
- fetch_url = key.archive.fetch_url,
- mirrors = key.archive.mirrors,
- sha256 = key.archive.sha256,
- sha512 = key.archive.sha512,
- pragma_special = key.pragma_special,
- additional_mirrors,
- ca_info,
- origin = key.archive.origin,
- absent = key.absent,
- fetch_absent,
- import_to_git_map,
- resolve_symlinks_map,
- local_api,
- remote_api,
- ts,
- setter,
- logger](auto const& values) {
- GitOpValue op_result = *values[0];
- // check flag
- if (not op_result.result) {
- (*logger)("Git init failed",
- /*fatal=*/true);
- return;
- }
- // verify if we know the tree already
- // locally
- auto just_git_cas =
- GitCAS::Open(StorageConfig::GitRoot());
- if (not just_git_cas) {
- (*logger)(
- "Could not open Git cache object "
- "database!",
- /*fatal=*/true);
- return;
- }
- auto just_git_repo =
- GitRepoRemote::Open(just_git_cas);
- if (not just_git_repo) {
- (*logger)(
- "Could not open Git cache "
- "repository!",
- /*fatal=*/true);
- return;
- }
- // setup wrapped logger
- auto wrapped_logger = std::make_shared<
- AsyncMapConsumerLogger>(
+ // otherwise, request (and sync) the whole archive tree,
+ // UNRESOLVED, to ensure we maintain the id file association
+ else {
+ if (auto root_tree_id = ServeApi::RetrieveTreeFromArchive(
+ key.archive.content,
+ key.repo_type,
+ /*subdir = */ ".",
+ /* resolve_symlinks = */ std::nullopt,
+ /*sync_tree = */ true)) {
+ // ensure Git cache
+ // define Git operation to be done
+ GitOpKey op_key = {
+ .params =
+ {
+ StorageConfig::GitRoot(), // target_path
+ "", // git_hash
+ "", // branch
+ std::nullopt, // message
+ true // init_bare
+ },
+ .op_type = GitOpType::ENSURE_INIT};
+ critical_git_op_map->ConsumeAfterKeysReady(
+ ts,
+ {std::move(op_key)},
+ [root_tree_id,
+ archive_tree_id_file,
+ key,
+ additional_mirrors,
+ ca_info,
+ fetch_absent,
+ content_cas_map,
+ import_to_git_map,
+ resolve_symlinks_map,
+ local_api,
+ remote_api,
+ ts,
+ setter,
+ logger](auto const& values) {
+ GitOpValue op_result = *values[0];
+ // check flag
+ if (not op_result.result) {
+ (*logger)("Git init failed",
+ /*fatal=*/true);
+ return;
+ }
+ // verify if we already know the tree locally
+ auto just_git_cas =
+ GitCAS::Open(StorageConfig::GitRoot());
+ if (not just_git_cas) {
+ (*logger)(
+ "Could not open Git cache object "
+ "database!",
+ /*fatal=*/true);
+ return;
+ }
+ auto just_git_repo =
+ GitRepoRemote::Open(just_git_cas);
+ if (not just_git_repo) {
+ (*logger)(
+ "Could not open Git cache "
+ "repository!",
+ /*fatal=*/true);
+ return;
+ }
+ // setup wrapped logger
+ auto wrapped_logger =
+ std::make_shared<AsyncMapConsumerLogger>(
[&logger, tree = *root_tree_id](
auto const& msg, bool fatal) {
(*logger)(
@@ -559,370 +569,222 @@ auto CreateContentGitMap(
msg),
fatal);
});
- auto tree_present =
- just_git_repo->CheckTreeExists(
- *root_tree_id, wrapped_logger);
- if (not tree_present) {
+ auto tree_present =
+ just_git_repo->CheckTreeExists(
+ *root_tree_id, wrapped_logger);
+ if (not tree_present) {
+ return;
+ }
+ if (*tree_present) {
+ JustMRProgress::Instance()
+ .TaskTracker()
+ .Stop(key.archive.origin);
+ // write to id file and process subdir tree
+ WriteIdFileAndSetWSRoot(
+ *root_tree_id,
+ just_git_cas,
+ archive_tree_id_file,
+ key.archive.content,
+ key.subdir,
+ key.pragma_special,
+ key.absent,
+ fetch_absent,
+ resolve_symlinks_map,
+ ts,
+ setter,
+ logger);
+ // done
+ return;
+ }
+ // try to get root tree from remote execution
+ // endpoint
+ auto root_digest = ArtifactDigest{
+ *root_tree_id, 0, /*is_tree=*/true};
+ if (remote_api != nullptr and
+ local_api != nullptr and
+ remote_api->RetrieveToCas(
+ {Artifact::ObjectInfo{
+ .digest = root_digest,
+ .type = ObjectType::Tree}},
+ local_api)) {
+ JustMRProgress::Instance()
+ .TaskTracker()
+ .Stop(key.archive.origin);
+ // Move tree from CAS to local git storage
+ auto tmp_dir =
+ StorageUtils::CreateTypedTmpDir(
+ "fetch-absent-root");
+ if (not tmp_dir) {
+ (*logger)(
+ fmt::format(
+ "Failed to create tmp "
+ "directory after fetching root "
+ "tree {} for absent archive {}",
+ *root_tree_id,
+ key.archive.content),
+ true);
return;
}
- if (*tree_present) {
+ if (not local_api->RetrieveToPaths(
+ {Artifact::ObjectInfo{
+ .digest = root_digest,
+ .type = ObjectType::Tree}},
+ {tmp_dir->GetPath()})) {
+ (*logger)(
+ fmt::format(
+ "Failed to copy fetched root "
+ "tree {} to {}",
+ *root_tree_id,
+ tmp_dir->GetPath().string()),
+ true);
+ return;
+ }
+ CommitInfo c_info{tmp_dir->GetPath(),
+ "tree",
+ *root_tree_id};
+ import_to_git_map->ConsumeAfterKeysReady(
+ ts,
+ {std::move(c_info)},
+ [tmp_dir, // keep tmp_dir alive
+ root_tree_id,
+ content = key.archive.content,
+ subdir = key.subdir,
+ pragma_special = key.pragma_special,
+ absent = key.absent,
+ fetch_absent,
+ just_git_cas,
+ archive_tree_id_file,
+ resolve_symlinks_map,
+ ts,
+ setter,
+ logger](auto const& values) {
+ if (not values[0]->second) {
+ (*logger)(
+ "Importing to git failed",
+ /*fatal=*/true);
+ return;
+ }
+ // write to id file and process
+ // subdir tree
+ WriteIdFileAndSetWSRoot(
+ *root_tree_id,
+ just_git_cas,
+ archive_tree_id_file,
+ content,
+ subdir,
+ pragma_special,
+ absent,
+ fetch_absent,
+ resolve_symlinks_map,
+ ts,
+ setter,
+ logger);
+ },
+ [logger, tmp_dir, root_tree_id](
+ auto const& msg, bool fatal) {
+ (*logger)(
+ fmt::format(
+ "While moving root tree {} "
+ "from {} to local git:\n{}",
+ *root_tree_id,
+ tmp_dir->GetPath().string(),
+ msg),
+ fatal);
+ });
+ // done
+ return;
+ }
+ // try to fetch content from network
+ content_cas_map->ConsumeAfterKeysReady(
+ ts,
+ {key.archive},
+ [archive_tree_id_file,
+ archive_origin = key.archive.origin,
+ repo_type = key.repo_type,
+ content_id = key.archive.content,
+ subdir = key.subdir,
+ pragma_special = key.pragma_special,
+ absent = key.absent,
+ fetch_absent,
+ import_to_git_map,
+ resolve_symlinks_map,
+ ts,
+ setter,
+ logger](
+ [[maybe_unused]] auto const& values) {
JustMRProgress::Instance()
.TaskTracker()
- .Stop(origin);
- // write to id file and process subdir
- // tree
- WriteIdFileAndSetWSRoot(
- *root_tree_id,
- just_git_cas,
+ .Stop(archive_origin);
+ // content is in CAS
+ auto const& cas =
+ Storage::Instance().CAS();
+ auto content_cas_path =
+ cas.BlobPath(
+ ArtifactDigest(
+ content_id, 0, false),
+ /*is_executable=*/false)
+ .value();
+ ExtractAndImportToGit(
+ content_cas_path,
archive_tree_id_file,
- content,
+ repo_type,
+ content_id,
subdir,
pragma_special,
absent,
fetch_absent,
+ import_to_git_map,
resolve_symlinks_map,
ts,
setter,
logger);
- // done
- return;
- }
- // try to get root tree from remote
- // execution endpoint
- auto root_digest = ArtifactDigest{
- *root_tree_id, 0, /*is_tree=*/true};
- if (remote_api != nullptr and
- local_api != nullptr and
- remote_api->RetrieveToCas(
- {Artifact::ObjectInfo{
- .digest = root_digest,
- .type = ObjectType::Tree}},
- local_api)) {
- JustMRProgress::Instance()
- .TaskTracker()
- .Stop(origin);
- // Move tree from CAS to local git
- // storage
- auto tmp_dir =
- StorageUtils::CreateTypedTmpDir(
- "fetch-absent-root");
- if (not tmp_dir) {
- (*logger)(
- fmt::format(
- "Failed to create tmp "
- "directory after fetching "
- "root tree {} for absent "
- "archive {}",
- *root_tree_id,
- content),
- true);
- return;
- }
- if (not local_api->RetrieveToPaths(
- {Artifact::ObjectInfo{
- .digest = root_digest,
- .type = ObjectType::Tree}},
- {tmp_dir->GetPath()})) {
- (*logger)(
- fmt::format("Failed to copy "
- "fetched root tree "
- "{} to {}",
- *root_tree_id,
- tmp_dir->GetPath()
- .string()),
- true);
- return;
- }
- CommitInfo c_info{tmp_dir->GetPath(),
- "tree",
- *root_tree_id};
- import_to_git_map
- ->ConsumeAfterKeysReady(
- ts,
- {std::move(c_info)},
- [tmp_dir, // keep tmp_dir alive
- root_tree_id,
- content,
- subdir,
- pragma_special,
- absent,
- fetch_absent,
- just_git_cas,
- archive_tree_id_file,
- resolve_symlinks_map,
- ts,
- setter,
- logger](auto const& values) {
- if (not values[0]->second) {
- (*logger)(
- "Importing to git "
- "failed",
- /*fatal=*/true);
- return;
- }
- // write to id file and
- // process subdir tree
- WriteIdFileAndSetWSRoot(
- *root_tree_id,
- just_git_cas,
- archive_tree_id_file,
- content,
- subdir,
- pragma_special,
- absent,
- fetch_absent,
- resolve_symlinks_map,
- ts,
- setter,
- logger);
- },
- [logger, tmp_dir, root_tree_id](
- auto const& msg,
- bool fatal) {
- (*logger)(
- fmt::format(
- "While moving root "
- "tree {} from {} "
- "to local git:\n{}",
- *root_tree_id,
- tmp_dir->GetPath()
- .string(),
- msg),
- fatal);
- });
- // done
- return;
- }
- // just serve should have made the tree
- // available in the remote CAS, so log this
- // attempt and revert to network fetch
- (*logger)(
- fmt::format("Tree {} marked as served, "
- "but not found on remote",
- *root_tree_id),
- /*fatal=*/false);
- // revert to network fetch and import_to_git
- // before any network fetching, check that
- // mandatory fields are provided
- if (fetch_url.empty()) {
- (*logger)(
- "Failed to provide archive fetch "
- "url!",
- /*fatal=*/true);
- return;
- }
- // now do the actual fetch
- auto data = NetworkFetchWithMirrors(
- fetch_url,
- mirrors,
- ca_info,
- additional_mirrors);
- if (not data) {
+ },
+ [logger, content = key.archive.content](
+ auto const& msg, bool fatal) {
(*logger)(
- fmt::format("Failed to fetch a "
- "file with content {} "
- "from provided remotes",
- content),
- /*fatal=*/true);
- return;
- }
- // check content wrt checksums
- if (sha256) {
- auto actual_sha256 = GetContentHash<
- Hasher::HashType::SHA256>(*data);
- if (actual_sha256 != sha256.value()) {
- (*logger)(
- fmt::format(
- "SHA256 mismatch for {}: "
- "expected {}, got {}",
- fetch_url,
- sha256.value(),
- actual_sha256),
- /*fatal=*/true);
- return;
- }
- }
- if (sha512) {
- auto actual_sha512 = GetContentHash<
- Hasher::HashType::SHA512>(*data);
- if (actual_sha512 != sha512.value()) {
- (*logger)(
- fmt::format(
- "SHA512 mismatch for {}: "
- "expected {}, got {}",
- fetch_url,
- sha512.value(),
- actual_sha512),
- /*fatal=*/true);
- return;
- }
- }
- // add the fetched data to CAS
- auto path = StorageUtils::AddToCAS(*data);
- // check one last time if content is in CAS
- // now
- if (not path) {
- (*logger)(fmt::format(
- "Failed to fetch a file "
- "with id {} from {}",
- content,
- fetch_url),
- /*fatal=*/true);
- return;
- }
- JustMRProgress::Instance()
- .TaskTracker()
- .Stop(origin);
- ExtractAndImportToGit(*path,
- archive_tree_id_file,
- repo_type,
- content,
- subdir,
- pragma_special,
- absent,
- fetch_absent,
- import_to_git_map,
- resolve_symlinks_map,
- ts,
- setter,
- logger);
- },
- [logger,
- target_path = StorageConfig::GitRoot()](
- auto const& msg, bool fatal) {
- (*logger)(fmt::format(
- "While running critical Git "
- "op ENSURE_INIT for "
- "target {}:\n{}",
- target_path.string(),
- msg),
- fatal);
- });
- // done
- return;
- }
- // give warning
- (*logger)(fmt::format("Root tree for content {} could "
- "not be served",
- key.archive.content),
- /*fatal=*/false);
- }
- // if not fetching absent, request the resolved subdir
- // tree directly
- else {
- if (auto tree_id = ServeApi::RetrieveTreeFromArchive(
- key.archive.content,
- key.repo_type,
- key.subdir,
- key.pragma_special,
- /*sync_tree = */ false)) {
- // set the workspace root as absent
- JustMRProgress::Instance().TaskTracker().Stop(
- key.archive.origin);
- (*setter)(std::pair(
- nlohmann::json::array(
- {FileRoot::kGitTreeMarker, *tree_id}),
- /*is_cache_hit = */ false));
- return;
- }
- // give warning
- (*logger)(fmt::format("Tree at subdir {} for archive "
- "{} could not be served",
- key.subdir,
- key.archive.content),
- /*fatal=*/false);
- }
- }
- else {
- if (not fetch_absent) {
- // give warning
- (*logger)(fmt::format("Missing serve endpoint for "
- "content {} marked absent "
- "requires slower network fetch.",
- key.archive.content),
- /*fatal=*/false);
- }
- }
- // revert to network fetch and import_to_git
- // before any network fetching, check that mandatory fields
- // are provided
- if (key.archive.fetch_url.empty()) {
- (*logger)("Failed to provide archive fetch url!",
- /*fatal=*/true);
- return;
- }
- // now do the actual fetch
- auto data = NetworkFetch(key.archive.fetch_url, ca_info);
- if (data == std::nullopt) {
- (*logger)(fmt::format("Failed to fetch a file with id "
- "{} from {}",
- key.archive.content,
- key.archive.fetch_url),
- /*fatal=*/true);
- return;
- }
- // check content wrt checksums
- if (key.archive.sha256) {
- auto actual_sha256 =
- GetContentHash<Hasher::HashType::SHA256>(*data);
- if (actual_sha256 != key.archive.sha256.value()) {
- (*logger)(fmt::format("SHA256 mismatch for {}: "
- "expected {}, got {}",
- key.archive.fetch_url,
- key.archive.sha256.value(),
- actual_sha256),
- /*fatal=*/true);
- return;
- }
- }
- if (key.archive.sha512) {
- auto actual_sha512 =
- GetContentHash<Hasher::HashType::SHA512>(*data);
- if (actual_sha512 != key.archive.sha512.value()) {
- (*logger)(fmt::format("SHA512 mismatch for {}: "
- "expected {}, got {}",
- key.archive.fetch_url,
- key.archive.sha512.value(),
- actual_sha512),
- /*fatal=*/true);
+ fmt::format("While ensuring "
+ "content {} is in "
+ "CAS:\n{}",
+ content,
+ msg),
+ fatal);
+ });
+ },
+ [logger, target_path = StorageConfig::GitRoot()](
+ auto const& msg, bool fatal) {
+ (*logger)(fmt::format(
+ "While running critical Git op "
+ "ENSURE_INIT for target {}:\n{}",
+ target_path.string(),
+ msg),
+ fatal);
+ });
+ // done
return;
}
+ // give warning
+ (*logger)(fmt::format("Root tree for content {} could not "
+ "be served",
+ key.archive.content),
+ /*fatal=*/false);
}
- // add the fetched data to CAS
- auto path = StorageUtils::AddToCAS(*data);
- // check one last time if content is in CAS now
- if (not path) {
- (*logger)(fmt::format("Failed to fetch a file with id "
- "{} from {}",
- key.archive.content,
- key.archive.fetch_url),
- /*fatal=*/true);
- return;
+ }
+ else {
+ if (key.absent) {
+ // give warning
+ (*logger)(fmt::format("Missing serve endpoint for content "
+ "{} marked absent requires slower "
+ "network fetch.",
+ key.archive.content),
+ /*fatal=*/false);
}
- JustMRProgress::Instance().TaskTracker().Stop(
- key.archive.origin);
- ExtractAndImportToGit(*path,
- archive_tree_id_file,
- key.repo_type,
- key.archive.content,
- key.subdir,
- key.pragma_special,
- key.absent,
- fetch_absent,
- import_to_git_map,
- resolve_symlinks_map,
- ts,
- setter,
- logger);
- // done
- return;
}
- // if not marked absent, do regular fetch to CAS and
- // import_to_git
+ // revert to network fetch
content_cas_map->ConsumeAfterKeysReady(
ts,
{key.archive},
[archive_tree_id_file,
+ archive_origin = key.archive.origin,
repo_type = key.repo_type,
content_id = key.archive.content,
subdir = key.subdir,
@@ -934,6 +796,8 @@ auto CreateContentGitMap(
ts,
setter,
logger]([[maybe_unused]] auto const& values) {
+ JustMRProgress::Instance().TaskTracker().Stop(
+ archive_origin);
// content is in CAS
auto const& cas = Storage::Instance().CAS();
auto content_cas_path =