1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
|
// Copyright 2023 Huawei Cloud Computing Technology Co., Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef INCLUDED_SRC_BUILDTOOL_STORAGE_LOCAL_CAS_TPP
#define INCLUDED_SRC_BUILDTOOL_STORAGE_LOCAL_CAS_TPP
// IWYU pragma: private, include "src/buildtool/storage/local_cas.hpp"
#include <cstddef>
#include <utility> // std::move
#include "fmt/core.h"
#include "src/buildtool/common/artifact_digest_factory.hpp"
#include "src/buildtool/common/bazel_types.hpp"
#include "src/buildtool/file_system/git_repo.hpp"
#include "src/buildtool/logging/log_level.hpp"
#include "src/buildtool/storage/local_cas.hpp"
template <bool kDoGlobalUplink>
template <bool kIsLocalGeneration>
requires(kIsLocalGeneration)
auto LocalCAS<kDoGlobalUplink>::LocalUplinkBlob(
LocalGenerationCAS const& latest,
ArtifactDigest const& digest,
bool is_executable,
bool skip_sync,
bool splice_result) const noexcept -> bool {
// Determine blob path in latest generation.
auto blob_path_latest = latest.BlobPathNoSync(digest, is_executable);
if (blob_path_latest) {
return true;
}
// Determine blob path of given generation.
auto blob_path = skip_sync ? BlobPathNoSync(digest, is_executable)
: BlobPath(digest, is_executable);
TmpFile::Ptr spliced;
if (not blob_path) {
spliced = TrySplice<ObjectType::File>(digest);
blob_path = spliced ? std::optional{spliced->GetPath()} : std::nullopt;
}
if (not blob_path) {
return false;
}
if (spliced != nullptr) {
// The result of uplinking of a large object must not affect the
// result of uplinking in general. In other case, two sequential calls
// to BlobPath might return different results: The first call splices
// and uplinks the object, but fails at large entry uplinking. The
// second call finds the tree in the youngest generation and returns.
std::ignore = LocalUplinkLargeObject<ObjectType::File>(latest, digest);
if (not splice_result) {
return true;
}
}
// Uplink blob from older generation to the latest generation.
if (spliced != nullptr and is_executable) {
// During multithreaded splicing, the main process can be forked
// (inheriting open file descriptors). In this case, an executable file
// saved using hardlinking becomes inaccessible. To prevent this,
// executables must be stored as copies made in a child process.
return latest.StoreBlob</*kOwner=*/false>(*blob_path, is_executable)
.has_value();
}
return latest.StoreBlob</*kOwner=*/true>(*blob_path, is_executable)
.has_value();
}
template <bool kDoGlobalUplink>
template <bool kIsLocalGeneration>
requires(kIsLocalGeneration)
auto LocalCAS<kDoGlobalUplink>::LocalUplinkTree(
LocalGenerationCAS const& latest,
ArtifactDigest const& digest,
bool splice_result) const noexcept -> bool {
if (not ProtocolTraits::IsNative(hash_function_.GetType())) {
std::unordered_set<ArtifactDigest> seen{};
return LocalUplinkBazelDirectory(latest, digest, &seen, splice_result);
}
return LocalUplinkGitTree(latest, digest, splice_result);
}
template <bool kDoGlobalUplink>
template <bool kIsLocalGeneration>
requires(kIsLocalGeneration)
auto LocalCAS<kDoGlobalUplink>::LocalUplinkGitTree(
LocalGenerationCAS const& latest,
ArtifactDigest const& digest,
bool splice_result) const noexcept -> bool {
// Determine tree path in latest generation.
auto tree_path_latest = latest.cas_tree_.BlobPath(digest);
if (tree_path_latest) {
return true;
}
// Determine tree path of given generation.
auto tree_path = cas_tree_.BlobPath(digest);
TmpFile::Ptr spliced;
if (not tree_path) {
spliced = TrySplice<ObjectType::Tree>(digest);
tree_path = spliced != nullptr ? std::optional{spliced->GetPath()}
: std::nullopt;
}
if (not tree_path) {
return false;
}
// Determine tree entries.
auto content = FileSystemManager::ReadFile(*tree_path);
auto check_symlinks =
[this](std::vector<ArtifactDigest> const& ids) -> bool {
for (auto const& id : ids) {
auto link_path = cas_file_.BlobPath(id);
TmpFile::Ptr spliced;
if (not link_path) {
spliced = TrySplice<ObjectType::File>(id);
link_path = spliced != nullptr
? std::optional{spliced->GetPath()}
: std::nullopt;
}
if (not link_path) {
return false;
}
// in the local CAS we store as files
auto content = FileSystemManager::ReadFile(*link_path);
if (not content or not PathIsNonUpwards(*content)) {
return false;
}
}
return true;
};
auto tree_entries = GitRepo::ReadTreeData(*content,
digest.hash(),
check_symlinks,
/*is_hex_id=*/true);
if (not tree_entries) {
return false;
}
// Uplink tree entries.
for (auto const& [raw_id, entry_vector] : *tree_entries) {
// Process only first entry from 'entry_vector' since all
// entries represent the same blob, just with different
// names.
auto const entry_type = entry_vector.front().type;
auto const digest =
ArtifactDigestFactory::Create(hash_function_.GetType(),
ToHexString(raw_id),
0,
IsTreeObject(entry_type));
if (not digest) {
return false;
}
if (digest->IsTree()) {
if (not LocalUplinkGitTree(latest, *digest)) {
return false;
}
}
else {
if (not LocalUplinkBlob(
latest, *digest, IsExecutableObject(entry_type))) {
return false;
}
}
}
if (spliced != nullptr) {
// Uplink the large entry afterwards:
// The result of uplinking of a large object must not affect the
// result of uplinking in general. In other case, two sequential calls
// to TreePath might return different results: The first call splices
// and uplinks the object, but fails at large entry uplinking. The
// second call finds the tree in the youngest generation and returns.
std::ignore = LocalUplinkLargeObject<ObjectType::Tree>(latest, digest);
if (not splice_result) {
return true;
}
}
// Uplink tree from older generation to the latest generation.
return latest.cas_tree_.StoreBlobFromFile(*tree_path, /*is owner=*/true)
.has_value();
}
template <bool kDoGlobalUplink>
template <bool kIsLocalGeneration>
requires(kIsLocalGeneration)
auto LocalCAS<kDoGlobalUplink>::LocalUplinkBazelDirectory(
LocalGenerationCAS const& latest,
ArtifactDigest const& digest,
gsl::not_null<std::unordered_set<ArtifactDigest>*> const& seen,
bool splice_result) const noexcept -> bool {
// Skip already uplinked directories
if (seen->contains(digest)) {
return true;
}
// Determine bazel directory path of given generation.
auto dir_path = cas_tree_.BlobPath(digest);
TmpFile::Ptr spliced;
if (not dir_path) {
spliced = TrySplice<ObjectType::Tree>(digest);
dir_path = spliced != nullptr ? std::optional{spliced->GetPath()}
: std::nullopt;
}
if (not dir_path) {
return false;
}
// Determine bazel directory entries.
auto content = FileSystemManager::ReadFile(*dir_path);
bazel_re::Directory dir{};
if (not dir.ParseFromString(*content)) {
return false;
}
// Uplink bazel directory entries.
for (auto const& file : dir.files()) {
auto const digest = ArtifactDigestFactory::FromBazel(
hash_function_.GetType(), file.digest());
if (not digest) {
return false;
}
if (not LocalUplinkBlob(latest, *digest, file.is_executable())) {
return false;
}
}
for (auto const& directory : dir.directories()) {
auto const digest = ArtifactDigestFactory::FromBazel(
hash_function_.GetType(), directory.digest());
if (not digest) {
return false;
}
if (not LocalUplinkBazelDirectory(latest, *digest, seen)) {
return false;
}
}
// Determine bazel directory path in latest generation.
auto const dir_path_latest = latest.cas_tree_.BlobPath(digest);
if (spliced != nullptr) {
// Uplink the large entry afterwards:
// The result of uplinking of a large object must not affect the
// result of uplinking in general. In other case, two sequential
// calls to TreePath might return different results: The first call
// splices and uplinks the object, but fails at large entry
// uplinking. The second call finds the tree in the youngest
// generation and returns.
std::ignore = LocalUplinkLargeObject<ObjectType::Tree>(latest, digest);
}
bool const skip_store = spliced != nullptr and not splice_result;
// Uplink bazel directory from older generation to the latest
// generation.
if (skip_store or dir_path_latest.has_value() or
latest.cas_tree_.StoreBlobFromFile(*dir_path,
/*is_owner=*/true)) {
try {
seen->emplace(digest);
return true;
} catch (...) {
return false;
}
}
return false;
}
template <bool kDoGlobalUplink>
template <ObjectType kType, bool kIsLocalGeneration>
requires(kIsLocalGeneration)
auto LocalCAS<kDoGlobalUplink>::LocalUplinkLargeObject(
LocalGenerationCAS const& latest,
ArtifactDigest const& digest) const noexcept -> bool {
if constexpr (IsTreeObject(kType)) {
return cas_tree_large_.LocalUplink(
latest, latest.cas_tree_large_, digest);
}
else {
return cas_file_large_.LocalUplink(
latest, latest.cas_file_large_, digest);
}
}
template <bool kDoGlobalUplink>
template <ObjectType kType, bool kIsLocalGeneration>
requires(kIsLocalGeneration)
auto LocalCAS<kDoGlobalUplink>::TrySplice(
ArtifactDigest const& digest) const noexcept -> TmpFile::Ptr {
auto spliced = IsTreeObject(kType) ? cas_tree_large_.TrySplice(digest)
: cas_file_large_.TrySplice(digest);
return spliced.has_value() ? spliced.value() : nullptr;
}
template <bool kDoGlobalUplink>
auto LocalCAS<kDoGlobalUplink>::CheckTreeInvariant(
ArtifactDigest const& tree_digest,
std::string const& tree_data) const noexcept
-> std::optional<LargeObjectError> {
if (not ProtocolTraits::IsNative(hash_function_.GetType())) {
return std::nullopt;
}
auto skip_symlinks = [](auto const& /*unused*/) { return true; };
auto const entries = GitRepo::ReadTreeData(tree_data,
tree_digest.hash(),
skip_symlinks,
/*is_hex_id=*/true);
if (not entries) {
return LargeObjectError{
LargeObjectErrorCode::Internal,
fmt::format("could not read entries of the tree {}",
tree_digest.hash())};
}
// Ensure all entries are in the storage:
for (const auto& entry : *entries) {
for (auto const& item : entry.second) {
auto const digest =
ArtifactDigestFactory::Create(hash_function_.GetType(),
ToHexString(entry.first),
0, // size unknown
IsTreeObject(item.type));
if (not digest) {
return LargeObjectError{
LargeObjectErrorCode::InvalidTree,
fmt::format("tree invariant violated {}:\n {}",
tree_digest.hash(),
digest.error())};
}
// To avoid splicing during search, large CASes are inspected first.
bool const entry_exists =
IsTreeObject(item.type)
? cas_tree_large_.GetEntryPath(*digest) or TreePath(*digest)
: cas_file_large_.GetEntryPath(*digest) or
BlobPath(*digest, IsExecutableObject(item.type));
if (not entry_exists) {
return LargeObjectError{
LargeObjectErrorCode::InvalidTree,
fmt::format("tree invariant violated {} : missing part {}",
tree_digest.hash(),
digest->hash())};
}
}
}
return std::nullopt;
}
template <bool kDoGlobalUplink>
auto LocalCAS<kDoGlobalUplink>::CheckTreeInvariant(
ArtifactDigest const& tree_digest,
std::filesystem::path const& file) const noexcept
-> std::optional<LargeObjectError> {
auto const tree_data = FileSystemManager::ReadFile(file);
if (not tree_data) {
return LargeObjectError{
LargeObjectErrorCode::Internal,
fmt::format("could not read tree {}", tree_digest.hash())};
}
return CheckTreeInvariant(tree_digest, *tree_data);
}
template <bool kDoGlobalUplink>
template <ObjectType kType>
auto LocalCAS<kDoGlobalUplink>::Splice(ArtifactDigest const& digest,
std::vector<ArtifactDigest> const& parts)
const noexcept -> expected<ArtifactDigest, LargeObjectError> {
static constexpr bool kIsTree = IsTreeObject(kType);
static constexpr bool kIsExec = IsExecutableObject(kType);
// Check file is spliced already:
if (kIsTree ? TreePath(digest) : BlobPath(digest, kIsExec)) {
return digest;
}
// Splice the result from parts:
auto splice_result = kIsTree ? cas_tree_large_.Splice(digest, parts)
: cas_file_large_.Splice(digest, parts);
if (not splice_result) {
return unexpected{std::move(splice_result).error()};
}
auto const& large_object = *splice_result;
// Check digest consistency:
// Using Store{Tree, Blob} to calculate the resulting hash and later
// decide whether the result is valid is unreasonable, because these
// methods can refer to a file that existed before. The direct hash
// calculation is done instead.
auto const& file_path = large_object->GetPath();
auto spliced_digest =
ArtifactDigestFactory::HashFileAs<kType>(hash_function_, file_path);
if (not spliced_digest) {
return unexpected{LargeObjectError{LargeObjectErrorCode::Internal,
"could not calculate digest"}};
}
if (*spliced_digest != digest) {
return unexpected{LargeObjectError{
LargeObjectErrorCode::InvalidResult,
fmt::format("actual result {} differs from the expected one {}",
spliced_digest->hash(),
digest.hash())}};
}
// Check tree invariants:
if constexpr (kIsTree) {
if (ProtocolTraits::IsNative(hash_function_.GetType())) {
if (auto error = CheckTreeInvariant(digest, file_path)) {
return unexpected{std::move(*error)};
}
}
}
static constexpr bool kOwner = true;
auto const stored_digest = kIsTree ? StoreTree<kOwner>(file_path)
: StoreBlob<kOwner>(file_path, kIsExec);
if (not stored_digest) {
return unexpected{LargeObjectError{
LargeObjectErrorCode::Internal,
fmt::format("could not splice {}", digest.hash())}};
}
return *std::move(stored_digest);
}
#endif // INCLUDED_SRC_BUILDTOOL_STORAGE_LOCAL_CAS_TPP
|