1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
|
// Copyright 2024 Huawei Cloud Computing Technology Co., Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "src/buildtool/storage/compactifier.hpp"
#include <algorithm>
#include <array>
#include <filesystem>
#include <functional>
#include <optional>
#include <variant>
#include <vector>
#include "src/buildtool/common/bazel_types.hpp"
#include "src/buildtool/crypto/hash_function.hpp"
#include "src/buildtool/crypto/hasher.hpp"
#include "src/buildtool/file_system/file_system_manager.hpp"
#include "src/buildtool/file_system/object_cas.hpp"
#include "src/buildtool/file_system/object_type.hpp"
#include "src/buildtool/logging/log_level.hpp"
#include "src/buildtool/logging/logger.hpp"
#include "src/buildtool/storage/compactification_task.hpp"
#include "src/buildtool/storage/local_cas.hpp"
#include "src/utils/cpp/hex_string.hpp"
namespace {
/// \brief Remove invalid entries from the key directory. The directory itself
/// can be removed too, if it has an invalid name.
/// A task is keyed by a two-letter directory name and the type of a storage
/// being checked.
/// \tparam kType Type of the storage to inspect.
/// \param task Owning compactification task.
/// \param entry Directory entry.
/// \return True if the entry directory doesn't contain invalid
/// entries.
template <ObjectType kType>
[[nodiscard]] auto RemoveInvalid(CompactificationTask const& task,
std::filesystem::path const& key) noexcept
-> bool;
/// \brief Remove spliced entries from the kType storage.
/// A task is keyed by a directory name concisting of two letters and kType...
/// storages need to be checked.
/// \tparam kLargeType Type of the large storage to scan.
/// \tparam kType Types of the storages to inspect.
/// \param task Owning compactification task.
/// \param entry Directory entry.
/// \return True if the entry directory doesn't contain spliced
/// entries.
template <ObjectType kLargeType, ObjectType... kType>
requires(sizeof...(kType) != 0)
[[nodiscard]] auto RemoveSpliced(CompactificationTask const& task,
std::filesystem::path const& key) noexcept
-> bool;
/// \brief Split and remove a key entry from the kType storage. Results of
/// splitting are added to the LocalCAS.
/// \tparam kType Type of the storage to inspect.
/// \param task Owning compactification task.
/// \param entry File entry.
/// \return True if the file was split succesfully.
template <ObjectType kType>
[[nodiscard]] auto SplitLarge(CompactificationTask const& task,
std::filesystem::path const& key) noexcept
-> bool;
} // namespace
auto Compactifier::RemoveInvalid(LocalCAS<false> const& cas) noexcept -> bool {
auto logger = [](LogLevel level, std::string const& msg) {
Logger::Log(
level, "Compactification: Removal of invalid files:\n{}", msg);
};
// Collect directories and remove from the storage files and directories
// having invalid names.
// In general, the number of files in the storage is not limited, thus
// parallelization is done using subdirectories to not run out of memory.
CompactificationTask task{.cas = cas,
.large = false,
.logger = logger,
.filter = FileSystemManager::IsDirectory,
.f_task = ::RemoveInvalid<ObjectType::File>,
.x_task = ::RemoveInvalid<ObjectType::Executable>,
.t_task = ::RemoveInvalid<ObjectType::Tree>};
return CompactifyConcurrently(task);
}
auto Compactifier::RemoveSpliced(LocalCAS<false> const& cas) noexcept -> bool {
auto logger = [](LogLevel level, std::string const& msg) {
Logger::Log(
level, "Compactification: Removal of spliced files:\n{}", msg);
};
// Collect directories from large storages and remove from the storage files
// having correponding large entries.
// In general, the number of files in the storage is not limited, thus
// parallelization is done using subdirectories to not run out of memory.
CompactificationTask task{
.cas = cas,
.large = true,
.logger = logger,
.filter = FileSystemManager::IsDirectory,
.f_task = ::RemoveSpliced<ObjectType::File,
ObjectType::File,
ObjectType::Executable>,
.t_task = ::RemoveSpliced<ObjectType::Tree, ObjectType::Tree>};
return CompactifyConcurrently(task);
}
auto Compactifier::SplitLarge(LocalCAS<false> const& cas,
size_t threshold) noexcept -> bool {
auto logger = [](LogLevel level, std::string const& msg) {
Logger::Log(level, "Compactification: Splitting:\n{}", msg);
};
// Collect files larger than the threshold and split them.
// Concurrently scanning a directory and putting new entries there may cause
// the scan to fail. To avoid that, parallelization is done using
// files, although this may result in a run out of memory.
CompactificationTask task{
.cas = cas,
.large = false,
.logger = logger,
.filter =
[threshold](std::filesystem::path const& path) {
return not FileSystemManager::IsDirectory(path) and
std::filesystem::file_size(path) >= threshold;
},
.f_task = ::SplitLarge<ObjectType::File>,
.x_task = ::SplitLarge<ObjectType::Executable>,
.t_task = ::SplitLarge<ObjectType::Tree>};
return CompactifyConcurrently(task);
}
namespace {
template <ObjectType kType>
[[nodiscard]] auto RemoveInvalid(CompactificationTask const& task,
std::filesystem::path const& key) noexcept
-> bool {
auto const directory = task.cas.StorageRoot(kType) / key;
// Check there are entries to process:
if (not FileSystemManager::IsDirectory(directory)) {
return true;
}
// Calculate reference hash size:
auto const kHashSize = HashFunction::Hasher().GetHashLength();
static constexpr size_t kDirNameSize = 2;
auto const kFileNameSize = kHashSize - kDirNameSize;
// Check the directory itself is valid:
std::string const d_name = directory.filename();
if (d_name.size() != kDirNameSize or not FromHexString(d_name)) {
static constexpr bool kRecursively = true;
if (FileSystemManager::RemoveDirectory(directory, kRecursively)) {
return true;
}
task.Log(LogLevel::Error,
"Failed to remove invalid directory {}",
directory.string());
return false;
}
FileSystemManager::ReadDirEntryFunc callback =
[&task, &directory, kFileNameSize](std::filesystem::path const& file,
ObjectType type) -> bool {
// Directories are unexpected in storage subdirectories
if (IsTreeObject(type)) {
task.Log(LogLevel::Error,
"There is a directory in a storage subdirectory: {}",
(directory / file).string());
return false;
}
// Check file has a hexadecimal name of length kFileNameSize:
std::string const f_name = file.filename();
if (f_name.size() == kFileNameSize and FromHexString(f_name)) {
return true;
}
auto const path = directory / file;
if (not FileSystemManager::RemoveFile(path)) {
task.Log(LogLevel::Error,
"Failed to remove invalid entry {}",
path.string());
return false;
}
return true;
};
// Read the key storage directory:
if (not FileSystemManager::ReadDirectory(directory, callback)) {
task.Log(LogLevel::Error, "Failed to read {}", directory.string());
return false;
}
return true;
}
template <ObjectType kLargeType, ObjectType... kType>
requires(sizeof...(kType) != 0)
[[nodiscard]] auto RemoveSpliced(CompactificationTask const& task,
std::filesystem::path const& key) noexcept
-> bool {
static constexpr bool kLarge = true;
auto const directory = task.cas.StorageRoot(kLargeType, kLarge) / key;
// Check there are entries to process:
if (not FileSystemManager::IsDirectory(directory)) {
return true;
}
// Obtain paths to the corresponding key directories in the object storages.
std::array const storage_roots{task.cas.StorageRoot(kType) / key...};
FileSystemManager::ReadDirEntryFunc callback =
[&storage_roots, &task, &directory](
std::filesystem::path const& entry_large, ObjectType type) -> bool {
// Directories are unexpected in storage subdirectories
if (IsTreeObject(type)) {
task.Log(LogLevel::Error,
"There is a directory in a storage subdirectory: {}",
(directory / entry_large).string());
return false;
}
// Pathes to large entries and spliced results are:
// large_storage / entry_large
// storage / entry_object
//
// Large objects are keyed by the hash of their spliced result, so for
// splicable objects large_entry and object_entry are the same.
// Thus, to check the existence of the spliced result, it is
// enough to check the existence of { storage / entry_large }:
auto check = [&entry_large](std::filesystem::path const& storage) {
std::filesystem::path file_path = storage / entry_large;
return not FileSystemManager::IsFile(file_path) or
FileSystemManager::RemoveFile(file_path);
};
return std::all_of(storage_roots.begin(), storage_roots.end(), check);
};
// Read the key storage directory:
if (not FileSystemManager::ReadDirectory(directory, callback)) {
task.Log(LogLevel::Error, "Failed to read {}", directory.string());
return false;
}
return true;
}
template <ObjectType kType>
[[nodiscard]] auto SplitLarge(CompactificationTask const& task,
std::filesystem::path const& key) noexcept
-> bool {
auto const path = task.cas.StorageRoot(kType) / key;
// Check the entry exists:
if (not FileSystemManager::IsFile(path)) {
return true;
}
// Calculate the digest for the entry:
auto const digest = ObjectCAS<kType>::CreateDigest(path);
if (not digest) {
task.Log(LogLevel::Error,
"Failed to calculate digest for {}",
path.string());
return false;
}
// Split the entry:
auto split_result = IsTreeObject(kType) ? task.cas.SplitTree(*digest)
: task.cas.SplitBlob(*digest);
auto* parts = std::get_if<std::vector<bazel_re::Digest>>(&split_result);
if (parts == nullptr) {
auto* error = std::get_if<LargeObjectError>(&split_result);
auto const error_message = error ? std::move(*error).Message() : "";
task.Log(LogLevel::Error,
"Failed to split {}\nDigest: {}\nMessage: {}",
path.string(),
digest->hash(),
error_message);
return false;
}
// If the file cannot actually be split (the threshold is too low), the
// file must not be deleted.
if (parts->size() < 2) {
task.Log(LogLevel::Debug,
"{} cannot be compactified. The compactification "
"threshold is too low.",
digest->hash());
return true;
}
if (not FileSystemManager::RemoveFile(path)) {
task.Log(LogLevel::Error, "Failed to remove {}", path.string());
return false;
}
return true;
}
} // namespace
|