summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKlaus Aehlig <klaus.aehlig@huawei.com>2024-04-12 10:29:11 +0200
committerKlaus Aehlig <klaus.aehlig@huawei.com>2024-04-12 11:59:01 +0200
commit7ee931b51e4f02313c86e9c5f7c64aadbc4b03de (patch)
tree4c79b16277c092f4dc2d74ec977ebd985aa19b91 /src
parent920dbcad30333ea91e34c8c5da07bb6f499c4925 (diff)
downloadjustbuild-7ee931b51e4f02313c86e9c5f7c64aadbc4b03de.tar.gz
file chunker: increase chunk sizes
As we use chunking also for reducing storage, we have to consider the overhead of block devices which is in the order of kB per file. So our target chunk size should be at least 2 orders of magnitude above this. This suggests to minimally aim for a chunk size of 128kB, a target size that also has the advantage the that maximal chunk size associated with this size is 1MB which is still well below the maximal transmission size of grpc allowing us to avoid the streaming API. As we're scaling everything up by a factor of 16, we also have to increase the number of bits in the involved masks by 4. We use this to also extend the window size by using the 2 most significant octets. Following the advice of the paper proposing FastCDC to spread out the ones roughly equally suggests 0x4444 as a suitable value for the two most significant octets. We also change the suggested extension of the remote-execution API accordingly. As the precise parameters for FastCDC when announced over the remote-execution APIs are still under discussion upstream, we simplify the name to not mention the target size.
Diffstat (limited to 'src')
-rw-r--r--src/buildtool/execution_api/execution_service/capabilities_server.cpp3
-rw-r--r--src/buildtool/execution_api/execution_service/cas_server.cpp23
-rw-r--r--src/buildtool/execution_api/remote/bazel/bazel_cas_client.cpp4
-rw-r--r--src/buildtool/storage/file_chunker.cpp4
-rw-r--r--src/buildtool/storage/file_chunker.hpp2
5 files changed, 17 insertions, 19 deletions
diff --git a/src/buildtool/execution_api/execution_service/capabilities_server.cpp b/src/buildtool/execution_api/execution_service/capabilities_server.cpp
index 1f34b388..71a1361f 100644
--- a/src/buildtool/execution_api/execution_service/capabilities_server.cpp
+++ b/src/buildtool/execution_api/execution_service/capabilities_server.cpp
@@ -41,8 +41,7 @@ auto CapabilitiesServiceImpl::GetCapabilities(
static_assert(kMaxBatchTransferSize < GRPC_DEFAULT_MAX_RECV_MESSAGE_LENGTH,
"Max batch transfer size too large.");
cache.add_supported_chunking_algorithms(
- ::bazel_re::ChunkingAlgorithm_Value::
- ChunkingAlgorithm_Value_FASTCDC_MT0_8KB);
+ ::bazel_re::ChunkingAlgorithm_Value::ChunkingAlgorithm_Value_FASTCDC);
*(response->mutable_cache_capabilities()) = cache;
exec.set_digest_function(
diff --git a/src/buildtool/execution_api/execution_service/cas_server.cpp b/src/buildtool/execution_api/execution_service/cas_server.cpp
index da585851..b9143daa 100644
--- a/src/buildtool/execution_api/execution_service/cas_server.cpp
+++ b/src/buildtool/execution_api/execution_service/cas_server.cpp
@@ -51,8 +51,8 @@ static auto ChunkingAlgorithmToString(::bazel_re::ChunkingAlgorithm_Value type)
ChunkingAlgorithm_Value_RABINCDC_8KB:
return "RABINCDC_8KB";
case ::bazel_re::ChunkingAlgorithm_Value::
- ChunkingAlgorithm_Value_FASTCDC_MT0_8KB:
- return "FASTCDC_MT0_8KB";
+ ChunkingAlgorithm_Value_FASTCDC:
+ return "FASTCDC";
default:
return "[Unknown Chunking Algorithm Type]";
}
@@ -254,16 +254,15 @@ auto CASServiceImpl::SplitBlob(::grpc::ServerContext* /*context*/,
if (chunking_algorithm != ::bazel_re::ChunkingAlgorithm_Value::
ChunkingAlgorithm_Value_IDENTITY and
chunking_algorithm != ::bazel_re::ChunkingAlgorithm_Value::
- ChunkingAlgorithm_Value_FASTCDC_MT0_8KB) {
- logger_.Emit(
- LogLevel::Warning,
- fmt::format(
- "SplitBlob: unsupported chunking algorithm {}, will use "
- "default implementation {}",
- ChunkingAlgorithmToString(chunking_algorithm),
- ChunkingAlgorithmToString(
- ::bazel_re::ChunkingAlgorithm_Value::
- ChunkingAlgorithm_Value_FASTCDC_MT0_8KB)));
+ ChunkingAlgorithm_Value_FASTCDC) {
+ logger_.Emit(LogLevel::Warning,
+ fmt::format("SplitBlob: unsupported chunking algorithm "
+ "{}, will use "
+ "default implementation {}",
+ ChunkingAlgorithmToString(chunking_algorithm),
+ ChunkingAlgorithmToString(
+ ::bazel_re::ChunkingAlgorithm_Value::
+ ChunkingAlgorithm_Value_FASTCDC)));
}
// Acquire garbage collection lock.
diff --git a/src/buildtool/execution_api/remote/bazel/bazel_cas_client.cpp b/src/buildtool/execution_api/remote/bazel/bazel_cas_client.cpp
index e7dea224..748dee6f 100644
--- a/src/buildtool/execution_api/remote/bazel/bazel_cas_client.cpp
+++ b/src/buildtool/execution_api/remote/bazel/bazel_cas_client.cpp
@@ -388,8 +388,8 @@ auto BazelCasClient::SplitBlob(std::string const& instance_name,
bazel_re::SplitBlobRequest request{};
request.set_instance_name(instance_name);
request.mutable_blob_digest()->CopyFrom(blob_digest);
- request.set_chunking_algorithm(bazel_re::ChunkingAlgorithm_Value::
- ChunkingAlgorithm_Value_FASTCDC_MT0_8KB);
+ request.set_chunking_algorithm(
+ bazel_re::ChunkingAlgorithm_Value::ChunkingAlgorithm_Value_FASTCDC);
bazel_re::SplitBlobResponse response{};
auto [ok, status] = WithRetry(
[this, &response, &request]() {
diff --git a/src/buildtool/storage/file_chunker.cpp b/src/buildtool/storage/file_chunker.cpp
index 8e747900..b94f487a 100644
--- a/src/buildtool/storage/file_chunker.cpp
+++ b/src/buildtool/storage/file_chunker.cpp
@@ -23,8 +23,8 @@ namespace {
// Mask values taken from algorithm 2 of the paper
// https://ieeexplore.ieee.org/document/9055082.
-constexpr std::uint64_t kMaskS{0x0000d9f003530000ULL}; // 15 '1' bits
-constexpr std::uint64_t kMaskL{0x0000d90003530000ULL}; // 11 '1' bits
+constexpr std::uint64_t kMaskS{0x4444d9f003530000ULL}; // 19 '1' bits
+constexpr std::uint64_t kMaskL{0x4444d90003530000ULL}; // 15 '1' bits
// Predefined array of 256 random 64-bit integers, needs to be initialized.
constexpr std::uint32_t kRandomTableSize{256};
diff --git a/src/buildtool/storage/file_chunker.hpp b/src/buildtool/storage/file_chunker.hpp
index f2aea001..914de3f0 100644
--- a/src/buildtool/storage/file_chunker.hpp
+++ b/src/buildtool/storage/file_chunker.hpp
@@ -32,7 +32,7 @@
/// A read buffer is used to progressively process the file content instead of
/// reading the entire file content in memory.
class FileChunker {
- static constexpr std::uint32_t kAverageChunkSize{1024 * 8}; // 8 KB
+ static constexpr std::uint32_t kAverageChunkSize{1024 * 128}; // 128 KB
static constexpr std::uint32_t kDefaultSeed{0};
public: