summaryrefslogtreecommitdiff
path: root/etc/patches
diff options
context:
space:
mode:
Diffstat (limited to 'etc/patches')
-rw-r--r--etc/patches/remote_execution.proto.diff194
1 files changed, 182 insertions, 12 deletions
diff --git a/etc/patches/remote_execution.proto.diff b/etc/patches/remote_execution.proto.diff
index 1de1a120..12e364e5 100644
--- a/etc/patches/remote_execution.proto.diff
+++ b/etc/patches/remote_execution.proto.diff
@@ -1,23 +1,53 @@
---- remote_execution.proto.orig 1970-01-01 01:00:00.000000000 +0100
-+++ remote_execution.proto 2023-11-03 13:32:37.502134562 +0100
-@@ -406,6 +406,29 @@
+--- remote_execution.proto.orig 2024-01-10 17:04:44.639543953 +0100
++++ remote_execution.proto 2024-01-31 11:47:50.253779055 +0100
+@@ -406,6 +406,103 @@
rpc GetTree(GetTreeRequest) returns (stream GetTreeResponse) {
option (google.api.http) = { get: "/v2/{instance_name=**}/blobs/{root_digest.hash}/{root_digest.size_bytes}:getTree" };
}
+
+ // Split a blob into chunks.
+ //
++ // This splitting API aims to reduce download traffic between client and
++ // server, e.g., if a client needs to fetch a large blob that just has been
++ // modified slightly since the last built. In this case, there is no need to
++ // fetch the entire blob data, but just the binary differences between the two
++ // blob versions, which are typically determined by deduplication techniques
++ // such as content-defined chunking.
++ //
+ // Clients can use this API before downloading a blob to determine which parts
+ // of the blob are already present locally and do not need to be downloaded
-+ // again.
++ // again. The server splits the blob into chunks according to a specified
++ // content-defined chunking algorithm and returns a list of the chunk digests
++ // in the order in which the chunks have to be concatenated to assemble the
++ // requested blob.
++ //
++ // A client can expect the following guarantees from the server if a split
++ // request is answered successfully:
++ // 1. The blob chunks are stored in CAS.
++ // 2. Concatenating the blob chunks in the order of the digest list returned
++ // by the server results in the original blob.
++ //
++ // The usage of this API is optional for clients but it allows them to
++ // download only the missing parts of a large blob instead of the entire blob
++ // data, which in turn can considerably reduce download network traffic.
+ //
-+ // The blob is split into chunks which are individually stored in the CAS. A
-+ // list of the chunk digests is returned in the order in which the chunks have
-+ // to be concatenated to assemble the requested blob.
++ // Since the generated chunks are stored as blobs, they underlie the same
++ // lifetimes as other blobs. However, their lifetime is extended if they are
++ // part of the result of a split blob request.
+ //
-+ // Using this API is optional but it allows clients to download only the
-+ // missing parts of a blob instead of the entire blob data, which in turn can
-+ // considerably reduce network traffic.
++ // For the client, it is recommended to verify whether the digest of the blob
++ // assembled by the fetched chunks results in the requested blob digest.
++ //
++ // If several clients use blob splitting, it is recommended that they request
++ // the same splitting algorithm to benefit from each others chunking data. In
++ // combination with blob splicing, an agreement about the chunking algorithm
++ // is recommended since both client as well as server side can benefit from
++ // each others chunking data.
++ //
++ // Servers are free to implement this functionality, but they need to declare
++ // whether they support it or not by setting the
++ // [CacheCapabilities.blob_split_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_split_support]
++ // field accordingly.
+ //
+ // Errors:
+ //
@@ -27,10 +57,54 @@
+ rpc SplitBlob(SplitBlobRequest) returns (SplitBlobResponse) {
+ option (google.api.http) = { get: "/v2/{instance_name=**}/blobs/{blob_digest.hash}/{blob_digest.size_bytes}:splitBlob" };
+ }
++
++ // Splice a blob from chunks.
++ //
++ // This is the complementary operation to the
++ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
++ // function to handle the splitted upload of large blobs to save upload
++ // traffic.
++ //
++ // If a client needs to upload a large blob and is able to split a blob into
++ // chunks locally according to some content-defined chunking algorithm, it can
++ // first determine which parts of the blob are already available in the remote
++ // CAS and upload the missing chunks, and then use this API to instruct the
++ // server to splice the original blob from the remotely available blob chunks.
++ //
++ // In order to ensure data consistency of the CAS, the server will verify the
++ // spliced result whether digest calculation results in the provided digest
++ // from the request and will reject a splice request if this check fails.
++ //
++ // The usage of this API is optional for clients but it allows them to upload
++ // only the missing parts of a large blob instead of the entire blob data,
++ // which in turn can considerably reduce upload network traffic.
++ //
++ // In order to split a blob into chunks, it is recommended for the client to
++ // use one of the servers' advertised chunking algorithms by
++ // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
++ // to benefit from each others chunking data. If several clients use blob
++ // splicing, it is recommended that they use the same splitting algorithm to
++ // split their blobs into chunks.
++ //
++ // Servers are free to implement this functionality, but they need to declare
++ // whether they support it or not by setting the
++ // [CacheCapabilities.blob_splice_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_splice_support]
++ // field accordingly.
++ //
++ // Errors:
++ //
++ // * `NOT_FOUND`: At least one of the blob chunks is not present in the CAS.
++ // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the
++ // spliced blob.
++ // * `INVALID_ARGUMENT`: The digest of the spliced blob is different from the
++ // provided expected digest.
++ rpc SpliceBlob(SpliceBlobRequest) returns (SpliceBlobResponse) {
++ option (google.api.http) = { post: "/v2/{instance_name=**}/blobs:spliceBlob" body: "*" };
++ }
}
// The Capabilities service may be used by remote execution clients to query
-@@ -1601,6 +1624,27 @@
+@@ -1601,6 +1698,60 @@
}
// A request message for
@@ -45,16 +119,112 @@
+
+ // The digest of the blob to be splitted.
+ Digest blob_digest = 2;
++
++ // The chunking algorithm to be used. Must be IDENTITY (no chunking) or one of
++ // the algorithms advertised by the
++ // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
++ // field.
++ ChunkingAlgorithm.Value chunking_algorithm = 3;
+}
+
+// A response message for
+// [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
+message SplitBlobResponse {
-+ // The digests of the chunks into in which the blob was splitted.
++ // The ordered list of digests of the chunks into which the blob was splitted.
++ // The original blob is assembled by concatenating the chunk data according to
++ // the order of the digests given by this list.
+ repeated Digest chunk_digests = 1;
+}
+
+// A request message for
++// [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
++message SpliceBlobRequest {
++ // The instance of the execution system to operate against. A server may
++ // support multiple instances of the execution system (with their own workers,
++ // storage, caches, etc.). The server MAY require use of this field to select
++ // between them in an implementation-defined fashion, otherwise it can be
++ // omitted.
++ string instance_name = 1;
++
++ // Expected digest of the spliced blob.
++ Digest blob_digest = 2;
++
++ // The ordered list of digests of the chunks which need to be concatenated to
++ // assemble the original blob.
++ repeated Digest chunk_digests = 3;
++}
++
++// A response message for
++// [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
++message SpliceBlobResponse {
++ // Computed digest of the spliced blob.
++ Digest blob_digest = 1;
++}
++
++// A request message for
// [Capabilities.GetCapabilities][build.bazel.remote.execution.v2.Capabilities.GetCapabilities].
message GetCapabilitiesRequest {
// The instance of the execution system to operate against. A server may
+@@ -1723,6 +1874,36 @@
+ }
+ }
+
++// Content-defined chunking algorithms used for splitting blobs into chunks.
++message ChunkingAlgorithm {
++ enum Value {
++ // No chunking. Servers MUST always support this, and do not need to
++ // advertise it.
++ IDENTITY = 0;
++
++ // Content-defined chunking algorithm based on Rabin fingerprinting. Details
++ // about the implementation can be found in algorithm 3 (RabinCDC8KB) of
++ // https://ieeexplore.ieee.org/document/9055082. This algorithm has the
++ // following properties:
++ // - minimum chunk size: 2 KB
++ // - maximum chunk size: 64 KB
++ // - average chunk size: 8 KB
++ RABINCDC_8KB = 1;
++
++ // Content-defined chunking algorithm based on Gear hashing. Details about
++ // the implementation can be found in algorithm 2 (FastCDC8KB) of
++ // https://ieeexplore.ieee.org/document/9055082. This algorithm has the
++ // following properties:
++ // - minimum chunk size: 2 KB
++ // - maximum chunk size: 64 KB
++ // - average chunk size: 8 KB
++ // The 256 64-bit random numbers in the Gear table are created with the
++ // Mersenne Twister pseudo-random generator for 64-bit numbers with a state
++ // size of 19937 bits and a seed of 0.
++ FASTCDC_MT0_8KB = 2;
++ }
++}
++
+ // Capabilities of the remote cache system.
+ message CacheCapabilities {
+ // All the digest functions supported by the remote cache.
+@@ -1751,6 +1932,25 @@
+ // Note that this does not imply which if any compressors are supported by
+ // the server at the gRPC level.
+ repeated Compressor.Value supported_compressors = 6;
++
++ // All the chunking algorithms supported by the remote cache. Remote cache may
++ // support multiple chunking algorithms simultaneously. Servers MUST support
++ // IDENTITY (no chunking), even if it is not listed here.
++ repeated ChunkingAlgorithm.Value supported_chunking_algorithms = 7;
++
++ // Whether blob splitting is supported for the particular server/instance. If
++ // yes, the server/instance implements the specified behavior for blob
++ // splitting and a meaningful result can be expected from the
++ // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
++ // operation.
++ bool blob_split_support = 8;
++
++ // Whether blob splicing is supported for the particular server/instance. If
++ // yes, the server/instance implements the specified behavior for blob
++ // splicing and a meaningful result can be expected from the
++ // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob]
++ // operation.
++ bool blob_splice_support = 9;
+ }
+
+ // Capabilities of the remote execution system.