etc/patches/remote_execution.proto.diff


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231

--- remote_execution.proto.orig	2024-01-10 17:04:44.639543953 +0100
+++ remote_execution.proto	2024-01-31 11:47:50.253779055 +0100
@@ -406,6 +406,103 @@
   rpc GetTree(GetTreeRequest) returns (stream GetTreeResponse) {
     option (google.api.http) = { get: "/v2/{instance_name=**}/blobs/{root_digest.hash}/{root_digest.size_bytes}:getTree" };
   }
+
+  // Split a blob into chunks.
+  //
+  // This splitting API aims to reduce download traffic between client and
+  // server, e.g., if a client needs to fetch a large blob that just has been
+  // modified slightly since the last built. In this case, there is no need to
+  // fetch the entire blob data, but just the binary differences between the two
+  // blob versions, which are typically determined by deduplication techniques
+  // such as content-defined chunking.
+  //
+  // Clients can use this API before downloading a blob to determine which parts
+  // of the blob are already present locally and do not need to be downloaded
+  // again. The server splits the blob into chunks according to a specified
+  // content-defined chunking algorithm and returns a list of the chunk digests
+  // in the order in which the chunks have to be concatenated to assemble the
+  // requested blob.
+  //
+  // A client can expect the following guarantees from the server if a split
+  // request is answered successfully:
+  //  1. The blob chunks are stored in CAS.
+  //  2. Concatenating the blob chunks in the order of the digest list returned
+  //     by the server results in the original blob.
+  //
+  // The usage of this API is optional for clients but it allows them to
+  // download only the missing parts of a large blob instead of the entire blob
+  // data, which in turn can considerably reduce download network traffic.
+  //
+  // Since the generated chunks are stored as blobs, they underlie the same
+  // lifetimes as other blobs. However, their lifetime is extended if they are
+  // part of the result of a split blob request.
+  //
+  // For the client, it is recommended to verify whether the digest of the blob
+  // assembled by the fetched chunks results in the requested blob digest.
+  //
+  // If several clients use blob splitting, it is recommended that they request
+  // the same splitting algorithm to benefit from each others chunking data. In
+  // combination with blob splicing, an agreement about the chunking algorithm
+  // is recommended since both client as well as server side can benefit from
+  // each others chunking data.
+  //
+  // Servers are free to implement this functionality, but they need to declare
+  // whether they support it or not by setting the
+  // [CacheCapabilities.blob_split_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_split_support]
+  // field accordingly.
+  //
+  // Errors:
+  //
+  // * `NOT_FOUND`: The requested blob is not present in the CAS.
+  // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the blob
+  //   chunks.
+  rpc SplitBlob(SplitBlobRequest) returns (SplitBlobResponse) {
+    option (google.api.http) = { get: "/v2/{instance_name=**}/blobs/{blob_digest.hash}/{blob_digest.size_bytes}:splitBlob" };
+  }
+
+  // Splice a blob from chunks.
+  //
+  // This is the complementary operation to the
+  // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
+  // function to handle the splitted upload of large blobs to save upload
+  // traffic.
+  //
+  // If a client needs to upload a large blob and is able to split a blob into
+  // chunks locally according to some content-defined chunking algorithm, it can
+  // first determine which parts of the blob are already available in the remote
+  // CAS and upload the missing chunks, and then use this API to instruct the
+  // server to splice the original blob from the remotely available blob chunks.
+  //
+  // In order to ensure data consistency of the CAS, the server will verify the
+  // spliced result whether digest calculation results in the provided digest
+  // from the request and will reject a splice request if this check fails.
+  //
+  // The usage of this API is optional for clients but it allows them to upload
+  // only the missing parts of a large blob instead of the entire blob data,
+  // which in turn can considerably reduce upload network traffic.
+  //
+  // In order to split a blob into chunks, it is recommended for the client to
+  // use one of the servers' advertised chunking algorithms by
+  // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
+  // to benefit from each others chunking data. If several clients use blob
+  // splicing, it is recommended that they use the same splitting algorithm to
+  // split their blobs into chunks.
+  //
+  // Servers are free to implement this functionality, but they need to declare
+  // whether they support it or not by setting the
+  // [CacheCapabilities.blob_splice_support][build.bazel.remote.execution.v2.CacheCapabilities.blob_splice_support]
+  // field accordingly.
+  //
+  // Errors:
+  //
+  // * `NOT_FOUND`: At least one of the blob chunks is not present in the CAS.
+  // * `RESOURCE_EXHAUSTED`: There is insufficient disk quota to store the
+  //   spliced blob.
+  // * `INVALID_ARGUMENT`: The digest of the spliced blob is different from the
+  //   provided expected digest.
+  rpc SpliceBlob(SpliceBlobRequest) returns (SpliceBlobResponse) {
+    option (google.api.http) = { post: "/v2/{instance_name=**}/blobs:spliceBlob" body: "*" };
+  }
 }
 
 // The Capabilities service may be used by remote execution clients to query
@@ -1601,6 +1698,60 @@
 }
 
 // A request message for
+// [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
+message SplitBlobRequest {
+  // The instance of the execution system to operate against. A server may
+  // support multiple instances of the execution system (with their own workers,
+  // storage, caches, etc.). The server MAY require use of this field to select
+  // between them in an implementation-defined fashion, otherwise it can be
+  // omitted.
+  string instance_name = 1;
+
+  // The digest of the blob to be splitted.
+  Digest blob_digest = 2;
+
+  // The chunking algorithm to be used. Must be IDENTITY (no chunking) or one of
+  // the algorithms advertised by the
+  // [CacheCapabilities.supported_chunking_algorithms][build.bazel.remote.execution.v2.CacheCapabilities.supported_chunking_algorithms]
+  // field.
+  ChunkingAlgorithm.Value chunking_algorithm = 3;
+}
+
+// A response message for
+// [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob].
+message SplitBlobResponse {
+  // The ordered list of digests of the chunks into which the blob was splitted.
+  // The original blob is assembled by concatenating the chunk data according to
+  // the order of the digests given by this list.
+  repeated Digest chunk_digests = 1;
+}
+
+// A request message for
+// [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
+message SpliceBlobRequest {
+  // The instance of the execution system to operate against. A server may
+  // support multiple instances of the execution system (with their own workers,
+  // storage, caches, etc.). The server MAY require use of this field to select
+  // between them in an implementation-defined fashion, otherwise it can be
+  // omitted.
+  string instance_name = 1;
+
+  // Expected digest of the spliced blob.
+  Digest blob_digest = 2;
+
+  // The ordered list of digests of the chunks which need to be concatenated to
+  // assemble the original blob.
+  repeated Digest chunk_digests = 3;
+}
+
+// A response message for
+// [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob].
+message SpliceBlobResponse {
+  // Computed digest of the spliced blob.
+  Digest blob_digest = 1;
+}
+
+// A request message for
 // [Capabilities.GetCapabilities][build.bazel.remote.execution.v2.Capabilities.GetCapabilities].
 message GetCapabilitiesRequest {
   // The instance of the execution system to operate against. A server may
@@ -1723,6 +1874,37 @@
   }
 }
 
+// Content-defined chunking algorithms used for splitting blobs into chunks.
+message ChunkingAlgorithm {
+  enum Value {
+    // No chunking. Servers MUST always support this, and do not need to
+    // advertise it.
+    IDENTITY = 0;
+
+    // Content-defined chunking algorithm based on Rabin fingerprinting. Details
+    // about the implementation can be found in algorithm 3 (RabinCDC8KB) of
+    // https://ieeexplore.ieee.org/document/9055082. This algorithm has the
+    // following properties:
+    //  - minimum chunk size: 2 KB
+    //  - maximum chunk size: 64 KB
+    //  - average chunk size: 8 KB
+    RABINCDC_8KB = 1;
+
+    // Content-defined chunking algorithm based on Gear hashing. Details about
+    // the implementation can be found in algorithm 2 (FastCDC8KB) of
+    // https://ieeexplore.ieee.org/document/9055082. This algorithm has the
+    // following properties:
+    //  - minimum chunk size: 32 KB
+    //  - maximum chunk size: 1024 KB
+    //  - average chunk size: 128 KB
+    // The 256 64-bit random numbers in the Gear table are created with the
+    // Mersenne Twister pseudo-random generator for 64-bit numbers with a state
+    // size of 19937 bits and a seed of 0.
+    // The masks are extended by setting the 2 most significant bytes to 0x4444.
+    FASTCDC = 2;
+  }
+}
+
 // Capabilities of the remote cache system.
 message CacheCapabilities {
   // All the digest functions supported by the remote cache.
@@ -1751,6 +1933,25 @@
   // Note that this does not imply which if any compressors are supported by
   // the server at the gRPC level.
   repeated Compressor.Value supported_compressors = 6;
+
+  // All the chunking algorithms supported by the remote cache. Remote cache may
+  // support multiple chunking algorithms simultaneously. Servers MUST support
+  // IDENTITY (no chunking), even if it is not listed here.
+  repeated ChunkingAlgorithm.Value supported_chunking_algorithms = 7;
+
+  // Whether blob splitting is supported for the particular server/instance. If
+  // yes, the server/instance implements the specified behavior for blob
+  // splitting and a meaningful result can be expected from the
+  // [ContentAddressableStorage.SplitBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SplitBlob]
+  // operation.
+  bool blob_split_support = 8;
+
+  // Whether blob splicing is supported for the particular server/instance. If
+  // yes, the server/instance implements the specified behavior for blob
+  // splicing and a meaningful result can be expected from the
+  // [ContentAddressableStorage.SpliceBlob][build.bazel.remote.execution.v2.ContentAddressableStorage.SpliceBlob]
+  // operation.
+  bool blob_splice_support = 9;
 }
 
 // Capabilities of the remote execution system.