Skip to content

Commit f8f4e22

Browse files
authored
feat: add @BetaApi BlobWriteSession#parallelCompositeUpload (#2239)
1 parent c9b82f6 commit f8f4e22

File tree

4 files changed

+657
-8
lines changed

4 files changed

+657
-8
lines changed

google-cloud-storage/src/main/java/com/google/cloud/storage/BlobWriteSessionConfigs.java

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
import com.google.api.core.BetaApi;
2020
import com.google.cloud.storage.GrpcStorageOptions.GrpcStorageDefaults;
21+
import com.google.cloud.storage.ParallelCompositeUploadBlobWriteSessionConfig.PartCleanupStrategy;
22+
import com.google.cloud.storage.Storage.BlobTargetOption;
2123
import com.google.cloud.storage.Storage.BlobWriteOption;
2224
import com.google.common.collect.ImmutableList;
2325
import java.io.IOException;
@@ -108,6 +110,10 @@
108110
* retryable error query the offset of the Resumable Upload Session, then open the recovery
109111
* file from the offset and transmit the bytes to Cloud Storage.
110112
* </td>
113+
* <td>
114+
* Opening the stream for upload will be retried up to the limitations specified in {@link StorageOptions#getRetrySettings()}
115+
* All bytes are buffered to disk and allow for recovery from any arbitrary offset.
116+
* </td>
111117
* <td>gRPC</td>
112118
* <td><a href="https://cloud.google.com/storage/docs/resumable-uploads">Resumable Upload</a></td>
113119
* <td>
@@ -128,6 +134,90 @@
128134
* </ol>
129135
* </td>
130136
* </tr>
137+
* <tr>
138+
* <td>Parallel Composite Upload</td>
139+
* <td>{@link #parallelCompositeUpload()}</td>
140+
* <td>
141+
* Break the stream of bytes into smaller part objects uploading each part in parallel. Then
142+
* composing the parts together to make the ultimate object.
143+
* </td>
144+
* <td>
145+
* Automatic retires will be applied for the following:
146+
* <ol>
147+
* <li>Creation of each individual part</li>
148+
* <li>Performing an intermediary compose</li>
149+
* <li>Performing a delete to cleanup each part and intermediary compose object</li>
150+
* </ol>
151+
*
152+
* Retrying the creation of the final object is contingent upon if an appropriate precondition
153+
* is supplied when calling {@link Storage#blobWriteSession(BlobInfo, BlobWriteOption...)}.
154+
* Either {@link BlobTargetOption#doesNotExist()} or {@link Storage.BlobTargetOption#generationMatch(long)}
155+
* should be specified in order to make the final request idempotent.
156+
* <p>Each operation will be retried up to the limitations specified in {@link StorageOptions#getRetrySettings()}
157+
* </td>
158+
* <td>gRPC</td>
159+
* <td>
160+
* <ul>
161+
* <li><a href="https://cloud.google.com/storage/docs/parallel-composite-uploads">Parallel composite uploads</a></li>
162+
* <li><a href="https://cloud.google.com/storage/docs/uploading-objects-from-memory">Direct uploads</a></li>
163+
* <li><a href="https://cloud.google.com/storage/docs/composite-objects">Compose</a></li>
164+
* <li><a href="https://cloud.google.com/storage/docs/deleting-objects">Object delete</a></li>
165+
* </ul>
166+
* </td>
167+
* <td>
168+
* <ol>
169+
* <li>
170+
* Performing parallel composite uploads costs more money.
171+
* <a href="https://cloud.google.com/storage/pricing#operations-by-class">Class A</a>
172+
* operations are performed to create each part and to perform each compose. If a storage
173+
* tier other than
174+
* <a href="https://cloud.google.com/storage/docs/storage-classes"><code>STANDARD</code></a>
175+
* is used, early deletion fees apply to deletion of the parts.
176+
* <p>An illustrative example. Upload a 5GiB object using 64MiB as the max size per part.
177+
* <ol>
178+
* <li>80 Parts will be created (Class A)</li>
179+
* <li>3 compose calls will be performed (Class A)</li>
180+
* <li>Delete 80 Parts along with 2 intermediary Compose objects (Free tier as long as {@code STANDARD} class)</li>
181+
* </ol>
182+
*
183+
* Once the parts and intermediary compose objects are deleted, there will be no storage charges related to those temporary objects.
184+
* </li>
185+
* <li>
186+
* The service account/credentials used to perform the parallel composite upload require
187+
* <a href="https://cloud.google.com/storage/docs/access-control/iam-permissions#object_permissions">{@code storage.objects.delete}</a>
188+
* in order to cleanup the temporary part and intermediary compose objects.
189+
* <p><i>To handle handle part and intermediary compose object deletion out of band</i>
190+
* passing {@link PartCleanupStrategy#never()} to {@link ParallelCompositeUploadBlobWriteSessionConfig#withPartCleanupStrategy(PartCleanupStrategy)}
191+
* will prevent automatic cleanup.
192+
* </li>
193+
* <li>
194+
* Please see the <a href="https://cloud.google.com/storage/docs/parallel-composite-uploads">
195+
* Parallel composite uploads</a> documentation for a more in depth explanation of the
196+
* limitations of Parallel composite uploads.
197+
* </li>
198+
* <li>
199+
* A failed upload can leave part and intermediary compose objects behind which will count
200+
* as storage usage, and you will be billed for it.
201+
* <p>By default if an upload fails, an attempt to cleanup the part and intermediary compose
202+
* will be made. However if the program were to crash there is no means for the client to
203+
* perform the cleanup.
204+
* <p>Every part and intermediary compose object will be created with a name which ends in
205+
* {@code .part}. An Object Lifecycle Management rule can be setup on your bucket to automatically
206+
* cleanup objects with the suffix after some period of time. See
207+
* <a href="https://cloud.google.com/storage/docs/lifecycle">Object Lifecycle Management</a>
208+
* for full details and a guide on how to setup a <a href="https://cloud.google.com/storage/docs/lifecycle#delete">Delete</a>
209+
* rule with a <a href="https://cloud.google.com/storage/docs/lifecycle#matchesprefix-suffix">suffix match</a> condition.
210+
* </li>
211+
* <li>
212+
* Using parallel composite uploads are not a one size fits all solution. They have very
213+
* real overhead until uploading a large enough object. The inflection point is dependent
214+
* upon many factors, and there is no one size fits all value. You will need to experiment
215+
* with your deployment and workload to determine if parallel composite uploads are useful
216+
* to you.
217+
* </li>
218+
* </ol>
219+
* </td>
220+
* </tr>
131221
* </table>
132222
*
133223
* @see BlobWriteSessionConfig
@@ -219,4 +309,19 @@ public static BufferToDiskThenUpload bufferToDiskThenUpload(Collection<Path> pat
219309
public static JournalingBlobWriteSessionConfig journaling(Collection<Path> paths) {
220310
return new JournalingBlobWriteSessionConfig(ImmutableList.copyOf(paths), false);
221311
}
312+
313+
/**
314+
* Create a new {@link BlobWriteSessionConfig} which will perform a <a
315+
* href="https://cloud.google.com/storage/docs/parallel-composite-uploads">Parallel Composite
316+
* Upload</a> by breaking the stream into parts and composing the parts together to make the
317+
* ultimate object.
318+
*
319+
* @see Storage#blobWriteSession(BlobInfo, BlobWriteOption...)
320+
* @see GrpcStorageOptions.Builder#setBlobWriteSessionConfig(BlobWriteSessionConfig)
321+
* @since 2.28.0 This new api is in preview and is subject to breaking changes.
322+
*/
323+
@BetaApi
324+
public static ParallelCompositeUploadBlobWriteSessionConfig parallelCompositeUpload() {
325+
return ParallelCompositeUploadBlobWriteSessionConfig.withDefaults();
326+
}
222327
}

google-cloud-storage/src/main/java/com/google/cloud/storage/GrpcStorageImpl.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1815,7 +1815,8 @@ private UnbufferedReadableByteChannelSession<Object> unbufferedReadSession(
18151815
ReadObjectRequest readObjectRequest = getReadObjectRequest(blob, opts);
18161816
Set<StatusCode.Code> codes =
18171817
resultRetryAlgorithmToCodes(retryAlgorithmManager.getFor(readObjectRequest));
1818-
GrpcCallContext grpcCallContext = Retrying.newCallContext().withRetryableCodes(codes);
1818+
GrpcCallContext grpcCallContext =
1819+
opts.grpcMetadataMapper().apply(Retrying.newCallContext().withRetryableCodes(codes));
18191820
return ResumableMedia.gapic()
18201821
.read()
18211822
.byteChannel(storageClient.readObjectCallable().withDefaultCallContext(grpcCallContext))

0 commit comments

Comments
 (0)