|
18 | 18 |
|
19 | 19 | import com.google.api.core.BetaApi;
|
20 | 20 | import com.google.cloud.storage.GrpcStorageOptions.GrpcStorageDefaults;
|
| 21 | +import com.google.cloud.storage.ParallelCompositeUploadBlobWriteSessionConfig.PartCleanupStrategy; |
| 22 | +import com.google.cloud.storage.Storage.BlobTargetOption; |
21 | 23 | import com.google.cloud.storage.Storage.BlobWriteOption;
|
22 | 24 | import com.google.common.collect.ImmutableList;
|
23 | 25 | import java.io.IOException;
|
|
108 | 110 | * retryable error query the offset of the Resumable Upload Session, then open the recovery
|
109 | 111 | * file from the offset and transmit the bytes to Cloud Storage.
|
110 | 112 | * </td>
|
| 113 | + * <td> |
| 114 | + * Opening the stream for upload will be retried up to the limitations specified in {@link StorageOptions#getRetrySettings()} |
| 115 | + * All bytes are buffered to disk and allow for recovery from any arbitrary offset. |
| 116 | + * </td> |
111 | 117 | * <td>gRPC</td>
|
112 | 118 | * <td><a href="https://cloud.google.com/storage/docs/resumable-uploads">Resumable Upload</a></td>
|
113 | 119 | * <td>
|
|
128 | 134 | * </ol>
|
129 | 135 | * </td>
|
130 | 136 | * </tr>
|
| 137 | + * <tr> |
| 138 | + * <td>Parallel Composite Upload</td> |
| 139 | + * <td>{@link #parallelCompositeUpload()}</td> |
| 140 | + * <td> |
| 141 | + * Break the stream of bytes into smaller part objects uploading each part in parallel. Then |
| 142 | + * composing the parts together to make the ultimate object. |
| 143 | + * </td> |
| 144 | + * <td> |
| 145 | + * Automatic retires will be applied for the following: |
| 146 | + * <ol> |
| 147 | + * <li>Creation of each individual part</li> |
| 148 | + * <li>Performing an intermediary compose</li> |
| 149 | + * <li>Performing a delete to cleanup each part and intermediary compose object</li> |
| 150 | + * </ol> |
| 151 | + * |
| 152 | + * Retrying the creation of the final object is contingent upon if an appropriate precondition |
| 153 | + * is supplied when calling {@link Storage#blobWriteSession(BlobInfo, BlobWriteOption...)}. |
| 154 | + * Either {@link BlobTargetOption#doesNotExist()} or {@link Storage.BlobTargetOption#generationMatch(long)} |
| 155 | + * should be specified in order to make the final request idempotent. |
| 156 | + * <p>Each operation will be retried up to the limitations specified in {@link StorageOptions#getRetrySettings()} |
| 157 | + * </td> |
| 158 | + * <td>gRPC</td> |
| 159 | + * <td> |
| 160 | + * <ul> |
| 161 | + * <li><a href="https://cloud.google.com/storage/docs/parallel-composite-uploads">Parallel composite uploads</a></li> |
| 162 | + * <li><a href="https://cloud.google.com/storage/docs/uploading-objects-from-memory">Direct uploads</a></li> |
| 163 | + * <li><a href="https://cloud.google.com/storage/docs/composite-objects">Compose</a></li> |
| 164 | + * <li><a href="https://cloud.google.com/storage/docs/deleting-objects">Object delete</a></li> |
| 165 | + * </ul> |
| 166 | + * </td> |
| 167 | + * <td> |
| 168 | + * <ol> |
| 169 | + * <li> |
| 170 | + * Performing parallel composite uploads costs more money. |
| 171 | + * <a href="https://cloud.google.com/storage/pricing#operations-by-class">Class A</a> |
| 172 | + * operations are performed to create each part and to perform each compose. If a storage |
| 173 | + * tier other than |
| 174 | + * <a href="https://cloud.google.com/storage/docs/storage-classes"><code>STANDARD</code></a> |
| 175 | + * is used, early deletion fees apply to deletion of the parts. |
| 176 | + * <p>An illustrative example. Upload a 5GiB object using 64MiB as the max size per part. |
| 177 | + * <ol> |
| 178 | + * <li>80 Parts will be created (Class A)</li> |
| 179 | + * <li>3 compose calls will be performed (Class A)</li> |
| 180 | + * <li>Delete 80 Parts along with 2 intermediary Compose objects (Free tier as long as {@code STANDARD} class)</li> |
| 181 | + * </ol> |
| 182 | + * |
| 183 | + * Once the parts and intermediary compose objects are deleted, there will be no storage charges related to those temporary objects. |
| 184 | + * </li> |
| 185 | + * <li> |
| 186 | + * The service account/credentials used to perform the parallel composite upload require |
| 187 | + * <a href="https://cloud.google.com/storage/docs/access-control/iam-permissions#object_permissions">{@code storage.objects.delete}</a> |
| 188 | + * in order to cleanup the temporary part and intermediary compose objects. |
| 189 | + * <p><i>To handle handle part and intermediary compose object deletion out of band</i> |
| 190 | + * passing {@link PartCleanupStrategy#never()} to {@link ParallelCompositeUploadBlobWriteSessionConfig#withPartCleanupStrategy(PartCleanupStrategy)} |
| 191 | + * will prevent automatic cleanup. |
| 192 | + * </li> |
| 193 | + * <li> |
| 194 | + * Please see the <a href="https://cloud.google.com/storage/docs/parallel-composite-uploads"> |
| 195 | + * Parallel composite uploads</a> documentation for a more in depth explanation of the |
| 196 | + * limitations of Parallel composite uploads. |
| 197 | + * </li> |
| 198 | + * <li> |
| 199 | + * A failed upload can leave part and intermediary compose objects behind which will count |
| 200 | + * as storage usage, and you will be billed for it. |
| 201 | + * <p>By default if an upload fails, an attempt to cleanup the part and intermediary compose |
| 202 | + * will be made. However if the program were to crash there is no means for the client to |
| 203 | + * perform the cleanup. |
| 204 | + * <p>Every part and intermediary compose object will be created with a name which ends in |
| 205 | + * {@code .part}. An Object Lifecycle Management rule can be setup on your bucket to automatically |
| 206 | + * cleanup objects with the suffix after some period of time. See |
| 207 | + * <a href="https://cloud.google.com/storage/docs/lifecycle">Object Lifecycle Management</a> |
| 208 | + * for full details and a guide on how to setup a <a href="https://cloud.google.com/storage/docs/lifecycle#delete">Delete</a> |
| 209 | + * rule with a <a href="https://cloud.google.com/storage/docs/lifecycle#matchesprefix-suffix">suffix match</a> condition. |
| 210 | + * </li> |
| 211 | + * <li> |
| 212 | + * Using parallel composite uploads are not a one size fits all solution. They have very |
| 213 | + * real overhead until uploading a large enough object. The inflection point is dependent |
| 214 | + * upon many factors, and there is no one size fits all value. You will need to experiment |
| 215 | + * with your deployment and workload to determine if parallel composite uploads are useful |
| 216 | + * to you. |
| 217 | + * </li> |
| 218 | + * </ol> |
| 219 | + * </td> |
| 220 | + * </tr> |
131 | 221 | * </table>
|
132 | 222 | *
|
133 | 223 | * @see BlobWriteSessionConfig
|
@@ -219,4 +309,19 @@ public static BufferToDiskThenUpload bufferToDiskThenUpload(Collection<Path> pat
|
219 | 309 | public static JournalingBlobWriteSessionConfig journaling(Collection<Path> paths) {
|
220 | 310 | return new JournalingBlobWriteSessionConfig(ImmutableList.copyOf(paths), false);
|
221 | 311 | }
|
| 312 | + |
| 313 | + /** |
| 314 | + * Create a new {@link BlobWriteSessionConfig} which will perform a <a |
| 315 | + * href="https://cloud.google.com/storage/docs/parallel-composite-uploads">Parallel Composite |
| 316 | + * Upload</a> by breaking the stream into parts and composing the parts together to make the |
| 317 | + * ultimate object. |
| 318 | + * |
| 319 | + * @see Storage#blobWriteSession(BlobInfo, BlobWriteOption...) |
| 320 | + * @see GrpcStorageOptions.Builder#setBlobWriteSessionConfig(BlobWriteSessionConfig) |
| 321 | + * @since 2.28.0 This new api is in preview and is subject to breaking changes. |
| 322 | + */ |
| 323 | + @BetaApi |
| 324 | + public static ParallelCompositeUploadBlobWriteSessionConfig parallelCompositeUpload() { |
| 325 | + return ParallelCompositeUploadBlobWriteSessionConfig.withDefaults(); |
| 326 | + } |
222 | 327 | }
|
0 commit comments