@@ -177,6 +177,14 @@ @implementation GGMLMetalClass
177
177
#undef GGML_METAL_ADD_KERNEL
178
178
}
179
179
180
+ fprintf (stderr, " %s : recommendedMaxWorkingSetSize = %8.2f MB\n " , __func__, ctx->device .recommendedMaxWorkingSetSize / 1024.0 / 1024.0 );
181
+ fprintf (stderr, " %s : hasUnifiedMemory = %s \n " , __func__, ctx->device .hasUnifiedMemory ? " true" : " false" );
182
+ if (ctx->device .maxTransferRate != 0 ) {
183
+ fprintf (stderr, " %s : maxTransferRate = %8.2f MB/s\n " , __func__, ctx->device .maxTransferRate / 1024.0 / 1024.0 );
184
+ } else {
185
+ fprintf (stderr, " %s : maxTransferRate = built-in GPU\n " , __func__);
186
+ }
187
+
180
188
return ctx;
181
189
}
182
190
@@ -250,11 +258,11 @@ bool ggml_metal_add_buffer(
250
258
ctx->buffers [ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy: data length: size_aligned options: MTLResourceStorageModeShared deallocator: nil ];
251
259
252
260
if (ctx->buffers [ctx->n_buffers].metal == nil ) {
253
- fprintf (stderr, " %s : failed to allocate '%-16s ' buffer, size = %8.2f MB\n " , __func__, name, size_aligned / 1024.0 / 1024.0 );
261
+ fprintf (stderr, " %s : failed to allocate '%-16s ' buffer, size = %8.2f MB" , __func__, name, size_aligned / 1024.0 / 1024.0 );
254
262
return false ;
255
263
}
256
264
257
- fprintf (stderr, " %s : allocated '%-16s ' buffer, size = %8.2f MB\n " , __func__, name, size_aligned / 1024.0 / 1024.0 );
265
+ fprintf (stderr, " %s : allocated '%-16s ' buffer, size = %8.2f MB" , __func__, name, size_aligned / 1024.0 / 1024.0 );
258
266
259
267
++ctx->n_buffers ;
260
268
} else {
@@ -281,11 +289,21 @@ bool ggml_metal_add_buffer(
281
289
return false ;
282
290
}
283
291
284
- fprintf (stderr, " %s : allocated '%-16s ' buffer, size = %8.2f MB, offs = %12ld \n " , __func__, name, size_step_aligned / 1024.0 / 1024.0 , i);
292
+ fprintf (stderr, " %s : allocated '%-16s ' buffer, size = %8.2f MB, offs = %12ld " , __func__, name, size_step_aligned / 1024.0 / 1024.0 , i);
285
293
286
294
++ctx->n_buffers ;
287
295
}
288
296
}
297
+
298
+ fprintf (stderr, " , (%8.2f / %8.2f )" ,
299
+ ctx->device .currentAllocatedSize / 1024.0 / 1024.0 ,
300
+ ctx->device .recommendedMaxWorkingSetSize / 1024.0 / 1024.0 );
301
+
302
+ if (ctx->device .currentAllocatedSize > ctx->device .recommendedMaxWorkingSetSize ) {
303
+ fprintf (stderr, " , warning: current allocated size is greater than the recommended max working set size\n " );
304
+ } else {
305
+ fprintf (stderr, " \n " );
306
+ }
289
307
}
290
308
291
309
return true ;
@@ -862,4 +880,14 @@ void ggml_metal_graph_compute(
862
880
dispatch_barrier_sync (queue, ^{});
863
881
864
882
[command_buffers[n_cb - 1 ] waitUntilCompleted ];
883
+
884
+ // check status of command buffers
885
+ // needed to detect if the device ran out-of-memory for example (#1881)
886
+ for (int i = 0 ; i < n_cb; i++) {
887
+ MTLCommandBufferStatus status = (MTLCommandBufferStatus ) [command_buffers[i] status ];
888
+ if (status != MTLCommandBufferStatusCompleted ) {
889
+ fprintf (stderr, " %s : command buffer %d failed with status %lu \n " , __func__, i, status);
890
+ GGML_ASSERT (false );
891
+ }
892
+ }
865
893
}
0 commit comments