|
115 | 115 | },
|
116 | 116 | {
|
117 | 117 | "cell_type": "code",
|
118 |
| - "execution_count": 4, |
| 118 | + "execution_count": 2, |
119 | 119 | "metadata": {},
|
120 | 120 | "outputs": [
|
121 | 121 | {
|
|
143 | 143 | },
|
144 | 144 | {
|
145 | 145 | "cell_type": "code",
|
146 |
| - "execution_count": 5, |
| 146 | + "execution_count": 3, |
147 | 147 | "metadata": {},
|
148 | 148 | "outputs": [],
|
149 | 149 | "source": [
|
|
165 | 165 | },
|
166 | 166 | {
|
167 | 167 | "cell_type": "code",
|
168 |
| - "execution_count": 6, |
| 168 | + "execution_count": 4, |
169 | 169 | "metadata": {},
|
170 | 170 | "outputs": [],
|
171 | 171 | "source": [
|
|
185 | 185 | },
|
186 | 186 | {
|
187 | 187 | "cell_type": "code",
|
188 |
| - "execution_count": 7, |
| 188 | + "execution_count": 5, |
189 | 189 | "metadata": {},
|
190 | 190 | "outputs": [],
|
191 | 191 | "source": [
|
|
225 | 225 | },
|
226 | 226 | {
|
227 | 227 | "cell_type": "code",
|
228 |
| - "execution_count": 8, |
| 228 | + "execution_count": 6, |
229 | 229 | "metadata": {
|
230 | 230 | "scrolled": true
|
231 | 231 | },
|
|
349 | 349 | },
|
350 | 350 | {
|
351 | 351 | "cell_type": "code",
|
352 |
| - "execution_count": 14, |
| 352 | + "execution_count": 8, |
353 | 353 | "metadata": {},
|
354 | 354 | "outputs": [
|
355 | 355 | {
|
356 | 356 | "name": "stdout",
|
357 | 357 | "output_type": "stream",
|
358 | 358 | "text": [
|
359 |
| - "Tue Aug 11 05:17:40 2020 \r\n", |
360 |
| - "+-----------------------------------------------------------------------------+\r\n", |
361 |
| - "| NVIDIA-SMI 440.44 Driver Version: 440.44 CUDA Version: 10.2 |\r\n", |
362 |
| - "|-------------------------------+----------------------+----------------------+\r\n", |
363 |
| - "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n", |
364 |
| - "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\r\n", |
365 |
| - "|===============================+======================+======================|\r\n", |
366 |
| - "| 0 Tesla V100-PCIE... Off | 00000000:02:00.0 Off | 0 |\r\n", |
367 |
| - "| N/A 38C P0 36W / 250W | 3329MiB / 32510MiB | 0% Default |\r\n", |
368 |
| - "+-------------------------------+----------------------+----------------------+\r\n", |
369 |
| - "| 1 Tesla V100-PCIE... Off | 00000000:03:00.0 Off | 0 |\r\n", |
370 |
| - "| N/A 40C P0 38W / 250W | 4722MiB / 32510MiB | 0% Default |\r\n", |
371 |
| - "+-------------------------------+----------------------+----------------------+\r\n", |
372 |
| - " \r\n", |
373 |
| - "+-----------------------------------------------------------------------------+\r\n", |
374 |
| - "| Processes: GPU Memory |\r\n", |
375 |
| - "| GPU PID Type Process name Usage |\r\n", |
376 |
| - "|=============================================================================|\r\n", |
377 |
| - "+-----------------------------------------------------------------------------+\r\n" |
| 359 | + "Tesla V100-PCIE-32GB\n", |
| 360 | + "|===========================================================================|\n", |
| 361 | + "| PyTorch CUDA memory summary, device ID 0 |\n", |
| 362 | + "|---------------------------------------------------------------------------|\n", |
| 363 | + "| CUDA OOMs: 0 | cudaMalloc retries: 0 |\n", |
| 364 | + "|===========================================================================|\n", |
| 365 | + "| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |\n", |
| 366 | + "|---------------------------------------------------------------------------|\n", |
| 367 | + "| Allocated memory | 0 B | 1152 MB | 200906 MB | 200906 MB |\n", |
| 368 | + "|---------------------------------------------------------------------------|\n", |
| 369 | + "| Active memory | 0 B | 1152 MB | 200906 MB | 200906 MB |\n", |
| 370 | + "|---------------------------------------------------------------------------|\n", |
| 371 | + "| GPU reserved memory | 2112 MB | 2112 MB | 2112 MB | 0 B |\n", |
| 372 | + "|---------------------------------------------------------------------------|\n", |
| 373 | + "| Non-releasable memory | 0 B | 455384 KB | 100324 MB | 100324 MB |\n", |
| 374 | + "|---------------------------------------------------------------------------|\n", |
| 375 | + "| Allocations | 0 | 609 | 30778 | 30778 |\n", |
| 376 | + "|---------------------------------------------------------------------------|\n", |
| 377 | + "| Active allocs | 0 | 609 | 30778 | 30778 |\n", |
| 378 | + "|---------------------------------------------------------------------------|\n", |
| 379 | + "| GPU reserved segments | 58 | 58 | 58 | 0 |\n", |
| 380 | + "|---------------------------------------------------------------------------|\n", |
| 381 | + "| Non-releasable allocs | 0 | 73 | 20653 | 20653 |\n", |
| 382 | + "|===========================================================================|\n", |
| 383 | + "\n" |
378 | 384 | ]
|
379 | 385 | }
|
380 | 386 | ],
|
381 | 387 | "source": [
|
382 |
| - "! nvidia-smi" |
| 388 | + "print(torch.cuda.get_device_name(0))\n", |
| 389 | + "print(torch.cuda.memory_summary(0, abbreviated=True))" |
383 | 390 | ]
|
384 | 391 | },
|
385 | 392 | {
|
386 | 393 | "cell_type": "markdown",
|
387 | 394 | "metadata": {},
|
388 | 395 | "source": [
|
389 |
| - "## Enable deterministic and train without AMP" |
| 396 | + "## Enable deterministic and train without AMP\n", |
| 397 | + "In order to correctly measure the memory usage, please restart the notebook and skip above AMP training." |
390 | 398 | ]
|
391 | 399 | },
|
392 | 400 | {
|
|
411 | 419 | },
|
412 | 420 | {
|
413 | 421 | "cell_type": "code",
|
414 |
| - "execution_count": 16, |
| 422 | + "execution_count": 8, |
415 | 423 | "metadata": {},
|
416 | 424 | "outputs": [
|
417 | 425 | {
|
418 | 426 | "name": "stdout",
|
419 | 427 | "output_type": "stream",
|
420 | 428 | "text": [
|
421 |
| - "Tue Aug 11 05:20:12 2020 \r\n", |
422 |
| - "+-----------------------------------------------------------------------------+\r\n", |
423 |
| - "| NVIDIA-SMI 440.44 Driver Version: 440.44 CUDA Version: 10.2 |\r\n", |
424 |
| - "|-------------------------------+----------------------+----------------------+\r\n", |
425 |
| - "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\r\n", |
426 |
| - "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\r\n", |
427 |
| - "|===============================+======================+======================|\r\n", |
428 |
| - "| 0 Tesla V100-PCIE... Off | 00000000:02:00.0 Off | 0 |\r\n", |
429 |
| - "| N/A 41C P0 37W / 250W | 4579MiB / 32510MiB | 0% Default |\r\n", |
430 |
| - "+-------------------------------+----------------------+----------------------+\r\n", |
431 |
| - "| 1 Tesla V100-PCIE... Off | 00000000:03:00.0 Off | 0 |\r\n", |
432 |
| - "| N/A 40C P0 38W / 250W | 4722MiB / 32510MiB | 0% Default |\r\n", |
433 |
| - "+-------------------------------+----------------------+----------------------+\r\n", |
434 |
| - " \r\n", |
435 |
| - "+-----------------------------------------------------------------------------+\r\n", |
436 |
| - "| Processes: GPU Memory |\r\n", |
437 |
| - "| GPU PID Type Process name Usage |\r\n", |
438 |
| - "|=============================================================================|\r\n", |
439 |
| - "+-----------------------------------------------------------------------------+\r\n" |
| 429 | + "Tesla V100-PCIE-32GB\n", |
| 430 | + "|===========================================================================|\n", |
| 431 | + "| PyTorch CUDA memory summary, device ID 0 |\n", |
| 432 | + "|---------------------------------------------------------------------------|\n", |
| 433 | + "| CUDA OOMs: 0 | cudaMalloc retries: 0 |\n", |
| 434 | + "|===========================================================================|\n", |
| 435 | + "| Metric | Cur Usage | Peak Usage | Tot Alloc | Tot Freed |\n", |
| 436 | + "|---------------------------------------------------------------------------|\n", |
| 437 | + "| Allocated memory | 0 B | 1730 MB | 279807 MB | 279807 MB |\n", |
| 438 | + "|---------------------------------------------------------------------------|\n", |
| 439 | + "| Active memory | 0 B | 1730 MB | 279807 MB | 279807 MB |\n", |
| 440 | + "|---------------------------------------------------------------------------|\n", |
| 441 | + "| GPU reserved memory | 3266 MB | 3266 MB | 3266 MB | 0 B |\n", |
| 442 | + "|---------------------------------------------------------------------------|\n", |
| 443 | + "| Non-releasable memory | 0 B | 630502 KB | 111299 MB | 111299 MB |\n", |
| 444 | + "|---------------------------------------------------------------------------|\n", |
| 445 | + "| Allocations | 0 | 542 | 25170 | 25170 |\n", |
| 446 | + "|---------------------------------------------------------------------------|\n", |
| 447 | + "| Active allocs | 0 | 542 | 25170 | 25170 |\n", |
| 448 | + "|---------------------------------------------------------------------------|\n", |
| 449 | + "| GPU reserved segments | 58 | 58 | 58 | 0 |\n", |
| 450 | + "|---------------------------------------------------------------------------|\n", |
| 451 | + "| Non-releasable allocs | 0 | 59 | 15985 | 15985 |\n", |
| 452 | + "|===========================================================================|\n", |
| 453 | + "\n" |
440 | 454 | ]
|
441 | 455 | }
|
442 | 456 | ],
|
443 | 457 | "source": [
|
444 |
| - "! nvidia-smi" |
| 458 | + "print(torch.cuda.get_device_name(0))\n", |
| 459 | + "print(torch.cuda.memory_summary(0, abbreviated=True))" |
445 | 460 | ]
|
446 | 461 | },
|
447 | 462 | {
|
|
0 commit comments