@@ -373,7 +373,7 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
373
373
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1
374
374
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
375
375
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
376
- ; GFX9-O0-NEXT: buffer_store_dword v4 , off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
376
+ ; GFX9-O0-NEXT: buffer_store_dword v1 , off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
377
377
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
378
378
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x400
379
379
; GFX9-O0-NEXT: v_writelane_b32 v2, s30, 0
@@ -407,17 +407,17 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
407
407
; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[46:47]
408
408
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3
409
409
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[42:43]
410
- ; GFX9-O0-NEXT: v_mov_b32_e32 v4 , v0
411
- ; GFX9-O0-NEXT: v_add_u32_e64 v3, v4 , v3
410
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v1 , v0
411
+ ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1 , v3
412
412
; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41]
413
- ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3
413
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1
414
414
; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4
415
415
; GFX9-O0-NEXT: v_readlane_b32 s31, v2, 1
416
416
; GFX9-O0-NEXT: v_readlane_b32 s30, v2, 0
417
417
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1
418
418
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
419
419
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
420
- ; GFX9-O0-NEXT: buffer_load_dword v4 , off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
420
+ ; GFX9-O0-NEXT: buffer_load_dword v1 , off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
421
421
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
422
422
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffffc00
423
423
; GFX9-O0-NEXT: s_mov_b32 s33, s48
@@ -432,7 +432,7 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
432
432
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
433
433
; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
434
434
; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
435
- ; GFX9-O3-NEXT: buffer_store_dword v4 , off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
435
+ ; GFX9-O3-NEXT: buffer_store_dword v1 , off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
436
436
; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35]
437
437
; GFX9-O3-NEXT: v_writelane_b32 v2, s30, 0
438
438
; GFX9-O3-NEXT: s_addk_i32 s32, 0x400
@@ -447,17 +447,17 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
447
447
; GFX9-O3-NEXT: s_add_u32 s36, s36, strict_wwm_called@rel32@lo+4
448
448
; GFX9-O3-NEXT: s_addc_u32 s37, s37, strict_wwm_called@rel32@hi+12
449
449
; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[36:37]
450
- ; GFX9-O3-NEXT: v_mov_b32_e32 v4 , v0
451
- ; GFX9-O3-NEXT: v_add_u32_e32 v3, v4 , v3
450
+ ; GFX9-O3-NEXT: v_mov_b32_e32 v1 , v0
451
+ ; GFX9-O3-NEXT: v_add_u32_e32 v1, v1 , v3
452
452
; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35]
453
- ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v3
453
+ ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1
454
454
; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4
455
455
; GFX9-O3-NEXT: v_readlane_b32 s31, v2, 1
456
456
; GFX9-O3-NEXT: v_readlane_b32 s30, v2, 0
457
457
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
458
458
; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
459
459
; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
460
- ; GFX9-O3-NEXT: buffer_load_dword v4 , off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
460
+ ; GFX9-O3-NEXT: buffer_load_dword v1 , off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
461
461
; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35]
462
462
; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00
463
463
; GFX9-O3-NEXT: s_mov_b32 s33, s38
@@ -567,16 +567,16 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
567
567
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
568
568
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
569
569
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
570
- ; GFX9-O0-NEXT: buffer_store_dword v12 , off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
571
- ; GFX9-O0-NEXT: buffer_store_dword v13 , off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
570
+ ; GFX9-O0-NEXT: buffer_store_dword v2 , off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
571
+ ; GFX9-O0-NEXT: buffer_store_dword v3 , off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
572
572
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
573
- ; GFX9-O0-NEXT: buffer_store_dword v14 , off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
574
- ; GFX9-O0-NEXT: buffer_store_dword v13 , off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
575
- ; GFX9-O0-NEXT: buffer_store_dword v12 , off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
573
+ ; GFX9-O0-NEXT: buffer_store_dword v4 , off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
574
+ ; GFX9-O0-NEXT: buffer_store_dword v3 , off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
575
+ ; GFX9-O0-NEXT: buffer_store_dword v2 , off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
576
576
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
577
- ; GFX9-O0-NEXT: buffer_store_dword v13 , off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
578
- ; GFX9-O0-NEXT: buffer_store_dword v9 , off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
579
- ; GFX9-O0-NEXT: buffer_store_dword v10 , off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
577
+ ; GFX9-O0-NEXT: buffer_store_dword v3 , off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
578
+ ; GFX9-O0-NEXT: buffer_store_dword v4 , off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
579
+ ; GFX9-O0-NEXT: buffer_store_dword v5 , off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
580
580
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
581
581
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x1000
582
582
; GFX9-O0-NEXT: ; implicit-def: $vgpr11 : SGPR spill to VGPR lane
@@ -614,10 +614,10 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
614
614
; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1
615
615
; GFX9-O0-NEXT: v_writelane_b32 v11, s34, 4
616
616
; GFX9-O0-NEXT: v_writelane_b32 v11, s35, 5
617
- ; GFX9-O0-NEXT: v_mov_b32_e32 v12 , v9
617
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v2 , v9
618
618
; GFX9-O0-NEXT: s_mov_b32 s34, 32
619
619
; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37
620
- ; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14 ], s34, v[9:10]
620
+ ; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4 ], s34, v[9:10]
621
621
; GFX9-O0-NEXT: s_getpc_b64 s[34:35]
622
622
; GFX9-O0-NEXT: s_add_u32 s34, s34, strict_wwm_called_i64@gotpcrel32@lo+4
623
623
; GFX9-O0-NEXT: s_addc_u32 s35, s35, strict_wwm_called_i64@gotpcrel32@hi+12
@@ -626,8 +626,8 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
626
626
; GFX9-O0-NEXT: s_mov_b64 s[36:37], s[0:1]
627
627
; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[36:37]
628
628
; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[38:39]
629
- ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v12
630
- ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v13
629
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2
630
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
631
631
; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0)
632
632
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[34:35]
633
633
; GFX9-O0-NEXT: v_readlane_b32 s34, v11, 4
@@ -636,15 +636,17 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
636
636
; GFX9-O0-NEXT: v_readlane_b32 s37, v11, 1
637
637
; GFX9-O0-NEXT: v_readlane_b32 s38, v11, 2
638
638
; GFX9-O0-NEXT: v_readlane_b32 s39, v11, 3
639
- ; GFX9-O0-NEXT: v_mov_b32_e32 v12 , v0
640
- ; GFX9-O0-NEXT: v_mov_b32_e32 v13 , v1
639
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v2 , v0
640
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v3 , v1
641
641
; GFX9-O0-NEXT: ; implicit-def: $sgpr40
642
642
; GFX9-O0-NEXT: ; implicit-def: $sgpr40
643
- ; GFX9-O0-NEXT: v_add_co_u32_e64 v9, s[40:41], v12, v9
644
- ; GFX9-O0-NEXT: v_addc_co_u32_e64 v10, s[40:41], v13, v10, s[40:41]
643
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v9
644
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
645
+ ; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[40:41], v2, v4
646
+ ; GFX9-O0-NEXT: v_addc_co_u32_e64 v3, s[40:41], v3, v5, s[40:41]
645
647
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
646
- ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v9
647
- ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v10
648
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2
649
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
648
650
; GFX9-O0-NEXT: s_mov_b32 s34, 0
649
651
; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4
650
652
; GFX9-O0-NEXT: v_readlane_b32 s31, v8, 1
@@ -655,14 +657,14 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
655
657
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
656
658
; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
657
659
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
658
- ; GFX9-O0-NEXT: buffer_load_dword v12 , off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
659
- ; GFX9-O0-NEXT: buffer_load_dword v13 , off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
660
- ; GFX9-O0-NEXT: buffer_load_dword v14 , off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
661
- ; GFX9-O0-NEXT: buffer_load_dword v13 , off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
662
- ; GFX9-O0-NEXT: buffer_load_dword v12 , off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
663
- ; GFX9-O0-NEXT: buffer_load_dword v13 , off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
664
- ; GFX9-O0-NEXT: buffer_load_dword v9 , off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
665
- ; GFX9-O0-NEXT: buffer_load_dword v10 , off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
660
+ ; GFX9-O0-NEXT: buffer_load_dword v2 , off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
661
+ ; GFX9-O0-NEXT: buffer_load_dword v3 , off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
662
+ ; GFX9-O0-NEXT: buffer_load_dword v4 , off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
663
+ ; GFX9-O0-NEXT: buffer_load_dword v3 , off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
664
+ ; GFX9-O0-NEXT: buffer_load_dword v2 , off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
665
+ ; GFX9-O0-NEXT: buffer_load_dword v3 , off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
666
+ ; GFX9-O0-NEXT: buffer_load_dword v4 , off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
667
+ ; GFX9-O0-NEXT: buffer_load_dword v5 , off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
666
668
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
667
669
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffff000
668
670
; GFX9-O0-NEXT: s_mov_b32 s33, s46
@@ -679,8 +681,11 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
679
681
; GFX9-O3-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
680
682
; GFX9-O3-NEXT: s_waitcnt vmcnt(0)
681
683
; GFX9-O3-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
682
- ; GFX9-O3-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
683
- ; GFX9-O3-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
684
+ ; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
685
+ ; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
686
+ ; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
687
+ ; GFX9-O3-NEXT: s_waitcnt vmcnt(0)
688
+ ; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
684
689
; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35]
685
690
; GFX9-O3-NEXT: v_writelane_b32 v6, s30, 0
686
691
; GFX9-O3-NEXT: s_addk_i32 s32, 0x800
@@ -702,22 +707,24 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
702
707
; GFX9-O3-NEXT: v_mov_b32_e32 v1, v8
703
708
; GFX9-O3-NEXT: s_waitcnt lgkmcnt(0)
704
709
; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[36:37]
705
- ; GFX9-O3-NEXT: v_mov_b32_e32 v9 , v0
706
- ; GFX9-O3-NEXT: v_mov_b32_e32 v10 , v1
707
- ; GFX9-O3-NEXT: v_add_co_u32_e32 v7 , vcc, v9 , v7
708
- ; GFX9-O3-NEXT: v_addc_co_u32_e32 v8 , vcc, v10 , v8, vcc
710
+ ; GFX9-O3-NEXT: v_mov_b32_e32 v2 , v0
711
+ ; GFX9-O3-NEXT: v_mov_b32_e32 v3 , v1
712
+ ; GFX9-O3-NEXT: v_add_co_u32_e32 v2 , vcc, v2 , v7
713
+ ; GFX9-O3-NEXT: v_addc_co_u32_e32 v3 , vcc, v3 , v8, vcc
709
714
; GFX9-O3-NEXT: s_mov_b64 exec, s[38:39]
710
- ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v7
711
- ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v8
715
+ ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2
716
+ ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3
712
717
; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4
713
718
; GFX9-O3-NEXT: v_readlane_b32 s31, v6, 1
714
719
; GFX9-O3-NEXT: v_readlane_b32 s30, v6, 0
715
720
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
716
721
; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload
717
722
; GFX9-O3-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
718
723
; GFX9-O3-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
719
- ; GFX9-O3-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
720
- ; GFX9-O3-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
724
+ ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
725
+ ; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
726
+ ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
727
+ ; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
721
728
; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35]
722
729
; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800
723
730
; GFX9-O3-NEXT: s_mov_b32 s33, s40
0 commit comments