@@ -559,27 +559,28 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
559
559
; GFX9-O0-LABEL: strict_wwm_call_i64:
560
560
; GFX9-O0: ; %bb.0:
561
561
; GFX9-O0-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562
- ; GFX9-O0-NEXT: s_mov_b32 s46 , s33
562
+ ; GFX9-O0-NEXT: s_mov_b32 s48 , s33
563
563
; GFX9-O0-NEXT: s_mov_b32 s33, s32
564
564
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1
565
565
; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s33 ; 4-byte Folded Spill
566
- ; GFX9-O0-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
567
- ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
568
- ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
569
- ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
570
- ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
571
- ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
566
+ ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
567
+ ; GFX9-O0-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
568
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
572
569
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
573
- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
570
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
571
+ ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
574
572
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
575
- ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
576
573
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
574
+ ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
577
575
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
578
- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
579
- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
576
+ ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
577
+ ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
578
+ ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
579
+ ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill
580
+ ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill
580
581
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
581
582
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x1000
582
- ; GFX9-O0-NEXT: ; implicit-def: $vgpr11 : SGPR spill to VGPR lane
583
+ ; GFX9-O0-NEXT: ; implicit-def: $vgpr0 : SGPR spill to VGPR lane
583
584
; GFX9-O0-NEXT: v_writelane_b32 v8, s30, 0
584
585
; GFX9-O0-NEXT: v_writelane_b32 v8, s31, 1
585
586
; GFX9-O0-NEXT: s_mov_b32 s34, s8
@@ -597,10 +598,10 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
597
598
; GFX9-O0-NEXT: s_mov_b32 s41, s45
598
599
; GFX9-O0-NEXT: s_mov_b32 s42, s44
599
600
; GFX9-O0-NEXT: s_mov_b32 s43, s35
600
- ; GFX9-O0-NEXT: v_writelane_b32 v11 , s40, 0
601
- ; GFX9-O0-NEXT: v_writelane_b32 v11 , s41, 1
602
- ; GFX9-O0-NEXT: v_writelane_b32 v11 , s42, 2
603
- ; GFX9-O0-NEXT: v_writelane_b32 v11 , s43, 3
601
+ ; GFX9-O0-NEXT: v_writelane_b32 v0 , s40, 0
602
+ ; GFX9-O0-NEXT: v_writelane_b32 v0 , s41, 1
603
+ ; GFX9-O0-NEXT: v_writelane_b32 v0 , s42, 2
604
+ ; GFX9-O0-NEXT: v_writelane_b32 v0 , s43, 3
604
605
; GFX9-O0-NEXT: ; kill: def $sgpr34 killed $sgpr34 def $sgpr34_sgpr35
605
606
; GFX9-O0-NEXT: s_mov_b32 s35, s9
606
607
; GFX9-O0-NEXT: ; kill: def $sgpr36_sgpr37 killed $sgpr34_sgpr35
@@ -612,8 +613,11 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
612
613
; GFX9-O0-NEXT: v_mov_b32_e32 v10, s37
613
614
; GFX9-O0-NEXT: s_not_b64 exec, exec
614
615
; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1
615
- ; GFX9-O0-NEXT: v_writelane_b32 v11, s34, 4
616
- ; GFX9-O0-NEXT: v_writelane_b32 v11, s35, 5
616
+ ; GFX9-O0-NEXT: v_writelane_b32 v0, s34, 4
617
+ ; GFX9-O0-NEXT: v_writelane_b32 v0, s35, 5
618
+ ; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1
619
+ ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
620
+ ; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47]
617
621
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9
618
622
; GFX9-O0-NEXT: s_mov_b32 s34, 32
619
623
; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37
@@ -630,13 +634,20 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
630
634
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
631
635
; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0)
632
636
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[34:35]
633
- ; GFX9-O0-NEXT: v_readlane_b32 s34, v11, 4
634
- ; GFX9-O0-NEXT: v_readlane_b32 s35, v11, 5
635
- ; GFX9-O0-NEXT: v_readlane_b32 s36, v11, 0
636
- ; GFX9-O0-NEXT: v_readlane_b32 s37, v11, 1
637
- ; GFX9-O0-NEXT: v_readlane_b32 s38, v11, 2
638
- ; GFX9-O0-NEXT: v_readlane_b32 s39, v11, 3
637
+ ; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1
638
+ ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
639
+ ; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47]
640
+ ; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
641
+ ; GFX9-O0-NEXT: v_readlane_b32 s34, v6, 4
642
+ ; GFX9-O0-NEXT: v_readlane_b32 s35, v6, 5
643
+ ; GFX9-O0-NEXT: v_readlane_b32 s36, v6, 0
644
+ ; GFX9-O0-NEXT: v_readlane_b32 s37, v6, 1
645
+ ; GFX9-O0-NEXT: v_readlane_b32 s38, v6, 2
646
+ ; GFX9-O0-NEXT: v_readlane_b32 s39, v6, 3
639
647
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
648
+ ; GFX9-O0-NEXT: s_or_saveexec_b64 s[46:47], -1
649
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
650
+ ; GFX9-O0-NEXT: s_mov_b64 exec, s[46:47]
640
651
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
641
652
; GFX9-O0-NEXT: ; implicit-def: $sgpr40
642
653
; GFX9-O0-NEXT: ; implicit-def: $sgpr40
@@ -645,29 +656,30 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
645
656
; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[40:41], v2, v4
646
657
; GFX9-O0-NEXT: v_addc_co_u32_e64 v3, s[40:41], v3, v5, s[40:41]
647
658
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
648
- ; GFX9-O0-NEXT: v_mov_b32_e32 v0 , v2
649
- ; GFX9-O0-NEXT: v_mov_b32_e32 v1 , v3
659
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v6 , v2
660
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v7 , v3
650
661
; GFX9-O0-NEXT: s_mov_b32 s34, 0
651
- ; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1 ], off, s[36:39], s34 offset:4
662
+ ; GFX9-O0-NEXT: buffer_store_dwordx2 v[6:7 ], off, s[36:39], s34 offset:4
652
663
; GFX9-O0-NEXT: v_readlane_b32 s31, v8, 1
653
664
; GFX9-O0-NEXT: v_readlane_b32 s30, v8, 0
654
- ; GFX9-O0-NEXT: ; kill: killed $vgpr11
665
+ ; GFX9-O0-NEXT: ; kill: killed $vgpr0
655
666
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1
656
667
; GFX9-O0-NEXT: buffer_load_dword v8, off, s[0:3], s33 ; 4-byte Folded Reload
657
- ; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
658
- ; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
659
- ; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
660
- ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
661
- ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
662
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
668
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
669
+ ; GFX9-O0-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
670
+ ; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
671
+ ; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
672
+ ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
663
673
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
664
- ; GFX9-O0-NEXT: buffer_load_dword v2 , off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
674
+ ; GFX9-O0-NEXT: buffer_load_dword v4 , off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
665
675
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
666
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
667
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
676
+ ; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
677
+ ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
678
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload
679
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload
668
680
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
669
681
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffff000
670
- ; GFX9-O0-NEXT: s_mov_b32 s33, s46
682
+ ; GFX9-O0-NEXT: s_mov_b32 s33, s48
671
683
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
672
684
; GFX9-O0-NEXT: s_setpc_b64 s[30:31]
673
685
;
0 commit comments