Skip to content

Commit 24a245e

Browse files
committed
Skip UsedPhysRegMask test when calling isPhysRegUsed.
We are not concerned about clobbers from calls as these will be spilt correctly -- and these were never considered before.
1 parent 04174a2 commit 24a245e

File tree

7 files changed

+2226
-2211
lines changed

7 files changed

+2226
-2211
lines changed

llvm/lib/Target/AMDGPU/SIPreAllocateWWMRegs.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
9999
LiveInterval &LI = LIS->getInterval(Reg);
100100

101101
for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
102-
if (!MRI->isPhysRegUsed(PhysReg) &&
102+
if (!MRI->isPhysRegUsed(PhysReg, true) &&
103103
Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
104104
Matrix->assign(LI, PhysReg);
105105
assert(PhysReg != 0);

llvm/test/CodeGen/AMDGPU/global_atomics_scan_fadd.ll

Lines changed: 576 additions & 576 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmax.ll

Lines changed: 465 additions & 462 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global_atomics_scan_fmin.ll

Lines changed: 465 additions & 462 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/global_atomics_scan_fsub.ll

Lines changed: 600 additions & 600 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll

Lines changed: 53 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -373,7 +373,7 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
373373
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1
374374
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
375375
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
376-
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
376+
; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
377377
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
378378
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x400
379379
; GFX9-O0-NEXT: v_writelane_b32 v2, s30, 0
@@ -407,17 +407,17 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
407407
; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[46:47]
408408
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3
409409
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[42:43]
410-
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v0
411-
; GFX9-O0-NEXT: v_add_u32_e64 v3, v4, v3
410+
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0
411+
; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v3
412412
; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41]
413-
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v3
413+
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1
414414
; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4
415415
; GFX9-O0-NEXT: v_readlane_b32 s31, v2, 1
416416
; GFX9-O0-NEXT: v_readlane_b32 s30, v2, 0
417417
; GFX9-O0-NEXT: s_xor_saveexec_b64 s[34:35], -1
418418
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
419419
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
420-
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
420+
; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
421421
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
422422
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffffc00
423423
; GFX9-O0-NEXT: s_mov_b32 s33, s48
@@ -432,7 +432,7 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
432432
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
433433
; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
434434
; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
435-
; GFX9-O3-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
435+
; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
436436
; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35]
437437
; GFX9-O3-NEXT: v_writelane_b32 v2, s30, 0
438438
; GFX9-O3-NEXT: s_addk_i32 s32, 0x400
@@ -447,17 +447,17 @@ define amdgpu_gfx void @strict_wwm_call(ptr addrspace(8) inreg %tmp14, i32 inreg
447447
; GFX9-O3-NEXT: s_add_u32 s36, s36, strict_wwm_called@rel32@lo+4
448448
; GFX9-O3-NEXT: s_addc_u32 s37, s37, strict_wwm_called@rel32@hi+12
449449
; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[36:37]
450-
; GFX9-O3-NEXT: v_mov_b32_e32 v4, v0
451-
; GFX9-O3-NEXT: v_add_u32_e32 v3, v4, v3
450+
; GFX9-O3-NEXT: v_mov_b32_e32 v1, v0
451+
; GFX9-O3-NEXT: v_add_u32_e32 v1, v1, v3
452452
; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35]
453-
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v3
453+
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1
454454
; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4
455455
; GFX9-O3-NEXT: v_readlane_b32 s31, v2, 1
456456
; GFX9-O3-NEXT: v_readlane_b32 s30, v2, 0
457457
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
458458
; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
459459
; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
460-
; GFX9-O3-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
460+
; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
461461
; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35]
462462
; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00
463463
; GFX9-O3-NEXT: s_mov_b32 s33, s38
@@ -567,16 +567,16 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
567567
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
568568
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
569569
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
570-
; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
571-
; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
570+
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
571+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
572572
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
573-
; GFX9-O0-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
574-
; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
575-
; GFX9-O0-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
573+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
574+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill
575+
; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill
576576
; GFX9-O0-NEXT: s_waitcnt vmcnt(0)
577-
; GFX9-O0-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
578-
; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
579-
; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
577+
; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill
578+
; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill
579+
; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill
580580
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
581581
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0x1000
582582
; GFX9-O0-NEXT: ; implicit-def: $vgpr11 : SGPR spill to VGPR lane
@@ -614,10 +614,10 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
614614
; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1
615615
; GFX9-O0-NEXT: v_writelane_b32 v11, s34, 4
616616
; GFX9-O0-NEXT: v_writelane_b32 v11, s35, 5
617-
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v9
617+
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v9
618618
; GFX9-O0-NEXT: s_mov_b32 s34, 32
619619
; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37
620-
; GFX9-O0-NEXT: v_lshrrev_b64 v[13:14], s34, v[9:10]
620+
; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s34, v[9:10]
621621
; GFX9-O0-NEXT: s_getpc_b64 s[34:35]
622622
; GFX9-O0-NEXT: s_add_u32 s34, s34, strict_wwm_called_i64@gotpcrel32@lo+4
623623
; GFX9-O0-NEXT: s_addc_u32 s35, s35, strict_wwm_called_i64@gotpcrel32@hi+12
@@ -626,8 +626,8 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
626626
; GFX9-O0-NEXT: s_mov_b64 s[36:37], s[0:1]
627627
; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[36:37]
628628
; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[38:39]
629-
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v12
630-
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v13
629+
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2
630+
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
631631
; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0)
632632
; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[34:35]
633633
; GFX9-O0-NEXT: v_readlane_b32 s34, v11, 4
@@ -636,15 +636,17 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
636636
; GFX9-O0-NEXT: v_readlane_b32 s37, v11, 1
637637
; GFX9-O0-NEXT: v_readlane_b32 s38, v11, 2
638638
; GFX9-O0-NEXT: v_readlane_b32 s39, v11, 3
639-
; GFX9-O0-NEXT: v_mov_b32_e32 v12, v0
640-
; GFX9-O0-NEXT: v_mov_b32_e32 v13, v1
639+
; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0
640+
; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1
641641
; GFX9-O0-NEXT: ; implicit-def: $sgpr40
642642
; GFX9-O0-NEXT: ; implicit-def: $sgpr40
643-
; GFX9-O0-NEXT: v_add_co_u32_e64 v9, s[40:41], v12, v9
644-
; GFX9-O0-NEXT: v_addc_co_u32_e64 v10, s[40:41], v13, v10, s[40:41]
643+
; GFX9-O0-NEXT: v_mov_b32_e32 v4, v9
644+
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v10
645+
; GFX9-O0-NEXT: v_add_co_u32_e64 v2, s[40:41], v2, v4
646+
; GFX9-O0-NEXT: v_addc_co_u32_e64 v3, s[40:41], v3, v5, s[40:41]
645647
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
646-
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v9
647-
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v10
648+
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2
649+
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
648650
; GFX9-O0-NEXT: s_mov_b32 s34, 0
649651
; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4
650652
; GFX9-O0-NEXT: v_readlane_b32 s31, v8, 1
@@ -655,14 +657,14 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
655657
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
656658
; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
657659
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
658-
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
659-
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
660-
; GFX9-O0-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
661-
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
662-
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
663-
; GFX9-O0-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
664-
; GFX9-O0-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
665-
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
660+
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
661+
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
662+
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
663+
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload
664+
; GFX9-O0-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload
665+
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload
666+
; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload
667+
; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload
666668
; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35]
667669
; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffff000
668670
; GFX9-O0-NEXT: s_mov_b32 s33, s46
@@ -679,8 +681,11 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
679681
; GFX9-O3-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
680682
; GFX9-O3-NEXT: s_waitcnt vmcnt(0)
681683
; GFX9-O3-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
682-
; GFX9-O3-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
683-
; GFX9-O3-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
684+
; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
685+
; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
686+
; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill
687+
; GFX9-O3-NEXT: s_waitcnt vmcnt(0)
688+
; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill
684689
; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35]
685690
; GFX9-O3-NEXT: v_writelane_b32 v6, s30, 0
686691
; GFX9-O3-NEXT: s_addk_i32 s32, 0x800
@@ -702,22 +707,24 @@ define amdgpu_gfx void @strict_wwm_call_i64(ptr addrspace(8) inreg %tmp14, i64 i
702707
; GFX9-O3-NEXT: v_mov_b32_e32 v1, v8
703708
; GFX9-O3-NEXT: s_waitcnt lgkmcnt(0)
704709
; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[36:37]
705-
; GFX9-O3-NEXT: v_mov_b32_e32 v9, v0
706-
; GFX9-O3-NEXT: v_mov_b32_e32 v10, v1
707-
; GFX9-O3-NEXT: v_add_co_u32_e32 v7, vcc, v9, v7
708-
; GFX9-O3-NEXT: v_addc_co_u32_e32 v8, vcc, v10, v8, vcc
710+
; GFX9-O3-NEXT: v_mov_b32_e32 v2, v0
711+
; GFX9-O3-NEXT: v_mov_b32_e32 v3, v1
712+
; GFX9-O3-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7
713+
; GFX9-O3-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v8, vcc
709714
; GFX9-O3-NEXT: s_mov_b64 exec, s[38:39]
710-
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v7
711-
; GFX9-O3-NEXT: v_mov_b32_e32 v1, v8
715+
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2
716+
; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3
712717
; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4
713718
; GFX9-O3-NEXT: v_readlane_b32 s31, v6, 1
714719
; GFX9-O3-NEXT: v_readlane_b32 s30, v6, 0
715720
; GFX9-O3-NEXT: s_xor_saveexec_b64 s[34:35], -1
716721
; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s33 ; 4-byte Folded Reload
717722
; GFX9-O3-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
718723
; GFX9-O3-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
719-
; GFX9-O3-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
720-
; GFX9-O3-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
724+
; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
725+
; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
726+
; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload
727+
; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload
721728
; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35]
722729
; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800
723730
; GFX9-O3-NEXT: s_mov_b32 s33, s40

0 commit comments

Comments
 (0)