Skip to content

Commit a4ced03

Browse files
committed
[AMDGPU] SIFoldOperands: eagerly delete dead copies
This is cheap to implement, means less work for future passes like MachineDCE, and slightly improves the folding in some cases. Differential Revision: https://reviews.llvm.org/D100117
1 parent 5b23e38 commit a4ced03

12 files changed

+26
-27
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1783,6 +1783,11 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
17831783
continue;
17841784

17851785
foldInstOperand(MI, OpToFold);
1786+
1787+
// If we managed to fold all uses of this copy then we might as well
1788+
// delete it now.
1789+
if (MRI->use_nodbg_empty(MI.getOperand(0).getReg()))
1790+
MI.eraseFromParentAndMarkDBGValuesForRemoval();
17861791
}
17871792
}
17881793
return true;

llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,9 +1220,8 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) {
12201220
; SI-NEXT: v_ffbh_u32_e32 v3, 0
12211221
; SI-NEXT: v_cmp_eq_u32_e64 vcc, 0, 0
12221222
; SI-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
1223-
; SI-NEXT: v_mov_b32_e32 v3, 0xbe
12241223
; SI-NEXT: v_mov_b32_e32 v1, 0
1225-
; SI-NEXT: v_sub_i32_e32 v4, vcc, v3, v2
1224+
; SI-NEXT: v_sub_i32_e32 v4, vcc, 0xbe, v2
12261225
; SI-NEXT: v_lshl_b64 v[2:3], v[0:1], v2
12271226
; SI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
12281227
; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3
@@ -1251,9 +1250,8 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) {
12511250
; VI-NEXT: v_ffbh_u32_e32 v3, 0
12521251
; VI-NEXT: v_cmp_eq_u32_e64 vcc, 0, 0
12531252
; VI-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
1254-
; VI-NEXT: v_mov_b32_e32 v3, 0xbe
12551253
; VI-NEXT: v_mov_b32_e32 v1, 0
1256-
; VI-NEXT: v_sub_u32_e32 v4, vcc, v3, v2
1254+
; VI-NEXT: v_sub_u32_e32 v4, vcc, 0xbe, v2
12571255
; VI-NEXT: v_lshlrev_b64 v[2:3], v2, v[0:1]
12581256
; VI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
12591257
; VI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3

llvm/test/CodeGen/AMDGPU/flat-scratch-fold-fi.mir

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ stack:
88
body: |
99
bb.0.entry:
1010
; GCN-LABEL: name: test_fold_fi_scratch_load_vgpr
11-
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
1211
; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
1312
; GCN: S_ENDPGM 0
1413
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
@@ -24,7 +23,6 @@ stack:
2423
body: |
2524
bb.0.entry:
2625
; GCN-LABEL: name: test_fold_fi_scratch_load_sgpr
27-
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 %stack.0
2826
; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (load 4 from %stack.0, addrspace 5)
2927
; GCN: S_ENDPGM 0
3028
%0:sgpr_32 = S_MOV_B32 %stack.0
@@ -40,7 +38,6 @@ stack:
4038
body: |
4139
bb.0.entry:
4240
; GCN-LABEL: name: test_fold_fi_scratch_store_vgpr
43-
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
4441
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
4542
; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
4643
; GCN: S_ENDPGM 0
@@ -76,7 +73,6 @@ stack:
7673
body: |
7774
bb.0.entry:
7875
; GCN-LABEL: name: test_fold_fi_scratch_store_sgpr
79-
; GCN: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 %stack.0
8076
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
8177
; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], %stack.0, 4, 0, implicit $exec, implicit $flat_scr :: (store 4 into %stack.0, addrspace 5)
8278
; GCN: S_ENDPGM 0

llvm/test/CodeGen/AMDGPU/fold-cndmask-wave32.mir

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ body: |
1111
; CHECK: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
1212
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1313
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
14-
; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
1514
%0:sreg_32_xm0_xexec = IMPLICIT_DEF
1615
%1:sreg_32 = S_MOV_B32 0
1716
%2:vgpr_32 = COPY %1:sreg_32

llvm/test/CodeGen/AMDGPU/fold-cndmask.mir

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
# CHECK: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
44
# CHECK: %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
55
# CHECK: %4:vgpr_32 = COPY %3
6-
# CHECK: %5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
76
# CHECK: %6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
87
# CHECK: %7:vgpr_32 = COPY %3
98

llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,6 @@ body: |
375375
...
376376
---
377377
# CHECK-LABEL: name: add_i32_1_multi_f16_use
378-
# CHECK: %13:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
379378
# CHECK: %14:vgpr_32 = V_ADD_F16_e32 1, killed %11, implicit $mode, implicit $exec
380379
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1, killed %12, implicit $mode, implicit $exec
381380

@@ -440,7 +439,6 @@ body: |
440439
---
441440

442441
# CHECK-LABEL: name: add_i32_m2_one_f32_use_multi_f16_use
443-
# CHECK: %14:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec
444442
# CHECK: %15:vgpr_32 = V_ADD_F16_e32 -2, %11, implicit $mode, implicit $exec
445443
# CHECK: %16:vgpr_32 = V_ADD_F16_e32 -2, %12, implicit $mode, implicit $exec
446444
# CHECK: %17:vgpr_32 = V_ADD_F32_e32 -2, killed %13, implicit $mode, implicit $exec

llvm/test/CodeGen/AMDGPU/fold-operands-order.mir

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,7 @@
66
# aren't made in users before the def is seen.
77

88
# GCN-LABEL: name: mov_in_use_list_2x{{$}}
9-
# GCN: %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
10-
# GCN-NEXT: %3:vgpr_32 = COPY undef %0
11-
12-
# GCN: %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
9+
# GCN: %3:vgpr_32 = COPY undef %0
1310

1411

1512
name: mov_in_use_list_2x

llvm/test/CodeGen/AMDGPU/fold-readlane.mir

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,22 @@ body: |
1212
S_NOP 0, implicit %1
1313
...
1414

15+
# GCN-LABEL: name: fold-imm-readfirstlane-dbgvalue{{$}}
16+
# GCN: %1:sreg_32_xm0 = S_MOV_B32 123
17+
# GCN: DBG_VALUE $noreg, 0, 0
18+
---
19+
name: fold-imm-readfirstlane-dbgvalue
20+
tracksRegLiveness: true
21+
body: |
22+
bb.0:
23+
%0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec
24+
%1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec
25+
DBG_VALUE %0, 0, 0
26+
S_NOP 0, implicit %1
27+
...
28+
1529
# GCN-LABEL: name: fold-imm-readfirstlane-readfirstlane{{$}}
16-
# GCN: %0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec
1730
# GCN: %1:sreg_32_xm0 = S_MOV_B32 123
18-
# GCN: %2:vgpr_32 = V_MOV_B32_e32 123, implicit $exec
1931
# GCN: %3:sreg_32_xm0 = COPY %1
2032

2133
---
@@ -33,7 +45,6 @@ body: |
3345

3446
# GCN-LABEL: name: fold-copy-readfirstlane{{$}}
3547
# GCN: %0:sreg_32_xm0 = COPY $sgpr10
36-
# GCN: %1:vgpr_32 = COPY %0
3748
# GCN: %2:sreg_32_xm0 = COPY %0
3849
---
3950
name: fold-copy-readfirstlane

llvm/test/CodeGen/AMDGPU/huge-number-operand-folds.mir

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ body: |
1313
liveins: $sgpr12_sgpr13_sgpr14_sgpr15
1414
1515
; GCN-LABEL: name: op_idx_overflows_uchar
16-
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
1716
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
1817
; GCN: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
1918
%0:sreg_32 = S_MOV_B32 0

llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -392,8 +392,8 @@ define amdgpu_kernel void @DiffBase(i8 addrspace(1)* %buffer1,
392392
; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
393393
; GFX8: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
394394
;
395+
; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
395396
; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
396-
; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
397397
; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
398398
; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
399399
; GFX9: global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}

llvm/test/CodeGen/AMDGPU/udiv64.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1205,11 +1205,10 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) {
12051205
; GCN-NEXT: v_lshrrev_b32_e32 v2, 17, v2
12061206
; GCN-NEXT: v_mul_lo_u32 v3, v1, v2
12071207
; GCN-NEXT: v_mul_hi_u32 v4, v0, v2
1208-
; GCN-NEXT: s_mov_b32 s4, 0x8000
12091208
; GCN-NEXT: v_add_i32_e32 v3, vcc, v4, v3
12101209
; GCN-NEXT: v_mul_lo_u32 v4, v0, v2
12111210
; GCN-NEXT: v_sub_i32_e32 v5, vcc, 0, v3
1212-
; GCN-NEXT: v_sub_i32_e32 v4, vcc, s4, v4
1211+
; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0x8000, v4
12131212
; GCN-NEXT: v_subb_u32_e64 v5, s[4:5], v5, v1, vcc
12141213
; GCN-NEXT: v_sub_i32_e64 v6, s[4:5], v4, v0
12151214
; GCN-NEXT: v_subbrev_u32_e64 v5, s[4:5], 0, v5, s[4:5]
@@ -1246,8 +1245,7 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) {
12461245
; GCN-IR-NEXT: v_ffbh_u32_e32 v3, v1
12471246
; GCN-IR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
12481247
; GCN-IR-NEXT: v_cndmask_b32_e32 v8, v3, v2, vcc
1249-
; GCN-IR-NEXT: s_movk_i32 s6, 0xffd0
1250-
; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, s6, v8
1248+
; GCN-IR-NEXT: v_add_i32_e32 v4, vcc, 0xffffffd0, v8
12511249
; GCN-IR-NEXT: v_addc_u32_e64 v5, s[6:7], 0, -1, vcc
12521250
; GCN-IR-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
12531251
; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, 63, v[4:5]

llvm/test/CodeGen/AMDGPU/urem64.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,10 +1226,9 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) {
12261226
; GCN-NEXT: v_mul_lo_u32 v3, v1, v2
12271227
; GCN-NEXT: v_mul_hi_u32 v4, v0, v2
12281228
; GCN-NEXT: v_mul_lo_u32 v2, v0, v2
1229-
; GCN-NEXT: s_mov_b32 s4, 0x8000
12301229
; GCN-NEXT: v_add_i32_e32 v3, vcc, v4, v3
12311230
; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v3
1232-
; GCN-NEXT: v_sub_i32_e32 v2, vcc, s4, v2
1231+
; GCN-NEXT: v_sub_i32_e32 v2, vcc, 0x8000, v2
12331232
; GCN-NEXT: v_subb_u32_e64 v4, s[4:5], v4, v1, vcc
12341233
; GCN-NEXT: v_sub_i32_e64 v5, s[4:5], v2, v0
12351234
; GCN-NEXT: v_subbrev_u32_e64 v6, s[6:7], 0, v4, s[4:5]

0 commit comments

Comments
 (0)