Skip to content

Commit d273717

Browse files
committed
[AMDGPU] Remove s_delay_alu for VALU->SGPR->SALU
1 parent 58571c8 commit d273717

File tree

54 files changed

+200
-398
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+200
-398
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,10 @@ class AMDGPUInsertDelayAlu : public MachineFunctionPass {
371371
for (MCRegUnit Unit : TRI->regunits(Op.getReg())) {
372372
auto It = State.find(Unit);
373373
if (It != State.end()) {
374-
Delay.merge(It->second);
374+
if (!(SII->isSALU(MI.getOpcode())) ||
375+
!AMDGPU::isSGPR(Op.getReg(), TRI) ||
376+
It->second.VALUCycles == 0)
377+
Delay.merge(It->second);
375378
State.erase(Unit);
376379
}
377380
}

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -234,8 +234,8 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_vgpr_descr(i32 %node_ptr,
234234
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
235235
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19]
236236
; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14]
237-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
238237
; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0
238+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
239239
; GFX11-NEXT: s_and_saveexec_b32 s0, s0
240240
; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v20, v21, v[15:17], v[5:7], v[8:10]], s[4:7]
241241
; GFX11-NEXT: ; implicit-def: $vgpr18
@@ -360,8 +360,8 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
360360
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
361361
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[18:19]
362362
; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[11:12]
363-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
364363
; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0
364+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
365365
; GFX11-NEXT: s_and_saveexec_b32 s0, s0
366366
; GFX11-NEXT: image_bvh_intersect_ray v[0:3], [v16, v17, v[13:15], v[4:6]], s[4:7] a16
367367
; GFX11-NEXT: ; implicit-def: $vgpr18
@@ -476,8 +476,8 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_vgpr_descr(i64 %node_ptr
476476
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
477477
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5]
478478
; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[14:15]
479-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
480479
; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0
480+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
481481
; GFX11-NEXT: s_and_saveexec_b32 s0, s0
482482
; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[19:20], v21, v[16:18], v[6:8], v[9:11]], s[4:7]
483483
; GFX11-NEXT: ; implicit-def: $vgpr4
@@ -604,8 +604,8 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
604604
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
605605
; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5]
606606
; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13]
607-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
608607
; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0
608+
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
609609
; GFX11-NEXT: s_and_saveexec_b32 s0, s0
610610
; GFX11-NEXT: image_bvh64_intersect_ray v[0:3], [v[17:18], v19, v[14:16], v[20:22]], s[4:7] a16
611611
; GFX11-NEXT: ; implicit-def: $vgpr4

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1468,7 +1468,6 @@ define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
14681468
; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; encoding: [0x00,0x05,0x00,0x7e]
14691469
; GFX11-NEXT: ;;#ASMSTART
14701470
; GFX11-NEXT: ;;#ASMEND
1471-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; encoding: [0x01,0x00,0x87,0xbf]
14721471
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 3), s0 ; encoding: [0x01,0x10,0x00,0xb9]
14731472
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
14741473
call void @llvm.amdgcn.s.setreg(i32 4097, i32 %var.mode)

llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -777,7 +777,6 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
777777
; GFX11W64-NEXT: s_lshl_b64 s[6:7], 1, s3
778778
; GFX11W64-NEXT: v_writelane_b32 v0, s2, s3
779779
; GFX11W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7]
780-
; GFX11W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
781780
; GFX11W64-NEXT: s_add_i32 s2, s2, s8
782781
; GFX11W64-NEXT: s_cmp_lg_u64 s[0:1], 0
783782
; GFX11W64-NEXT: s_cbranch_scc1 .LBB2_1
@@ -822,7 +821,6 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
822821
; GFX11W32-NEXT: s_lshl_b32 s6, 1, s2
823822
; GFX11W32-NEXT: v_writelane_b32 v0, s0, s2
824823
; GFX11W32-NEXT: s_and_not1_b32 s1, s1, s6
825-
; GFX11W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
826824
; GFX11W32-NEXT: s_add_i32 s0, s0, s3
827825
; GFX11W32-NEXT: s_cmp_lg_u32 s1, 0
828826
; GFX11W32-NEXT: s_cbranch_scc1 .LBB2_1
@@ -864,7 +862,6 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
864862
; GFX12W64-NEXT: s_lshl_b64 s[6:7], 1, s3
865863
; GFX12W64-NEXT: v_writelane_b32 v0, s2, s3
866864
; GFX12W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7]
867-
; GFX12W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
868865
; GFX12W64-NEXT: s_add_co_i32 s2, s2, s8
869866
; GFX12W64-NEXT: s_cmp_lg_u64 s[0:1], 0
870867
; GFX12W64-NEXT: s_cbranch_scc1 .LBB2_1
@@ -910,7 +907,6 @@ define amdgpu_kernel void @add_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
910907
; GFX12W32-NEXT: s_lshl_b32 s6, 1, s2
911908
; GFX12W32-NEXT: v_writelane_b32 v0, s0, s2
912909
; GFX12W32-NEXT: s_and_not1_b32 s1, s1, s6
913-
; GFX12W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
914910
; GFX12W32-NEXT: s_add_co_i32 s0, s0, s3
915911
; GFX12W32-NEXT: s_wait_alu 0xfffe
916912
; GFX12W32-NEXT: s_cmp_lg_u32 s1, 0
@@ -1178,7 +1174,6 @@ define amdgpu_kernel void @struct_add_i32_varying_vdata(ptr addrspace(1) %out, p
11781174
; GFX11W64-NEXT: s_lshl_b64 s[6:7], 1, s3
11791175
; GFX11W64-NEXT: v_writelane_b32 v0, s2, s3
11801176
; GFX11W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7]
1181-
; GFX11W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
11821177
; GFX11W64-NEXT: s_add_i32 s2, s2, s8
11831178
; GFX11W64-NEXT: s_cmp_lg_u64 s[0:1], 0
11841179
; GFX11W64-NEXT: s_cbranch_scc1 .LBB3_1
@@ -1226,7 +1221,6 @@ define amdgpu_kernel void @struct_add_i32_varying_vdata(ptr addrspace(1) %out, p
12261221
; GFX11W32-NEXT: s_lshl_b32 s6, 1, s2
12271222
; GFX11W32-NEXT: v_writelane_b32 v0, s0, s2
12281223
; GFX11W32-NEXT: s_and_not1_b32 s1, s1, s6
1229-
; GFX11W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
12301224
; GFX11W32-NEXT: s_add_i32 s0, s0, s3
12311225
; GFX11W32-NEXT: s_cmp_lg_u32 s1, 0
12321226
; GFX11W32-NEXT: s_cbranch_scc1 .LBB3_1
@@ -1270,7 +1264,6 @@ define amdgpu_kernel void @struct_add_i32_varying_vdata(ptr addrspace(1) %out, p
12701264
; GFX12W64-NEXT: s_lshl_b64 s[6:7], 1, s3
12711265
; GFX12W64-NEXT: v_writelane_b32 v0, s2, s3
12721266
; GFX12W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7]
1273-
; GFX12W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
12741267
; GFX12W64-NEXT: s_add_co_i32 s2, s2, s8
12751268
; GFX12W64-NEXT: s_cmp_lg_u64 s[0:1], 0
12761269
; GFX12W64-NEXT: s_cbranch_scc1 .LBB3_1
@@ -1319,7 +1312,6 @@ define amdgpu_kernel void @struct_add_i32_varying_vdata(ptr addrspace(1) %out, p
13191312
; GFX12W32-NEXT: s_lshl_b32 s6, 1, s2
13201313
; GFX12W32-NEXT: v_writelane_b32 v0, s0, s2
13211314
; GFX12W32-NEXT: s_and_not1_b32 s1, s1, s6
1322-
; GFX12W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
13231315
; GFX12W32-NEXT: s_add_co_i32 s0, s0, s3
13241316
; GFX12W32-NEXT: s_wait_alu 0xfffe
13251317
; GFX12W32-NEXT: s_cmp_lg_u32 s1, 0
@@ -2246,7 +2238,6 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
22462238
; GFX11W64-NEXT: s_lshl_b64 s[6:7], 1, s3
22472239
; GFX11W64-NEXT: v_writelane_b32 v0, s2, s3
22482240
; GFX11W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7]
2249-
; GFX11W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
22502241
; GFX11W64-NEXT: s_add_i32 s2, s2, s8
22512242
; GFX11W64-NEXT: s_cmp_lg_u64 s[0:1], 0
22522243
; GFX11W64-NEXT: s_cbranch_scc1 .LBB7_1
@@ -2291,7 +2282,6 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
22912282
; GFX11W32-NEXT: s_lshl_b32 s6, 1, s2
22922283
; GFX11W32-NEXT: v_writelane_b32 v0, s0, s2
22932284
; GFX11W32-NEXT: s_and_not1_b32 s1, s1, s6
2294-
; GFX11W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
22952285
; GFX11W32-NEXT: s_add_i32 s0, s0, s3
22962286
; GFX11W32-NEXT: s_cmp_lg_u32 s1, 0
22972287
; GFX11W32-NEXT: s_cbranch_scc1 .LBB7_1
@@ -2334,7 +2324,6 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
23342324
; GFX12W64-NEXT: s_lshl_b64 s[6:7], 1, s3
23352325
; GFX12W64-NEXT: v_writelane_b32 v0, s2, s3
23362326
; GFX12W64-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[6:7]
2337-
; GFX12W64-NEXT: s_delay_alu instid0(VALU_DEP_2)
23382327
; GFX12W64-NEXT: s_add_co_i32 s2, s2, s8
23392328
; GFX12W64-NEXT: s_cmp_lg_u64 s[0:1], 0
23402329
; GFX12W64-NEXT: s_cbranch_scc1 .LBB7_1
@@ -2380,7 +2369,6 @@ define amdgpu_kernel void @sub_i32_varying_vdata(ptr addrspace(1) %out, ptr addr
23802369
; GFX12W32-NEXT: s_lshl_b32 s6, 1, s2
23812370
; GFX12W32-NEXT: v_writelane_b32 v0, s0, s2
23822371
; GFX12W32-NEXT: s_and_not1_b32 s1, s1, s6
2383-
; GFX12W32-NEXT: s_delay_alu instid0(VALU_DEP_2)
23842372
; GFX12W32-NEXT: s_add_co_i32 s0, s0, s3
23852373
; GFX12W32-NEXT: s_wait_alu 0xfffe
23862374
; GFX12W32-NEXT: s_cmp_lg_u32 s1, 0

llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
899899
; GFX1164_ITERATIVE-NEXT: s_lshl_b64 s[2:3], 1, s7
900900
; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s7
901901
; GFX1164_ITERATIVE-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
902-
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
903902
; GFX1164_ITERATIVE-NEXT: s_add_i32 s6, s6, s8
904903
; GFX1164_ITERATIVE-NEXT: s_cmp_lg_u64 s[0:1], 0
905904
; GFX1164_ITERATIVE-NEXT: s_cbranch_scc1 .LBB2_1
@@ -950,7 +949,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
950949
; GFX1132_ITERATIVE-NEXT: s_lshl_b32 s3, 1, s1
951950
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
952951
; GFX1132_ITERATIVE-NEXT: s_and_not1_b32 s0, s0, s3
953-
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
954952
; GFX1132_ITERATIVE-NEXT: s_add_i32 s6, s6, s2
955953
; GFX1132_ITERATIVE-NEXT: s_cmp_lg_u32 s0, 0
956954
; GFX1132_ITERATIVE-NEXT: s_cbranch_scc1 .LBB2_1
@@ -999,7 +997,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
999997
; GFX1264_ITERATIVE-NEXT: s_lshl_b64 s[2:3], 1, s7
1000998
; GFX1264_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s7
1001999
; GFX1264_ITERATIVE-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
1002-
; GFX1264_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
10031000
; GFX1264_ITERATIVE-NEXT: s_add_co_i32 s6, s6, s8
10041001
; GFX1264_ITERATIVE-NEXT: s_cmp_lg_u64 s[0:1], 0
10051002
; GFX1264_ITERATIVE-NEXT: s_cbranch_scc1 .LBB2_1
@@ -1049,7 +1046,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
10491046
; GFX1232_ITERATIVE-NEXT: s_lshl_b32 s3, 1, s1
10501047
; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
10511048
; GFX1232_ITERATIVE-NEXT: s_and_not1_b32 s0, s0, s3
1052-
; GFX1232_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
10531049
; GFX1232_ITERATIVE-NEXT: s_add_co_i32 s6, s6, s2
10541050
; GFX1232_ITERATIVE-NEXT: s_wait_alu 0xfffe
10551051
; GFX1232_ITERATIVE-NEXT: s_cmp_lg_u32 s0, 0
@@ -2576,17 +2572,16 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
25762572
; GFX1164_ITERATIVE-NEXT: .LBB5_1: ; %ComputeLoop
25772573
; GFX1164_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
25782574
; GFX1164_ITERATIVE-NEXT: s_ctz_i32_b64 s2, s[0:1]
2579-
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
2575+
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
25802576
; GFX1164_ITERATIVE-NEXT: v_readlane_b32 s3, v2, s2
25812577
; GFX1164_ITERATIVE-NEXT: v_readlane_b32 s8, v3, s2
25822578
; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s2
25832579
; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s2
25842580
; GFX1164_ITERATIVE-NEXT: s_add_u32 s6, s6, s3
2585-
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
25862581
; GFX1164_ITERATIVE-NEXT: s_addc_u32 s7, s7, s8
25872582
; GFX1164_ITERATIVE-NEXT: s_lshl_b64 s[2:3], 1, s2
2583+
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
25882584
; GFX1164_ITERATIVE-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
2589-
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
25902585
; GFX1164_ITERATIVE-NEXT: s_cmp_lg_u64 s[0:1], 0
25912586
; GFX1164_ITERATIVE-NEXT: s_cbranch_scc1 .LBB5_1
25922587
; GFX1164_ITERATIVE-NEXT: ; %bb.2: ; %ComputeEnd
@@ -2639,7 +2634,6 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
26392634
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s3, v3, s1
26402635
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
26412636
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s1
2642-
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
26432637
; GFX1132_ITERATIVE-NEXT: s_add_u32 s6, s6, s2
26442638
; GFX1132_ITERATIVE-NEXT: s_addc_u32 s7, s7, s3
26452639
; GFX1132_ITERATIVE-NEXT: s_lshl_b32 s1, 1, s1
@@ -4454,7 +4448,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
44544448
; GFX1164_ITERATIVE-NEXT: s_lshl_b64 s[2:3], 1, s7
44554449
; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s7
44564450
; GFX1164_ITERATIVE-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
4457-
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
44584451
; GFX1164_ITERATIVE-NEXT: s_add_i32 s6, s6, s8
44594452
; GFX1164_ITERATIVE-NEXT: s_cmp_lg_u64 s[0:1], 0
44604453
; GFX1164_ITERATIVE-NEXT: s_cbranch_scc1 .LBB8_1
@@ -4505,7 +4498,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
45054498
; GFX1132_ITERATIVE-NEXT: s_lshl_b32 s3, 1, s1
45064499
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
45074500
; GFX1132_ITERATIVE-NEXT: s_and_not1_b32 s0, s0, s3
4508-
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
45094501
; GFX1132_ITERATIVE-NEXT: s_add_i32 s6, s6, s2
45104502
; GFX1132_ITERATIVE-NEXT: s_cmp_lg_u32 s0, 0
45114503
; GFX1132_ITERATIVE-NEXT: s_cbranch_scc1 .LBB8_1
@@ -4554,7 +4546,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
45544546
; GFX1264_ITERATIVE-NEXT: s_lshl_b64 s[2:3], 1, s7
45554547
; GFX1264_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s7
45564548
; GFX1264_ITERATIVE-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
4557-
; GFX1264_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
45584549
; GFX1264_ITERATIVE-NEXT: s_add_co_i32 s6, s6, s8
45594550
; GFX1264_ITERATIVE-NEXT: s_cmp_lg_u64 s[0:1], 0
45604551
; GFX1264_ITERATIVE-NEXT: s_cbranch_scc1 .LBB8_1
@@ -4604,7 +4595,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
46044595
; GFX1232_ITERATIVE-NEXT: s_lshl_b32 s3, 1, s1
46054596
; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
46064597
; GFX1232_ITERATIVE-NEXT: s_and_not1_b32 s0, s0, s3
4607-
; GFX1232_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
46084598
; GFX1232_ITERATIVE-NEXT: s_add_co_i32 s6, s6, s2
46094599
; GFX1232_ITERATIVE-NEXT: s_wait_alu 0xfffe
46104600
; GFX1232_ITERATIVE-NEXT: s_cmp_lg_u32 s0, 0
@@ -6164,17 +6154,16 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
61646154
; GFX1164_ITERATIVE-NEXT: .LBB11_1: ; %ComputeLoop
61656155
; GFX1164_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
61666156
; GFX1164_ITERATIVE-NEXT: s_ctz_i32_b64 s2, s[0:1]
6167-
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
6157+
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
61686158
; GFX1164_ITERATIVE-NEXT: v_readlane_b32 s3, v2, s2
61696159
; GFX1164_ITERATIVE-NEXT: v_readlane_b32 s8, v3, s2
61706160
; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s2
61716161
; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s2
61726162
; GFX1164_ITERATIVE-NEXT: s_add_u32 s6, s6, s3
6173-
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
61746163
; GFX1164_ITERATIVE-NEXT: s_addc_u32 s7, s7, s8
61756164
; GFX1164_ITERATIVE-NEXT: s_lshl_b64 s[2:3], 1, s2
6165+
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
61766166
; GFX1164_ITERATIVE-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
6177-
; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
61786167
; GFX1164_ITERATIVE-NEXT: s_cmp_lg_u64 s[0:1], 0
61796168
; GFX1164_ITERATIVE-NEXT: s_cbranch_scc1 .LBB11_1
61806169
; GFX1164_ITERATIVE-NEXT: ; %bb.2: ; %ComputeEnd
@@ -6227,7 +6216,6 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
62276216
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s3, v3, s1
62286217
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
62296218
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s1
6230-
; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
62316219
; GFX1132_ITERATIVE-NEXT: s_add_u32 s6, s6, s2
62326220
; GFX1132_ITERATIVE-NEXT: s_addc_u32 s7, s7, s3
62336221
; GFX1132_ITERATIVE-NEXT: s_lshl_b32 s1, 1, s1

0 commit comments

Comments
 (0)