@@ -899,7 +899,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
899
899
; GFX1164_ITERATIVE-NEXT: s_lshl_b64 s[2:3], 1, s7
900
900
; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s7
901
901
; GFX1164_ITERATIVE-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
902
- ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
903
902
; GFX1164_ITERATIVE-NEXT: s_add_i32 s6, s6, s8
904
903
; GFX1164_ITERATIVE-NEXT: s_cmp_lg_u64 s[0:1], 0
905
904
; GFX1164_ITERATIVE-NEXT: s_cbranch_scc1 .LBB2_1
@@ -950,7 +949,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
950
949
; GFX1132_ITERATIVE-NEXT: s_lshl_b32 s3, 1, s1
951
950
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
952
951
; GFX1132_ITERATIVE-NEXT: s_and_not1_b32 s0, s0, s3
953
- ; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
954
952
; GFX1132_ITERATIVE-NEXT: s_add_i32 s6, s6, s2
955
953
; GFX1132_ITERATIVE-NEXT: s_cmp_lg_u32 s0, 0
956
954
; GFX1132_ITERATIVE-NEXT: s_cbranch_scc1 .LBB2_1
@@ -999,7 +997,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
999
997
; GFX1264_ITERATIVE-NEXT: s_lshl_b64 s[2:3], 1, s7
1000
998
; GFX1264_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s7
1001
999
; GFX1264_ITERATIVE-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
1002
- ; GFX1264_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
1003
1000
; GFX1264_ITERATIVE-NEXT: s_add_co_i32 s6, s6, s8
1004
1001
; GFX1264_ITERATIVE-NEXT: s_cmp_lg_u64 s[0:1], 0
1005
1002
; GFX1264_ITERATIVE-NEXT: s_cbranch_scc1 .LBB2_1
@@ -1049,7 +1046,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
1049
1046
; GFX1232_ITERATIVE-NEXT: s_lshl_b32 s3, 1, s1
1050
1047
; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
1051
1048
; GFX1232_ITERATIVE-NEXT: s_and_not1_b32 s0, s0, s3
1052
- ; GFX1232_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
1053
1049
; GFX1232_ITERATIVE-NEXT: s_add_co_i32 s6, s6, s2
1054
1050
; GFX1232_ITERATIVE-NEXT: s_wait_alu 0xfffe
1055
1051
; GFX1232_ITERATIVE-NEXT: s_cmp_lg_u32 s0, 0
@@ -2576,17 +2572,16 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
2576
2572
; GFX1164_ITERATIVE-NEXT: .LBB5_1: ; %ComputeLoop
2577
2573
; GFX1164_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
2578
2574
; GFX1164_ITERATIVE-NEXT: s_ctz_i32_b64 s2, s[0:1]
2579
- ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
2575
+ ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2580
2576
; GFX1164_ITERATIVE-NEXT: v_readlane_b32 s3, v2, s2
2581
2577
; GFX1164_ITERATIVE-NEXT: v_readlane_b32 s8, v3, s2
2582
2578
; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s2
2583
2579
; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s2
2584
2580
; GFX1164_ITERATIVE-NEXT: s_add_u32 s6, s6, s3
2585
- ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
2586
2581
; GFX1164_ITERATIVE-NEXT: s_addc_u32 s7, s7, s8
2587
2582
; GFX1164_ITERATIVE-NEXT: s_lshl_b64 s[2:3], 1, s2
2583
+ ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2588
2584
; GFX1164_ITERATIVE-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
2589
- ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2590
2585
; GFX1164_ITERATIVE-NEXT: s_cmp_lg_u64 s[0:1], 0
2591
2586
; GFX1164_ITERATIVE-NEXT: s_cbranch_scc1 .LBB5_1
2592
2587
; GFX1164_ITERATIVE-NEXT: ; %bb.2: ; %ComputeEnd
@@ -2639,7 +2634,6 @@ define amdgpu_kernel void @add_i64_varying(ptr addrspace(1) %out, ptr addrspace(
2639
2634
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s3, v3, s1
2640
2635
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
2641
2636
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s1
2642
- ; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
2643
2637
; GFX1132_ITERATIVE-NEXT: s_add_u32 s6, s6, s2
2644
2638
; GFX1132_ITERATIVE-NEXT: s_addc_u32 s7, s7, s3
2645
2639
; GFX1132_ITERATIVE-NEXT: s_lshl_b32 s1, 1, s1
@@ -4454,7 +4448,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
4454
4448
; GFX1164_ITERATIVE-NEXT: s_lshl_b64 s[2:3], 1, s7
4455
4449
; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s7
4456
4450
; GFX1164_ITERATIVE-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
4457
- ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
4458
4451
; GFX1164_ITERATIVE-NEXT: s_add_i32 s6, s6, s8
4459
4452
; GFX1164_ITERATIVE-NEXT: s_cmp_lg_u64 s[0:1], 0
4460
4453
; GFX1164_ITERATIVE-NEXT: s_cbranch_scc1 .LBB8_1
@@ -4505,7 +4498,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
4505
4498
; GFX1132_ITERATIVE-NEXT: s_lshl_b32 s3, 1, s1
4506
4499
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
4507
4500
; GFX1132_ITERATIVE-NEXT: s_and_not1_b32 s0, s0, s3
4508
- ; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
4509
4501
; GFX1132_ITERATIVE-NEXT: s_add_i32 s6, s6, s2
4510
4502
; GFX1132_ITERATIVE-NEXT: s_cmp_lg_u32 s0, 0
4511
4503
; GFX1132_ITERATIVE-NEXT: s_cbranch_scc1 .LBB8_1
@@ -4554,7 +4546,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
4554
4546
; GFX1264_ITERATIVE-NEXT: s_lshl_b64 s[2:3], 1, s7
4555
4547
; GFX1264_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s7
4556
4548
; GFX1264_ITERATIVE-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
4557
- ; GFX1264_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
4558
4549
; GFX1264_ITERATIVE-NEXT: s_add_co_i32 s6, s6, s8
4559
4550
; GFX1264_ITERATIVE-NEXT: s_cmp_lg_u64 s[0:1], 0
4560
4551
; GFX1264_ITERATIVE-NEXT: s_cbranch_scc1 .LBB8_1
@@ -4604,7 +4595,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
4604
4595
; GFX1232_ITERATIVE-NEXT: s_lshl_b32 s3, 1, s1
4605
4596
; GFX1232_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
4606
4597
; GFX1232_ITERATIVE-NEXT: s_and_not1_b32 s0, s0, s3
4607
- ; GFX1232_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_2)
4608
4598
; GFX1232_ITERATIVE-NEXT: s_add_co_i32 s6, s6, s2
4609
4599
; GFX1232_ITERATIVE-NEXT: s_wait_alu 0xfffe
4610
4600
; GFX1232_ITERATIVE-NEXT: s_cmp_lg_u32 s0, 0
@@ -6164,17 +6154,16 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
6164
6154
; GFX1164_ITERATIVE-NEXT: .LBB11_1: ; %ComputeLoop
6165
6155
; GFX1164_ITERATIVE-NEXT: ; =>This Inner Loop Header: Depth=1
6166
6156
; GFX1164_ITERATIVE-NEXT: s_ctz_i32_b64 s2, s[0:1]
6167
- ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
6157
+ ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6168
6158
; GFX1164_ITERATIVE-NEXT: v_readlane_b32 s3, v2, s2
6169
6159
; GFX1164_ITERATIVE-NEXT: v_readlane_b32 s8, v3, s2
6170
6160
; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s2
6171
6161
; GFX1164_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s2
6172
6162
; GFX1164_ITERATIVE-NEXT: s_add_u32 s6, s6, s3
6173
- ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
6174
6163
; GFX1164_ITERATIVE-NEXT: s_addc_u32 s7, s7, s8
6175
6164
; GFX1164_ITERATIVE-NEXT: s_lshl_b64 s[2:3], 1, s2
6165
+ ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
6176
6166
; GFX1164_ITERATIVE-NEXT: s_and_not1_b64 s[0:1], s[0:1], s[2:3]
6177
- ; GFX1164_ITERATIVE-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6178
6167
; GFX1164_ITERATIVE-NEXT: s_cmp_lg_u64 s[0:1], 0
6179
6168
; GFX1164_ITERATIVE-NEXT: s_cbranch_scc1 .LBB11_1
6180
6169
; GFX1164_ITERATIVE-NEXT: ; %bb.2: ; %ComputeEnd
@@ -6227,7 +6216,6 @@ define amdgpu_kernel void @sub_i64_varying(ptr addrspace(1) %out, ptr addrspace(
6227
6216
; GFX1132_ITERATIVE-NEXT: v_readlane_b32 s3, v3, s1
6228
6217
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v0, s6, s1
6229
6218
; GFX1132_ITERATIVE-NEXT: v_writelane_b32 v1, s7, s1
6230
- ; GFX1132_ITERATIVE-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
6231
6219
; GFX1132_ITERATIVE-NEXT: s_add_u32 s6, s6, s2
6232
6220
; GFX1132_ITERATIVE-NEXT: s_addc_u32 s7, s7, s3
6233
6221
; GFX1132_ITERATIVE-NEXT: s_lshl_b32 s1, 1, s1
0 commit comments