@@ -516,10 +516,8 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
516
516
;
517
517
; GFX8-LABEL: add_i32_varying:
518
518
; GFX8: ; %bb.0: ; %entry
519
- ; GFX8-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
520
519
; GFX8-NEXT: s_mov_b64 s[2:3], exec
521
520
; GFX8-NEXT: s_mov_b32 s6, 0
522
- ; GFX8-NEXT: v_mbcnt_hi_u32_b32 v2, exec_hi, v1
523
521
; GFX8-NEXT: ; implicit-def: $vgpr1
524
522
; GFX8-NEXT: .LBB2_1: ; %ComputeLoop
525
523
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -537,7 +535,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
537
535
; GFX8-NEXT: s_cbranch_scc1 .LBB2_1
538
536
; GFX8-NEXT: ; %bb.2: ; %ComputeEnd
539
537
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
540
- ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
538
+ ; GFX8-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
539
+ ; GFX8-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
540
+ ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
541
541
; GFX8-NEXT: ; implicit-def: $vgpr0
542
542
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
543
543
; GFX8-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@@ -565,10 +565,8 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
565
565
;
566
566
; GFX9-LABEL: add_i32_varying:
567
567
; GFX9: ; %bb.0: ; %entry
568
- ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
569
568
; GFX9-NEXT: s_mov_b64 s[2:3], exec
570
569
; GFX9-NEXT: s_mov_b32 s6, 0
571
- ; GFX9-NEXT: v_mbcnt_hi_u32_b32 v2, exec_hi, v1
572
570
; GFX9-NEXT: ; implicit-def: $vgpr1
573
571
; GFX9-NEXT: .LBB2_1: ; %ComputeLoop
574
572
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -586,7 +584,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
586
584
; GFX9-NEXT: s_cbranch_scc1 .LBB2_1
587
585
; GFX9-NEXT: ; %bb.2: ; %ComputeEnd
588
586
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
589
- ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
587
+ ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
588
+ ; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
589
+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
590
590
; GFX9-NEXT: ; implicit-def: $vgpr0
591
591
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
592
592
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@@ -614,10 +614,8 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
614
614
;
615
615
; GFX1064-LABEL: add_i32_varying:
616
616
; GFX1064: ; %bb.0: ; %entry
617
- ; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
618
617
; GFX1064-NEXT: s_mov_b64 s[2:3], exec
619
618
; GFX1064-NEXT: s_mov_b32 s6, 0
620
- ; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v2, exec_hi, v1
621
619
; GFX1064-NEXT: ; implicit-def: $vgpr1
622
620
; GFX1064-NEXT: .LBB2_1: ; %ComputeLoop
623
621
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -634,7 +632,9 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
634
632
; GFX1064-NEXT: s_cbranch_scc1 .LBB2_1
635
633
; GFX1064-NEXT: ; %bb.2: ; %ComputeEnd
636
634
; GFX1064-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
637
- ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
635
+ ; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
636
+ ; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
637
+ ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
638
638
; GFX1064-NEXT: ; implicit-def: $vgpr0
639
639
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
640
640
; GFX1064-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@@ -665,7 +665,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
665
665
;
666
666
; GFX1032-LABEL: add_i32_varying:
667
667
; GFX1032: ; %bb.0: ; %entry
668
- ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0
669
668
; GFX1032-NEXT: s_mov_b32 s2, exec_lo
670
669
; GFX1032-NEXT: s_mov_b32 s4, 0
671
670
; GFX1032-NEXT: ; implicit-def: $vgpr1
@@ -681,7 +680,8 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
681
680
; GFX1032-NEXT: s_cbranch_scc1 .LBB2_1
682
681
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
683
682
; GFX1032-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
684
- ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
683
+ ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
684
+ ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
685
685
; GFX1032-NEXT: ; implicit-def: $vgpr0
686
686
; GFX1032-NEXT: s_and_saveexec_b32 s5, vcc_lo
687
687
; GFX1032-NEXT: s_xor_b32 s5, exec_lo, s5
@@ -712,11 +712,8 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
712
712
;
713
713
; GFX1164-LABEL: add_i32_varying:
714
714
; GFX1164: ; %bb.0: ; %entry
715
- ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
716
715
; GFX1164-NEXT: s_mov_b64 s[2:3], exec
717
716
; GFX1164-NEXT: s_mov_b32 s6, 0
718
- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
719
- ; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v2, exec_hi, v1
720
717
; GFX1164-NEXT: ; implicit-def: $vgpr1
721
718
; GFX1164-NEXT: .LBB2_1: ; %ComputeLoop
722
719
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -735,9 +732,13 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
735
732
; GFX1164-NEXT: s_cbranch_scc1 .LBB2_1
736
733
; GFX1164-NEXT: ; %bb.2: ; %ComputeEnd
737
734
; GFX1164-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
738
- ; GFX1164-NEXT: s_mov_b64 s[4:5], exec
735
+ ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
736
+ ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
737
+ ; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
738
+ ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
739
739
; GFX1164-NEXT: ; implicit-def: $vgpr0
740
- ; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v2
740
+ ; GFX1164-NEXT: s_and_saveexec_b64 s[4:5], vcc
741
+ ; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
741
742
; GFX1164-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
742
743
; GFX1164-NEXT: s_cbranch_execz .LBB2_4
743
744
; GFX1164-NEXT: ; %bb.3:
@@ -767,7 +768,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
767
768
;
768
769
; GFX1132-LABEL: add_i32_varying:
769
770
; GFX1132: ; %bb.0: ; %entry
770
- ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0
771
771
; GFX1132-NEXT: s_mov_b32 s2, exec_lo
772
772
; GFX1132-NEXT: s_mov_b32 s4, 0
773
773
; GFX1132-NEXT: ; implicit-def: $vgpr1
@@ -784,9 +784,11 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out, ptr addrspace(
784
784
; GFX1132-NEXT: s_cbranch_scc1 .LBB2_1
785
785
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
786
786
; GFX1132-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
787
- ; GFX1132-NEXT: s_mov_b32 s5, exec_lo
787
+ ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
788
+ ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
789
+ ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
788
790
; GFX1132-NEXT: ; implicit-def: $vgpr0
789
- ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v2
791
+ ; GFX1132-NEXT: s_and_saveexec_b32 s5, vcc_lo
790
792
; GFX1132-NEXT: s_xor_b32 s5, exec_lo, s5
791
793
; GFX1132-NEXT: s_cbranch_execz .LBB2_4
792
794
; GFX1132-NEXT: ; %bb.3:
@@ -2016,10 +2018,8 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
2016
2018
;
2017
2019
; GFX8-LABEL: sub_i32_varying:
2018
2020
; GFX8: ; %bb.0: ; %entry
2019
- ; GFX8-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
2020
2021
; GFX8-NEXT: s_mov_b64 s[2:3], exec
2021
2022
; GFX8-NEXT: s_mov_b32 s6, 0
2022
- ; GFX8-NEXT: v_mbcnt_hi_u32_b32 v2, exec_hi, v1
2023
2023
; GFX8-NEXT: ; implicit-def: $vgpr1
2024
2024
; GFX8-NEXT: .LBB8_1: ; %ComputeLoop
2025
2025
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -2037,7 +2037,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
2037
2037
; GFX8-NEXT: s_cbranch_scc1 .LBB8_1
2038
2038
; GFX8-NEXT: ; %bb.2: ; %ComputeEnd
2039
2039
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
2040
- ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
2040
+ ; GFX8-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2041
+ ; GFX8-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
2042
+ ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2041
2043
; GFX8-NEXT: ; implicit-def: $vgpr0
2042
2044
; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
2043
2045
; GFX8-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@@ -2065,10 +2067,8 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
2065
2067
;
2066
2068
; GFX9-LABEL: sub_i32_varying:
2067
2069
; GFX9: ; %bb.0: ; %entry
2068
- ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
2069
2070
; GFX9-NEXT: s_mov_b64 s[2:3], exec
2070
2071
; GFX9-NEXT: s_mov_b32 s6, 0
2071
- ; GFX9-NEXT: v_mbcnt_hi_u32_b32 v2, exec_hi, v1
2072
2072
; GFX9-NEXT: ; implicit-def: $vgpr1
2073
2073
; GFX9-NEXT: .LBB8_1: ; %ComputeLoop
2074
2074
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -2086,7 +2086,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
2086
2086
; GFX9-NEXT: s_cbranch_scc1 .LBB8_1
2087
2087
; GFX9-NEXT: ; %bb.2: ; %ComputeEnd
2088
2088
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
2089
- ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
2089
+ ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2090
+ ; GFX9-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
2091
+ ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2090
2092
; GFX9-NEXT: ; implicit-def: $vgpr0
2091
2093
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
2092
2094
; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@@ -2114,10 +2116,8 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
2114
2116
;
2115
2117
; GFX1064-LABEL: sub_i32_varying:
2116
2118
; GFX1064: ; %bb.0: ; %entry
2117
- ; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
2118
2119
; GFX1064-NEXT: s_mov_b64 s[2:3], exec
2119
2120
; GFX1064-NEXT: s_mov_b32 s6, 0
2120
- ; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v2, exec_hi, v1
2121
2121
; GFX1064-NEXT: ; implicit-def: $vgpr1
2122
2122
; GFX1064-NEXT: .LBB8_1: ; %ComputeLoop
2123
2123
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -2134,7 +2134,9 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
2134
2134
; GFX1064-NEXT: s_cbranch_scc1 .LBB8_1
2135
2135
; GFX1064-NEXT: ; %bb.2: ; %ComputeEnd
2136
2136
; GFX1064-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
2137
- ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
2137
+ ; GFX1064-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2138
+ ; GFX1064-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
2139
+ ; GFX1064-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2138
2140
; GFX1064-NEXT: ; implicit-def: $vgpr0
2139
2141
; GFX1064-NEXT: s_and_saveexec_b64 s[4:5], vcc
2140
2142
; GFX1064-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
@@ -2165,7 +2167,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
2165
2167
;
2166
2168
; GFX1032-LABEL: sub_i32_varying:
2167
2169
; GFX1032: ; %bb.0: ; %entry
2168
- ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0
2169
2170
; GFX1032-NEXT: s_mov_b32 s2, exec_lo
2170
2171
; GFX1032-NEXT: s_mov_b32 s4, 0
2171
2172
; GFX1032-NEXT: ; implicit-def: $vgpr1
@@ -2181,7 +2182,8 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
2181
2182
; GFX1032-NEXT: s_cbranch_scc1 .LBB8_1
2182
2183
; GFX1032-NEXT: ; %bb.2: ; %ComputeEnd
2183
2184
; GFX1032-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
2184
- ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
2185
+ ; GFX1032-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2186
+ ; GFX1032-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
2185
2187
; GFX1032-NEXT: ; implicit-def: $vgpr0
2186
2188
; GFX1032-NEXT: s_and_saveexec_b32 s5, vcc_lo
2187
2189
; GFX1032-NEXT: s_xor_b32 s5, exec_lo, s5
@@ -2212,11 +2214,8 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
2212
2214
;
2213
2215
; GFX1164-LABEL: sub_i32_varying:
2214
2216
; GFX1164: ; %bb.0: ; %entry
2215
- ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v1, exec_lo, 0
2216
2217
; GFX1164-NEXT: s_mov_b64 s[2:3], exec
2217
2218
; GFX1164-NEXT: s_mov_b32 s6, 0
2218
- ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1)
2219
- ; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v2, exec_hi, v1
2220
2219
; GFX1164-NEXT: ; implicit-def: $vgpr1
2221
2220
; GFX1164-NEXT: .LBB8_1: ; %ComputeLoop
2222
2221
; GFX1164-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -2235,9 +2234,13 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
2235
2234
; GFX1164-NEXT: s_cbranch_scc1 .LBB8_1
2236
2235
; GFX1164-NEXT: ; %bb.2: ; %ComputeEnd
2237
2236
; GFX1164-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
2238
- ; GFX1164-NEXT: s_mov_b64 s[4:5], exec
2237
+ ; GFX1164-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2238
+ ; GFX1164-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2239
+ ; GFX1164-NEXT: v_mbcnt_hi_u32_b32 v0, exec_hi, v0
2240
+ ; GFX1164-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
2239
2241
; GFX1164-NEXT: ; implicit-def: $vgpr0
2240
- ; GFX1164-NEXT: v_cmpx_eq_u32_e32 0, v2
2242
+ ; GFX1164-NEXT: s_and_saveexec_b64 s[4:5], vcc
2243
+ ; GFX1164-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2241
2244
; GFX1164-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
2242
2245
; GFX1164-NEXT: s_cbranch_execz .LBB8_4
2243
2246
; GFX1164-NEXT: ; %bb.3:
@@ -2267,7 +2270,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
2267
2270
;
2268
2271
; GFX1132-LABEL: sub_i32_varying:
2269
2272
; GFX1132: ; %bb.0: ; %entry
2270
- ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v2, exec_lo, 0
2271
2273
; GFX1132-NEXT: s_mov_b32 s2, exec_lo
2272
2274
; GFX1132-NEXT: s_mov_b32 s4, 0
2273
2275
; GFX1132-NEXT: ; implicit-def: $vgpr1
@@ -2284,9 +2286,11 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out, ptr addrspace(
2284
2286
; GFX1132-NEXT: s_cbranch_scc1 .LBB8_1
2285
2287
; GFX1132-NEXT: ; %bb.2: ; %ComputeEnd
2286
2288
; GFX1132-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
2287
- ; GFX1132-NEXT: s_mov_b32 s5, exec_lo
2289
+ ; GFX1132-NEXT: v_mbcnt_lo_u32_b32 v0, exec_lo, 0
2290
+ ; GFX1132-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
2291
+ ; GFX1132-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
2288
2292
; GFX1132-NEXT: ; implicit-def: $vgpr0
2289
- ; GFX1132-NEXT: v_cmpx_eq_u32_e32 0, v2
2293
+ ; GFX1132-NEXT: s_and_saveexec_b32 s5, vcc_lo
2290
2294
; GFX1132-NEXT: s_xor_b32 s5, exec_lo, s5
2291
2295
; GFX1132-NEXT: s_cbranch_execz .LBB8_4
2292
2296
; GFX1132-NEXT: ; %bb.3:
0 commit comments