@@ -52,44 +52,44 @@ define <2 x i64> @f1() #0 {
52
52
define amdgpu_kernel void @f2 (i32 %arg , i32 %arg1 , i32 %arg2 , i1 %arg3 , i32 %arg4 , i1 %arg5 , ptr %arg6 , i32 %arg7 , i32 %arg8 , i32 %arg9 , i32 %arg10 , i1 %arg11 ) {
53
53
; GFX11-LABEL: f2:
54
54
; GFX11: ; %bb.0: ; %bb
55
- ; GFX11-NEXT: s_mov_b64 s[16:17] , s[4:5]
55
+ ; GFX11-NEXT: s_load_b32 s21 , s[2:3], 0x24
56
56
; GFX11-NEXT: v_mov_b32_e32 v31, v0
57
- ; GFX11-NEXT: s_load_b32 s24, s[16:17], 0x24
58
57
; GFX11-NEXT: s_mov_b32 s12, s13
59
- ; GFX11-NEXT: s_mov_b64 s[10:11], s[6:7]
60
- ; GFX11-NEXT: s_mov_b64 s[6:7], s[2:3]
61
- ; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v31
58
+ ; GFX11-NEXT: s_mov_b64 s[10:11], s[4:5]
62
59
; GFX11-NEXT: s_mov_b64 s[4:5], s[0:1]
63
- ; GFX11-NEXT: s_mov_b32 s3, 0
60
+ ; GFX11-NEXT: s_mov_b32 s6, 0
61
+ ; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v31
64
62
; GFX11-NEXT: s_mov_b32 s0, -1
65
- ; GFX11-NEXT: s_mov_b32 s18 , exec_lo
63
+ ; GFX11-NEXT: s_mov_b32 s20 , exec_lo
66
64
; GFX11-NEXT: s_mov_b32 s32, 0
67
65
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
68
- ; GFX11-NEXT: v_mul_lo_u32 v0, s24 , v0
66
+ ; GFX11-NEXT: v_mul_lo_u32 v0, s21 , v0
69
67
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
70
68
; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0
71
69
; GFX11-NEXT: s_cbranch_execz .LBB2_13
72
70
; GFX11-NEXT: ; %bb.1: ; %bb14
73
- ; GFX11-NEXT: s_load_b128 s[20:23], s[16:17], 0x2c
74
- ; GFX11-NEXT: s_mov_b32 s19, 0
71
+ ; GFX11-NEXT: s_load_b128 s[16:19], s[2:3], 0x2c
75
72
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
76
- ; GFX11-NEXT: s_bitcmp1_b32 s21, 0
77
- ; GFX11-NEXT: s_cselect_b32 s25, -1, 0
78
- ; GFX11-NEXT: s_bitcmp0_b32 s21, 0
73
+ ; GFX11-NEXT: s_bitcmp1_b32 s17, 0
74
+ ; GFX11-NEXT: s_cselect_b32 s22, -1, 0
75
+ ; GFX11-NEXT: s_bitcmp0_b32 s17, 0
76
+ ; GFX11-NEXT: s_mov_b32 s17, 0
79
77
; GFX11-NEXT: s_cbranch_scc0 .LBB2_3
80
78
; GFX11-NEXT: ; %bb.2: ; %bb15
81
- ; GFX11-NEXT: s_add_u32 s8, s16 , 0x58
82
- ; GFX11-NEXT: s_addc_u32 s9, s17 , 0
79
+ ; GFX11-NEXT: s_add_u32 s8, s2 , 0x58
80
+ ; GFX11-NEXT: s_addc_u32 s9, s3 , 0
83
81
; GFX11-NEXT: s_getpc_b64 s[0:1]
84
82
; GFX11-NEXT: s_add_u32 s0, s0, f0@gotpcrel32@lo+4
85
83
; GFX11-NEXT: s_addc_u32 s1, s1, f0@gotpcrel32@hi+12
86
84
; GFX11-NEXT: s_mov_b32 s13, s14
87
85
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
88
- ; GFX11-NEXT: s_mov_b32 s3 , s14
86
+ ; GFX11-NEXT: s_mov_b32 s23 , s14
89
87
; GFX11-NEXT: s_mov_b32 s14, s15
88
+ ; GFX11-NEXT: s_mov_b64 s[6:7], s[2:3]
90
89
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
91
90
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
92
- ; GFX11-NEXT: s_mov_b32 s14, s3
91
+ ; GFX11-NEXT: s_mov_b32 s14, s23
92
+ ; GFX11-NEXT: s_mov_b64 s[2:3], s[6:7]
93
93
; GFX11-NEXT: s_mov_b32 s1, -1
94
94
; GFX11-NEXT: s_cbranch_execz .LBB2_4
95
95
; GFX11-NEXT: s_branch .LBB2_12
@@ -98,66 +98,66 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
98
98
; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0
99
99
; GFX11-NEXT: s_cbranch_vccnz .LBB2_12
100
100
; GFX11-NEXT: .LBB2_4: ; %bb16
101
- ; GFX11-NEXT: s_load_b32 s2 , s[16:17 ], 0x54
102
- ; GFX11-NEXT: s_bitcmp1_b32 s23 , 0
101
+ ; GFX11-NEXT: s_load_b32 s6 , s[2:3 ], 0x54
102
+ ; GFX11-NEXT: s_bitcmp1_b32 s19 , 0
103
103
; GFX11-NEXT: s_cselect_b32 s0, -1, 0
104
- ; GFX11-NEXT: s_and_b32 s3, s23 , 1
104
+ ; GFX11-NEXT: s_and_b32 s7, s19 , 1
105
105
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
106
- ; GFX11-NEXT: s_bitcmp1_b32 s2 , 0
107
- ; GFX11-NEXT: s_mov_b32 s2 , -1
106
+ ; GFX11-NEXT: s_bitcmp1_b32 s6 , 0
107
+ ; GFX11-NEXT: s_mov_b32 s6 , -1
108
108
; GFX11-NEXT: s_cselect_b32 s8, -1, 0
109
- ; GFX11-NEXT: s_cmp_eq_u32 s3 , 0
109
+ ; GFX11-NEXT: s_cmp_eq_u32 s7 , 0
110
110
; GFX11-NEXT: s_cbranch_scc0 .LBB2_8
111
111
; GFX11-NEXT: ; %bb.5: ; %bb18.preheader
112
- ; GFX11-NEXT: s_load_b128 s[28:31 ], s[16:17 ], 0x44
112
+ ; GFX11-NEXT: s_load_b128 s[24:27 ], s[2:3 ], 0x44
113
113
; GFX11-NEXT: v_mov_b32_e32 v2, 0
114
114
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
115
- ; GFX11-NEXT: s_mul_hi_u32 s2, s29, s28
116
- ; GFX11-NEXT: s_mul_i32 s3, s29, s28
115
+ ; GFX11-NEXT: s_mul_hi_u32 s6, s25, s24
116
+ ; GFX11-NEXT: s_mul_i32 s7, s25, s24
117
117
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
118
- ; GFX11-NEXT: v_alignbit_b32 v0, s2, s3 , 1
119
- ; GFX11-NEXT: s_mov_b32 s3 , 0
120
- ; GFX11-NEXT: v_readfirstlane_b32 s2 , v0
121
- ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s25
118
+ ; GFX11-NEXT: v_alignbit_b32 v0, s6, s7 , 1
119
+ ; GFX11-NEXT: s_mov_b32 s7 , 0
120
+ ; GFX11-NEXT: v_readfirstlane_b32 s6 , v0
121
+ ; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s22
122
122
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1)
123
- ; GFX11-NEXT: s_or_b32 s2, s2 , 1
124
- ; GFX11-NEXT: s_lshr_b32 s2, s2, s30
123
+ ; GFX11-NEXT: s_or_b32 s6, s6 , 1
124
+ ; GFX11-NEXT: s_lshr_b32 s6, s6, s26
125
125
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
126
- ; GFX11-NEXT: s_mul_i32 s2, s2, s22
127
- ; GFX11-NEXT: s_mul_i32 s2, s2, s20
126
+ ; GFX11-NEXT: s_mul_i32 s6, s6, s18
127
+ ; GFX11-NEXT: s_mul_i32 s6, s6, s16
128
128
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
129
- ; GFX11-NEXT: s_or_b32 s2, s24, s2
130
- ; GFX11-NEXT: s_lshl_b64 s[20:21 ], s[2:3 ], 1
131
- ; GFX11-NEXT: global_load_u16 v1, v2, s[20:21 ]
129
+ ; GFX11-NEXT: s_or_b32 s6, s21, s6
130
+ ; GFX11-NEXT: s_lshl_b64 s[18:19 ], s[6:7 ], 1
131
+ ; GFX11-NEXT: global_load_u16 v1, v2, s[18:19 ]
132
132
; GFX11-NEXT: s_waitcnt vmcnt(0)
133
133
; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v1
134
134
; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
135
135
; GFX11-NEXT: .p2align 6
136
136
; GFX11-NEXT: .LBB2_6: ; %bb18
137
137
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
138
- ; GFX11-NEXT: v_cmp_ne_u16_e64 s2, s3 , 0
138
+ ; GFX11-NEXT: v_cmp_ne_u16_e64 s6, s7 , 0
139
139
; GFX11-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v2
140
140
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
141
- ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s2
141
+ ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s6
142
142
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
143
143
; GFX11-NEXT: s_and_b32 vcc_lo, s8, vcc_lo
144
144
; GFX11-NEXT: v_cndmask_b32_e64 v3, v1, v3, s0
145
145
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
146
146
; GFX11-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo
147
147
; GFX11-NEXT: s_mov_b32 vcc_lo, 0
148
- ; GFX11-NEXT: v_readfirstlane_b32 s2 , v3
148
+ ; GFX11-NEXT: v_readfirstlane_b32 s6 , v3
149
149
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
150
150
; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
151
- ; GFX11-NEXT: s_bitcmp1_b32 s2 , 0
152
- ; GFX11-NEXT: s_cselect_b32 s2 , 0x100, 0
151
+ ; GFX11-NEXT: s_bitcmp1_b32 s6 , 0
152
+ ; GFX11-NEXT: s_cselect_b32 s6 , 0x100, 0
153
153
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
154
- ; GFX11-NEXT: s_or_b32 s3, s2, s3
154
+ ; GFX11-NEXT: s_or_b32 s7, s6, s7
155
155
; GFX11-NEXT: s_cbranch_vccz .LBB2_6
156
156
; GFX11-NEXT: ; %bb.7: ; %Flow
157
- ; GFX11-NEXT: s_mov_b32 s2 , 0
157
+ ; GFX11-NEXT: s_mov_b32 s6 , 0
158
158
; GFX11-NEXT: .LBB2_8: ; %Flow12
159
159
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
160
- ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2
160
+ ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s6
161
161
; GFX11-NEXT: s_cbranch_vccz .LBB2_12
162
162
; GFX11-NEXT: ; %bb.9:
163
163
; GFX11-NEXT: s_xor_b32 s0, s8, -1
@@ -167,17 +167,17 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
167
167
; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0
168
168
; GFX11-NEXT: s_cbranch_vccz .LBB2_10
169
169
; GFX11-NEXT: ; %bb.11: ; %Flow6
170
- ; GFX11-NEXT: s_mov_b32 s19 , -1
170
+ ; GFX11-NEXT: s_mov_b32 s17 , -1
171
171
; GFX11-NEXT: .LBB2_12: ; %Flow11
172
- ; GFX11-NEXT: s_and_b32 s3 , s1, exec_lo
173
- ; GFX11-NEXT: s_or_not1_b32 s0, s19 , exec_lo
172
+ ; GFX11-NEXT: s_and_b32 s6 , s1, exec_lo
173
+ ; GFX11-NEXT: s_or_not1_b32 s0, s17 , exec_lo
174
174
; GFX11-NEXT: .LBB2_13: ; %Flow9
175
- ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s18
176
- ; GFX11-NEXT: s_and_saveexec_b32 s18 , s0
175
+ ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s20
176
+ ; GFX11-NEXT: s_and_saveexec_b32 s7 , s0
177
177
; GFX11-NEXT: s_cbranch_execz .LBB2_15
178
178
; GFX11-NEXT: ; %bb.14: ; %bb43
179
- ; GFX11-NEXT: s_add_u32 s8, s16 , 0x58
180
- ; GFX11-NEXT: s_addc_u32 s9, s17 , 0
179
+ ; GFX11-NEXT: s_add_u32 s8, s2 , 0x58
180
+ ; GFX11-NEXT: s_addc_u32 s9, s3 , 0
181
181
; GFX11-NEXT: s_getpc_b64 s[0:1]
182
182
; GFX11-NEXT: s_add_u32 s0, s0, f0@gotpcrel32@lo+4
183
183
; GFX11-NEXT: s_addc_u32 s1, s1, f0@gotpcrel32@hi+12
@@ -186,10 +186,10 @@ define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg
186
186
; GFX11-NEXT: s_mov_b32 s14, s15
187
187
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
188
188
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
189
- ; GFX11-NEXT: s_or_b32 s3, s3 , exec_lo
189
+ ; GFX11-NEXT: s_or_b32 s6, s6 , exec_lo
190
190
; GFX11-NEXT: .LBB2_15: ; %Flow14
191
- ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s18
192
- ; GFX11-NEXT: s_and_saveexec_b32 s0, s3
191
+ ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s7
192
+ ; GFX11-NEXT: s_and_saveexec_b32 s0, s6
193
193
; GFX11-NEXT: ; %bb.16: ; %UnifiedUnreachableBlock
194
194
; GFX11-NEXT: ; divergent unreachable
195
195
; GFX11-NEXT: ; %bb.17: ; %UnifiedReturnBlock
@@ -246,3 +246,6 @@ bb43:
246
246
}
247
247
248
248
attributes #0 = { noinline optnone }
249
+
250
+ !llvm.module.flags = !{!0 }
251
+ !0 = !{i32 1 , !"amdgpu_code_object_version" , i32 500 }
0 commit comments