@@ -182,17 +182,14 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
182
182
define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2 (ptr %ptr , bfloat %value ) #0 {
183
183
; NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
184
184
; NOLSE: // %bb.0:
185
- ; NOLSE-NEXT: // kill: def $h0 killed $h0 def $s0
186
- ; NOLSE-NEXT: fmov w9, s0
185
+ ; NOLSE-NEXT: // kill: def $h0 killed $h0 def $d0
186
+ ; NOLSE-NEXT: shll v1.4s, v0.4h, #16
187
187
; NOLSE-NEXT: mov w8, #32767 // =0x7fff
188
- ; NOLSE-NEXT: lsl w9, w9, #16
189
- ; NOLSE-NEXT: fmov s1, w9
190
188
; NOLSE-NEXT: .LBB2_1: // %atomicrmw.start
191
189
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
192
190
; NOLSE-NEXT: ldaxrh w9, [x0]
193
191
; NOLSE-NEXT: fmov s0, w9
194
- ; NOLSE-NEXT: lsl w9, w9, #16
195
- ; NOLSE-NEXT: fmov s2, w9
192
+ ; NOLSE-NEXT: shll v2.4s, v0.4h, #16
196
193
; NOLSE-NEXT: fadd s2, s2, s1
197
194
; NOLSE-NEXT: fmov w9, s2
198
195
; NOLSE-NEXT: ubfx w10, w9, #16, #1
@@ -202,36 +199,34 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
202
199
; NOLSE-NEXT: stlxrh w10, w9, [x0]
203
200
; NOLSE-NEXT: cbnz w10, .LBB2_1
204
201
; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
205
- ; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $s0
202
+ ; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $d0
206
203
; NOLSE-NEXT: ret
207
204
;
208
205
; LSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
209
206
; LSE: // %bb.0:
210
- ; LSE-NEXT: // kill: def $h0 killed $h0 def $s0
211
- ; LSE-NEXT: fmov w9, s0
207
+ ; LSE-NEXT: // kill: def $h0 killed $h0 def $d0
208
+ ; LSE-NEXT: shll v1.4s, v0.4h, #16
212
209
; LSE-NEXT: mov w8, #32767 // =0x7fff
213
210
; LSE-NEXT: ldr h0, [x0]
214
- ; LSE-NEXT: lsl w9, w9, #16
215
- ; LSE-NEXT: fmov s1, w9
216
211
; LSE-NEXT: .LBB2_1: // %atomicrmw.start
217
212
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
218
- ; LSE-NEXT: fmov w9, s0
219
- ; LSE-NEXT: lsl w9, w9, #16
220
- ; LSE-NEXT: fmov s2, w9
213
+ ; LSE-NEXT: shll v2.4s, v0.4h, #16
221
214
; LSE-NEXT: fadd s2, s2, s1
222
215
; LSE-NEXT: fmov w9, s2
223
216
; LSE-NEXT: ubfx w10, w9, #16, #1
224
217
; LSE-NEXT: add w9, w9, w8
225
218
; LSE-NEXT: add w9, w10, w9
226
- ; LSE-NEXT: fmov w10, s0
227
219
; LSE-NEXT: lsr w9, w9, #16
228
- ; LSE-NEXT: mov w11, w10
229
- ; LSE-NEXT: casalh w11, w9, [x0]
220
+ ; LSE-NEXT: fmov s2, w9
221
+ ; LSE-NEXT: fmov w9, s0
222
+ ; LSE-NEXT: fmov w10, s2
223
+ ; LSE-NEXT: mov w11, w9
224
+ ; LSE-NEXT: casalh w11, w10, [x0]
230
225
; LSE-NEXT: fmov s0, w11
231
- ; LSE-NEXT: cmp w11, w10 , uxth
226
+ ; LSE-NEXT: cmp w11, w9 , uxth
232
227
; LSE-NEXT: b.ne .LBB2_1
233
228
; LSE-NEXT: // %bb.2: // %atomicrmw.end
234
- ; LSE-NEXT: // kill: def $h0 killed $h0 killed $s0
229
+ ; LSE-NEXT: // kill: def $h0 killed $h0 killed $d0
235
230
; LSE-NEXT: ret
236
231
;
237
232
; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align2:
@@ -281,17 +276,14 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align2(ptr %ptr, bfloat %value)
281
276
define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4 (ptr %ptr , bfloat %value ) #0 {
282
277
; NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
283
278
; NOLSE: // %bb.0:
284
- ; NOLSE-NEXT: // kill: def $h0 killed $h0 def $s0
285
- ; NOLSE-NEXT: fmov w9, s0
279
+ ; NOLSE-NEXT: // kill: def $h0 killed $h0 def $d0
280
+ ; NOLSE-NEXT: shll v1.4s, v0.4h, #16
286
281
; NOLSE-NEXT: mov w8, #32767 // =0x7fff
287
- ; NOLSE-NEXT: lsl w9, w9, #16
288
- ; NOLSE-NEXT: fmov s1, w9
289
282
; NOLSE-NEXT: .LBB3_1: // %atomicrmw.start
290
283
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
291
284
; NOLSE-NEXT: ldaxrh w9, [x0]
292
285
; NOLSE-NEXT: fmov s0, w9
293
- ; NOLSE-NEXT: lsl w9, w9, #16
294
- ; NOLSE-NEXT: fmov s2, w9
286
+ ; NOLSE-NEXT: shll v2.4s, v0.4h, #16
295
287
; NOLSE-NEXT: fadd s2, s2, s1
296
288
; NOLSE-NEXT: fmov w9, s2
297
289
; NOLSE-NEXT: ubfx w10, w9, #16, #1
@@ -301,36 +293,34 @@ define bfloat @test_atomicrmw_fadd_bf16_seq_cst_align4(ptr %ptr, bfloat %value)
301
293
; NOLSE-NEXT: stlxrh w10, w9, [x0]
302
294
; NOLSE-NEXT: cbnz w10, .LBB3_1
303
295
; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
304
- ; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $s0
296
+ ; NOLSE-NEXT: // kill: def $h0 killed $h0 killed $d0
305
297
; NOLSE-NEXT: ret
306
298
;
307
299
; LSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
308
300
; LSE: // %bb.0:
309
- ; LSE-NEXT: // kill: def $h0 killed $h0 def $s0
310
- ; LSE-NEXT: fmov w9, s0
301
+ ; LSE-NEXT: // kill: def $h0 killed $h0 def $d0
302
+ ; LSE-NEXT: shll v1.4s, v0.4h, #16
311
303
; LSE-NEXT: mov w8, #32767 // =0x7fff
312
304
; LSE-NEXT: ldr h0, [x0]
313
- ; LSE-NEXT: lsl w9, w9, #16
314
- ; LSE-NEXT: fmov s1, w9
315
305
; LSE-NEXT: .LBB3_1: // %atomicrmw.start
316
306
; LSE-NEXT: // =>This Inner Loop Header: Depth=1
317
- ; LSE-NEXT: fmov w9, s0
318
- ; LSE-NEXT: lsl w9, w9, #16
319
- ; LSE-NEXT: fmov s2, w9
307
+ ; LSE-NEXT: shll v2.4s, v0.4h, #16
320
308
; LSE-NEXT: fadd s2, s2, s1
321
309
; LSE-NEXT: fmov w9, s2
322
310
; LSE-NEXT: ubfx w10, w9, #16, #1
323
311
; LSE-NEXT: add w9, w9, w8
324
312
; LSE-NEXT: add w9, w10, w9
325
- ; LSE-NEXT: fmov w10, s0
326
313
; LSE-NEXT: lsr w9, w9, #16
327
- ; LSE-NEXT: mov w11, w10
328
- ; LSE-NEXT: casalh w11, w9, [x0]
314
+ ; LSE-NEXT: fmov s2, w9
315
+ ; LSE-NEXT: fmov w9, s0
316
+ ; LSE-NEXT: fmov w10, s2
317
+ ; LSE-NEXT: mov w11, w9
318
+ ; LSE-NEXT: casalh w11, w10, [x0]
329
319
; LSE-NEXT: fmov s0, w11
330
- ; LSE-NEXT: cmp w11, w10 , uxth
320
+ ; LSE-NEXT: cmp w11, w9 , uxth
331
321
; LSE-NEXT: b.ne .LBB3_1
332
322
; LSE-NEXT: // %bb.2: // %atomicrmw.end
333
- ; LSE-NEXT: // kill: def $h0 killed $h0 killed $s0
323
+ ; LSE-NEXT: // kill: def $h0 killed $h0 killed $d0
334
324
; LSE-NEXT: ret
335
325
;
336
326
; SOFTFP-NOLSE-LABEL: test_atomicrmw_fadd_bf16_seq_cst_align4:
0 commit comments