@@ -21,10 +21,10 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
21
21
; LA64-NEXT: andi $a7, $a5, 255
22
22
; LA64-NEXT: sltu $a7, $a7, $a4
23
23
; LA64-NEXT: xori $a7, $a7, 1
24
- ; LA64-NEXT: sub.d $a5 , $a5, $a1
25
- ; LA64-NEXT: maskeqz $a5, $a5, $a7
26
- ; LA64-NEXT: masknez $a7, $a1 , $a7
27
- ; LA64-NEXT: or $a5, $a5 , $a7
24
+ ; LA64-NEXT: sub.d $t0 , $a5, $a1
25
+ ; LA64-NEXT: masknez $a5, $a5, $a7
26
+ ; LA64-NEXT: maskeqz $a7, $t0 , $a7
27
+ ; LA64-NEXT: or $a5, $a7 , $a5
28
28
; LA64-NEXT: andi $a5, $a5, 255
29
29
; LA64-NEXT: sll.w $a5, $a5, $a2
30
30
; LA64-NEXT: and $a7, $a6, $a3
@@ -74,10 +74,10 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
74
74
; LA64-NEXT: bstrpick.d $a7, $a5, 15, 0
75
75
; LA64-NEXT: sltu $a7, $a7, $a4
76
76
; LA64-NEXT: xori $a7, $a7, 1
77
- ; LA64-NEXT: sub.d $a5 , $a5, $a1
78
- ; LA64-NEXT: maskeqz $a5, $a5, $a7
79
- ; LA64-NEXT: masknez $a7, $a1 , $a7
80
- ; LA64-NEXT: or $a5, $a5 , $a7
77
+ ; LA64-NEXT: sub.d $t0 , $a5, $a1
78
+ ; LA64-NEXT: masknez $a5, $a5, $a7
79
+ ; LA64-NEXT: maskeqz $a7, $t0 , $a7
80
+ ; LA64-NEXT: or $a5, $a7 , $a5
81
81
; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
82
82
; LA64-NEXT: sll.w $a5, $a5, $a2
83
83
; LA64-NEXT: and $a7, $a6, $a3
@@ -120,7 +120,7 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
120
120
; LA64-NEXT: xori $a2, $a2, 1
121
121
; LA64-NEXT: sub.w $a5, $a4, $a1
122
122
; LA64-NEXT: maskeqz $a5, $a5, $a2
123
- ; LA64-NEXT: masknez $a2, $a1 , $a2
123
+ ; LA64-NEXT: masknez $a2, $a4 , $a2
124
124
; LA64-NEXT: or $a5, $a5, $a2
125
125
; LA64-NEXT: .LBB2_3: # %atomicrmw.start
126
126
; LA64-NEXT: # Parent Loop BB2_1 Depth=1
@@ -159,7 +159,7 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
159
159
; LA64-NEXT: xori $a2, $a2, 1
160
160
; LA64-NEXT: sub.d $a4, $a3, $a1
161
161
; LA64-NEXT: maskeqz $a4, $a4, $a2
162
- ; LA64-NEXT: masknez $a2, $a1 , $a2
162
+ ; LA64-NEXT: masknez $a2, $a3 , $a2
163
163
; LA64-NEXT: or $a4, $a4, $a2
164
164
; LA64-NEXT: .LBB3_3: # %atomicrmw.start
165
165
; LA64-NEXT: # Parent Loop BB3_1 Depth=1
@@ -191,45 +191,43 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
191
191
; LA64-NEXT: slli.d $a3, $a0, 3
192
192
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
193
193
; LA64-NEXT: andi $a2, $a3, 24
194
- ; LA64-NEXT: ori $a4 , $zero, 255
195
- ; LA64-NEXT: ld.w $a5 , $a0, 0
196
- ; LA64-NEXT: sll.w $a3, $a4 , $a3
194
+ ; LA64-NEXT: ori $a5 , $zero, 255
195
+ ; LA64-NEXT: ld.w $a4 , $a0, 0
196
+ ; LA64-NEXT: sll.w $a3, $a5 , $a3
197
197
; LA64-NEXT: nor $a3, $a3, $zero
198
- ; LA64-NEXT: andi $a4 , $a1, 255
198
+ ; LA64-NEXT: andi $a1 , $a1, 255
199
199
; LA64-NEXT: .p2align 4, , 16
200
200
; LA64-NEXT: .LBB4_1: # %atomicrmw.start
201
201
; LA64-NEXT: # =>This Loop Header: Depth=1
202
202
; LA64-NEXT: # Child Loop BB4_3 Depth 2
203
- ; LA64-NEXT: move $a6, $a5
204
- ; LA64-NEXT: srl.w $a5, $a5, $a2
205
- ; LA64-NEXT: andi $a7, $a5, 255
206
- ; LA64-NEXT: sltu $a7, $a7, $a4
207
- ; LA64-NEXT: xori $a7, $a7, 1
208
- ; LA64-NEXT: sub.d $a5, $a5, $a1
209
- ; LA64-NEXT: maskeqz $a5, $a5, $a7
210
- ; LA64-NEXT: andi $a5, $a5, 255
211
- ; LA64-NEXT: sll.w $a5, $a5, $a2
212
- ; LA64-NEXT: and $a7, $a6, $a3
213
- ; LA64-NEXT: or $a7, $a7, $a5
203
+ ; LA64-NEXT: move $a5, $a4
204
+ ; LA64-NEXT: srl.w $a4, $a4, $a2
205
+ ; LA64-NEXT: andi $a4, $a4, 255
206
+ ; LA64-NEXT: sub.d $a6, $a4, $a1
207
+ ; LA64-NEXT: sltu $a4, $a4, $a6
208
+ ; LA64-NEXT: masknez $a4, $a6, $a4
209
+ ; LA64-NEXT: sll.w $a4, $a4, $a2
210
+ ; LA64-NEXT: and $a6, $a5, $a3
211
+ ; LA64-NEXT: or $a6, $a6, $a4
214
212
; LA64-NEXT: .LBB4_3: # %atomicrmw.start
215
213
; LA64-NEXT: # Parent Loop BB4_1 Depth=1
216
214
; LA64-NEXT: # => This Inner Loop Header: Depth=2
217
- ; LA64-NEXT: ll.w $a5 , $a0, 0
218
- ; LA64-NEXT: bne $a5 , $a6 , .LBB4_5
215
+ ; LA64-NEXT: ll.w $a4 , $a0, 0
216
+ ; LA64-NEXT: bne $a4 , $a5 , .LBB4_5
219
217
; LA64-NEXT: # %bb.4: # %atomicrmw.start
220
218
; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2
221
- ; LA64-NEXT: move $t0 , $a7
222
- ; LA64-NEXT: sc.w $t0 , $a0, 0
223
- ; LA64-NEXT: beqz $t0 , .LBB4_3
219
+ ; LA64-NEXT: move $a7 , $a6
220
+ ; LA64-NEXT: sc.w $a7 , $a0, 0
221
+ ; LA64-NEXT: beqz $a7 , .LBB4_3
224
222
; LA64-NEXT: b .LBB4_6
225
223
; LA64-NEXT: .LBB4_5: # %atomicrmw.start
226
224
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
227
225
; LA64-NEXT: dbar 20
228
226
; LA64-NEXT: .LBB4_6: # %atomicrmw.start
229
227
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
230
- ; LA64-NEXT: bne $a5 , $a6 , .LBB4_1
228
+ ; LA64-NEXT: bne $a4 , $a5 , .LBB4_1
231
229
; LA64-NEXT: # %bb.2: # %atomicrmw.end
232
- ; LA64-NEXT: srl.w $a0, $a5 , $a2
230
+ ; LA64-NEXT: srl.w $a0, $a4 , $a2
233
231
; LA64-NEXT: ret
234
232
%result = atomicrmw usub_sat ptr %ptr , i8 %val seq_cst
235
233
ret i8 %result
@@ -242,45 +240,43 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
242
240
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
243
241
; LA64-NEXT: andi $a2, $a3, 24
244
242
; LA64-NEXT: lu12i.w $a4, 15
245
- ; LA64-NEXT: ori $a4 , $a4, 4095
246
- ; LA64-NEXT: ld.w $a5 , $a0, 0
247
- ; LA64-NEXT: sll.w $a3, $a4 , $a3
243
+ ; LA64-NEXT: ori $a5 , $a4, 4095
244
+ ; LA64-NEXT: ld.w $a4 , $a0, 0
245
+ ; LA64-NEXT: sll.w $a3, $a5 , $a3
248
246
; LA64-NEXT: nor $a3, $a3, $zero
249
- ; LA64-NEXT: bstrpick.d $a4 , $a1, 15, 0
247
+ ; LA64-NEXT: bstrpick.d $a1 , $a1, 15, 0
250
248
; LA64-NEXT: .p2align 4, , 16
251
249
; LA64-NEXT: .LBB5_1: # %atomicrmw.start
252
250
; LA64-NEXT: # =>This Loop Header: Depth=1
253
251
; LA64-NEXT: # Child Loop BB5_3 Depth 2
254
- ; LA64-NEXT: move $a6, $a5
255
- ; LA64-NEXT: srl.w $a5, $a5, $a2
256
- ; LA64-NEXT: bstrpick.d $a7, $a5, 15, 0
257
- ; LA64-NEXT: sltu $a7, $a7, $a4
258
- ; LA64-NEXT: xori $a7, $a7, 1
259
- ; LA64-NEXT: sub.d $a5, $a5, $a1
260
- ; LA64-NEXT: maskeqz $a5, $a5, $a7
261
- ; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
262
- ; LA64-NEXT: sll.w $a5, $a5, $a2
263
- ; LA64-NEXT: and $a7, $a6, $a3
264
- ; LA64-NEXT: or $a7, $a7, $a5
252
+ ; LA64-NEXT: move $a5, $a4
253
+ ; LA64-NEXT: srl.w $a4, $a4, $a2
254
+ ; LA64-NEXT: bstrpick.d $a4, $a4, 15, 0
255
+ ; LA64-NEXT: sub.d $a6, $a4, $a1
256
+ ; LA64-NEXT: sltu $a4, $a4, $a6
257
+ ; LA64-NEXT: masknez $a4, $a6, $a4
258
+ ; LA64-NEXT: sll.w $a4, $a4, $a2
259
+ ; LA64-NEXT: and $a6, $a5, $a3
260
+ ; LA64-NEXT: or $a6, $a6, $a4
265
261
; LA64-NEXT: .LBB5_3: # %atomicrmw.start
266
262
; LA64-NEXT: # Parent Loop BB5_1 Depth=1
267
263
; LA64-NEXT: # => This Inner Loop Header: Depth=2
268
- ; LA64-NEXT: ll.w $a5 , $a0, 0
269
- ; LA64-NEXT: bne $a5 , $a6 , .LBB5_5
264
+ ; LA64-NEXT: ll.w $a4 , $a0, 0
265
+ ; LA64-NEXT: bne $a4 , $a5 , .LBB5_5
270
266
; LA64-NEXT: # %bb.4: # %atomicrmw.start
271
267
; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2
272
- ; LA64-NEXT: move $t0 , $a7
273
- ; LA64-NEXT: sc.w $t0 , $a0, 0
274
- ; LA64-NEXT: beqz $t0 , .LBB5_3
268
+ ; LA64-NEXT: move $a7 , $a6
269
+ ; LA64-NEXT: sc.w $a7 , $a0, 0
270
+ ; LA64-NEXT: beqz $a7 , .LBB5_3
275
271
; LA64-NEXT: b .LBB5_6
276
272
; LA64-NEXT: .LBB5_5: # %atomicrmw.start
277
273
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
278
274
; LA64-NEXT: dbar 20
279
275
; LA64-NEXT: .LBB5_6: # %atomicrmw.start
280
276
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
281
- ; LA64-NEXT: bne $a5 , $a6 , .LBB5_1
277
+ ; LA64-NEXT: bne $a4 , $a5 , .LBB5_1
282
278
; LA64-NEXT: # %bb.2: # %atomicrmw.end
283
- ; LA64-NEXT: srl.w $a0, $a5 , $a2
279
+ ; LA64-NEXT: srl.w $a0, $a4 , $a2
284
280
; LA64-NEXT: ret
285
281
%result = atomicrmw usub_sat ptr %ptr , i16 %val seq_cst
286
282
ret i16 %result
@@ -290,33 +286,32 @@ define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
290
286
; LA64-LABEL: atomicrmw_usub_sat_i32:
291
287
; LA64: # %bb.0:
292
288
; LA64-NEXT: ld.w $a2, $a0, 0
293
- ; LA64-NEXT: addi.w $a3 , $a1, 0
289
+ ; LA64-NEXT: addi.w $a1 , $a1, 0
294
290
; LA64-NEXT: .p2align 4, , 16
295
291
; LA64-NEXT: .LBB6_1: # %atomicrmw.start
296
292
; LA64-NEXT: # =>This Loop Header: Depth=1
297
293
; LA64-NEXT: # Child Loop BB6_3 Depth 2
298
- ; LA64-NEXT: move $a4, $a2
299
- ; LA64-NEXT: sltu $a2, $a2, $a3
300
- ; LA64-NEXT: xori $a2, $a2, 1
301
- ; LA64-NEXT: sub.w $a5, $a4, $a1
302
- ; LA64-NEXT: maskeqz $a5, $a5, $a2
294
+ ; LA64-NEXT: move $a3, $a2
295
+ ; LA64-NEXT: sub.d $a2, $a2, $a1
296
+ ; LA64-NEXT: sltu $a4, $a3, $a2
297
+ ; LA64-NEXT: masknez $a4, $a2, $a4
303
298
; LA64-NEXT: .LBB6_3: # %atomicrmw.start
304
299
; LA64-NEXT: # Parent Loop BB6_1 Depth=1
305
300
; LA64-NEXT: # => This Inner Loop Header: Depth=2
306
301
; LA64-NEXT: ll.w $a2, $a0, 0
307
- ; LA64-NEXT: bne $a2, $a4 , .LBB6_5
302
+ ; LA64-NEXT: bne $a2, $a3 , .LBB6_5
308
303
; LA64-NEXT: # %bb.4: # %atomicrmw.start
309
304
; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2
310
- ; LA64-NEXT: move $a6 , $a5
311
- ; LA64-NEXT: sc.w $a6 , $a0, 0
312
- ; LA64-NEXT: beqz $a6 , .LBB6_3
305
+ ; LA64-NEXT: move $a5 , $a4
306
+ ; LA64-NEXT: sc.w $a5 , $a0, 0
307
+ ; LA64-NEXT: beqz $a5 , .LBB6_3
313
308
; LA64-NEXT: b .LBB6_6
314
309
; LA64-NEXT: .LBB6_5: # %atomicrmw.start
315
310
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
316
311
; LA64-NEXT: dbar 20
317
312
; LA64-NEXT: .LBB6_6: # %atomicrmw.start
318
313
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
319
- ; LA64-NEXT: bne $a2, $a4 , .LBB6_1
314
+ ; LA64-NEXT: bne $a2, $a3 , .LBB6_1
320
315
; LA64-NEXT: # %bb.2: # %atomicrmw.end
321
316
; LA64-NEXT: move $a0, $a2
322
317
; LA64-NEXT: ret
@@ -333,10 +328,9 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
333
328
; LA64-NEXT: # =>This Loop Header: Depth=1
334
329
; LA64-NEXT: # Child Loop BB7_3 Depth 2
335
330
; LA64-NEXT: move $a3, $a2
336
- ; LA64-NEXT: sltu $a2, $a2, $a1
337
- ; LA64-NEXT: xori $a2, $a2, 1
338
- ; LA64-NEXT: sub.d $a4, $a3, $a1
339
- ; LA64-NEXT: maskeqz $a4, $a4, $a2
331
+ ; LA64-NEXT: sub.d $a2, $a2, $a1
332
+ ; LA64-NEXT: sltu $a4, $a3, $a2
333
+ ; LA64-NEXT: masknez $a4, $a2, $a4
340
334
; LA64-NEXT: .LBB7_3: # %atomicrmw.start
341
335
; LA64-NEXT: # Parent Loop BB7_1 Depth=1
342
336
; LA64-NEXT: # => This Inner Loop Header: Depth=2
0 commit comments