Skip to content

Commit f66e124

Browse files
committed
Feedback from pull request.
1 parent c65ce4e commit f66e124

File tree

8 files changed

+594
-726
lines changed

8 files changed

+594
-726
lines changed

llvm/docs/LangRef.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11294,7 +11294,7 @@ operation argument:
1129411294
- uinc_wrap: ``*ptr = (*ptr u>= val) ? 0 : (*ptr + 1)`` (increment value with wraparound to zero when incremented above input value)
1129511295
- udec_wrap: ``*ptr = ((*ptr == 0) || (*ptr u> val)) ? val : (*ptr - 1)`` (decrement with wraparound to input value when decremented below zero).
1129611296
- usub_cond: ``*ptr = (*ptr u>= val) ? *ptr - val : *ptr`` (subtract only if no unsigned overflow).
11297-
- usub_sat: ``*ptr = (*ptr u>= val) ? *ptr - val : 0`` (subtract with clamping to zero).
11297+
- usub_sat: ``*ptr = (*ptr u>= val) ? *ptr - val : 0`` (subtract with unsigned clamping to zero).
1129811298

1129911299

1130011300
Example:

llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,8 +1657,8 @@ class MachineIRBuilder {
16571657
/// Build and insert `OldValRes<def> = G_ATOMICRMW_USUB_SAT Addr, Val, MMO`.
16581658
///
16591659
/// Atomically replace the value at \p Addr with the original value minus \p
1660-
/// Val if the original value is greater than or equal to \p Val, or with zero
1661-
/// otherwise. Puts the original value from \p Addr in \p OldValRes.
1660+
/// Val, with clamping to zero if the unsigned subtraction would overflow.
1661+
/// Puts the original value from \p Addr in \p OldValRes.
16621662
///
16631663
/// \pre setBasicBlock or setMI must have been called.
16641664
/// \pre \p OldValRes must be a generic virtual register.

llvm/lib/Transforms/Utils/LowerAtomic.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,15 @@ Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op,
9898
case AtomicRMWInst::USubCond: {
9999
Value *Cmp = Builder.CreateICmpUGE(Loaded, Val);
100100
Value *Sub = Builder.CreateSub(Loaded, Val);
101-
return Builder.CreateSelect(Cmp, Sub, Val, "new");
101+
return Builder.CreateSelect(Cmp, Sub, Loaded, "new");
102102
}
103103
case AtomicRMWInst::USubSat: {
104-
Constant *Zero = ConstantInt::get(Loaded->getType(), 0);
105-
Value *Cmp = Builder.CreateICmpUGE(Loaded, Val);
106-
Value *Sub = Builder.CreateSub(Loaded, Val);
107-
return Builder.CreateSelect(Cmp, Sub, Zero, "new");
104+
return Builder.CreateIntrinsic(Intrinsic::usub_sat, Loaded->getType(),
105+
{Loaded, Val}, nullptr, "new");
106+
// Constant *Zero = ConstantInt::get(Loaded->getType(), 0);
107+
// Value *Cmp = Builder.CreateICmpUGE(Loaded, Val);
108+
// Value *Sub = Builder.CreateSub(Loaded, Val);
109+
// return Builder.CreateSelect(Cmp, Sub, Zero, "new");
108110
}
109111
default:
110112
llvm_unreachable("Unknown atomic op");

llvm/test/CodeGen/AArch64/atomicrmw-cond-sub-clamp.ll

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
99
; CHECK-NEXT: ldaxrb w8, [x0]
1010
; CHECK-NEXT: sub w9, w8, w1
1111
; CHECK-NEXT: cmp w8, w1, uxtb
12-
; CHECK-NEXT: csel w9, w9, w1, hs
12+
; CHECK-NEXT: csel w9, w9, w8, hs
1313
; CHECK-NEXT: stlxrb w10, w9, [x0]
1414
; CHECK-NEXT: cbnz w10, .LBB0_1
1515
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
@@ -27,7 +27,7 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
2727
; CHECK-NEXT: ldaxrh w8, [x0]
2828
; CHECK-NEXT: sub w9, w8, w1
2929
; CHECK-NEXT: cmp w8, w1, uxth
30-
; CHECK-NEXT: csel w9, w9, w1, hs
30+
; CHECK-NEXT: csel w9, w9, w8, hs
3131
; CHECK-NEXT: stlxrh w10, w9, [x0]
3232
; CHECK-NEXT: cbnz w10, .LBB1_1
3333
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
@@ -44,7 +44,7 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
4444
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
4545
; CHECK-NEXT: ldaxr w8, [x0]
4646
; CHECK-NEXT: subs w9, w8, w1
47-
; CHECK-NEXT: csel w9, w9, w1, hs
47+
; CHECK-NEXT: csel w9, w9, w8, hs
4848
; CHECK-NEXT: stlxr w10, w9, [x0]
4949
; CHECK-NEXT: cbnz w10, .LBB2_1
5050
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
@@ -62,7 +62,7 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
6262
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
6363
; CHECK-NEXT: ldaxr x0, [x8]
6464
; CHECK-NEXT: subs x9, x0, x1
65-
; CHECK-NEXT: csel x9, x9, x1, hs
65+
; CHECK-NEXT: csel x9, x9, x0, hs
6666
; CHECK-NEXT: stlxr w10, x9, [x8]
6767
; CHECK-NEXT: cbnz w10, .LBB3_1
6868
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
@@ -77,9 +77,8 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
7777
; CHECK-NEXT: .LBB4_1: // %atomicrmw.start
7878
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
7979
; CHECK-NEXT: ldaxrb w8, [x0]
80-
; CHECK-NEXT: sub w9, w8, w1
81-
; CHECK-NEXT: cmp w8, w1, uxtb
82-
; CHECK-NEXT: csel w9, w9, wzr, hs
80+
; CHECK-NEXT: subs w9, w8, w1, uxtb
81+
; CHECK-NEXT: csel w9, wzr, w9, lo
8382
; CHECK-NEXT: stlxrb w10, w9, [x0]
8483
; CHECK-NEXT: cbnz w10, .LBB4_1
8584
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
@@ -95,9 +94,8 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
9594
; CHECK-NEXT: .LBB5_1: // %atomicrmw.start
9695
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
9796
; CHECK-NEXT: ldaxrh w8, [x0]
98-
; CHECK-NEXT: sub w9, w8, w1
99-
; CHECK-NEXT: cmp w8, w1, uxth
100-
; CHECK-NEXT: csel w9, w9, wzr, hs
97+
; CHECK-NEXT: subs w9, w8, w1, uxth
98+
; CHECK-NEXT: csel w9, wzr, w9, lo
10199
; CHECK-NEXT: stlxrh w10, w9, [x0]
102100
; CHECK-NEXT: cbnz w10, .LBB5_1
103101
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
@@ -114,7 +112,7 @@ define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
114112
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
115113
; CHECK-NEXT: ldaxr w8, [x0]
116114
; CHECK-NEXT: subs w9, w8, w1
117-
; CHECK-NEXT: csel w9, w9, wzr, hs
115+
; CHECK-NEXT: csel w9, wzr, w9, lo
118116
; CHECK-NEXT: stlxr w10, w9, [x0]
119117
; CHECK-NEXT: cbnz w10, .LBB6_1
120118
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
@@ -131,7 +129,7 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
131129
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
132130
; CHECK-NEXT: ldaxr x8, [x0]
133131
; CHECK-NEXT: subs x9, x8, x1
134-
; CHECK-NEXT: csel x9, x9, xzr, hs
132+
; CHECK-NEXT: csel x9, xzr, x9, lo
135133
; CHECK-NEXT: stlxr w10, x9, [x0]
136134
; CHECK-NEXT: cbnz w10, .LBB7_1
137135
; CHECK-NEXT: // %bb.2: // %atomicrmw.end

llvm/test/CodeGen/LoongArch/atomicrmw-cond-sub-clamp.ll

Lines changed: 63 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,10 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) {
2121
; LA64-NEXT: andi $a7, $a5, 255
2222
; LA64-NEXT: sltu $a7, $a7, $a4
2323
; LA64-NEXT: xori $a7, $a7, 1
24-
; LA64-NEXT: sub.d $a5, $a5, $a1
25-
; LA64-NEXT: maskeqz $a5, $a5, $a7
26-
; LA64-NEXT: masknez $a7, $a1, $a7
27-
; LA64-NEXT: or $a5, $a5, $a7
24+
; LA64-NEXT: sub.d $t0, $a5, $a1
25+
; LA64-NEXT: masknez $a5, $a5, $a7
26+
; LA64-NEXT: maskeqz $a7, $t0, $a7
27+
; LA64-NEXT: or $a5, $a7, $a5
2828
; LA64-NEXT: andi $a5, $a5, 255
2929
; LA64-NEXT: sll.w $a5, $a5, $a2
3030
; LA64-NEXT: and $a7, $a6, $a3
@@ -74,10 +74,10 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) {
7474
; LA64-NEXT: bstrpick.d $a7, $a5, 15, 0
7575
; LA64-NEXT: sltu $a7, $a7, $a4
7676
; LA64-NEXT: xori $a7, $a7, 1
77-
; LA64-NEXT: sub.d $a5, $a5, $a1
78-
; LA64-NEXT: maskeqz $a5, $a5, $a7
79-
; LA64-NEXT: masknez $a7, $a1, $a7
80-
; LA64-NEXT: or $a5, $a5, $a7
77+
; LA64-NEXT: sub.d $t0, $a5, $a1
78+
; LA64-NEXT: masknez $a5, $a5, $a7
79+
; LA64-NEXT: maskeqz $a7, $t0, $a7
80+
; LA64-NEXT: or $a5, $a7, $a5
8181
; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
8282
; LA64-NEXT: sll.w $a5, $a5, $a2
8383
; LA64-NEXT: and $a7, $a6, $a3
@@ -120,7 +120,7 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) {
120120
; LA64-NEXT: xori $a2, $a2, 1
121121
; LA64-NEXT: sub.w $a5, $a4, $a1
122122
; LA64-NEXT: maskeqz $a5, $a5, $a2
123-
; LA64-NEXT: masknez $a2, $a1, $a2
123+
; LA64-NEXT: masknez $a2, $a4, $a2
124124
; LA64-NEXT: or $a5, $a5, $a2
125125
; LA64-NEXT: .LBB2_3: # %atomicrmw.start
126126
; LA64-NEXT: # Parent Loop BB2_1 Depth=1
@@ -159,7 +159,7 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) {
159159
; LA64-NEXT: xori $a2, $a2, 1
160160
; LA64-NEXT: sub.d $a4, $a3, $a1
161161
; LA64-NEXT: maskeqz $a4, $a4, $a2
162-
; LA64-NEXT: masknez $a2, $a1, $a2
162+
; LA64-NEXT: masknez $a2, $a3, $a2
163163
; LA64-NEXT: or $a4, $a4, $a2
164164
; LA64-NEXT: .LBB3_3: # %atomicrmw.start
165165
; LA64-NEXT: # Parent Loop BB3_1 Depth=1
@@ -191,45 +191,43 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) {
191191
; LA64-NEXT: slli.d $a3, $a0, 3
192192
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
193193
; LA64-NEXT: andi $a2, $a3, 24
194-
; LA64-NEXT: ori $a4, $zero, 255
195-
; LA64-NEXT: ld.w $a5, $a0, 0
196-
; LA64-NEXT: sll.w $a3, $a4, $a3
194+
; LA64-NEXT: ori $a5, $zero, 255
195+
; LA64-NEXT: ld.w $a4, $a0, 0
196+
; LA64-NEXT: sll.w $a3, $a5, $a3
197197
; LA64-NEXT: nor $a3, $a3, $zero
198-
; LA64-NEXT: andi $a4, $a1, 255
198+
; LA64-NEXT: andi $a1, $a1, 255
199199
; LA64-NEXT: .p2align 4, , 16
200200
; LA64-NEXT: .LBB4_1: # %atomicrmw.start
201201
; LA64-NEXT: # =>This Loop Header: Depth=1
202202
; LA64-NEXT: # Child Loop BB4_3 Depth 2
203-
; LA64-NEXT: move $a6, $a5
204-
; LA64-NEXT: srl.w $a5, $a5, $a2
205-
; LA64-NEXT: andi $a7, $a5, 255
206-
; LA64-NEXT: sltu $a7, $a7, $a4
207-
; LA64-NEXT: xori $a7, $a7, 1
208-
; LA64-NEXT: sub.d $a5, $a5, $a1
209-
; LA64-NEXT: maskeqz $a5, $a5, $a7
210-
; LA64-NEXT: andi $a5, $a5, 255
211-
; LA64-NEXT: sll.w $a5, $a5, $a2
212-
; LA64-NEXT: and $a7, $a6, $a3
213-
; LA64-NEXT: or $a7, $a7, $a5
203+
; LA64-NEXT: move $a5, $a4
204+
; LA64-NEXT: srl.w $a4, $a4, $a2
205+
; LA64-NEXT: andi $a4, $a4, 255
206+
; LA64-NEXT: sub.d $a6, $a4, $a1
207+
; LA64-NEXT: sltu $a4, $a4, $a6
208+
; LA64-NEXT: masknez $a4, $a6, $a4
209+
; LA64-NEXT: sll.w $a4, $a4, $a2
210+
; LA64-NEXT: and $a6, $a5, $a3
211+
; LA64-NEXT: or $a6, $a6, $a4
214212
; LA64-NEXT: .LBB4_3: # %atomicrmw.start
215213
; LA64-NEXT: # Parent Loop BB4_1 Depth=1
216214
; LA64-NEXT: # => This Inner Loop Header: Depth=2
217-
; LA64-NEXT: ll.w $a5, $a0, 0
218-
; LA64-NEXT: bne $a5, $a6, .LBB4_5
215+
; LA64-NEXT: ll.w $a4, $a0, 0
216+
; LA64-NEXT: bne $a4, $a5, .LBB4_5
219217
; LA64-NEXT: # %bb.4: # %atomicrmw.start
220218
; LA64-NEXT: # in Loop: Header=BB4_3 Depth=2
221-
; LA64-NEXT: move $t0, $a7
222-
; LA64-NEXT: sc.w $t0, $a0, 0
223-
; LA64-NEXT: beqz $t0, .LBB4_3
219+
; LA64-NEXT: move $a7, $a6
220+
; LA64-NEXT: sc.w $a7, $a0, 0
221+
; LA64-NEXT: beqz $a7, .LBB4_3
224222
; LA64-NEXT: b .LBB4_6
225223
; LA64-NEXT: .LBB4_5: # %atomicrmw.start
226224
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
227225
; LA64-NEXT: dbar 20
228226
; LA64-NEXT: .LBB4_6: # %atomicrmw.start
229227
; LA64-NEXT: # in Loop: Header=BB4_1 Depth=1
230-
; LA64-NEXT: bne $a5, $a6, .LBB4_1
228+
; LA64-NEXT: bne $a4, $a5, .LBB4_1
231229
; LA64-NEXT: # %bb.2: # %atomicrmw.end
232-
; LA64-NEXT: srl.w $a0, $a5, $a2
230+
; LA64-NEXT: srl.w $a0, $a4, $a2
233231
; LA64-NEXT: ret
234232
%result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst
235233
ret i8 %result
@@ -242,45 +240,43 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) {
242240
; LA64-NEXT: bstrins.d $a0, $zero, 1, 0
243241
; LA64-NEXT: andi $a2, $a3, 24
244242
; LA64-NEXT: lu12i.w $a4, 15
245-
; LA64-NEXT: ori $a4, $a4, 4095
246-
; LA64-NEXT: ld.w $a5, $a0, 0
247-
; LA64-NEXT: sll.w $a3, $a4, $a3
243+
; LA64-NEXT: ori $a5, $a4, 4095
244+
; LA64-NEXT: ld.w $a4, $a0, 0
245+
; LA64-NEXT: sll.w $a3, $a5, $a3
248246
; LA64-NEXT: nor $a3, $a3, $zero
249-
; LA64-NEXT: bstrpick.d $a4, $a1, 15, 0
247+
; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0
250248
; LA64-NEXT: .p2align 4, , 16
251249
; LA64-NEXT: .LBB5_1: # %atomicrmw.start
252250
; LA64-NEXT: # =>This Loop Header: Depth=1
253251
; LA64-NEXT: # Child Loop BB5_3 Depth 2
254-
; LA64-NEXT: move $a6, $a5
255-
; LA64-NEXT: srl.w $a5, $a5, $a2
256-
; LA64-NEXT: bstrpick.d $a7, $a5, 15, 0
257-
; LA64-NEXT: sltu $a7, $a7, $a4
258-
; LA64-NEXT: xori $a7, $a7, 1
259-
; LA64-NEXT: sub.d $a5, $a5, $a1
260-
; LA64-NEXT: maskeqz $a5, $a5, $a7
261-
; LA64-NEXT: bstrpick.d $a5, $a5, 15, 0
262-
; LA64-NEXT: sll.w $a5, $a5, $a2
263-
; LA64-NEXT: and $a7, $a6, $a3
264-
; LA64-NEXT: or $a7, $a7, $a5
252+
; LA64-NEXT: move $a5, $a4
253+
; LA64-NEXT: srl.w $a4, $a4, $a2
254+
; LA64-NEXT: bstrpick.d $a4, $a4, 15, 0
255+
; LA64-NEXT: sub.d $a6, $a4, $a1
256+
; LA64-NEXT: sltu $a4, $a4, $a6
257+
; LA64-NEXT: masknez $a4, $a6, $a4
258+
; LA64-NEXT: sll.w $a4, $a4, $a2
259+
; LA64-NEXT: and $a6, $a5, $a3
260+
; LA64-NEXT: or $a6, $a6, $a4
265261
; LA64-NEXT: .LBB5_3: # %atomicrmw.start
266262
; LA64-NEXT: # Parent Loop BB5_1 Depth=1
267263
; LA64-NEXT: # => This Inner Loop Header: Depth=2
268-
; LA64-NEXT: ll.w $a5, $a0, 0
269-
; LA64-NEXT: bne $a5, $a6, .LBB5_5
264+
; LA64-NEXT: ll.w $a4, $a0, 0
265+
; LA64-NEXT: bne $a4, $a5, .LBB5_5
270266
; LA64-NEXT: # %bb.4: # %atomicrmw.start
271267
; LA64-NEXT: # in Loop: Header=BB5_3 Depth=2
272-
; LA64-NEXT: move $t0, $a7
273-
; LA64-NEXT: sc.w $t0, $a0, 0
274-
; LA64-NEXT: beqz $t0, .LBB5_3
268+
; LA64-NEXT: move $a7, $a6
269+
; LA64-NEXT: sc.w $a7, $a0, 0
270+
; LA64-NEXT: beqz $a7, .LBB5_3
275271
; LA64-NEXT: b .LBB5_6
276272
; LA64-NEXT: .LBB5_5: # %atomicrmw.start
277273
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
278274
; LA64-NEXT: dbar 20
279275
; LA64-NEXT: .LBB5_6: # %atomicrmw.start
280276
; LA64-NEXT: # in Loop: Header=BB5_1 Depth=1
281-
; LA64-NEXT: bne $a5, $a6, .LBB5_1
277+
; LA64-NEXT: bne $a4, $a5, .LBB5_1
282278
; LA64-NEXT: # %bb.2: # %atomicrmw.end
283-
; LA64-NEXT: srl.w $a0, $a5, $a2
279+
; LA64-NEXT: srl.w $a0, $a4, $a2
284280
; LA64-NEXT: ret
285281
%result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst
286282
ret i16 %result
@@ -290,33 +286,32 @@ define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) {
290286
; LA64-LABEL: atomicrmw_usub_sat_i32:
291287
; LA64: # %bb.0:
292288
; LA64-NEXT: ld.w $a2, $a0, 0
293-
; LA64-NEXT: addi.w $a3, $a1, 0
289+
; LA64-NEXT: addi.w $a1, $a1, 0
294290
; LA64-NEXT: .p2align 4, , 16
295291
; LA64-NEXT: .LBB6_1: # %atomicrmw.start
296292
; LA64-NEXT: # =>This Loop Header: Depth=1
297293
; LA64-NEXT: # Child Loop BB6_3 Depth 2
298-
; LA64-NEXT: move $a4, $a2
299-
; LA64-NEXT: sltu $a2, $a2, $a3
300-
; LA64-NEXT: xori $a2, $a2, 1
301-
; LA64-NEXT: sub.w $a5, $a4, $a1
302-
; LA64-NEXT: maskeqz $a5, $a5, $a2
294+
; LA64-NEXT: move $a3, $a2
295+
; LA64-NEXT: sub.d $a2, $a2, $a1
296+
; LA64-NEXT: sltu $a4, $a3, $a2
297+
; LA64-NEXT: masknez $a4, $a2, $a4
303298
; LA64-NEXT: .LBB6_3: # %atomicrmw.start
304299
; LA64-NEXT: # Parent Loop BB6_1 Depth=1
305300
; LA64-NEXT: # => This Inner Loop Header: Depth=2
306301
; LA64-NEXT: ll.w $a2, $a0, 0
307-
; LA64-NEXT: bne $a2, $a4, .LBB6_5
302+
; LA64-NEXT: bne $a2, $a3, .LBB6_5
308303
; LA64-NEXT: # %bb.4: # %atomicrmw.start
309304
; LA64-NEXT: # in Loop: Header=BB6_3 Depth=2
310-
; LA64-NEXT: move $a6, $a5
311-
; LA64-NEXT: sc.w $a6, $a0, 0
312-
; LA64-NEXT: beqz $a6, .LBB6_3
305+
; LA64-NEXT: move $a5, $a4
306+
; LA64-NEXT: sc.w $a5, $a0, 0
307+
; LA64-NEXT: beqz $a5, .LBB6_3
313308
; LA64-NEXT: b .LBB6_6
314309
; LA64-NEXT: .LBB6_5: # %atomicrmw.start
315310
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
316311
; LA64-NEXT: dbar 20
317312
; LA64-NEXT: .LBB6_6: # %atomicrmw.start
318313
; LA64-NEXT: # in Loop: Header=BB6_1 Depth=1
319-
; LA64-NEXT: bne $a2, $a4, .LBB6_1
314+
; LA64-NEXT: bne $a2, $a3, .LBB6_1
320315
; LA64-NEXT: # %bb.2: # %atomicrmw.end
321316
; LA64-NEXT: move $a0, $a2
322317
; LA64-NEXT: ret
@@ -333,10 +328,9 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) {
333328
; LA64-NEXT: # =>This Loop Header: Depth=1
334329
; LA64-NEXT: # Child Loop BB7_3 Depth 2
335330
; LA64-NEXT: move $a3, $a2
336-
; LA64-NEXT: sltu $a2, $a2, $a1
337-
; LA64-NEXT: xori $a2, $a2, 1
338-
; LA64-NEXT: sub.d $a4, $a3, $a1
339-
; LA64-NEXT: maskeqz $a4, $a4, $a2
331+
; LA64-NEXT: sub.d $a2, $a2, $a1
332+
; LA64-NEXT: sltu $a4, $a3, $a2
333+
; LA64-NEXT: masknez $a4, $a2, $a4
340334
; LA64-NEXT: .LBB7_3: # %atomicrmw.start
341335
; LA64-NEXT: # Parent Loop BB7_1 Depth=1
342336
; LA64-NEXT: # => This Inner Loop Header: Depth=2

0 commit comments

Comments
 (0)