Skip to content

Commit d29fc6e

Browse files
committed
[AArch64] Replace performANDSCombine with performFlagSettingCombine.
`performFlagSettingCombine` is a generalised version of `performANDSCombine` which also works on `ADCS` and `SBCS`. Differential revision: https://reviews.llvm.org/D124464
1 parent c5f8b98 commit d29fc6e

File tree

13 files changed

+93
-89
lines changed

13 files changed

+93
-89
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17684,27 +17684,27 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
1768417684
return SDValue();
1768517685
}
1768617686

17687-
// Combines for S forms of generic opcodes (AArch64ISD::ANDS into ISD::AND for
17688-
// example). NOTE: This could be used for ADDS and SUBS too, if we can find test
17689-
// cases.
17690-
static SDValue performANDSCombine(SDNode *N,
17691-
TargetLowering::DAGCombinerInfo &DCI) {
17687+
// Replace a flag-setting operator (eg ANDS) with the generic version
17688+
// (eg AND) if the flag is unused.
17689+
static SDValue performFlagSettingCombine(SDNode *N,
17690+
TargetLowering::DAGCombinerInfo &DCI,
17691+
unsigned GenericOpcode) {
1769217692
SDLoc DL(N);
1769317693
SDValue LHS = N->getOperand(0);
1769417694
SDValue RHS = N->getOperand(1);
1769517695
EVT VT = N->getValueType(0);
1769617696

1769717697
// If the flag result isn't used, convert back to a generic opcode.
1769817698
if (!N->hasAnyUseOfValue(1)) {
17699-
SDValue Res = DCI.DAG.getNode(ISD::AND, DL, VT, LHS, RHS);
17699+
SDValue Res = DCI.DAG.getNode(GenericOpcode, DL, VT, N->ops());
1770017700
return DCI.DAG.getMergeValues({Res, DCI.DAG.getConstant(0, DL, MVT::i32)},
1770117701
DL);
1770217702
}
1770317703

1770417704
// Combine identical generic nodes into this node, re-using the result.
17705-
if (SDNode *GenericAddSub =
17706-
DCI.DAG.getNodeIfExists(ISD::AND, DCI.DAG.getVTList(VT), {LHS, RHS}))
17707-
DCI.CombineTo(GenericAddSub, SDValue(N, 0));
17705+
if (SDNode *Generic = DCI.DAG.getNodeIfExists(
17706+
GenericOpcode, DCI.DAG.getVTList(VT), {LHS, RHS}))
17707+
DCI.CombineTo(Generic, SDValue(N, 0));
1770817708

1770917709
return SDValue();
1771017710
}
@@ -18718,12 +18718,20 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
1871818718
case ISD::ADD:
1871918719
case ISD::SUB:
1872018720
return performAddSubCombine(N, DCI, DAG);
18721+
case AArch64ISD::ANDS:
18722+
return performFlagSettingCombine(N, DCI, ISD::AND);
1872118723
case AArch64ISD::ADC:
18722-
case AArch64ISD::ADCS:
1872318724
return foldOverflowCheck(N, DAG, /* IsAdd */ true);
1872418725
case AArch64ISD::SBC:
18725-
case AArch64ISD::SBCS:
1872618726
return foldOverflowCheck(N, DAG, /* IsAdd */ false);
18727+
case AArch64ISD::ADCS:
18728+
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ true))
18729+
return R;
18730+
return performFlagSettingCombine(N, DCI, AArch64ISD::ADC);
18731+
case AArch64ISD::SBCS:
18732+
if (auto R = foldOverflowCheck(N, DAG, /* IsAdd */ false))
18733+
return R;
18734+
return performFlagSettingCombine(N, DCI, AArch64ISD::SBC);
1872718735
case ISD::XOR:
1872818736
return performXorCombine(N, DAG, DCI, Subtarget);
1872918737
case ISD::MUL:
@@ -18782,8 +18790,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
1878218790
return performTBZCombine(N, DCI, DAG);
1878318791
case AArch64ISD::CSEL:
1878418792
return performCSELCombine(N, DCI, DAG);
18785-
case AArch64ISD::ANDS:
18786-
return performANDSCombine(N, DCI);
1878718793
case AArch64ISD::DUP:
1878818794
return performPostLD1Combine(N, DCI, false);
1878918795
case AArch64ISD::NVCAST:

llvm/test/CodeGen/AArch64/adc.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,17 @@ define i128 @test_simple(i128 %a, i128 %b, i128 %c) {
66
; CHECK-LE-LABEL: test_simple:
77
; CHECK-LE: ; %bb.0:
88
; CHECK-LE-NEXT: adds x8, x0, x2
9-
; CHECK-LE-NEXT: adcs x9, x1, x3
9+
; CHECK-LE-NEXT: adc x9, x1, x3
1010
; CHECK-LE-NEXT: subs x0, x8, x4
11-
; CHECK-LE-NEXT: sbcs x1, x9, x5
11+
; CHECK-LE-NEXT: sbc x1, x9, x5
1212
; CHECK-LE-NEXT: ret
1313
;
1414
; CHECK-BE-LABEL: test_simple:
1515
; CHECK-BE: // %bb.0:
1616
; CHECK-BE-NEXT: adds x8, x1, x3
17-
; CHECK-BE-NEXT: adcs x9, x0, x2
17+
; CHECK-BE-NEXT: adc x9, x0, x2
1818
; CHECK-BE-NEXT: subs x1, x8, x5
19-
; CHECK-BE-NEXT: sbcs x0, x9, x4
19+
; CHECK-BE-NEXT: sbc x0, x9, x4
2020
; CHECK-BE-NEXT: ret
2121

2222
%valadd = add i128 %a, %b
@@ -30,13 +30,13 @@ define i128 @test_imm(i128 %a) {
3030
; CHECK-LE-LABEL: test_imm:
3131
; CHECK-LE: ; %bb.0:
3232
; CHECK-LE-NEXT: adds x0, x0, #12
33-
; CHECK-LE-NEXT: adcs x1, x1, xzr
33+
; CHECK-LE-NEXT: adc x1, x1, xzr
3434
; CHECK-LE-NEXT: ret
3535
;
3636
; CHECK-BE-LABEL: test_imm:
3737
; CHECK-BE: // %bb.0:
3838
; CHECK-BE-NEXT: adds x1, x1, #12
39-
; CHECK-BE-NEXT: adcs x0, x0, xzr
39+
; CHECK-BE-NEXT: adc x0, x0, xzr
4040
; CHECK-BE-NEXT: ret
4141

4242
%val = add i128 %a, 12
@@ -49,14 +49,14 @@ define i128 @test_shifted(i128 %a, i128 %b) {
4949
; CHECK-LE: ; %bb.0:
5050
; CHECK-LE-NEXT: extr x8, x3, x2, #19
5151
; CHECK-LE-NEXT: adds x0, x0, x2, lsl #45
52-
; CHECK-LE-NEXT: adcs x1, x1, x8
52+
; CHECK-LE-NEXT: adc x1, x1, x8
5353
; CHECK-LE-NEXT: ret
5454
;
5555
; CHECK-BE-LABEL: test_shifted:
5656
; CHECK-BE: // %bb.0:
5757
; CHECK-BE-NEXT: extr x8, x2, x3, #19
5858
; CHECK-BE-NEXT: adds x1, x1, x3, lsl #45
59-
; CHECK-BE-NEXT: adcs x0, x0, x8
59+
; CHECK-BE-NEXT: adc x0, x0, x8
6060
; CHECK-BE-NEXT: ret
6161

6262
%rhs = shl i128 %b, 45
@@ -74,7 +74,7 @@ define i128 @test_extended(i128 %a, i16 %b) {
7474
; CHECK-LE-NEXT: adds x0, x0, w2, sxth #3
7575
; CHECK-LE-NEXT: asr x9, x8, #63
7676
; CHECK-LE-NEXT: extr x8, x9, x8, #61
77-
; CHECK-LE-NEXT: adcs x1, x1, x8
77+
; CHECK-LE-NEXT: adc x1, x1, x8
7878
; CHECK-LE-NEXT: ret
7979
;
8080
; CHECK-BE-LABEL: test_extended:
@@ -84,7 +84,7 @@ define i128 @test_extended(i128 %a, i16 %b) {
8484
; CHECK-BE-NEXT: adds x1, x1, w2, sxth #3
8585
; CHECK-BE-NEXT: asr x9, x8, #63
8686
; CHECK-BE-NEXT: extr x8, x9, x8, #61
87-
; CHECK-BE-NEXT: adcs x0, x0, x8
87+
; CHECK-BE-NEXT: adc x0, x0, x8
8888
; CHECK-BE-NEXT: ret
8989

9090
%ext = sext i16 %b to i128

llvm/test/CodeGen/AArch64/addcarry-crash.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ define i64 @foo(i64* nocapture readonly %ptr, i64 %a, i64 %b, i64 %c) local_unna
99
; CHECK-NEXT: lsr x9, x1, #32
1010
; CHECK-NEXT: cmn x3, x2
1111
; CHECK-NEXT: mul x8, x8, x9
12-
; CHECK-NEXT: adcs x0, x8, xzr
12+
; CHECK-NEXT: adc x0, x8, xzr
1313
; CHECK-NEXT: ret
1414
entry:
1515
%0 = lshr i64 %a, 32

llvm/test/CodeGen/AArch64/arm64-atomic-128.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ define void @fetch_and_add(i128* %p, i128 %bits) {
260260
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
261261
; CHECK-NEXT: ldaxp x9, x8, [x0]
262262
; CHECK-NEXT: adds x10, x9, x2
263-
; CHECK-NEXT: adcs x11, x8, x3
263+
; CHECK-NEXT: adc x11, x8, x3
264264
; CHECK-NEXT: stlxp w12, x10, x11, [x0]
265265
; CHECK-NEXT: cbnz w12, .LBB6_1
266266
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
@@ -281,7 +281,7 @@ define void @fetch_and_sub(i128* %p, i128 %bits) {
281281
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
282282
; CHECK-NEXT: ldaxp x9, x8, [x0]
283283
; CHECK-NEXT: subs x10, x9, x2
284-
; CHECK-NEXT: sbcs x11, x8, x3
284+
; CHECK-NEXT: sbc x11, x8, x3
285285
; CHECK-NEXT: stlxp w12, x10, x11, [x0]
286286
; CHECK-NEXT: cbnz w12, .LBB7_1
287287
; CHECK-NEXT: // %bb.2: // %atomicrmw.end

llvm/test/CodeGen/AArch64/arm64-vabs.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1748,28 +1748,28 @@ define <2 x i64> @uabd_i32(<2 x i32> %a, <2 x i32> %b) {
17481748
define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
17491749
; CHECK-LABEL: uabd_i64:
17501750
; CHECK: // %bb.0:
1751-
; CHECK-NEXT: fmov x9, d0
1752-
; CHECK-NEXT: fmov x11, d1
17531751
; CHECK-NEXT: mov.d x8, v0[1]
1752+
; CHECK-NEXT: fmov x9, d0
17541753
; CHECK-NEXT: mov.d x10, v1[1]
1754+
; CHECK-NEXT: fmov x11, d1
17551755
; CHECK-NEXT: asr x12, x9, #63
17561756
; CHECK-NEXT: asr x13, x11, #63
17571757
; CHECK-NEXT: subs x9, x9, x11
1758-
; CHECK-NEXT: sbcs x11, x12, x13
1758+
; CHECK-NEXT: sbc x11, x12, x13
17591759
; CHECK-NEXT: asr x12, x8, #63
17601760
; CHECK-NEXT: asr x13, x10, #63
17611761
; CHECK-NEXT: subs x8, x8, x10
1762-
; CHECK-NEXT: sbcs x10, x12, x13
1763-
; CHECK-NEXT: asr x13, x11, #63
1764-
; CHECK-NEXT: asr x12, x10, #63
1765-
; CHECK-NEXT: eor x8, x8, x12
1766-
; CHECK-NEXT: eor x10, x10, x12
1767-
; CHECK-NEXT: subs x2, x8, x12
1768-
; CHECK-NEXT: eor x8, x9, x13
1769-
; CHECK-NEXT: sbcs x3, x10, x12
1770-
; CHECK-NEXT: eor x9, x11, x13
1771-
; CHECK-NEXT: subs x8, x8, x13
1772-
; CHECK-NEXT: sbcs x1, x9, x13
1762+
; CHECK-NEXT: sbc x10, x12, x13
1763+
; CHECK-NEXT: asr x12, x11, #63
1764+
; CHECK-NEXT: asr x13, x10, #63
1765+
; CHECK-NEXT: eor x9, x9, x12
1766+
; CHECK-NEXT: eor x8, x8, x13
1767+
; CHECK-NEXT: eor x10, x10, x13
1768+
; CHECK-NEXT: subs x2, x8, x13
1769+
; CHECK-NEXT: sbc x3, x10, x13
1770+
; CHECK-NEXT: subs x8, x9, x12
1771+
; CHECK-NEXT: eor x9, x11, x12
1772+
; CHECK-NEXT: sbc x1, x9, x12
17731773
; CHECK-NEXT: fmov d0, x8
17741774
; CHECK-NEXT: mov.d v0[1], x1
17751775
; CHECK-NEXT: fmov x0, d0

llvm/test/CodeGen/AArch64/atomicrmw-O0.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ define i128 @test_rmw_add_128(i128* %dst) {
220220
; NOLSE-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload
221221
; NOLSE-NEXT: adds x14, x8, #1
222222
; NOLSE-NEXT: mov x9, xzr
223-
; NOLSE-NEXT: adcs x15, x11, x9
223+
; NOLSE-NEXT: adc x15, x11, x9
224224
; NOLSE-NEXT: .LBB4_2: // %atomicrmw.start
225225
; NOLSE-NEXT: // Parent Loop BB4_1 Depth=1
226226
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
@@ -273,7 +273,7 @@ define i128 @test_rmw_add_128(i128* %dst) {
273273
; LSE-NEXT: ldr x9, [sp, #56] // 8-byte Folded Reload
274274
; LSE-NEXT: adds x2, x8, #1
275275
; LSE-NEXT: mov x11, xzr
276-
; LSE-NEXT: adcs x11, x10, x11
276+
; LSE-NEXT: adc x11, x10, x11
277277
; LSE-NEXT: // kill: def $x2 killed $x2 def $x2_x3
278278
; LSE-NEXT: mov x3, x11
279279
; LSE-NEXT: mov x0, x8

llvm/test/CodeGen/AArch64/i128-math.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ define i128 @u128_add(i128 %x, i128 %y) {
2323
; CHECK-LABEL: u128_add:
2424
; CHECK: // %bb.0:
2525
; CHECK-NEXT: adds x0, x0, x2
26-
; CHECK-NEXT: adcs x1, x1, x3
26+
; CHECK-NEXT: adc x1, x1, x3
2727
; CHECK-NEXT: ret
2828
%1 = add i128 %x, %y
2929
ret i128 %1
@@ -81,7 +81,7 @@ define i128 @u128_sub(i128 %x, i128 %y) {
8181
; CHECK-LABEL: u128_sub:
8282
; CHECK: // %bb.0:
8383
; CHECK-NEXT: subs x0, x0, x2
84-
; CHECK-NEXT: sbcs x1, x1, x3
84+
; CHECK-NEXT: sbc x1, x1, x3
8585
; CHECK-NEXT: ret
8686
%1 = sub i128 %x, %y
8787
ret i128 %1
@@ -139,7 +139,7 @@ define i128 @i128_add(i128 %x, i128 %y) {
139139
; CHECK-LABEL: i128_add:
140140
; CHECK: // %bb.0:
141141
; CHECK-NEXT: adds x0, x0, x2
142-
; CHECK-NEXT: adcs x1, x1, x3
142+
; CHECK-NEXT: adc x1, x1, x3
143143
; CHECK-NEXT: ret
144144
%1 = add i128 %x, %y
145145
ret i128 %1
@@ -199,7 +199,7 @@ define i128 @i128_sub(i128 %x, i128 %y) {
199199
; CHECK-LABEL: i128_sub:
200200
; CHECK: // %bb.0:
201201
; CHECK-NEXT: subs x0, x0, x2
202-
; CHECK-NEXT: sbcs x1, x1, x3
202+
; CHECK-NEXT: sbc x1, x1, x3
203203
; CHECK-NEXT: ret
204204
%1 = sub i128 %x, %y
205205
ret i128 %1

llvm/test/CodeGen/AArch64/i256-math.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ define i256 @u256_add(i256 %x, i256 %y) {
1919
; CHECK-NEXT: adds x0, x0, x4
2020
; CHECK-NEXT: adcs x1, x1, x5
2121
; CHECK-NEXT: adcs x2, x2, x6
22-
; CHECK-NEXT: adcs x3, x3, x7
22+
; CHECK-NEXT: adc x3, x3, x7
2323
; CHECK-NEXT: ret
2424
%1 = add i256 %x, %y
2525
ret i256 %1
@@ -87,7 +87,7 @@ define i256 @u256_sub(i256 %x, i256 %y) {
8787
; CHECK-NEXT: subs x0, x0, x4
8888
; CHECK-NEXT: sbcs x1, x1, x5
8989
; CHECK-NEXT: sbcs x2, x2, x6
90-
; CHECK-NEXT: sbcs x3, x3, x7
90+
; CHECK-NEXT: sbc x3, x3, x7
9191
; CHECK-NEXT: ret
9292
%1 = sub i256 %x, %y
9393
ret i256 %1
@@ -163,7 +163,7 @@ define i256 @i256_add(i256 %x, i256 %y) {
163163
; CHECK-NEXT: adds x0, x0, x4
164164
; CHECK-NEXT: adcs x1, x1, x5
165165
; CHECK-NEXT: adcs x2, x2, x6
166-
; CHECK-NEXT: adcs x3, x3, x7
166+
; CHECK-NEXT: adc x3, x3, x7
167167
; CHECK-NEXT: ret
168168
%1 = add i256 %x, %y
169169
ret i256 %1
@@ -233,7 +233,7 @@ define i256 @i256_sub(i256 %x, i256 %y) {
233233
; CHECK-NEXT: subs x0, x0, x4
234234
; CHECK-NEXT: sbcs x1, x1, x5
235235
; CHECK-NEXT: sbcs x2, x2, x6
236-
; CHECK-NEXT: sbcs x3, x3, x7
236+
; CHECK-NEXT: sbc x3, x3, x7
237237
; CHECK-NEXT: ret
238238
%1 = sub i256 %x, %y
239239
ret i256 %1

llvm/test/CodeGen/AArch64/icmp-shift-opt.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
1111
; CHECK-NEXT: .LBB0_1: // %loop
1212
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
1313
; CHECK-NEXT: adds x0, x0, #1
14-
; CHECK-NEXT: adcs x1, x1, xzr
14+
; CHECK-NEXT: adc x1, x1, xzr
1515
; CHECK-NEXT: orr x8, x1, x0, lsr #60
1616
; CHECK-NEXT: cbnz x8, .LBB0_1
1717
; CHECK-NEXT: // %bb.2: // %exit

llvm/test/CodeGen/AArch64/neg-abs.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ define i128 @neg_abs128(i128 %x) {
5252
; CHECK-NEXT: eor x9, x0, x8
5353
; CHECK-NEXT: eor x10, x1, x8
5454
; CHECK-NEXT: subs x0, x8, x9
55-
; CHECK-NEXT: sbcs x1, x8, x10
55+
; CHECK-NEXT: sbc x1, x8, x10
5656
; CHECK-NEXT: ret
5757
%abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true)
5858
%neg = sub nsw i128 0, %abs
@@ -99,7 +99,7 @@ define i128 @abs128(i128 %x) {
9999
; CHECK-NEXT: eor x9, x0, x8
100100
; CHECK-NEXT: eor x10, x1, x8
101101
; CHECK-NEXT: subs x0, x9, x8
102-
; CHECK-NEXT: sbcs x1, x10, x8
102+
; CHECK-NEXT: sbc x1, x10, x8
103103
; CHECK-NEXT: ret
104104
%abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true)
105105
ret i128 %abs

llvm/test/CodeGen/AArch64/neon-abd.ll

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -147,23 +147,23 @@ define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) #0 {
147147
; CHECK-NEXT: mov x8, v0.d[1]
148148
; CHECK-NEXT: fmov x10, d0
149149
; CHECK-NEXT: mov x9, v1.d[1]
150-
; CHECK-NEXT: asr x11, x10, #63
151-
; CHECK-NEXT: asr x12, x8, #63
152-
; CHECK-NEXT: asr x13, x9, #63
150+
; CHECK-NEXT: fmov x11, d1
151+
; CHECK-NEXT: asr x12, x10, #63
152+
; CHECK-NEXT: asr x14, x8, #63
153+
; CHECK-NEXT: asr x15, x9, #63
153154
; CHECK-NEXT: subs x8, x8, x9
154-
; CHECK-NEXT: fmov x9, d1
155-
; CHECK-NEXT: sbcs x12, x12, x13
156-
; CHECK-NEXT: asr x13, x9, #63
157-
; CHECK-NEXT: subs x9, x10, x9
158-
; CHECK-NEXT: sbcs x10, x11, x13
159-
; CHECK-NEXT: asr x11, x12, #63
160-
; CHECK-NEXT: asr x10, x10, #63
161-
; CHECK-NEXT: eor x8, x8, x11
162-
; CHECK-NEXT: eor x9, x9, x10
163-
; CHECK-NEXT: sub x8, x8, x11
164-
; CHECK-NEXT: sub x9, x9, x10
155+
; CHECK-NEXT: asr x13, x11, #63
156+
; CHECK-NEXT: sbc x9, x14, x15
157+
; CHECK-NEXT: subs x10, x10, x11
158+
; CHECK-NEXT: asr x9, x9, #63
159+
; CHECK-NEXT: sbc x11, x12, x13
160+
; CHECK-NEXT: eor x8, x8, x9
161+
; CHECK-NEXT: asr x11, x11, #63
162+
; CHECK-NEXT: sub x8, x8, x9
163+
; CHECK-NEXT: eor x10, x10, x11
164+
; CHECK-NEXT: sub x10, x10, x11
165165
; CHECK-NEXT: fmov d1, x8
166-
; CHECK-NEXT: fmov d0, x9
166+
; CHECK-NEXT: fmov d0, x10
167167
; CHECK-NEXT: mov v0.d[1], v1.d[0]
168168
; CHECK-NEXT: ret
169169
%a.sext = sext <2 x i64> %a to <2 x i128>
@@ -327,19 +327,19 @@ define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) #0 {
327327
; CHECK-NEXT: mov x8, v0.d[1]
328328
; CHECK-NEXT: fmov x10, d0
329329
; CHECK-NEXT: mov x9, v1.d[1]
330+
; CHECK-NEXT: fmov x11, d1
330331
; CHECK-NEXT: subs x8, x8, x9
331-
; CHECK-NEXT: fmov x9, d1
332-
; CHECK-NEXT: ngcs x11, xzr
332+
; CHECK-NEXT: ngc x9, xzr
333+
; CHECK-NEXT: subs x10, x10, x11
334+
; CHECK-NEXT: ngc x11, xzr
335+
; CHECK-NEXT: asr x9, x9, #63
333336
; CHECK-NEXT: asr x11, x11, #63
334-
; CHECK-NEXT: subs x9, x10, x9
335-
; CHECK-NEXT: eor x8, x8, x11
336-
; CHECK-NEXT: ngcs x10, xzr
337-
; CHECK-NEXT: sub x8, x8, x11
338-
; CHECK-NEXT: asr x10, x10, #63
339-
; CHECK-NEXT: eor x9, x9, x10
340-
; CHECK-NEXT: sub x9, x9, x10
337+
; CHECK-NEXT: eor x8, x8, x9
338+
; CHECK-NEXT: eor x10, x10, x11
339+
; CHECK-NEXT: sub x8, x8, x9
340+
; CHECK-NEXT: sub x10, x10, x11
341341
; CHECK-NEXT: fmov d1, x8
342-
; CHECK-NEXT: fmov d0, x9
342+
; CHECK-NEXT: fmov d0, x10
343343
; CHECK-NEXT: mov v0.d[1], v1.d[0]
344344
; CHECK-NEXT: ret
345345
%a.zext = zext <2 x i64> %a to <2 x i128>

0 commit comments

Comments
 (0)