Skip to content

Commit 3acbd38

Browse files
authored
[AArch64] Optimise MOVI + CMGT to CMGE (#74499)
This fixes a regression that occured for a pattern of MOVI + CMGT instructions, which can be optimised to CMGE. I.e., when the signed greater than compare has -1 as an operand, we can rewrite that as a compare greater equal than 0, which is what CMGE does. Fixes #61836
1 parent c439913 commit 3acbd38

File tree

5 files changed

+120
-119
lines changed

5 files changed

+120
-119
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13705,11 +13705,17 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
1370513705
assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
1370613706
"function only supposed to emit natural comparisons");
1370713707

13708+
APInt SplatValue;
13709+
APInt SplatUndef;
13710+
unsigned SplatBitSize;
13711+
bool HasAnyUndefs;
13712+
1370813713
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
13709-
APInt CnstBits(VT.getSizeInBits(), 0);
13710-
APInt UndefBits(VT.getSizeInBits(), 0);
13711-
bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
13712-
bool IsZero = IsCnst && (CnstBits == 0);
13714+
bool IsCnst = BVN && BVN->isConstantSplat(SplatValue, SplatUndef,
13715+
SplatBitSize, HasAnyUndefs);
13716+
bool IsZero = IsCnst && SplatValue == 0;
13717+
bool IsOne = IsCnst && SplatValue == 1;
13718+
bool IsMinusOne = IsCnst && SplatValue.isAllOnes();
1371313719

1371413720
if (SrcVT.getVectorElementType().isFloatingPoint()) {
1371513721
switch (CC) {
@@ -13778,6 +13784,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
1377813784
case AArch64CC::GT:
1377913785
if (IsZero)
1378013786
return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
13787+
if (IsMinusOne)
13788+
return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS, RHS);
1378113789
return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
1378213790
case AArch64CC::LE:
1378313791
if (IsZero)
@@ -13790,6 +13798,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
1379013798
case AArch64CC::LT:
1379113799
if (IsZero)
1379213800
return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
13801+
if (IsOne)
13802+
return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
1379313803
return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
1379413804
case AArch64CC::HI:
1379513805
return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);

llvm/test/CodeGen/AArch64/cmp-select-sign.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -176,12 +176,11 @@ define <4 x i32> @sign_4xi32_multi_use(<4 x i32> %a) {
176176
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
177177
; CHECK-NEXT: .cfi_def_cfa_offset 32
178178
; CHECK-NEXT: .cfi_offset w30, -16
179-
; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
180-
; CHECK-NEXT: cmlt v2.4s, v0.4s, #0
181-
; CHECK-NEXT: orr v2.4s, #1
182-
; CHECK-NEXT: cmgt v1.4s, v0.4s, v1.4s
183-
; CHECK-NEXT: str q2, [sp] // 16-byte Folded Spill
184-
; CHECK-NEXT: xtn v0.4h, v1.4s
179+
; CHECK-NEXT: cmlt v1.4s, v0.4s, #0
180+
; CHECK-NEXT: cmge v0.4s, v0.4s, #0
181+
; CHECK-NEXT: xtn v0.4h, v0.4s
182+
; CHECK-NEXT: orr v1.4s, #1
183+
; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
185184
; CHECK-NEXT: bl use_4xi1
186185
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
187186
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
@@ -227,9 +226,8 @@ define <4 x i32> @not_sign_4xi32_2(<4 x i32> %a) {
227226
define <4 x i32> @not_sign_4xi32_3(<4 x i32> %a) {
228227
; CHECK-LABEL: not_sign_4xi32_3:
229228
; CHECK: // %bb.0:
230-
; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
231229
; CHECK-NEXT: adrp x8, .LCPI18_0
232-
; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s
230+
; CHECK-NEXT: cmge v0.4s, v0.4s, #0
233231
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0]
234232
; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b
235233
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s

llvm/test/CodeGen/AArch64/dag-numsignbits.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,12 @@ define void @signbits_vXi1(<4 x i16> %a1) {
88
; CHECK: // %bb.0:
99
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
1010
; CHECK-NEXT: adrp x8, .LCPI0_0
11-
; CHECK-NEXT: movi v2.4h, #1
1211
; CHECK-NEXT: mov w1, wzr
12+
; CHECK-NEXT: mov w2, wzr
1313
; CHECK-NEXT: dup v0.4h, v0.h[0]
1414
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
15-
; CHECK-NEXT: mov w2, wzr
1615
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
17-
; CHECK-NEXT: cmgt v0.4h, v2.4h, v0.4h
16+
; CHECK-NEXT: cmle v0.4h, v0.4h, #0
1817
; CHECK-NEXT: umov w0, v0.h[0]
1918
; CHECK-NEXT: umov w3, v0.h[3]
2019
; CHECK-NEXT: b foo

llvm/test/CodeGen/AArch64/signbit-shift.ll

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,9 @@ define i32 @add_zext_ifpos(i32 %x) {
2929
define <4 x i32> @add_zext_ifpos_vec_splat(<4 x i32> %x) {
3030
; CHECK-LABEL: add_zext_ifpos_vec_splat:
3131
; CHECK: // %bb.0:
32-
; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
33-
; CHECK-NEXT: movi v2.4s, #41
34-
; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s
35-
; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s
32+
; CHECK-NEXT: movi v1.4s, #41
33+
; CHECK-NEXT: cmge v0.4s, v0.4s, #0
34+
; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
3635
; CHECK-NEXT: ret
3736
%c = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
3837
%e = zext <4 x i1> %c to <4 x i32>
@@ -43,7 +42,7 @@ define <4 x i32> @add_zext_ifpos_vec_splat(<4 x i32> %x) {
4342
define i32 @sel_ifpos_tval_bigger(i32 %x) {
4443
; CHECK-LABEL: sel_ifpos_tval_bigger:
4544
; CHECK: // %bb.0:
46-
; CHECK-NEXT: mov w8, #41
45+
; CHECK-NEXT: mov w8, #41 // =0x29
4746
; CHECK-NEXT: cmp w0, #0
4847
; CHECK-NEXT: cinc w0, w8, ge
4948
; CHECK-NEXT: ret
@@ -78,10 +77,9 @@ define i32 @add_sext_ifpos(i32 %x) {
7877
define <4 x i32> @add_sext_ifpos_vec_splat(<4 x i32> %x) {
7978
; CHECK-LABEL: add_sext_ifpos_vec_splat:
8079
; CHECK: // %bb.0:
81-
; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
82-
; CHECK-NEXT: movi v2.4s, #42
83-
; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s
84-
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s
80+
; CHECK-NEXT: movi v1.4s, #42
81+
; CHECK-NEXT: cmge v0.4s, v0.4s, #0
82+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
8583
; CHECK-NEXT: ret
8684
%c = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
8785
%e = sext <4 x i1> %c to <4 x i32>
@@ -92,7 +90,7 @@ define <4 x i32> @add_sext_ifpos_vec_splat(<4 x i32> %x) {
9290
define i32 @sel_ifpos_fval_bigger(i32 %x) {
9391
; CHECK-LABEL: sel_ifpos_fval_bigger:
9492
; CHECK: // %bb.0:
95-
; CHECK-NEXT: mov w8, #41
93+
; CHECK-NEXT: mov w8, #41 // =0x29
9694
; CHECK-NEXT: cmp w0, #0
9795
; CHECK-NEXT: cinc w0, w8, lt
9896
; CHECK-NEXT: ret
@@ -128,7 +126,7 @@ define i32 @add_zext_ifneg(i32 %x) {
128126
define i32 @sel_ifneg_tval_bigger(i32 %x) {
129127
; CHECK-LABEL: sel_ifneg_tval_bigger:
130128
; CHECK: // %bb.0:
131-
; CHECK-NEXT: mov w8, #41
129+
; CHECK-NEXT: mov w8, #41 // =0x29
132130
; CHECK-NEXT: cmp w0, #0
133131
; CHECK-NEXT: cinc w0, w8, lt
134132
; CHECK-NEXT: ret
@@ -162,7 +160,7 @@ define i32 @add_sext_ifneg(i32 %x) {
162160
define i32 @sel_ifneg_fval_bigger(i32 %x) {
163161
; CHECK-LABEL: sel_ifneg_fval_bigger:
164162
; CHECK: // %bb.0:
165-
; CHECK-NEXT: mov w8, #41
163+
; CHECK-NEXT: mov w8, #41 // =0x29
166164
; CHECK-NEXT: cmp w0, #0
167165
; CHECK-NEXT: cinc w0, w8, ge
168166
; CHECK-NEXT: ret
@@ -199,7 +197,7 @@ define <4 x i32> @add_lshr_not_vec_splat(<4 x i32> %x) {
199197
define i32 @sub_lshr_not(i32 %x) {
200198
; CHECK-LABEL: sub_lshr_not:
201199
; CHECK: // %bb.0:
202-
; CHECK-NEXT: mov w8, #42
200+
; CHECK-NEXT: mov w8, #42 // =0x2a
203201
; CHECK-NEXT: bfxil w8, w0, #31, #1
204202
; CHECK-NEXT: mov w0, w8
205203
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)