Skip to content

Commit 07a8ff4

Browse files
committed
ARM64: handle v1i1 types arising from setcc properly.
There were several overlapping problems here, and this solution is closely inspired by the one adopted in AArch64 in r201381. Firstly, scalarisation of v1i1 setcc operations simply fails if the input types are legal. This is fixed in LegalizeVectorTypes.cpp this time, and allows AArch64 code to be simplified slightly. Second, vselect with such a setcc feeding into it ends up in ScalarizeVectorOperand, where it's not handled. I experimented with an implementation, but found that whatever DAG came out was rather horrific. I think Hao's DAG combine approach is a good one for quality, though there are edge cases it won't catch (to be fixed separately). Should fix PR19335. llvm-svn: 205625
1 parent 92a4744 commit 07a8ff4

File tree

4 files changed

+114
-31
lines changed

4 files changed

+114
-31
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -331,12 +331,24 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
331331
assert(N->getValueType(0).isVector() &&
332332
N->getOperand(0).getValueType().isVector() &&
333333
"Operand types must be vectors");
334-
335-
SDValue LHS = GetScalarizedVector(N->getOperand(0));
336-
SDValue RHS = GetScalarizedVector(N->getOperand(1));
334+
SDValue LHS = N->getOperand(0);
335+
SDValue RHS = N->getOperand(1);
336+
EVT OpVT = LHS.getValueType();
337337
EVT NVT = N->getValueType(0).getVectorElementType();
338338
SDLoc DL(N);
339339

340+
// The result needs scalarizing, but it's not a given that the source does.
341+
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
342+
LHS = GetScalarizedVector(LHS);
343+
RHS = GetScalarizedVector(RHS);
344+
} else {
345+
EVT VT = OpVT.getVectorElementType();
346+
LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
347+
DAG.getConstant(0, TLI.getVectorIdxTy()));
348+
RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
349+
DAG.getConstant(0, TLI.getVectorIdxTy()));
350+
}
351+
340352
// Turn it into a scalar SETCC.
341353
SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
342354
N->getOperand(2));

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -538,7 +538,6 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
538538
setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
539539
}
540540

541-
setTargetDAGCombine(ISD::SETCC);
542541
setTargetDAGCombine(ISD::SIGN_EXTEND);
543542
setTargetDAGCombine(ISD::VSELECT);
544543
}
@@ -4284,32 +4283,6 @@ static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
42844283
return SDValue(N, 0);
42854284
}
42864285

4287-
// v1i1 setcc ->
4288-
// v1i1 (bitcast (i1 setcc (extract_vector_elt, extract_vector_elt))
4289-
// FIXME: Currently the type legalizer can't handle SETCC having v1i1 as result.
4290-
// If it can legalize "v1i1 SETCC" correctly, no need to combine such SETCC.
4291-
static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
4292-
EVT ResVT = N->getValueType(0);
4293-
4294-
if (!ResVT.isVector() || ResVT.getVectorNumElements() != 1 ||
4295-
ResVT.getVectorElementType() != MVT::i1)
4296-
return SDValue();
4297-
4298-
SDValue LHS = N->getOperand(0);
4299-
SDValue RHS = N->getOperand(1);
4300-
EVT CmpVT = LHS.getValueType();
4301-
LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
4302-
CmpVT.getVectorElementType(), LHS,
4303-
DAG.getConstant(0, MVT::i64));
4304-
RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
4305-
CmpVT.getVectorElementType(), RHS,
4306-
DAG.getConstant(0, MVT::i64));
4307-
SDValue SetCC =
4308-
DAG.getSetCC(SDLoc(N), MVT::i1, LHS, RHS,
4309-
cast<CondCodeSDNode>(N->getOperand(2))->get());
4310-
return DAG.getNode(ISD::BITCAST, SDLoc(N), ResVT, SetCC);
4311-
}
4312-
43134286
// vselect (v1i1 setcc) ->
43144287
// vselect (v1iXX setcc) (XX is the size of the compared operand type)
43154288
// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
@@ -4378,7 +4351,6 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
43784351
case ISD::SRA:
43794352
case ISD::SRL:
43804353
return PerformShiftCombine(N, DCI, getSubtarget());
4381-
case ISD::SETCC: return PerformSETCCCombine(N, DCI.DAG);
43824354
case ISD::VSELECT: return PerformVSelectCombine(N, DCI.DAG);
43834355
case ISD::SIGN_EXTEND: return PerformSignExtendCombine(N, DCI.DAG);
43844356
case ISD::INTRINSIC_WO_CHAIN:

llvm/lib/Target/ARM64/ARM64ISelLowering.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,8 @@ ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
434434

435435
setTargetDAGCombine(ISD::MUL);
436436

437+
setTargetDAGCombine(ISD::VSELECT);
438+
437439
MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
438440
MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
439441
MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
@@ -7227,6 +7229,36 @@ static SDValue performBRCONDCombine(SDNode *N,
72277229
return SDValue();
72287230
}
72297231

7232+
// vselect (v1i1 setcc) ->
7233+
// vselect (v1iXX setcc) (XX is the size of the compared operand type)
7234+
// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
7235+
// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
7236+
// such VSELECT.
7237+
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
7238+
SDValue N0 = N->getOperand(0);
7239+
EVT CCVT = N0.getValueType();
7240+
7241+
if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 ||
7242+
CCVT.getVectorElementType() != MVT::i1)
7243+
return SDValue();
7244+
7245+
EVT ResVT = N->getValueType(0);
7246+
EVT CmpVT = N0.getOperand(0).getValueType();
7247+
// Only combine when the result type is of the same size as the compared
7248+
// operands.
7249+
if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
7250+
return SDValue();
7251+
7252+
SDValue IfTrue = N->getOperand(1);
7253+
SDValue IfFalse = N->getOperand(2);
7254+
SDValue SetCC =
7255+
DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
7256+
N0.getOperand(0), N0.getOperand(1),
7257+
cast<CondCodeSDNode>(N0.getOperand(2))->get());
7258+
return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
7259+
IfTrue, IfFalse);
7260+
}
7261+
72307262
SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N,
72317263
DAGCombinerInfo &DCI) const {
72327264
SelectionDAG &DAG = DCI.DAG;
@@ -7255,6 +7287,8 @@ SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N,
72557287
return performBitcastCombine(N, DCI, DAG);
72567288
case ISD::CONCAT_VECTORS:
72577289
return performConcatVectorsCombine(N, DCI, DAG);
7290+
case ISD::VSELECT:
7291+
return performVSelectCombine(N, DCI.DAG);
72587292
case ISD::STORE:
72597293
return performSTORECombine(N, DCI, DAG, Subtarget);
72607294
case ARM64ISD::BRCOND:
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; RUN: llc %s -o - -verify-machineinstrs -mtriple=arm64-none-linux-gnu | FileCheck %s
2+
3+
; This is the analogue of AArch64's file of the same name. It's mostly testing
4+
; some form of correct lowering occurs, the tests are a little artificial but I
5+
; strongly suspect there's room for improved CodeGen (FIXME).
6+
7+
define i64 @test_sext_extr_cmp_0(<1 x i64> %v1, <1 x i64> %v2) {
8+
; CHECK-LABEL: test_sext_extr_cmp_0:
9+
; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}
10+
; CHECK: csinc
11+
%1 = icmp sge <1 x i64> %v1, %v2
12+
%2 = extractelement <1 x i1> %1, i32 0
13+
%vget_lane = sext i1 %2 to i64
14+
ret i64 %vget_lane
15+
}
16+
17+
define i64 @test_sext_extr_cmp_1(<1 x double> %v1, <1 x double> %v2) {
18+
; CHECK-LABEL: test_sext_extr_cmp_1:
19+
; CHECK: fcmp {{d[0-9]+}}, {{d[0-9]+}}
20+
%1 = fcmp oeq <1 x double> %v1, %v2
21+
%2 = extractelement <1 x i1> %1, i32 0
22+
%vget_lane = sext i1 %2 to i64
23+
ret i64 %vget_lane
24+
}
25+
26+
define <1 x i64> @test_select_v1i1_0(<1 x i64> %v1, <1 x i64> %v2, <1 x i64> %v3) {
27+
; CHECK-LABEL: test_select_v1i1_0:
28+
; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
29+
; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
30+
%1 = icmp eq <1 x i64> %v1, %v2
31+
%res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
32+
ret <1 x i64> %res
33+
}
34+
35+
define <1 x i64> @test_select_v1i1_1(<1 x double> %v1, <1 x double> %v2, <1 x i64> %v3) {
36+
; CHECK-LABEL: test_select_v1i1_1:
37+
; CHECK: fcmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
38+
; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
39+
%1 = fcmp oeq <1 x double> %v1, %v2
40+
%res = select <1 x i1> %1, <1 x i64> zeroinitializer, <1 x i64> %v3
41+
ret <1 x i64> %res
42+
}
43+
44+
define <1 x double> @test_select_v1i1_2(<1 x i64> %v1, <1 x i64> %v2, <1 x double> %v3) {
45+
; CHECK-LABEL: test_select_v1i1_2:
46+
; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
47+
; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
48+
%1 = icmp eq <1 x i64> %v1, %v2
49+
%res = select <1 x i1> %1, <1 x double> zeroinitializer, <1 x double> %v3
50+
ret <1 x double> %res
51+
}
52+
53+
define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) {
54+
; CHECK-LABEL: test_br_extr_cmp:
55+
; CHECK: cmp x{{[0-9]+}}, x{{[0-9]+}}
56+
%1 = icmp eq <1 x i64> %v1, %v2
57+
%2 = extractelement <1 x i1> %1, i32 0
58+
br i1 %2, label %if.end, label %if.then
59+
60+
if.then:
61+
ret i32 0;
62+
63+
if.end:
64+
ret i32 1;
65+
}

0 commit comments

Comments
 (0)