Skip to content

Commit 4675db5

Browse files
authored
[DAGCombiner] Add support for scalarising extracts of a vector setcc (#117566)
For IR like this: %icmp = icmp ult <4 x i32> %a, splat (i32 5) %res = extractelement <4 x i1> %icmp, i32 1 where there is only one use of %icmp we can take a similar approach to what we already do for binary ops such add, sub, etc. and convert this into %ext = extractelement <4 x i32> %a, i32 1 %res = icmp ult i32 %ext, 5 For AArch64 targets at least the scalar boolean result will almost certainly need to be in a GPR anyway, since it will probably be used by branches for control flow. I've tried to reuse existing code in scalarizeExtractedBinop to also work for setcc. NOTE: The optimisations don't apply for tests such as extract_icmp_v4i32_splat_rhs in the file CodeGen/AArch64/extract-vector-cmp.ll because scalarizeExtractedBinOp only works if one of the input operands is a constant.
1 parent 4b5e7fa commit 4675db5

File tree

8 files changed

+270
-41
lines changed

8 files changed

+270
-41
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 27 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22749,16 +22749,22 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
2274922749

2275022750
/// Transform a vector binary operation into a scalar binary operation by moving
2275122751
/// the math/logic after an extract element of a vector.
22752-
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
22753-
const SDLoc &DL, bool LegalOperations) {
22752+
static SDValue scalarizeExtractedBinOp(SDNode *ExtElt, SelectionDAG &DAG,
22753+
const SDLoc &DL, bool LegalTypes) {
2275422754
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2275522755
SDValue Vec = ExtElt->getOperand(0);
2275622756
SDValue Index = ExtElt->getOperand(1);
2275722757
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22758-
if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
22758+
unsigned Opc = Vec.getOpcode();
22759+
if (!IndexC || !Vec.hasOneUse() || (!TLI.isBinOp(Opc) && Opc != ISD::SETCC) ||
2275922760
Vec->getNumValues() != 1)
2276022761
return SDValue();
2276122762

22763+
EVT ResVT = ExtElt->getValueType(0);
22764+
if (Opc == ISD::SETCC &&
22765+
(ResVT != Vec.getValueType().getVectorElementType() || LegalTypes))
22766+
return SDValue();
22767+
2276222768
// Targets may want to avoid this to prevent an expensive register transfer.
2276322769
if (!TLI.shouldScalarizeBinop(Vec))
2276422770
return SDValue();
@@ -22769,19 +22775,24 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
2276922775
SDValue Op0 = Vec.getOperand(0);
2277022776
SDValue Op1 = Vec.getOperand(1);
2277122777
APInt SplatVal;
22772-
if (isAnyConstantBuildVector(Op0, true) ||
22773-
ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
22774-
isAnyConstantBuildVector(Op1, true) ||
22775-
ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
22776-
// extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
22777-
// extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
22778-
EVT VT = ExtElt->getValueType(0);
22779-
SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
22780-
SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
22781-
return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
22782-
}
22778+
if (!isAnyConstantBuildVector(Op0, true) &&
22779+
!ISD::isConstantSplatVector(Op0.getNode(), SplatVal) &&
22780+
!isAnyConstantBuildVector(Op1, true) &&
22781+
!ISD::isConstantSplatVector(Op1.getNode(), SplatVal))
22782+
return SDValue();
2278322783

22784-
return SDValue();
22784+
// extractelt (op X, C), IndexC --> op (extractelt X, IndexC), C'
22785+
// extractelt (op C, X), IndexC --> op C', (extractelt X, IndexC)
22786+
if (Opc == ISD::SETCC) {
22787+
EVT OpVT = Op0.getValueType().getVectorElementType();
22788+
Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op0, Index);
22789+
Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, Op1, Index);
22790+
return DAG.getSetCC(DL, ResVT, Op0, Op1,
22791+
cast<CondCodeSDNode>(Vec->getOperand(2))->get());
22792+
}
22793+
Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op0, Index);
22794+
Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op1, Index);
22795+
return DAG.getNode(Opc, DL, ResVT, Op0, Op1);
2278522796
}
2278622797

2278722798
// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
@@ -23014,7 +23025,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
2301423025
}
2301523026
}
2301623027

23017-
if (SDValue BO = scalarizeExtractedBinop(N, DAG, DL, LegalOperations))
23028+
if (SDValue BO = scalarizeExtractedBinOp(N, DAG, DL, LegalTypes))
2301823029
return BO;
2301923030

2302023031
if (VecVT.isScalableVector())

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,6 +1348,10 @@ class AArch64TargetLowering : public TargetLowering {
13481348
unsigned getMinimumJumpTableEntries() const override;
13491349

13501350
bool softPromoteHalfType() const override { return true; }
1351+
1352+
bool shouldScalarizeBinop(SDValue VecOp) const override {
1353+
return VecOp.getOpcode() == ISD::SETCC;
1354+
}
13511355
};
13521356

13531357
namespace AArch64 {

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2093,7 +2093,7 @@ bool RISCVTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
20932093

20942094
// Assume target opcodes can't be scalarized.
20952095
// TODO - do we have any exceptions?
2096-
if (Opc >= ISD::BUILTIN_OP_END)
2096+
if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
20972097
return false;
20982098

20992099
// If the vector op is not supported, try to convert to scalar.

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
429429

430430
// Assume target opcodes can't be scalarized.
431431
// TODO - do we have any exceptions?
432-
if (Opc >= ISD::BUILTIN_OP_END)
432+
if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
433433
return false;
434434

435435
// If the vector op is not supported, try to convert to scalar.

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3306,7 +3306,7 @@ bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
33063306

33073307
// Assume target opcodes can't be scalarized.
33083308
// TODO - do we have any exceptions?
3309-
if (Opc >= ISD::BUILTIN_OP_END)
3309+
if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opc))
33103310
return false;
33113311

33123312
// If the vector op is not supported, try to convert to scalar.

llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
declare void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8>, <vscale x 16 x ptr>, i32 immarg, <vscale x 16 x i1>)
77

8-
define fastcc i8 @allocno_reload_assign() {
8+
define fastcc i8 @allocno_reload_assign(ptr %p) {
99
; CHECK-LABEL: allocno_reload_assign:
1010
; CHECK: // %bb.0:
1111
; CHECK-NEXT: fmov d0, xzr
@@ -14,8 +14,8 @@ define fastcc i8 @allocno_reload_assign() {
1414
; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, #0
1515
; CHECK-NEXT: uzp1 p0.s, p0.s, p0.s
1616
; CHECK-NEXT: uzp1 p0.h, p0.h, p0.h
17-
; CHECK-NEXT: uzp1 p0.b, p0.b, p0.b
18-
; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
17+
; CHECK-NEXT: uzp1 p8.b, p0.b, p0.b
18+
; CHECK-NEXT: mov z0.b, p8/z, #1 // =0x1
1919
; CHECK-NEXT: fmov w8, s0
2020
; CHECK-NEXT: mov z0.b, #0 // =0x0
2121
; CHECK-NEXT: uunpklo z1.h, z0.b
@@ -30,34 +30,35 @@ define fastcc i8 @allocno_reload_assign() {
3030
; CHECK-NEXT: punpklo p1.h, p0.b
3131
; CHECK-NEXT: punpkhi p0.h, p0.b
3232
; CHECK-NEXT: punpklo p2.h, p1.b
33-
; CHECK-NEXT: punpkhi p3.h, p1.b
33+
; CHECK-NEXT: punpkhi p4.h, p1.b
3434
; CHECK-NEXT: uunpklo z0.d, z2.s
3535
; CHECK-NEXT: uunpkhi z1.d, z2.s
36-
; CHECK-NEXT: punpklo p5.h, p0.b
36+
; CHECK-NEXT: punpklo p6.h, p0.b
3737
; CHECK-NEXT: uunpklo z2.d, z3.s
3838
; CHECK-NEXT: uunpkhi z3.d, z3.s
39-
; CHECK-NEXT: punpkhi p7.h, p0.b
39+
; CHECK-NEXT: punpkhi p0.h, p0.b
4040
; CHECK-NEXT: uunpklo z4.d, z5.s
4141
; CHECK-NEXT: uunpkhi z5.d, z5.s
4242
; CHECK-NEXT: uunpklo z6.d, z7.s
4343
; CHECK-NEXT: uunpkhi z7.d, z7.s
44-
; CHECK-NEXT: punpklo p0.h, p2.b
45-
; CHECK-NEXT: punpkhi p1.h, p2.b
46-
; CHECK-NEXT: punpklo p2.h, p3.b
47-
; CHECK-NEXT: punpkhi p3.h, p3.b
48-
; CHECK-NEXT: punpklo p4.h, p5.b
49-
; CHECK-NEXT: punpkhi p5.h, p5.b
50-
; CHECK-NEXT: punpklo p6.h, p7.b
51-
; CHECK-NEXT: punpkhi p7.h, p7.b
44+
; CHECK-NEXT: punpklo p1.h, p2.b
45+
; CHECK-NEXT: punpkhi p2.h, p2.b
46+
; CHECK-NEXT: punpklo p3.h, p4.b
47+
; CHECK-NEXT: punpkhi p4.h, p4.b
48+
; CHECK-NEXT: punpklo p5.h, p6.b
49+
; CHECK-NEXT: punpkhi p6.h, p6.b
50+
; CHECK-NEXT: punpklo p7.h, p0.b
51+
; CHECK-NEXT: punpkhi p0.h, p0.b
5252
; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1
53-
; CHECK-NEXT: st1b { z0.d }, p0, [z16.d]
54-
; CHECK-NEXT: st1b { z1.d }, p1, [z16.d]
55-
; CHECK-NEXT: st1b { z2.d }, p2, [z16.d]
56-
; CHECK-NEXT: st1b { z3.d }, p3, [z16.d]
57-
; CHECK-NEXT: st1b { z4.d }, p4, [z16.d]
58-
; CHECK-NEXT: st1b { z5.d }, p5, [z16.d]
59-
; CHECK-NEXT: st1b { z6.d }, p6, [z16.d]
60-
; CHECK-NEXT: st1b { z7.d }, p7, [z16.d]
53+
; CHECK-NEXT: st1b { z0.d }, p1, [z16.d]
54+
; CHECK-NEXT: st1b { z1.d }, p2, [z16.d]
55+
; CHECK-NEXT: st1b { z2.d }, p3, [z16.d]
56+
; CHECK-NEXT: st1b { z3.d }, p4, [z16.d]
57+
; CHECK-NEXT: st1b { z4.d }, p5, [z16.d]
58+
; CHECK-NEXT: st1b { z5.d }, p6, [z16.d]
59+
; CHECK-NEXT: st1b { z6.d }, p7, [z16.d]
60+
; CHECK-NEXT: st1b { z7.d }, p0, [z16.d]
61+
; CHECK-NEXT: str p8, [x0]
6162
; CHECK-NEXT: b .LBB0_1
6263
br label %1
6364

@@ -66,6 +67,7 @@ define fastcc i8 @allocno_reload_assign() {
6667
%constexpr1 = shufflevector <vscale x 16 x i1> %constexpr, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
6768
%constexpr2 = xor <vscale x 16 x i1> %constexpr1, shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer)
6869
call void @llvm.masked.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x ptr> zeroinitializer, i32 0, <vscale x 16 x i1> %constexpr2)
70+
store <vscale x 16 x i1> %constexpr, ptr %p, align 16
6971
br label %1
7072
}
7173

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mattr=+sve < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
7+
define i1 @extract_icmp_v4i32_const_splat_rhs(<4 x i32> %a) {
8+
; CHECK-LABEL: extract_icmp_v4i32_const_splat_rhs:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: mov w8, v0.s[1]
11+
; CHECK-NEXT: cmp w8, #5
12+
; CHECK-NEXT: cset w0, lo
13+
; CHECK-NEXT: ret
14+
%icmp = icmp ult <4 x i32> %a, splat (i32 5)
15+
%ext = extractelement <4 x i1> %icmp, i32 1
16+
ret i1 %ext
17+
}
18+
19+
define i1 @extract_icmp_v4i32_const_splat_lhs(<4 x i32> %a) {
20+
; CHECK-LABEL: extract_icmp_v4i32_const_splat_lhs:
21+
; CHECK: // %bb.0:
22+
; CHECK-NEXT: mov w8, v0.s[1]
23+
; CHECK-NEXT: cmp w8, #7
24+
; CHECK-NEXT: cset w0, hi
25+
; CHECK-NEXT: ret
26+
%icmp = icmp ult <4 x i32> splat(i32 7), %a
27+
%ext = extractelement <4 x i1> %icmp, i32 1
28+
ret i1 %ext
29+
}
30+
31+
define i1 @extract_icmp_v4i32_const_vec_rhs(<4 x i32> %a) {
32+
; CHECK-LABEL: extract_icmp_v4i32_const_vec_rhs:
33+
; CHECK: // %bb.0:
34+
; CHECK-NEXT: mov w8, v0.s[1]
35+
; CHECK-NEXT: cmp w8, #234
36+
; CHECK-NEXT: cset w0, lo
37+
; CHECK-NEXT: ret
38+
%icmp = icmp ult <4 x i32> %a, <i32 5, i32 234, i32 -1, i32 7>
39+
%ext = extractelement <4 x i1> %icmp, i32 1
40+
ret i1 %ext
41+
}
42+
43+
define i1 @extract_fcmp_v4f32_const_splat_rhs(<4 x float> %a) {
44+
; CHECK-LABEL: extract_fcmp_v4f32_const_splat_rhs:
45+
; CHECK: // %bb.0:
46+
; CHECK-NEXT: mov s0, v0.s[1]
47+
; CHECK-NEXT: fmov s1, #4.00000000
48+
; CHECK-NEXT: fcmp s0, s1
49+
; CHECK-NEXT: cset w0, lt
50+
; CHECK-NEXT: ret
51+
%fcmp = fcmp ult <4 x float> %a, splat(float 4.0e+0)
52+
%ext = extractelement <4 x i1> %fcmp, i32 1
53+
ret i1 %ext
54+
}
55+
56+
define void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) {
57+
; CHECK-LABEL: vector_loop_with_icmp:
58+
; CHECK: // %bb.0: // %entry
59+
; CHECK-NEXT: index z0.d, #0, #1
60+
; CHECK-NEXT: mov w8, #2 // =0x2
61+
; CHECK-NEXT: mov w9, #16 // =0x10
62+
; CHECK-NEXT: dup v1.2d, x8
63+
; CHECK-NEXT: add x8, x0, #4
64+
; CHECK-NEXT: mov w10, #1 // =0x1
65+
; CHECK-NEXT: b .LBB4_2
66+
; CHECK-NEXT: .LBB4_1: // %pred.store.continue6
67+
; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1
68+
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
69+
; CHECK-NEXT: subs x9, x9, #2
70+
; CHECK-NEXT: add x8, x8, #8
71+
; CHECK-NEXT: b.eq .LBB4_6
72+
; CHECK-NEXT: .LBB4_2: // %vector.body
73+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
74+
; CHECK-NEXT: fmov x11, d0
75+
; CHECK-NEXT: cmp x11, #14
76+
; CHECK-NEXT: b.hi .LBB4_4
77+
; CHECK-NEXT: // %bb.3: // %pred.store.if
78+
; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1
79+
; CHECK-NEXT: stur w10, [x8, #-4]
80+
; CHECK-NEXT: .LBB4_4: // %pred.store.continue
81+
; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1
82+
; CHECK-NEXT: mov x11, v0.d[1]
83+
; CHECK-NEXT: cmp x11, #14
84+
; CHECK-NEXT: b.hi .LBB4_1
85+
; CHECK-NEXT: // %bb.5: // %pred.store.if5
86+
; CHECK-NEXT: // in Loop: Header=BB4_2 Depth=1
87+
; CHECK-NEXT: str w10, [x8]
88+
; CHECK-NEXT: b .LBB4_1
89+
; CHECK-NEXT: .LBB4_6: // %for.cond.cleanup
90+
; CHECK-NEXT: ret
91+
entry:
92+
br label %vector.body
93+
94+
vector.body:
95+
%index = phi i64 [ 0, %entry ], [ %index.next, %pred.store.continue6 ]
96+
%vec.ind = phi <2 x i64> [ <i64 0, i64 1>, %entry ], [ %vec.ind.next, %pred.store.continue6 ]
97+
%vec.cmp = icmp ult <2 x i64> %vec.ind, <i64 15, i64 15>
98+
%c0 = extractelement <2 x i1> %vec.cmp, i64 0
99+
br i1 %c0, label %pred.store.if, label %pred.store.continue
100+
101+
pred.store.if:
102+
%arrayidx = getelementptr inbounds i32, ptr %dest, i64 %index
103+
store i32 1, ptr %arrayidx, align 4
104+
br label %pred.store.continue
105+
106+
pred.store.continue:
107+
%c1 = extractelement <2 x i1> %vec.cmp, i64 1
108+
br i1 %c1, label %pred.store.if5, label %pred.store.continue6
109+
110+
pred.store.if5:
111+
%indexp1 = or disjoint i64 %index, 1
112+
%arrayidx2 = getelementptr inbounds i32, ptr %dest, i64 %indexp1
113+
store i32 1, ptr %arrayidx2, align 4
114+
br label %pred.store.continue6
115+
116+
pred.store.continue6:
117+
%index.next = add i64 %index, 2
118+
%vec.ind.next = add <2 x i64> %vec.ind, <i64 2, i64 2>
119+
%index.cmp = icmp eq i64 %index.next, 16
120+
br i1 %index.cmp, label %for.cond.cleanup, label %vector.body
121+
122+
for.cond.cleanup:
123+
ret void
124+
}
125+
126+
127+
; Negative tests
128+
129+
define i1 @extract_icmp_v4i32_splat_rhs(<4 x i32> %a, i32 %b) {
130+
; CHECK-LABEL: extract_icmp_v4i32_splat_rhs:
131+
; CHECK: // %bb.0:
132+
; CHECK-NEXT: dup v1.4s, w0
133+
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
134+
; CHECK-NEXT: xtn v0.4h, v0.4s
135+
; CHECK-NEXT: umov w8, v0.h[1]
136+
; CHECK-NEXT: and w0, w8, #0x1
137+
; CHECK-NEXT: ret
138+
%ins = insertelement <4 x i32> poison, i32 %b, i32 0
139+
%splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
140+
%icmp = icmp ult <4 x i32> %a, %splat
141+
%ext = extractelement <4 x i1> %icmp, i32 1
142+
ret i1 %ext
143+
}
144+
145+
define i1 @extract_icmp_v4i32_splat_rhs_mul_use(<4 x i32> %a, ptr %p) {
146+
; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_mul_use:
147+
; CHECK: // %bb.0:
148+
; CHECK-NEXT: movi v1.4s, #235
149+
; CHECK-NEXT: adrp x9, .LCPI6_0
150+
; CHECK-NEXT: mov x8, x0
151+
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI6_0]
152+
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
153+
; CHECK-NEXT: xtn v1.4h, v0.4s
154+
; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
155+
; CHECK-NEXT: addv s0, v0.4s
156+
; CHECK-NEXT: umov w9, v1.h[1]
157+
; CHECK-NEXT: fmov w10, s0
158+
; CHECK-NEXT: and w0, w9, #0x1
159+
; CHECK-NEXT: strb w10, [x8]
160+
; CHECK-NEXT: ret
161+
%icmp = icmp ult <4 x i32> %a, splat(i32 235)
162+
%ext = extractelement <4 x i1> %icmp, i32 1
163+
store <4 x i1> %icmp, ptr %p, align 4
164+
ret i1 %ext
165+
}
166+
167+
define i1 @extract_icmp_v4i32_splat_rhs_unknown_idx(<4 x i32> %a, i32 %c) {
168+
; CHECK-LABEL: extract_icmp_v4i32_splat_rhs_unknown_idx:
169+
; CHECK: // %bb.0:
170+
; CHECK-NEXT: sub sp, sp, #16
171+
; CHECK-NEXT: .cfi_def_cfa_offset 16
172+
; CHECK-NEXT: movi v1.4s, #127
173+
; CHECK-NEXT: add x8, sp, #8
174+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
175+
; CHECK-NEXT: bfi x8, x0, #1, #2
176+
; CHECK-NEXT: cmhi v0.4s, v1.4s, v0.4s
177+
; CHECK-NEXT: xtn v0.4h, v0.4s
178+
; CHECK-NEXT: str d0, [sp, #8]
179+
; CHECK-NEXT: ldrh w8, [x8]
180+
; CHECK-NEXT: and w0, w8, #0x1
181+
; CHECK-NEXT: add sp, sp, #16
182+
; CHECK-NEXT: ret
183+
%icmp = icmp ult <4 x i32> %a, splat(i32 127)
184+
%ext = extractelement <4 x i1> %icmp, i32 %c
185+
ret i1 %ext
186+
}

0 commit comments

Comments
 (0)