Skip to content

[LLVM][SelectionDAG] Don't legalise splat constants until required. #143571

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 68 additions & 68 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1679,81 +1679,81 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
Elt = ConstantInt::get(*getContext(), Elt->getValue());

// In some cases the vector type is legal but the element type is illegal and
// needs to be promoted, for example v8i8 on ARM. In this case, promote the
// inserted value (the type does not need to match the vector element type).
// Any extra bits introduced will be truncated away.
if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
TargetLowering::TypePromoteInteger) {
EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
APInt NewVal;
if (TLI->isSExtCheaperThanZExt(VT.getScalarType(), EltVT))
NewVal = Elt->getValue().sextOrTrunc(EltVT.getSizeInBits());
else
NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
Elt = ConstantInt::get(*getContext(), NewVal);
}
// In other cases the element type is illegal and needs to be expanded, for
// example v2i64 on MIPS32. In this case, find the nearest legal type, split
// the value into n parts and use a vector type with n-times the elements.
// Then bitcast to the type requested.
// Legalizing constants too early makes the DAGCombiner's job harder so we
// only legalize if the DAG tells us we must produce legal types.
else if (NewNodesMustHaveLegalTypes && VT.isVector() &&
TLI->getTypeAction(*getContext(), EltVT) ==
TargetLowering::TypeExpandInteger) {
const APInt &NewVal = Elt->getValue();
EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();

// For scalable vectors, try to use a SPLAT_VECTOR_PARTS node.
if (VT.isScalableVector() ||
TLI->isOperationLegal(ISD::SPLAT_VECTOR, VT)) {
assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 &&
"Can only handle an even split!");
unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits;

SmallVector<SDValue, 2> ScalarParts;
for (unsigned i = 0; i != Parts; ++i)
ScalarParts.push_back(getConstant(
NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL,
ViaEltVT, isT, isO));

return getNode(ISD::SPLAT_VECTOR_PARTS, DL, VT, ScalarParts);
}
// thus when necessary we "legalise" the constant here so as to simplify the
// job of calling this function. NOTE: Only legalize when necessary so that
// we don't make DAGCombiner's job harder.
if (NewNodesMustHaveLegalTypes && VT.isVector()) {
// Promote the inserted value (the type does not need to match the vector
// element type). Any extra bits introduced will be truncated away.
if (TLI->getTypeAction(*getContext(), EltVT) ==
TargetLowering::TypePromoteInteger) {
EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
APInt NewVal;
if (TLI->isSExtCheaperThanZExt(VT.getScalarType(), EltVT))
NewVal = Elt->getValue().sextOrTrunc(EltVT.getSizeInBits());
else
NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
Elt = ConstantInt::get(*getContext(), NewVal);
}
// For expansion we find the nearest legal type, split the value into n
// parts and use a vector type with n-times the elements. Then bitcast to
// the type requested.
else if (TLI->getTypeAction(*getContext(), EltVT) ==
TargetLowering::TypeExpandInteger) {
const APInt &NewVal = Elt->getValue();
EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits();

// For scalable vectors, try to use a SPLAT_VECTOR_PARTS node.
if (VT.isScalableVector() ||
TLI->isOperationLegal(ISD::SPLAT_VECTOR, VT)) {
assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 &&
"Can only handle an even split!");
unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits;

SmallVector<SDValue, 2> ScalarParts;
for (unsigned i = 0; i != Parts; ++i)
ScalarParts.push_back(getConstant(
NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL,
ViaEltVT, isT, isO));

return getNode(ISD::SPLAT_VECTOR_PARTS, DL, VT, ScalarParts);
}

unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts);
unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits;
EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts);

// Check the temporary vector is the correct size. If this fails then
// getTypeToTransformTo() probably returned a type whose size (in bits)
// isn't a power-of-2 factor of the requested type size.
assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits());
// Check the temporary vector is the correct size. If this fails then
// getTypeToTransformTo() probably returned a type whose size (in bits)
// isn't a power-of-2 factor of the requested type size.
assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits());

SmallVector<SDValue, 2> EltParts;
for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i)
EltParts.push_back(getConstant(
NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL,
ViaEltVT, isT, isO));
SmallVector<SDValue, 2> EltParts;
for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i)
EltParts.push_back(getConstant(
NewVal.extractBits(ViaEltSizeInBits, i * ViaEltSizeInBits), DL,
ViaEltVT, isT, isO));

// EltParts is currently in little endian order. If we actually want
// big-endian order then reverse it now.
if (getDataLayout().isBigEndian())
std::reverse(EltParts.begin(), EltParts.end());
// EltParts is currently in little endian order. If we actually want
// big-endian order then reverse it now.
if (getDataLayout().isBigEndian())
std::reverse(EltParts.begin(), EltParts.end());

// The elements must be reversed when the element order is different
// to the endianness of the elements (because the BITCAST is itself a
// vector shuffle in this situation). However, we do not need any code to
// perform this reversal because getConstant() is producing a vector
// splat.
// This situation occurs in MIPS MSA.
// The elements must be reversed when the element order is different
// to the endianness of the elements (because the BITCAST is itself a
// vector shuffle in this situation). However, we do not need any code to
// perform this reversal because getConstant() is producing a vector
// splat.
// This situation occurs in MIPS MSA.

SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
llvm::append_range(Ops, EltParts);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
llvm::append_range(Ops, EltParts);

SDValue V =
getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
return V;
SDValue V =
getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
return V;
}
}

assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
Expand Down
22 changes: 2 additions & 20 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1791,26 +1791,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const Constant *C = dyn_cast<Constant>(V)) {
EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);

if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
SDLoc DL = getCurSDLoc();

// DAG.getConstant() may attempt to legalise the vector constant which can
// significantly change the combines applied to the DAG. To reduce the
// divergence when enabling ConstantInt based vectors we try to construct
// the DAG in the same way as shufflevector based splats. TODO: The
// divergence sometimes leads to better optimisations. Ideally we should
// prevent DAG.getConstant() from legalising too early but there are some
// degradations preventing this.
if (VT.isScalableVector())
return DAG.getNode(
ISD::SPLAT_VECTOR, DL, VT,
DAG.getConstant(CI->getValue(), DL, VT.getVectorElementType()));
if (VT.isFixedLengthVector())
return DAG.getSplatBuildVector(
VT, DL,
DAG.getConstant(CI->getValue(), DL, VT.getVectorElementType()));
return DAG.getConstant(*CI, DL, VT);
}
if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
return DAG.getConstant(*CI, getCurSDLoc(), VT);

if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45540,6 +45540,10 @@ static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG,
if (!sd_match(Op, m_OneUse(m_BitwiseLogic(m_Value(LHS), m_Value(RHS)))))
return SDValue();

// WIP: Fixes one of the failures but triggers more.
//if (isBitwiseNot(Op))
// return SDValue();

// If either operand was bitcast from DstVT, then perform logic with DstVT (at
// least one of the getBitcast() will fold away).
if (sd_match(LHS, m_OneUse(m_BitCast(m_SpecificVT(DstVT)))) ||
Expand Down Expand Up @@ -48138,8 +48142,9 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Check if the first operand is all zeros and Cond type is vXi1.
// If this an avx512 target we can improve the use of zero masking by
// swapping the operands and inverting the condition.
if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() &&
Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 &&
if (!DCI.isBeforeLegalize() && N->getOpcode() == ISD::VSELECT &&
Cond.hasOneUse() && Subtarget.hasAVX512() &&
CondVT.getVectorElementType() == MVT::i1 &&
ISD::isBuildVectorAllZeros(LHS.getNode()) &&
!ISD::isBuildVectorAllZeros(RHS.getNode())) {
// Invert the cond to not(cond) : xor(op,allones)=not(op)
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AArch64/arm64-neon-mul-div-cte.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ define <16 x i8> @div16xi8(<16 x i8> %x) {
; CHECK-SD-NEXT: movi v1.16b, #41
; CHECK-SD-NEXT: smull2 v2.8h, v0.16b, v1.16b
; CHECK-SD-NEXT: smull v0.8h, v0.8b, v1.8b
; CHECK-SD-NEXT: uzp2 v0.16b, v0.16b, v2.16b
; CHECK-SD-NEXT: sshr v0.16b, v0.16b, #2
; CHECK-SD-NEXT: usra v0.16b, v0.16b, #7
; CHECK-SD-NEXT: uzp2 v1.16b, v0.16b, v2.16b
; CHECK-SD-NEXT: sshr v0.16b, v1.16b, #2
; CHECK-SD-NEXT: usra v0.16b, v1.16b, #7
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: div16xi8:
Expand Down Expand Up @@ -78,9 +78,9 @@ define <8 x i16> @div8xi16(<8 x i16> %x) {
; CHECK-SD-NEXT: smull2 v2.4s, v0.8h, v1.8h
; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h
; CHECK-SD-NEXT: add v0.8h, v1.8h, v0.8h
; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #12
; CHECK-SD-NEXT: usra v0.8h, v0.8h, #15
; CHECK-SD-NEXT: add v1.8h, v1.8h, v0.8h
; CHECK-SD-NEXT: sshr v0.8h, v1.8h, #12
; CHECK-SD-NEXT: usra v0.8h, v1.8h, #15
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: div8xi16:
Expand Down
66 changes: 33 additions & 33 deletions llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
; CHECK-NEXT: mla v1.4h, v0.4h, v2.4h
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_2]
; CHECK-NEXT: adrp x8, .LCPI0_3
; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_3]
; CHECK-NEXT: usra v1.4h, v1.4h, #15
; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-NEXT: sshl v2.4h, v1.4h, v2.4h
; CHECK-NEXT: usra v2.4h, v1.4h, #15
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_3]
; CHECK-NEXT: mls v0.4h, v2.4h, v1.4h
; CHECK-NEXT: ret
%1 = srem <4 x i16> %x, <i16 95, i16 -124, i16 98, i16 -1003>
ret <4 x i16> %1
Expand All @@ -27,14 +27,14 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
; CHECK-LABEL: fold_srem_vec_2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #44151 // =0xac77
; CHECK-NEXT: movi v2.4h, #95
; CHECK-NEXT: movi v3.4h, #95
; CHECK-NEXT: dup v1.4h, w8
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: add v1.4h, v1.4h, v0.4h
; CHECK-NEXT: sshr v1.4h, v1.4h, #6
; CHECK-NEXT: usra v1.4h, v1.4h, #15
; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-NEXT: sshr v2.4h, v1.4h, #6
; CHECK-NEXT: usra v2.4h, v1.4h, #15
; CHECK-NEXT: mls v0.4h, v2.4h, v3.4h
; CHECK-NEXT: ret
%1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
ret <4 x i16> %1
Expand All @@ -46,15 +46,15 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
; CHECK-LABEL: combine_srem_sdiv:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #44151 // =0xac77
; CHECK-NEXT: movi v2.4h, #95
; CHECK-NEXT: movi v3.4h, #95
; CHECK-NEXT: dup v1.4h, w8
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: add v1.4h, v1.4h, v0.4h
; CHECK-NEXT: sshr v1.4h, v1.4h, #6
; CHECK-NEXT: usra v1.4h, v1.4h, #15
; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-NEXT: sshr v2.4h, v1.4h, #6
; CHECK-NEXT: usra v2.4h, v1.4h, #15
; CHECK-NEXT: mls v0.4h, v2.4h, v3.4h
; CHECK-NEXT: add v0.4h, v0.4h, v2.4h
; CHECK-NEXT: ret
%1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
%2 = sdiv <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
Expand All @@ -74,10 +74,10 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: add v1.4h, v1.4h, v0.4h
; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_2]
; CHECK-NEXT: usra v1.4h, v1.4h, #15
; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-NEXT: sshl v2.4h, v1.4h, v2.4h
; CHECK-NEXT: usra v2.4h, v1.4h, #15
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_2]
; CHECK-NEXT: mls v0.4h, v2.4h, v1.4h
; CHECK-NEXT: ret
%1 = srem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
ret <4 x i16> %1
Expand All @@ -91,14 +91,14 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
; CHECK-NEXT: movi d2, #0x00ffff0000ffff
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: adrp x8, .LCPI4_1
; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_1]
; CHECK-NEXT: adrp x8, .LCPI4_2
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: and v2.8b, v0.8b, v2.8b
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: add v1.4h, v1.4h, v2.4h
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_1]
; CHECK-NEXT: adrp x8, .LCPI4_2
; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h
; CHECK-NEXT: ushr v2.4h, v1.4h, #15
; CHECK-NEXT: sshl v1.4h, v1.4h, v3.4h
; CHECK-NEXT: mov v2.h[0], wzr
; CHECK-NEXT: add v1.4h, v1.4h, v2.4h
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2]
Expand All @@ -118,12 +118,12 @@ define <4 x i16> @dont_fold_srem_i16_smax(<4 x i16> %x) {
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_0]
; CHECK-NEXT: adrp x8, .LCPI5_2
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI5_2]
; CHECK-NEXT: adrp x8, .LCPI5_3
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: mla v1.4h, v0.4h, v2.4h
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_2]
; CHECK-NEXT: adrp x8, .LCPI5_3
; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h
; CHECK-NEXT: ushr v2.4h, v1.4h, #15
; CHECK-NEXT: sshl v1.4h, v1.4h, v3.4h
; CHECK-NEXT: mov v2.h[0], wzr
; CHECK-NEXT: add v1.4h, v1.4h, v2.4h
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_3]
Expand Down Expand Up @@ -181,13 +181,13 @@ define <16 x i8> @fold_srem_v16i8(<16 x i8> %x) {
; CHECK-LABEL: fold_srem_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v1.16b, #103
; CHECK-NEXT: movi v3.16b, #10
; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b
; CHECK-NEXT: smull v1.8h, v0.8b, v1.8b
; CHECK-NEXT: uzp2 v1.16b, v1.16b, v2.16b
; CHECK-NEXT: movi v2.16b, #10
; CHECK-NEXT: sshr v1.16b, v1.16b, #2
; CHECK-NEXT: usra v1.16b, v1.16b, #7
; CHECK-NEXT: mls v0.16b, v1.16b, v2.16b
; CHECK-NEXT: sshr v2.16b, v1.16b, #2
; CHECK-NEXT: usra v2.16b, v1.16b, #7
; CHECK-NEXT: mls v0.16b, v2.16b, v3.16b
; CHECK-NEXT: ret
%1 = srem <16 x i8> %x, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
ret <16 x i8> %1
Expand All @@ -199,8 +199,8 @@ define <8 x i8> @fold_srem_v8i8(<8 x i8> %x) {
; CHECK-NEXT: movi v1.8b, #103
; CHECK-NEXT: movi v2.8b, #10
; CHECK-NEXT: smull v1.8h, v0.8b, v1.8b
; CHECK-NEXT: shrn v1.8b, v1.8h, #8
; CHECK-NEXT: sshr v1.8b, v1.8b, #2
; CHECK-NEXT: sshr v1.8h, v1.8h, #10
; CHECK-NEXT: xtn v1.8b, v1.8h
; CHECK-NEXT: usra v1.8b, v1.8b, #7
; CHECK-NEXT: mls v0.8b, v1.8b, v2.8b
; CHECK-NEXT: ret
Expand All @@ -212,14 +212,14 @@ define <8 x i16> @fold_srem_v8i16(<8 x i16> %x) {
; CHECK-LABEL: fold_srem_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #26215 // =0x6667
; CHECK-NEXT: movi v3.8h, #10
; CHECK-NEXT: dup v1.8h, w8
; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
; CHECK-NEXT: movi v2.8h, #10
; CHECK-NEXT: sshr v1.8h, v1.8h, #2
; CHECK-NEXT: usra v1.8h, v1.8h, #15
; CHECK-NEXT: mls v0.8h, v1.8h, v2.8h
; CHECK-NEXT: sshr v2.8h, v1.8h, #2
; CHECK-NEXT: usra v2.8h, v1.8h, #15
; CHECK-NEXT: mls v0.8h, v2.8h, v3.8h
; CHECK-NEXT: ret
%1 = srem <8 x i16> %x, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
ret <8 x i16> %1
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -356,9 +356,7 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind {
define <16 x i1> @v16i1(<16 x i1> %x, <16 x i1> %y) nounwind {
; CHECK-LABEL: v16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v2.16b, #1
; CHECK-NEXT: eor v1.16b, v1.16b, v2.16b
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
%z = call <16 x i1> @llvm.ssub.sat.v16i1(<16 x i1> %x, <16 x i1> %y)
ret <16 x i1> %z
Expand Down
Loading
Loading