Skip to content

Commit ef964aa

Browse files
committed
WIP: Attempt vector truncstore
1 parent e5cb18b commit ef964aa

31 files changed

+530
-423
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1394,6 +1394,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13941394
}
13951395
}
13961396

1397+
setTruncStoreAction(MVT::v1i64, MVT::v1i8, Legal);
1398+
13971399
for (auto Op :
13981400
{ISD::FFLOOR, ISD::FNEARBYINT, ISD::FCEIL, ISD::FRINT, ISD::FTRUNC,
13991401
ISD::FROUND, ISD::FROUNDEVEN, ISD::FMAXNUM_IEEE, ISD::FMINNUM_IEEE,
@@ -23989,6 +23991,22 @@ static unsigned getFPSubregForVT(EVT VT) {
2398923991
}
2399023992
}
2399123993

23994+
static EVT get64BitVector(EVT ElVT) {
23995+
assert(ElVT.isSimple() && "Expected simple VT");
23996+
switch (ElVT.getSimpleVT().SimpleTy) {
23997+
case MVT::i8:
23998+
return MVT::v8i8;
23999+
case MVT::i16:
24000+
return MVT::v4i16;
24001+
case MVT::i32:
24002+
return MVT::v2i32;
24003+
case MVT::i64:
24004+
return MVT::v1i64;
24005+
default:
24006+
llvm_unreachable("Unexpected VT!");
24007+
}
24008+
}
24009+
2399224010
static SDValue performSTORECombine(SDNode *N,
2399324011
TargetLowering::DAGCombinerInfo &DCI,
2399424012
SelectionDAG &DAG,
@@ -24067,11 +24085,27 @@ static SDValue performSTORECombine(SDNode *N,
2406724085
SDValue ExtIdx = Value.getOperand(1);
2406824086
EVT VectorVT = Vector.getValueType();
2406924087
EVT ElemVT = VectorVT.getVectorElementType();
24070-
if (!ValueVT.isInteger() || ElemVT == MVT::i8 || MemVT == MVT::i8)
24088+
if (!ValueVT.isInteger())
2407124089
return SDValue();
2407224090
if (ValueVT != MemVT && !ST->isTruncatingStore())
2407324091
return SDValue();
2407424092

24093+
if (MemVT == MVT::i8) {
24094+
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
24095+
SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
24096+
Value.getValueType(), Vector, ExtIdx);
24097+
EVT VecVT64 = get64BitVector(ElemVT);
24098+
SDValue ExtVector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT64,
24099+
DAG.getUNDEF(VecVT64), Ext, Zero);
24100+
SDValue Cast = DAG.getNode(AArch64ISD::NVCAST, DL, MVT::v1i64, ExtVector);
24101+
return DAG.getTruncStore(ST->getChain(), DL, Cast, ST->getBasePtr(),
24102+
MVT::v1i8, ST->getMemOperand());
24103+
}
24104+
24105+
// TODO: Handle storing i8s to wider types.
24106+
if (ElemVT == MVT::i8)
24107+
return SDValue();
24108+
2407524109
// Heuristic: If there are other users of integer scalars extracted from
2407624110
// this vector that won't fold into the store -- abandon folding. Applying
2407724111
// this fold may extend the vector lifetime and disrupt paired stores.
@@ -28826,6 +28860,10 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
2882628860
auto Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
2882728861
auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
2882828862

28863+
// Can be lowered to a bsub store in ISEL.
28864+
if (VT == MVT::v1i64 && MemVT == MVT::v1i8)
28865+
return SDValue();
28866+
2882928867
if (VT.isFloatingPoint() && Store->isTruncatingStore()) {
2883028868
EVT TruncVT = ContainerVT.changeVectorElementType(
2883128869
Store->getMemoryVT().getVectorElementType());

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4451,8 +4451,6 @@ multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop
44514451
}
44524452

44534453
let AddedComplexity = 19 in {
4454-
defm : VecStoreLane0Pat<am_indexed8, truncstorei8, v16i8, i32, vi8, bsub, uimm12s2, STRBui>;
4455-
defm : VecStoreLane0Pat<am_indexed8, truncstorei8, v4i32, i32, vi8, bsub, uimm12s2, STRBui>;
44564454
defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>;
44574455
defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, f16, hsub, uimm12s2, STRHui>;
44584456
defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, i32, ssub, uimm12s4, STRSui>;
@@ -4591,6 +4589,18 @@ def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
45914589
def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
45924590
(STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
45934591

4592+
// v1i64 -> bsub truncating stores
4593+
// Supporting pattern lower f32/64 -> v8i8
4594+
def : Pat<(v8i8 (vector_insert (v8i8 (undef)), (i32 FPR32:$src), 0)),
4595+
(INSERT_SUBREG (v8i8 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
4596+
def : Pat<(v8i8 (vector_insert (v8i8 (undef)), (i64 FPR64:$src), 0)),
4597+
(v8i8 (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub), dsub))>;
4598+
// Lower v1i64 -> v1i8 truncstore to bsub store
4599+
def : Pat<(truncstorevi8 v1i64:$VT, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
4600+
(STURBi (vi8 (EXTRACT_SUBREG v1i64:$VT, bsub)), GPR64sp:$Rn, simm9:$offset)>;
4601+
def : Pat<(truncstorevi8 v1i64:$VT, (am_indexed8 GPR64sp:$Rn, uimm12s4:$offset)),
4602+
(STRBui (vi8 (EXTRACT_SUBREG v1i64:$VT, bsub)), GPR64sp:$Rn, uimm12s4:$offset)>;
4603+
45944604
// Match stores from lane 0 to the appropriate subreg's store.
45954605
multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
45964606
ValueType VTy, ValueType STy,
@@ -4600,7 +4610,6 @@ multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
46004610
}
46014611

46024612
let AddedComplexity = 19 in {
4603-
defm : VecStoreULane0Pat<truncstorei8, v16i8, i32, vi8, bsub, STURBi>;
46044613
defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, f16, hsub, STURHi>;
46054614
defm : VecStoreULane0Pat<store, v8f16, f16, f16, hsub, STURHi>;
46064615
defm : VecStoreULane0Pat<store, v4i32, i32, i32, ssub, STURSi>;
@@ -7242,6 +7251,11 @@ multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64, ValueType VTSVE
72427251
(INS V128:$src, imm:$Immd,
72437252
(SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
72447253

7254+
def : Pat<(VT64 (vector_insert (VT64 (undef)),
7255+
(VTScal (vector_extract (VT128 V128:$Rn), (i64 0))),
7256+
(i64 0))),
7257+
(EXTRACT_SUBREG $Rn, dsub)>;
7258+
72457259
def : Pat<(VT64 (vector_insert V64:$src,
72467260
(VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
72477261
(i64 imm:$Immd))),

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1827,43 +1827,6 @@ let Predicates = [HasSVE] in {
18271827
defm : adrXtwShiftPat<nxv2i64, nxv2i1, 3>;
18281828
} // End HasSVE
18291829

1830-
multiclass SVEVecStoreLanePat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
1831-
ValueType VTy, ValueType STy,
1832-
ValueType SubRegTy,
1833-
SubRegIndex SubRegIdx, Operand IndexType,
1834-
Instruction STR,
1835-
Instruction DUP, AsmVectorIndexOpnd DUPIdxTy> {
1836-
let Predicates = [HasSVE_or_SME] in {
1837-
// Same as Neon VecStoreLane0Pat but without matching VecListOne128.
1838-
def : Pat<(storeop (STy (vector_extract VTy:$Vt, (i64 0))),
1839-
(UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
1840-
(STR (SubRegTy (EXTRACT_SUBREG $Vt, SubRegIdx)),
1841-
GPR64sp:$Rn, IndexType:$offset)>;
1842-
}
1843-
1844-
// Non-zero immediate index:
1845-
def : Pat<(storeop (STy (vector_extract VTy:$Vt, DUPIdxTy:$idx)),
1846-
(UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
1847-
(STR (SubRegTy (EXTRACT_SUBREG (DUP $Vt, DUPIdxTy:$idx), SubRegIdx)),
1848-
GPR64sp:$Rn, IndexType:$offset)>;
1849-
}
1850-
1851-
// Note: Types other than i8 are handled in performSTORECombine -- i8 is tricky
1852-
// to handle before ISEL as it is not really a legal type in many places, nor
1853-
// is its equivalently sized FP variant.
1854-
let AddedComplexity = 19 in {
1855-
// Lane 0 truncating stores
1856-
// i32 -> i8
1857-
defm : SVEVecStoreLanePat<am_indexed8, truncstorei8, nxv4i32, i32, vi8, bsub, uimm12s4, STRBui, DUP_ZZI_S, sve_elm_idx_extdup_s>;
1858-
defm : SVEVecStoreLanePat<am_unscaled8, truncstorei8, nxv4i32, i32, vi8, bsub, simm9, STURBi, DUP_ZZI_S, sve_elm_idx_extdup_s>;
1859-
// i64 -> i8
1860-
defm : SVEVecStoreLanePat<am_indexed8, truncstorei8, nxv2i64, i64, vi8, bsub, uimm12s4, STRBui, DUP_ZZI_D, sve_elm_idx_extdup_d>;
1861-
defm : SVEVecStoreLanePat<am_unscaled8, truncstorei8, nxv2i64, i64, vi8, bsub, simm9, STURBi, DUP_ZZI_D, sve_elm_idx_extdup_d>;
1862-
// i8 -> i8 (technically a truncate as the extracted type is i32)
1863-
defm : SVEVecStoreLanePat<am_indexed8, truncstorei8, nxv16i8, i32, vi8, bsub, uimm12s4, STRBui, DUP_ZZI_B, sve_elm_idx_extdup_b>;
1864-
defm : SVEVecStoreLanePat<am_unscaled8, truncstorei8, nxv16i8, i32, vi8, bsub, simm9, STURBi, DUP_ZZI_B, sve_elm_idx_extdup_b>;
1865-
}
1866-
18671830
let Predicates = [HasSVE_or_SME] in {
18681831
defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>;
18691832

@@ -3245,6 +3208,8 @@ let Predicates = [HasSVE_or_SME] in {
32453208
// Insert scalar into undef[0]
32463209
def : Pat<(nxv16i8 (vector_insert (nxv16i8 (undef)), (i32 FPR32:$src), 0)),
32473210
(INSERT_SUBREG (nxv16i8 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
3211+
def : Pat<(nxv16i8 (vector_insert (nxv16i8 (undef)), (i64 FPR64:$src), 0)),
3212+
(INSERT_SUBREG (nxv16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
32483213
def : Pat<(nxv8i16 (vector_insert (nxv8i16 (undef)), (i32 FPR32:$src), 0)),
32493214
(INSERT_SUBREG (nxv8i16 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
32503215
def : Pat<(nxv4i32 (vector_insert (nxv4i32 (undef)), (i32 FPR32:$src), 0)),

0 commit comments

Comments
 (0)