Skip to content

Commit d8752c7

Browse files
authored
Merge pull request #3177 from apple/eng/revert-1528a4d40022925dcc3e8cb6b8af7dd109ad7075-0726
Revert "[llvm][sve] Lowering for VLS truncating stores"
2 parents afdb120 + 0b6bbe9 commit d8752c7

File tree

9 files changed

+52
-315
lines changed

9 files changed

+52
-315
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1271,14 +1271,6 @@ class TargetLoweringBase {
12711271
getTruncStoreAction(ValVT, MemVT) == Custom);
12721272
}
12731273

1274-
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT,
1275-
bool LegalOnly) const {
1276-
if (LegalOnly)
1277-
return isTruncStoreLegal(ValVT, MemVT);
1278-
1279-
return isTruncStoreLegalOrCustom(ValVT, MemVT);
1280-
}
1281-
12821274
/// Return how the indexed load should be treated: either it is legal, needs
12831275
/// to be promoted to a larger size, needs to be expanded to some other code
12841276
/// sequence, or the target has a custom expander for it.

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18089,11 +18089,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
1808918089

1809018090
// If this is an FP_ROUND or TRUNC followed by a store, fold this into a
1809118091
// truncating store. We can do this even if this is already a truncstore.
18092-
if ((Value.getOpcode() == ISD::FP_ROUND ||
18093-
Value.getOpcode() == ISD::TRUNCATE) &&
18094-
Value.getNode()->hasOneUse() && ST->isUnindexed() &&
18095-
TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
18096-
ST->getMemoryVT(), LegalOperations)) {
18092+
if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
18093+
&& Value.getNode()->hasOneUse() && ST->isUnindexed() &&
18094+
TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
18095+
ST->getMemoryVT())) {
1809718096
return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
1809818097
Ptr, ST->getMemoryVT(), ST->getMemOperand());
1809918098
}

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,13 +1249,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
12491249
}
12501250
}
12511251

1252-
// SVE supports truncating stores of 64 and 128-bit vectors
1253-
setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
1254-
setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
1255-
setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
1256-
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
1257-
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
1258-
12591252
for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
12601253
MVT::nxv4f32, MVT::nxv2f64}) {
12611254
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
@@ -1503,16 +1496,6 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
15031496
setCondCodeAction(ISD::SETUNE, VT, Expand);
15041497
}
15051498

1506-
// Mark integer truncating stores as having custom lowering
1507-
if (VT.isInteger()) {
1508-
MVT InnerVT = VT.changeVectorElementType(MVT::i8);
1509-
while (InnerVT != VT) {
1510-
setTruncStoreAction(VT, InnerVT, Custom);
1511-
InnerVT = InnerVT.changeVectorElementType(
1512-
MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
1513-
}
1514-
}
1515-
15161499
// Lower fixed length vector operations to scalable equivalents.
15171500
setOperationAction(ISD::ABS, VT, Custom);
15181501
setOperationAction(ISD::ADD, VT, Custom);
@@ -4559,7 +4542,7 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
45594542
EVT MemVT = StoreNode->getMemoryVT();
45604543

45614544
if (VT.isVector()) {
4562-
if (useSVEForFixedLengthVectorVT(VT, true))
4545+
if (useSVEForFixedLengthVectorVT(VT))
45634546
return LowerFixedLengthVectorStoreToSVE(Op, DAG);
45644547

45654548
unsigned AS = StoreNode->getAddressSpace();
@@ -4571,8 +4554,7 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
45714554
return scalarizeVectorStore(StoreNode, DAG);
45724555
}
45734556

4574-
if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
4575-
MemVT == MVT::v4i8) {
4557+
if (StoreNode->isTruncatingStore()) {
45764558
return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
45774559
}
45784560
// 256 bit non-temporal stores can be lowered to STNP. Do this as part of
@@ -15354,29 +15336,6 @@ static bool performTBISimplification(SDValue Addr,
1535415336
return false;
1535515337
}
1535615338

15357-
static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
15358-
assert(N->getOpcode() == ISD::STORE ||
15359-
N->getOpcode() == ISD::MSTORE && "Expected STORE dag node in input!");
15360-
15361-
if (auto Store = dyn_cast<StoreSDNode>(N)) {
15362-
if (!Store->isTruncatingStore() || Store->isIndexed())
15363-
return SDValue();
15364-
SDValue Ext = Store->getValue();
15365-
auto ExtOpCode = Ext.getOpcode();
15366-
if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND &&
15367-
ExtOpCode != ISD::ANY_EXTEND)
15368-
return SDValue();
15369-
SDValue Orig = Ext->getOperand(0);
15370-
if (Store->getMemoryVT() != Orig->getValueType(0))
15371-
return SDValue();
15372-
return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
15373-
Store->getBasePtr(), Store->getPointerInfo(),
15374-
Store->getAlign());
15375-
}
15376-
15377-
return SDValue();
15378-
}
15379-
1538015339
static SDValue performSTORECombine(SDNode *N,
1538115340
TargetLowering::DAGCombinerInfo &DCI,
1538215341
SelectionDAG &DAG,
@@ -15388,9 +15347,6 @@ static SDValue performSTORECombine(SDNode *N,
1538815347
performTBISimplification(N->getOperand(2), DCI, DAG))
1538915348
return SDValue(N, 0);
1539015349

15391-
if (SDValue Store = foldTruncStoreOfExt(DAG, N))
15392-
return Store;
15393-
1539415350
return SDValue();
1539515351
}
1539615352

llvm/lib/Target/AMDGPU/R600ISelLowering.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,6 @@ class R600TargetLowering final : public AMDGPUTargetLowering {
5454
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
5555
bool *IsFast = nullptr) const override;
5656

57-
virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT,
58-
bool LegalOperations) const override {
59-
// R600 has "custom" lowering for truncating stores despite not supporting
60-
// those instructions. If we allow that custom lowering in the DAG combiner
61-
// then all truncates are merged into truncating stores, giving worse code
62-
// generation. This hook prevents the DAG combiner performing that combine.
63-
return isTruncStoreLegal(ValVT, MemVT);
64-
}
65-
6657
private:
6758
unsigned Gen;
6859
/// Each OpenCL kernel has nine implicit parameters that are stored in the

llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@ define void @masked_gather_v2i8(<2 x i8>* %a, <2 x i8*>* %b) #0 {
3636
; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG0]]/z, z[[CMP]].s, #0
3737
; CHECK-NEXT: ld1sb { z[[RES:[0-9]+]].d }, [[MASK]]/z, [z[[PTRS]].d]
3838
; CHECK-NEXT: xtn v[[XTN:[0-9]+]].2s, v[[RES]].2d
39-
; CHECK-NEXT: st1b { z[[XTN]].s }, [[PG0]], [x0]
39+
; CHECK-NEXT: mov [[RES_HI:w[0-9]+]], v[[XTN]].s[1]
40+
; CHECK-NEXT: fmov [[RES_LO:w[0-9]+]], s[[XTN]]
41+
; CHECK-NEXT: strb [[RES_LO]], [x0]
42+
; CHECK-NEXT: strb [[RES_HI]], [x0, #1]
4043
; CHECK-NEXT: ret
4144
%cval = load <2 x i8>, <2 x i8>* %a
4245
%ptrs = load <2 x i8*>, <2 x i8*>* %b
@@ -58,7 +61,8 @@ define void @masked_gather_v4i8(<4 x i8>* %a, <4 x i8*>* %b) #0 {
5861
; CHECK-NEXT: ld1sb { [[RES:z[0-9]+]].d }, [[MASK]]/z, {{\[}}[[PTRS]].d]
5962
; CHECK-NEXT: uzp1 [[UZP1:z[0-9]+]].s, [[RES]].s, [[RES]].s
6063
; CHECK-NEXT: uzp1 z[[UZP2:[0-9]+]].h, [[UZP1]].h, [[UZP1]].h
61-
; CHECK-NEXT: st1b { z[[UZP2]].h }, [[PG0]], [x0]
64+
; CHECK-NEXT: uzp1 v[[UZP3:[0-9]+]].8b, v[[UZP2]].8b, v[[UZP2]].8b
65+
; CHECK-NEXT: str s[[UZP3]], [x0]
6266
; CHECK-NEXT: ret
6367
%cval = load <4 x i8>, <4 x i8>* %a
6468
%ptrs = load <4 x i8*>, <4 x i8*>* %b
@@ -175,7 +179,10 @@ define void @masked_gather_v2i16(<2 x i16>* %a, <2 x i16*>* %b) #0 {
175179
; CHECK-NEXT: cmpne [[MASK:p[0-9]+]].s, [[PG0]]/z, z[[CMP]].s, #0
176180
; CHECK-NEXT: ld1sh { z[[RES:[0-9]+]].d }, [[MASK]]/z, [z[[PTRS]].d]
177181
; CHECK-NEXT: xtn v[[XTN:[0-9]+]].2s, v[[RES]].2d
178-
; CHECK-NEXT: st1h { z[[RES]].s }, [[PG0]], [x0]
182+
; CHECK-NEXT: mov [[RES_HI:w[0-9]+]], v[[XTN]].s[1]
183+
; CHECK-NEXT: fmov [[RES_LO:w[0-9]+]], s[[XTN]]
184+
; CHECK-NEXT: strh [[RES_LO]], [x0]
185+
; CHECK-NEXT: strh [[RES_HI]], [x0, #2]
179186
; CHECK-NEXT: ret
180187
%cval = load <2 x i16>, <2 x i16>* %a
181188
%ptrs = load <2 x i16*>, <2 x i16*>* %b

llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll

Lines changed: 0 additions & 223 deletions
This file was deleted.

0 commit comments

Comments
 (0)