Skip to content

Commit 85d159b

Browse files
committed
Unify the logic for ld2 and ld4d ld4
add negative tests Change-Id: Id323139f11ddc4d3a22a72af84a01e98dadfe46d
1 parent 94d537b commit 85d159b

File tree

6 files changed

+567
-397
lines changed

6 files changed

+567
-397
lines changed

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,7 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
509509
return false;
510510

511511
LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n");
512+
512513
// Try and match this with target specific intrinsics.
513514
if (!TLI->lowerInterleaveIntrinsicToStore(II, SI))
514515
return false;

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 124 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -16906,71 +16906,120 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
1690616906
return true;
1690716907
}
1690816908

16909-
bool getDeinterleavedValues(
16909+
bool getDeinterleave2Values(
1691016910
Value *DI, SmallVectorImpl<Instruction *> &DeinterleavedValues) {
16911-
if (!DI->hasNUsesOrMore(2))
16911+
if (!DI->hasNUses(2))
1691216912
return false;
1691316913
auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->user_begin()));
1691416914
auto *Extr2 = dyn_cast<ExtractValueInst>(*(++DI->user_begin()));
1691516915
if (!Extr1 || !Extr2)
1691616916
return false;
1691716917

16918-
if (!Extr1->hasNUsesOrMore(1) || !Extr2->hasNUsesOrMore(1))
16918+
DeinterleavedValues.resize(2);
16919+
// Place the values into the vector in the order of extraction:
16920+
DeinterleavedValues[0x1 & (Extr1->getIndices()[0])] = Extr1;
16921+
DeinterleavedValues[0x1 & (Extr2->getIndices()[0])] = Extr2;
16922+
if (!DeinterleavedValues[0] || !DeinterleavedValues[1])
16923+
return false;
16924+
16925+
// Make sure that the extracted values match the deinterleave tree pattern
16926+
if (!match(DeinterleavedValues[0], m_ExtractValue<0>((m_Specific(DI)))) ||
16927+
!match(DeinterleavedValues[1], m_ExtractValue<1>((m_Specific(DI))))) {
16928+
LLVM_DEBUG(dbgs() << "matching deinterleave2 failed\n");
16929+
return false;
16930+
}
16931+
return true;
16932+
}
16933+
16934+
/*
16935+
Diagram for DI tree.
16936+
[LOAD]
16937+
|
16938+
[DI]
16939+
/ \
16940+
[Extr<0>] [Extr<1>]
16941+
| |
16942+
[DI] [DI]
16943+
/ \ / \
16944+
[Extr<0>][Extr<1>] [Extr<0>][Extr<1>]
16945+
| | | |
16946+
roots: A C B D
16947+
roots in correct order of DI4: A B C D.
16948+
If there is a pattern matches the deinterleave tree above, then we can construct
16949+
DI4 out of that pattern. This function tries to match the deinterleave tree
16950+
pattern, and fetch the tree roots, so that in further steps they can be replaced
16951+
by the output of DI4.
16952+
*/
16953+
bool getDeinterleave4Values(Value *DI,
16954+
SmallVectorImpl<Instruction *> &DeinterleavedValues,
16955+
SmallVectorImpl<Instruction *> &DeadInstructions) {
16956+
if (!DI->hasNUses(2))
16957+
return false;
16958+
auto *Extr1 = dyn_cast<ExtractValueInst>(*(DI->user_begin()));
16959+
auto *Extr2 = dyn_cast<ExtractValueInst>(*(++DI->user_begin()));
16960+
if (!Extr1 || !Extr2)
16961+
return false;
16962+
16963+
if (!Extr1->hasNUses(1) || !Extr2->hasNUses(1))
1691916964
return false;
1692016965
auto *DI1 = *(Extr1->user_begin());
1692116966
auto *DI2 = *(Extr2->user_begin());
1692216967

16923-
if (!DI1->hasNUsesOrMore(2) || !DI2->hasNUsesOrMore(2))
16968+
if (!DI1->hasNUses(2) || !DI2->hasNUses(2))
1692416969
return false;
1692516970
// Leaf nodes of the deinterleave tree:
1692616971
auto *A = dyn_cast<ExtractValueInst>(*(DI1->user_begin()));
16927-
auto *B = dyn_cast<ExtractValueInst>(*(++DI1->user_begin()));
16928-
auto *C = dyn_cast<ExtractValueInst>(*(DI2->user_begin()));
16972+
auto *C = dyn_cast<ExtractValueInst>(*(++DI1->user_begin()));
16973+
auto *B = dyn_cast<ExtractValueInst>(*(DI2->user_begin()));
1692916974
auto *D = dyn_cast<ExtractValueInst>(*(++DI2->user_begin()));
1693016975
// Make sure that the A,B,C,D are instructions of ExtractValue,
1693116976
// before getting the extract index
1693216977
if (!A || !B || !C || !D)
1693316978
return false;
1693416979

1693516980
DeinterleavedValues.resize(4);
16936-
// Place the values into the vector in the order of extraction:
16937-
DeinterleavedValues[A->getIndices()[0] + (Extr1->getIndices()[0] * 2)] = A;
16938-
DeinterleavedValues[B->getIndices()[0] + (Extr1->getIndices()[0] * 2)] = B;
16939-
DeinterleavedValues[C->getIndices()[0] + (Extr2->getIndices()[0] * 2)] = C;
16940-
DeinterleavedValues[D->getIndices()[0] + (Extr2->getIndices()[0] * 2)] = D;
16981+
// Place the values into the vector in the order of deinterleave4:
16982+
DeinterleavedValues[0x3 &
16983+
((A->getIndices()[0] * 2) + Extr1->getIndices()[0])] = A;
16984+
DeinterleavedValues[0x3 &
16985+
((B->getIndices()[0] * 2) + Extr2->getIndices()[0])] = B;
16986+
DeinterleavedValues[0x3 &
16987+
((C->getIndices()[0] * 2) + Extr1->getIndices()[0])] = C;
16988+
DeinterleavedValues[0x3 &
16989+
((D->getIndices()[0] * 2) + Extr2->getIndices()[0])] = D;
16990+
if (!DeinterleavedValues[0] || !DeinterleavedValues[1] ||
16991+
!DeinterleavedValues[2] || !DeinterleavedValues[3])
16992+
return false;
1694116993

1694216994
// Make sure that A,B,C,D match the deinterleave tree pattern
16943-
if (!match(DeinterleavedValues[0],
16944-
m_ExtractValue<0>(m_Deinterleave2(
16945-
m_ExtractValue<0>(m_Deinterleave2(m_Value()))))) ||
16946-
!match(DeinterleavedValues[1],
16947-
m_ExtractValue<1>(m_Deinterleave2(
16948-
m_ExtractValue<0>(m_Deinterleave2(m_Value()))))) ||
16949-
!match(DeinterleavedValues[2],
16950-
m_ExtractValue<0>(m_Deinterleave2(
16951-
m_ExtractValue<1>(m_Deinterleave2(m_Value()))))) ||
16952-
!match(DeinterleavedValues[3],
16953-
m_ExtractValue<1>(m_Deinterleave2(
16954-
m_ExtractValue<1>(m_Deinterleave2(m_Value())))))) {
16995+
if (!match(DeinterleavedValues[0], m_ExtractValue<0>(m_Deinterleave2(
16996+
m_ExtractValue<0>(m_Specific(DI))))) ||
16997+
!match(DeinterleavedValues[1], m_ExtractValue<0>(m_Deinterleave2(
16998+
m_ExtractValue<1>(m_Specific(DI))))) ||
16999+
!match(DeinterleavedValues[2], m_ExtractValue<1>(m_Deinterleave2(
17000+
m_ExtractValue<0>(m_Specific(DI))))) ||
17001+
!match(DeinterleavedValues[3], m_ExtractValue<1>(m_Deinterleave2(
17002+
m_ExtractValue<1>(m_Specific(DI)))))) {
1695517003
LLVM_DEBUG(dbgs() << "matching deinterleave4 failed\n");
1695617004
return false;
1695717005
}
16958-
// Order the values according to the deinterleaving order.
16959-
std::swap(DeinterleavedValues[1], DeinterleavedValues[2]);
17006+
17007+
// These Values will not be used anymre,
17008+
// DI4 will be created instead of nested DI1 and DI2
17009+
DeadInstructions.push_back(cast<Instruction>(DI1));
17010+
DeadInstructions.push_back(cast<Instruction>(Extr1));
17011+
DeadInstructions.push_back(cast<Instruction>(DI2));
17012+
DeadInstructions.push_back(cast<Instruction>(Extr2));
17013+
1696017014
return true;
1696117015
}
1696217016

16963-
void deleteDeadDeinterleaveInstructions(Instruction *DeadRoot) {
16964-
Value *DeadDeinterleave = nullptr, *DeadExtract = nullptr;
16965-
match(DeadRoot, m_ExtractValue(m_Value(DeadDeinterleave)));
16966-
assert(DeadDeinterleave != nullptr && "Match is expected to succeed");
16967-
match(DeadDeinterleave, m_Deinterleave2(m_Value(DeadExtract)));
16968-
assert(DeadExtract != nullptr && "Match is expected to succeed");
16969-
DeadRoot->eraseFromParent();
16970-
if (DeadDeinterleave->getNumUses() == 0)
16971-
cast<Instruction>(DeadDeinterleave)->eraseFromParent();
16972-
if (DeadExtract->getNumUses() == 0)
16973-
cast<Instruction>(DeadExtract)->eraseFromParent();
17017+
bool getDeinterleavedValues(Value *DI,
17018+
SmallVectorImpl<Instruction *> &DeinterleavedValues,
17019+
SmallVectorImpl<Instruction *> &DeadInstructions) {
17020+
if (getDeinterleave4Values(DI, DeinterleavedValues, DeadInstructions))
17021+
return true;
17022+
return getDeinterleave2Values(DI, DeinterleavedValues);
1697417023
}
1697517024

1697617025
bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
@@ -16980,16 +17029,17 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1698017029
return false;
1698117030

1698217031
SmallVector<Instruction *, 4> DeinterleavedValues;
17032+
SmallVector<Instruction *, 4> DeadInstructions;
1698317033
const DataLayout &DL = DI->getModule()->getDataLayout();
16984-
unsigned Factor = 2;
16985-
VectorType *VTy = cast<VectorType>(DI->getType()->getContainedType(0));
1698617034

16987-
if (getDeinterleavedValues(DI, DeinterleavedValues)) {
16988-
Factor = DeinterleavedValues.size();
16989-
VTy = cast<VectorType>(DeinterleavedValues[0]->getType());
17035+
if (!getDeinterleavedValues(DI, DeinterleavedValues, DeadInstructions)) {
17036+
LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n");
17037+
return false;
1699017038
}
17039+
unsigned Factor = DeinterleavedValues.size();
1699117040
assert((Factor == 2 || Factor == 4) &&
16992-
"Currently supported Factors are 2 or 4");
17041+
"Currently supported Factor is 2 or 4 only");
17042+
VectorType *VTy = cast<VectorType>(DeinterleavedValues[0]->getType());
1699317043

1699417044
bool UseScalable;
1699517045
if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
@@ -17050,23 +17100,35 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1705017100
else
1705117101
Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
1705217102
}
17053-
if (Factor > 2) {
17054-
// Itereate over old deinterleaved values to replace it by
17055-
// the new deinterleaved values.
17056-
for (unsigned I = 0; I < DeinterleavedValues.size(); I++) {
17057-
Value *NewExtract = Builder.CreateExtractValue(Result, I);
17058-
DeinterleavedValues[I]->replaceAllUsesWith(NewExtract);
17059-
}
17060-
for (unsigned I = 0; I < DeinterleavedValues.size(); I++)
17061-
deleteDeadDeinterleaveInstructions(DeinterleavedValues[I]);
17062-
return true;
17103+
// Itereate over old deinterleaved values to replace it by
17104+
// the new values.
17105+
for (unsigned I = 0; I < DeinterleavedValues.size(); I++) {
17106+
Value *NewExtract = Builder.CreateExtractValue(Result, I);
17107+
DeinterleavedValues[I]->replaceAllUsesWith(NewExtract);
17108+
cast<Instruction>(DeinterleavedValues[I])->eraseFromParent();
1706317109
}
17064-
DI->replaceAllUsesWith(Result);
17110+
for (auto &dead : DeadInstructions)
17111+
dead->eraseFromParent();
1706517112
return true;
1706617113
}
1706717114

17068-
bool getValuesToInterleaved(Value *II,
17069-
SmallVectorImpl<Value *> &ValuesToInterleave) {
17115+
/*
17116+
Diagram for Interleave tree.
17117+
A C B D
17118+
\ / \ /
17119+
[Interleave] [Interleave]
17120+
\ /
17121+
[Interleave]
17122+
|
17123+
[Store]
17124+
values in correct order of interleave4: A B C D.
17125+
If there is a pattern matches the interleave tree above, then we can construct
17126+
Interleave4 out of that pattern. This function tries to match the interleave
17127+
tree pattern, and fetch the values that we want to interleave, so that in
17128+
further steps they can be replaced by the output of Inteleave4.
17129+
*/
17130+
bool getValuesToInterleave(Value *II,
17131+
SmallVectorImpl<Value *> &ValuesToInterleave) {
1707017132
Value *A, *B, *C, *D;
1707117133
// Try to match interleave of Factor 4
1707217134
if (match(II, m_Interleave2(m_Interleave2(m_Value(A), m_Value(C)),
@@ -17090,14 +17152,18 @@ bool getValuesToInterleaved(Value *II,
1709017152

1709117153
bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
1709217154
IntrinsicInst *II, StoreInst *SI) const {
17093-
LLVM_DEBUG(dbgs() << "lowerInterleaveIntrinsicToStore\n");
17155+
// Only interleave2 supported at present.
17156+
if (II->getIntrinsicID() != Intrinsic::vector_interleave2)
17157+
return false;
1709417158

1709517159
SmallVector<Value *, 4> ValuesToInterleave;
17096-
if (!getValuesToInterleaved(II, ValuesToInterleave))
17160+
if (!getValuesToInterleave(II, ValuesToInterleave)) {
17161+
LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n");
1709717162
return false;
17163+
}
1709817164
unsigned Factor = ValuesToInterleave.size();
1709917165
assert((Factor == 2 || Factor == 4) &&
17100-
"Currently supported Factors are 2 or 4");
17166+
"Currently supported Factor is 2 or 4 only");
1710117167
VectorType *VTy = cast<VectorType>(ValuesToInterleave[0]->getType());
1710217168
const DataLayout &DL = II->getModule()->getDataLayout();
1710317169

llvm/test/Transforms/InterleavedAccess/AArch64/fixed-deinterleave-intrinsics.ll

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,35 @@
66

77
target triple = "aarch64-linux-gnu"
88

9-
define { <16 x i8>, <16 x i8> } @deinterleave_i8_factor2(ptr %ptr) {
10-
; NEON-LABEL: define { <16 x i8>, <16 x i8> } @deinterleave_i8_factor2
9+
define void @deinterleave_i8_factor2(ptr %ptr) {
10+
; NEON-LABEL: define void @deinterleave_i8_factor2
1111
; NEON-SAME: (ptr [[PTR:%.*]]) {
1212
; NEON-NEXT: [[LDN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[PTR]])
13-
; NEON-NEXT: ret { <16 x i8>, <16 x i8> } [[LDN]]
13+
; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[LDN]], 0
14+
; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[LDN]], 1
15+
; NEON-NEXT: ret void
1416
;
15-
; SVE-FIXED-LABEL: define { <16 x i8>, <16 x i8> } @deinterleave_i8_factor2
17+
; SVE-FIXED-LABEL: define void @deinterleave_i8_factor2
1618
; SVE-FIXED-SAME: (ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
1719
; SVE-FIXED-NEXT: [[LOAD:%.*]] = load <32 x i8>, ptr [[PTR]], align 1
1820
; SVE-FIXED-NEXT: [[DEINTERLEAVE:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.vector.deinterleave2.v32i8(<32 x i8> [[LOAD]])
19-
; SVE-FIXED-NEXT: ret { <16 x i8>, <16 x i8> } [[DEINTERLEAVE]]
21+
; SVE-FIXED-NEXT: [[EXTRACT1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[DEINTERLEAVE]], 0
22+
; SVE-FIXED-NEXT: [[EXTRACT2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[DEINTERLEAVE]], 1
23+
; SVE-FIXED-NEXT: ret void
2024
;
2125
%load = load <32 x i8>, ptr %ptr, align 1
2226
%deinterleave = tail call { <16 x i8>, <16 x i8> } @llvm.vector.deinterleave2.v32i8(<32 x i8> %load)
23-
ret { <16 x i8>, <16 x i8> } %deinterleave
27+
%extract1 = extractvalue { <16 x i8>, <16 x i8> } %deinterleave, 0
28+
%extract2 = extractvalue { <16 x i8>, <16 x i8> } %deinterleave, 1
29+
ret void
2430
}
2531

2632
define { <8 x i16>, <8 x i16> } @deinterleave_i16_factor2(ptr %ptr) {
2733
; NEON-LABEL: define { <8 x i16>, <8 x i16> } @deinterleave_i16_factor2
2834
; NEON-SAME: (ptr [[PTR:%.*]]) {
29-
; NEON-NEXT: [[LDN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr [[PTR]])
30-
; NEON-NEXT: ret { <8 x i16>, <8 x i16> } [[LDN]]
35+
; NEON-NEXT: [[LOAD:%.*]] = load <16 x i16>, ptr [[PTR]], align 2
36+
; NEON-NEXT: [[DEINTERLEAVE:%.*]] = tail call { <8 x i16>, <8 x i16> } @llvm.vector.deinterleave2.v16i16(<16 x i16> [[LOAD]])
37+
; NEON-NEXT: ret { <8 x i16>, <8 x i16> } [[DEINTERLEAVE]]
3138
;
3239
; SVE-FIXED-LABEL: define { <8 x i16>, <8 x i16> } @deinterleave_i16_factor2
3340
; SVE-FIXED-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
@@ -43,8 +50,9 @@ define { <8 x i16>, <8 x i16> } @deinterleave_i16_factor2(ptr %ptr) {
4350
define { <4 x i32>, <4 x i32> } @deinterleave_8xi32_factor2(ptr %ptr) {
4451
; NEON-LABEL: define { <4 x i32>, <4 x i32> } @deinterleave_8xi32_factor2
4552
; NEON-SAME: (ptr [[PTR:%.*]]) {
46-
; NEON-NEXT: [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[PTR]])
47-
; NEON-NEXT: ret { <4 x i32>, <4 x i32> } [[LDN]]
53+
; NEON-NEXT: [[LOAD:%.*]] = load <8 x i32>, ptr [[PTR]], align 4
54+
; NEON-NEXT: [[DEINTERLEAVE:%.*]] = tail call { <4 x i32>, <4 x i32> } @llvm.vector.deinterleave2.v8i32(<8 x i32> [[LOAD]])
55+
; NEON-NEXT: ret { <4 x i32>, <4 x i32> } [[DEINTERLEAVE]]
4856
;
4957
; SVE-FIXED-LABEL: define { <4 x i32>, <4 x i32> } @deinterleave_8xi32_factor2
5058
; SVE-FIXED-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
@@ -60,8 +68,9 @@ define { <4 x i32>, <4 x i32> } @deinterleave_8xi32_factor2(ptr %ptr) {
6068
define { <2 x i64>, <2 x i64> } @deinterleave_i64_factor2(ptr %ptr) {
6169
; NEON-LABEL: define { <2 x i64>, <2 x i64> } @deinterleave_i64_factor2
6270
; NEON-SAME: (ptr [[PTR:%.*]]) {
63-
; NEON-NEXT: [[LDN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr [[PTR]])
64-
; NEON-NEXT: ret { <2 x i64>, <2 x i64> } [[LDN]]
71+
; NEON-NEXT: [[LOAD:%.*]] = load <4 x i64>, ptr [[PTR]], align 8
72+
; NEON-NEXT: [[DEINTERLEAVE:%.*]] = tail call { <2 x i64>, <2 x i64> } @llvm.vector.deinterleave2.v4i64(<4 x i64> [[LOAD]])
73+
; NEON-NEXT: ret { <2 x i64>, <2 x i64> } [[DEINTERLEAVE]]
6574
;
6675
; SVE-FIXED-LABEL: define { <2 x i64>, <2 x i64> } @deinterleave_i64_factor2
6776
; SVE-FIXED-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
@@ -77,8 +86,9 @@ define { <2 x i64>, <2 x i64> } @deinterleave_i64_factor2(ptr %ptr) {
7786
define { <4 x float>, <4 x float> } @deinterleave_float_factor2(ptr %ptr) {
7887
; NEON-LABEL: define { <4 x float>, <4 x float> } @deinterleave_float_factor2
7988
; NEON-SAME: (ptr [[PTR:%.*]]) {
80-
; NEON-NEXT: [[LDN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr [[PTR]])
81-
; NEON-NEXT: ret { <4 x float>, <4 x float> } [[LDN]]
89+
; NEON-NEXT: [[LOAD:%.*]] = load <8 x float>, ptr [[PTR]], align 4
90+
; NEON-NEXT: [[DEINTERLEAVE:%.*]] = tail call { <4 x float>, <4 x float> } @llvm.vector.deinterleave2.v8f32(<8 x float> [[LOAD]])
91+
; NEON-NEXT: ret { <4 x float>, <4 x float> } [[DEINTERLEAVE]]
8292
;
8393
; SVE-FIXED-LABEL: define { <4 x float>, <4 x float> } @deinterleave_float_factor2
8494
; SVE-FIXED-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
@@ -94,8 +104,9 @@ define { <4 x float>, <4 x float> } @deinterleave_float_factor2(ptr %ptr) {
94104
define { <2 x double>, <2 x double> } @deinterleave_double_factor2(ptr %ptr) {
95105
; NEON-LABEL: define { <2 x double>, <2 x double> } @deinterleave_double_factor2
96106
; NEON-SAME: (ptr [[PTR:%.*]]) {
97-
; NEON-NEXT: [[LDN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr [[PTR]])
98-
; NEON-NEXT: ret { <2 x double>, <2 x double> } [[LDN]]
107+
; NEON-NEXT: [[LOAD:%.*]] = load <4 x double>, ptr [[PTR]], align 8
108+
; NEON-NEXT: [[DEINTERLEAVE:%.*]] = tail call { <2 x double>, <2 x double> } @llvm.vector.deinterleave2.v4f64(<4 x double> [[LOAD]])
109+
; NEON-NEXT: ret { <2 x double>, <2 x double> } [[DEINTERLEAVE]]
99110
;
100111
; SVE-FIXED-LABEL: define { <2 x double>, <2 x double> } @deinterleave_double_factor2
101112
; SVE-FIXED-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
@@ -111,8 +122,9 @@ define { <2 x double>, <2 x double> } @deinterleave_double_factor2(ptr %ptr) {
111122
define { <2 x ptr>, <2 x ptr> } @deinterleave_ptr_factor2(ptr %ptr) {
112123
; NEON-LABEL: define { <2 x ptr>, <2 x ptr> } @deinterleave_ptr_factor2
113124
; NEON-SAME: (ptr [[PTR:%.*]]) {
114-
; NEON-NEXT: [[LDN:%.*]] = call { <2 x ptr>, <2 x ptr> } @llvm.aarch64.neon.ld2.v2p0.p0(ptr [[PTR]])
115-
; NEON-NEXT: ret { <2 x ptr>, <2 x ptr> } [[LDN]]
125+
; NEON-NEXT: [[LOAD:%.*]] = load <4 x ptr>, ptr [[PTR]], align 8
126+
; NEON-NEXT: [[DEINTERLEAVE:%.*]] = tail call { <2 x ptr>, <2 x ptr> } @llvm.vector.deinterleave2.v4p0(<4 x ptr> [[LOAD]])
127+
; NEON-NEXT: ret { <2 x ptr>, <2 x ptr> } [[DEINTERLEAVE]]
116128
;
117129
; SVE-FIXED-LABEL: define { <2 x ptr>, <2 x ptr> } @deinterleave_ptr_factor2
118130
; SVE-FIXED-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
@@ -247,21 +259,9 @@ define void @interleave_ptr_factor2(ptr %ptr, <2 x ptr> %l, <2 x ptr> %r) {
247259
define { <16 x i16>, <16 x i16> } @deinterleave_wide_i16_factor2(ptr %ptr) #0 {
248260
; NEON-LABEL: define { <16 x i16>, <16 x i16> } @deinterleave_wide_i16_factor2
249261
; NEON-SAME: (ptr [[PTR:%.*]]) {
250-
; NEON-NEXT: [[TMP1:%.*]] = getelementptr <8 x i16>, ptr [[PTR]], i64 0
251-
; NEON-NEXT: [[LDN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr [[TMP1]])
252-
; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[LDN]], 0
253-
; NEON-NEXT: [[TMP3:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> [[TMP2]], i64 0)
254-
; NEON-NEXT: [[TMP4:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[LDN]], 1
255-
; NEON-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> [[TMP4]], i64 0)
256-
; NEON-NEXT: [[TMP6:%.*]] = getelementptr <8 x i16>, ptr [[PTR]], i64 2
257-
; NEON-NEXT: [[LDN1:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr [[TMP6]])
258-
; NEON-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[LDN1]], 0
259-
; NEON-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v8i16(<16 x i16> [[TMP3]], <8 x i16> [[TMP7]], i64 8)
260-
; NEON-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[LDN1]], 1
261-
; NEON-NEXT: [[TMP10:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v8i16(<16 x i16> [[TMP5]], <8 x i16> [[TMP9]], i64 8)
262-
; NEON-NEXT: [[TMP11:%.*]] = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> [[TMP8]], 0
263-
; NEON-NEXT: [[TMP12:%.*]] = insertvalue { <16 x i16>, <16 x i16> } [[TMP11]], <16 x i16> [[TMP10]], 1
264-
; NEON-NEXT: ret { <16 x i16>, <16 x i16> } [[TMP12]]
262+
; NEON-NEXT: [[LOAD:%.*]] = load <32 x i16>, ptr [[PTR]], align 2
263+
; NEON-NEXT: [[DEINTERLEAVE:%.*]] = tail call { <16 x i16>, <16 x i16> } @llvm.vector.deinterleave2.v32i16(<32 x i16> [[LOAD]])
264+
; NEON-NEXT: ret { <16 x i16>, <16 x i16> } [[DEINTERLEAVE]]
265265
;
266266
; SVE-FIXED-LABEL: define { <16 x i16>, <16 x i16> } @deinterleave_wide_i16_factor2
267267
; SVE-FIXED-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {

0 commit comments

Comments
 (0)