Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 1a080ce

Browse files
Hao LiuHao Liu
authored andcommitted
[AArch64]Add support to spill/fill D tuples such as DPair/DTriple/DQuad. There is no test cases for D tuple as the original test cases are too large. As the spill/fill of the D tuple is similar to the Q tuple, the correctness can be guaranteed.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@198684 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent bba676f commit 1a080ce

File tree

4 files changed

+77
-11
lines changed

4 files changed

+77
-11
lines changed

lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -477,12 +477,18 @@ AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
477477
default:
478478
llvm_unreachable("Unknown size for regclass");
479479
}
480-
} else { // The spill of D tuples is implemented by Q tuples
481-
if (RC == &AArch64::QPairRegClass)
480+
} else { // For a super register class has more than one sub registers
481+
if (AArch64::DPairRegClass.hasSubClassEq(RC))
482+
StoreOp = AArch64::ST1x2_8B;
483+
else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
484+
StoreOp = AArch64::ST1x3_8B;
485+
else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
486+
StoreOp = AArch64::ST1x4_8B;
487+
else if (AArch64::QPairRegClass.hasSubClassEq(RC))
482488
StoreOp = AArch64::ST1x2_16B;
483-
else if (RC == &AArch64::QTripleRegClass)
489+
else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
484490
StoreOp = AArch64::ST1x3_16B;
485-
else if (RC == &AArch64::QQuadRegClass)
491+
else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
486492
StoreOp = AArch64::ST1x4_16B;
487493
else
488494
llvm_unreachable("Unknown reg class");
@@ -537,12 +543,18 @@ AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
537543
default:
538544
llvm_unreachable("Unknown size for regclass");
539545
}
540-
} else { // The spill of D tuples is implemented by Q tuples
541-
if (RC == &AArch64::QPairRegClass)
546+
} else { // For a super register class has more than one sub registers
547+
if (AArch64::DPairRegClass.hasSubClassEq(RC))
548+
LoadOp = AArch64::LD1x2_8B;
549+
else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
550+
LoadOp = AArch64::LD1x3_8B;
551+
else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
552+
LoadOp = AArch64::LD1x4_8B;
553+
else if (AArch64::QPairRegClass.hasSubClassEq(RC))
542554
LoadOp = AArch64::LD1x2_16B;
543-
else if (RC == &AArch64::QTripleRegClass)
555+
else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
544556
LoadOp = AArch64::LD1x3_16B;
545-
else if (RC == &AArch64::QQuadRegClass)
557+
else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
546558
LoadOp = AArch64::LD1x4_16B;
547559
else
548560
llvm_unreachable("Unknown reg class");
@@ -649,6 +661,17 @@ void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
649661
MinOffset = -0x40 * AccessScale;
650662
MaxOffset = 0x3f * AccessScale;
651663
return;
664+
case AArch64::LD1x2_8B: case AArch64::ST1x2_8B:
665+
AccessScale = 16;
666+
MinOffset = 0;
667+
MaxOffset = 0xfff * AccessScale;
668+
return;
669+
case AArch64::LD1x3_8B: case AArch64::ST1x3_8B:
670+
AccessScale = 24;
671+
MinOffset = 0;
672+
MaxOffset = 0xfff * AccessScale;
673+
return;
674+
case AArch64::LD1x4_8B: case AArch64::ST1x4_8B:
652675
case AArch64::LD1x2_16B: case AArch64::ST1x2_16B:
653676
AccessScale = 32;
654677
MinOffset = 0;

lib/Target/AArch64/AArch64RegisterInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,10 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
7777
}
7878

7979
static bool hasFrameOffset(int opcode) {
80-
return opcode != AArch64::LD1x2_16B && opcode != AArch64::LD1x3_16B &&
80+
return opcode != AArch64::LD1x2_8B && opcode != AArch64::LD1x3_8B &&
81+
opcode != AArch64::LD1x4_8B && opcode != AArch64::ST1x2_8B &&
82+
opcode != AArch64::ST1x3_8B && opcode != AArch64::ST1x4_8B &&
83+
opcode != AArch64::LD1x2_16B && opcode != AArch64::LD1x3_16B &&
8184
opcode != AArch64::LD1x4_16B && opcode != AArch64::ST1x2_16B &&
8285
opcode != AArch64::ST1x3_16B && opcode != AArch64::ST1x4_16B;
8386
}

lib/Target/AArch64/AArch64RegisterInfo.td

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ def dsub_0 : SubRegIndex<64>;
3030
def dsub_1 : SubRegIndex<64, 64>;
3131
def dsub_2 : ComposedSubRegIndex<qsub_1, dsub_0>;
3232
def dsub_3 : ComposedSubRegIndex<qsub_1, dsub_1>;
33-
def dsub_4 : ComposedSubRegIndex<qsub_2, dsub_0>;
3433
}
3534

3635
// Registers are identified with 5-bit ID numbers.
@@ -206,7 +205,7 @@ def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
206205
//===----------------------------------------------------------------------===//
207206
// Consecutive vector registers
208207
//===----------------------------------------------------------------------===//
209-
// 2 Consecutive 64-bit registers: D0_D1, D1_D2, ..., D30_D31
208+
// 2 Consecutive 64-bit registers: D0_D1, D1_D2, ..., D31_D0
210209
def Tuples2D : RegisterTuples<[dsub_0, dsub_1],
211210
[(rotl FPR64, 0), (rotl FPR64, 1)]>;
212211

test/CodeGen/AArch64/neon-vector-list-spill.ll

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,3 +132,44 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld3.v4f32(i8*,
132132
declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld4.v16i8(i8*, i32)
133133

134134
declare void @foo()
135+
136+
; FIXME: We should not generate ld/st for such register spill/fill, because the
137+
; test case seems very simple and the register pressure is not high. If the
138+
; spill/fill algorithm is optimized, this test case may not be triggered. And
139+
; then we can delete it.
140+
; check the spill for Register Class QPair_with_qsub_0_in_FPR128Lo
141+
define <8 x i16> @test_2xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) {
142+
tail call void @llvm.arm.neon.vst2lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8)
143+
tail call void @foo()
144+
%sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
145+
%1 = bitcast <2 x i64> %sv to <8 x i16>
146+
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
147+
%3 = mul <8 x i16> %2, %2
148+
ret <8 x i16> %3
149+
}
150+
151+
; check the spill for Register Class QTriple_with_qsub_0_in_FPR128Lo
152+
define <8 x i16> @test_3xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) {
153+
tail call void @llvm.arm.neon.vst3lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8)
154+
tail call void @foo()
155+
%sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
156+
%1 = bitcast <2 x i64> %sv to <8 x i16>
157+
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
158+
%3 = mul <8 x i16> %2, %2
159+
ret <8 x i16> %3
160+
}
161+
162+
; check the spill for Register Class QQuad_with_qsub_0_in_FPR128Lo
163+
define <8 x i16> @test_4xFPR128Lo(i64 %got, i8* %ptr, <1 x i64> %a) {
164+
tail call void @llvm.arm.neon.vst4lane.v1i64(i8* %ptr, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, i32 0, i32 8)
165+
tail call void @foo()
166+
%sv = shufflevector <1 x i64> zeroinitializer, <1 x i64> %a, <2 x i32> <i32 0, i32 1>
167+
%1 = bitcast <2 x i64> %sv to <8 x i16>
168+
%2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
169+
%3 = mul <8 x i16> %2, %2
170+
ret <8 x i16> %3
171+
}
172+
173+
declare void @llvm.arm.neon.vst2lane.v1i64(i8*, <1 x i64>, <1 x i64>, i32, i32)
174+
declare void @llvm.arm.neon.vst3lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)
175+
declare void @llvm.arm.neon.vst4lane.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32, i32)

0 commit comments

Comments
 (0)