Skip to content

Commit f2704d2

Browse files
committed
Require SVE2, support inc/dec immediates, adjust doxygen comment
1 parent 4ef1e53 commit f2704d2

File tree

4 files changed

+77
-16
lines changed

4 files changed

+77
-16
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -696,10 +696,10 @@ class TargetTransformInfo {
696696
/// immediate without having to materialize the immediate into a register.
697697
bool isLegalAddImmediate(int64_t Imm) const;
698698

699-
/// Return true if the specified immediate is legal add of a scalable
700-
/// immediate, that is the target has add instructions which can add a
701-
/// register with the immediate (multiplied by vscale) without having to
702-
/// materialize the immediate into a register.
699+
/// Return true if adding the specified scalable immediate is legal, that is
700+
/// the target has add instructions which can add a register with the
701+
/// immediate (multiplied by vscale) without having to materialize the
702+
/// immediate into a register.
703703
bool isLegalAddScalableImmediate(int64_t Imm) const;
704704

705705
/// Return true if the specified immediate is legal icmp immediate,

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2770,10 +2770,10 @@ class TargetLoweringBase {
27702770
return true;
27712771
}
27722772

2773-
/// Return true if the specified immediate is legal add of a scalable
2774-
/// immediate, that is the target has add instructions which can add a
2775-
/// register with the immediate (multiplied by vscale) without having to
2776-
/// materialize the immediate into a register.
2773+
/// Return true if adding the specified scalable immediate is legal, that is
2774+
/// the target has add instructions which can add a register with the
2775+
/// immediate (multiplied by vscale) without having to materialize the
2776+
/// immediate into a register.
27772777
virtual bool isLegalAddScalableImmediate(int64_t) const { return false; }
27782778

27792779
/// Return true if the specified immediate is legal for the value input of a

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16596,18 +16596,35 @@ bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
1659616596
}
1659716597

1659816598
bool AArch64TargetLowering::isLegalAddScalableImmediate(int64_t Imm) const {
16599-
// Scalable immediates require SVE support.
16600-
if (!Subtarget->hasSVE())
16599+
// We will only emit addvl/inc* instructions for SVE2
16600+
if (!Subtarget->hasSVE2())
1660116601
return false;
1660216602

1660316603
// addvl's immediates are in terms of the number of bytes in a register.
1660416604
// Since there are 16 in the base supported size (128bits), we need to
1660516605
// divide the immediate by that much to give us a useful immediate to
1660616606
// multiply by vscale. We can't have a remainder as a result of this.
16607-
if (Imm % 16 != 0)
16608-
return false;
16607+
if (Imm % 16 == 0)
16608+
return isInt<6>(Imm / 16);
16609+
16610+
// Inc[b|h|w|d] instructions take a pattern and a positive immediate
16611+
// multiplier. For now, assume a pattern of 'all'. Incb would be a subset
16612+
// of addvl as a result, so only take h|w|d into account.
16613+
// Dec[h|w|d] will cover subtractions.
16614+
// Immediates are in the range [1,16], so we can't do a 2's complement check.
16615+
// FIXME: Can we make use of other patterns to cover other immediates?
16616+
16617+
// inch|dech
16618+
if (Imm % 8 == 0)
16619+
return std::labs(Imm / 8) <= 16;
16620+
// incw|decw
16621+
if (Imm % 4 == 0)
16622+
return std::labs(Imm / 4) <= 16;
16623+
// incd|decd
16624+
if (Imm % 2 == 0)
16625+
return std::labs(Imm / 2) <= 16;
1660916626

16610-
return isInt<6>(Imm / 16);
16627+
return false;
1661116628
}
1661216629

1661316630
// Return false to prevent folding

llvm/unittests/Target/AArch64/Immediates.cpp

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ const std::initializer_list<TestCase> Tests = {
2525
// addvl increments by whole registers, range [-32,31]
2626
// +(16 * vscale), one register's worth
2727
{16, true},
28-
// +(8 * vscale), half a register's worth
29-
{8, false},
3028
// -(32 * 16 * vscale)
3129
{-512, true},
3230
// -(33 * 16 * vscale)
@@ -35,6 +33,52 @@ const std::initializer_list<TestCase> Tests = {
3533
{496, true},
3634
// +(32 * 16 * vscale)
3735
{512, false},
36+
37+
// inc[h|w|d] increments by the number of 16/32/64bit elements in a
38+
// register. mult_imm is in the range [1,16]
39+
// +(mult_imm * num_elts * vscale)
40+
// +(1 * 8 * vscale), 16 bit
41+
{8, true},
42+
// +(15 * 8 * vscale), 16 bit
43+
{120, true},
44+
// +(1 * 4 * vscale), 32 bit
45+
{4, true},
46+
// +(7 * 4 * vscale), 32 bit
47+
{28, true},
48+
// +(1 * 2 * vscale), 64 bit
49+
{2, true},
50+
// +(13 * 2 * vscale), 64 bit
51+
{26, true},
52+
// +(17 * 8 * vscale), 16 bit, out of range.
53+
{136, false},
54+
// +(19 * 2 * vscale), 64 bit, out of range.
55+
{38, false},
56+
// +(21 * 4 * vscale), 32 bit, out of range.
57+
{84, false},
58+
59+
// dec[h|w|d] -- Same as above, but negative.
60+
// -(mult_imm * num_elts * vscale)
61+
// -(1 * 8 * vscale), 16 bit
62+
{-8, true},
63+
// -(15 * 8 * vscale), 16 bit
64+
{-120, true},
65+
// -(1 * 4 * vscale), 32 bit
66+
{-4, true},
67+
// -(7 * 4 * vscale), 32 bit
68+
{-28, true},
69+
// -(1 * 2 * vscale), 64 bit
70+
{-2, true},
71+
// -(13 * 2 * vscale), 64 bit
72+
{-26, true},
73+
// -(17 * 8 * vscale), 16 bit, out of range.
74+
{-136, false},
75+
// -(19 * 2 * vscale), 64 bit, out of range.
76+
{-38, false},
77+
// -(21 * 4 * vscale), 32 bit, out of range.
78+
{-84, false},
79+
80+
// Invalid; not divisible by the above powers of 2.
81+
{5, false},
3882
};
3983
} // namespace
4084

@@ -48,7 +92,7 @@ TEST(Immediates, Immediates) {
4892
const Target *T = TargetRegistry::lookupTarget(TT, Error);
4993

5094
std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
51-
TT, "generic", "+sve", TargetOptions(), std::nullopt, std::nullopt,
95+
TT, "generic", "+sve2", TargetOptions(), std::nullopt, std::nullopt,
5296
CodeGenOptLevel::Default));
5397
AArch64Subtarget ST(TM->getTargetTriple(), TM->getTargetCPU(),
5498
TM->getTargetCPU(), TM->getTargetFeatureString(), *TM,

0 commit comments

Comments
 (0)