Require SVE2, support inc/dec immediates, adjust doxygen comment

huntergr-arm · huntergr-arm · commit f2704d2138d8 · 2024-03-14T12:05:30.000Z
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -696,10 +696,10 @@ class TargetTransformInfo {
   /// immediate without having to materialize the immediate into a register.
   bool isLegalAddImmediate(int64_t Imm) const;
 
-  /// Return true if the specified immediate is legal add of a scalable
-  /// immediate, that is the target has add instructions which can add a
-  /// register with the immediate (multiplied by vscale) without having to
-  /// materialize the immediate into a register.
+  /// Return true if adding the specified scalable immediate is legal, that is
+  /// the target has add instructions which can add a register with the
+  /// immediate (multiplied by vscale) without having to materialize the
+  /// immediate into a register.
   bool isLegalAddScalableImmediate(int64_t Imm) const;
 
   /// Return true if the specified immediate is legal icmp immediate,
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2770,10 +2770,10 @@ class TargetLoweringBase {
     return true;
   }
 
-  /// Return true if the specified immediate is legal add of a scalable
-  /// immediate, that is the target has add instructions which can add a
-  /// register with the immediate (multiplied by vscale) without having to
-  /// materialize the immediate into a register.
+  /// Return true if adding the specified scalable immediate is legal, that is
+  /// the target has add instructions which can add a register with the
+  /// immediate (multiplied by vscale) without having to materialize the
+  /// immediate into a register.
   virtual bool isLegalAddScalableImmediate(int64_t) const { return false; }
 
   /// Return true if the specified immediate is legal for the value input of a
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16596,18 +16596,35 @@ bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
 }
 
 bool AArch64TargetLowering::isLegalAddScalableImmediate(int64_t Imm) const {
-  // Scalable immediates require SVE support.
-  if (!Subtarget->hasSVE())
+  // We will only emit addvl/inc* instructions for SVE2
+  if (!Subtarget->hasSVE2())
     return false;
 
   // addvl's immediates are in terms of the number of bytes in a register.
   // Since there are 16 in the base supported size (128bits), we need to
   // divide the immediate by that much to give us a useful immediate to
   // multiply by vscale. We can't have a remainder as a result of this.
-  if (Imm % 16 != 0)
-    return false;
+  if (Imm % 16 == 0)
+    return isInt<6>(Imm / 16);
+
+  // Inc[b|h|w|d] instructions take a pattern and a positive immediate
+  // multiplier. For now, assume a pattern of 'all'. Incb would be a subset
+  // of addvl as a result, so only take h|w|d into account.
+  // Dec[h|w|d] will cover subtractions.
+  // Immediates are in the range [1,16], so we can't do a 2's complement check.
+  // FIXME: Can we make use of other patterns to cover other immediates?
+
+  // inch|dech
+  if (Imm % 8 == 0)
+    return std::labs(Imm / 8) <= 16;
+  // incw|decw
+  if (Imm % 4 == 0)
+    return std::labs(Imm / 4) <= 16;
+  // incd|decd
+  if (Imm % 2 == 0)
+    return std::labs(Imm / 2) <= 16;
 
-  return isInt<6>(Imm / 16);
+  return false;
 }
 
 // Return false to prevent folding
diff --git a/llvm/unittests/Target/AArch64/Immediates.cpp b/llvm/unittests/Target/AArch64/Immediates.cpp
@@ -25,8 +25,6 @@ const std::initializer_list<TestCase> Tests = {
     // addvl increments by whole registers, range [-32,31]
     // +(16 * vscale), one register's worth
     {16, true},
-    // +(8 * vscale), half a register's worth
-    {8, false},
     // -(32 * 16 * vscale)
     {-512, true},
     // -(33 * 16 * vscale)
@@ -35,6 +33,52 @@ const std::initializer_list<TestCase> Tests = {
     {496, true},
     // +(32 * 16 * vscale)
     {512, false},
+
+    // inc[h|w|d] increments by the number of 16/32/64bit elements in a
+    // register. mult_imm is in the range [1,16]
+    // +(mult_imm * num_elts * vscale)
+    // +(1 * 8 * vscale), 16 bit
+    {8, true},
+    // +(15 * 8 * vscale), 16 bit
+    {120, true},
+    // +(1 * 4 * vscale), 32 bit
+    {4, true},
+    // +(7 * 4 * vscale), 32 bit
+    {28, true},
+    // +(1 * 2 * vscale), 64 bit
+    {2, true},
+    // +(13 * 2 * vscale), 64 bit
+    {26, true},
+    // +(17 * 8 * vscale), 16 bit, out of range.
+    {136, false},
+    // +(19 * 2 * vscale), 64 bit, out of range.
+    {38, false},
+    // +(21 * 4 * vscale), 32 bit, out of range.
+    {84, false},
+
+    // dec[h|w|d] -- Same as above, but negative.
+    // -(mult_imm * num_elts * vscale)
+    // -(1 * 8 * vscale), 16 bit
+    {-8, true},
+    // -(15 * 8 * vscale), 16 bit
+    {-120, true},
+    // -(1 * 4 * vscale), 32 bit
+    {-4, true},
+    // -(7 * 4 * vscale), 32 bit
+    {-28, true},
+    // -(1 * 2 * vscale), 64 bit
+    {-2, true},
+    // -(13 * 2 * vscale), 64 bit
+    {-26, true},
+    // -(17 * 8 * vscale), 16 bit, out of range.
+    {-136, false},
+    // -(19 * 2 * vscale), 64 bit, out of range.
+    {-38, false},
+    // -(21 * 4 * vscale), 32 bit, out of range.
+    {-84, false},
+
+    // Invalid; not divisible by the above powers of 2.
+    {5, false},
 };
 } // namespace
 
@@ -48,7 +92,7 @@ TEST(Immediates, Immediates) {
   const Target *T = TargetRegistry::lookupTarget(TT, Error);
 
   std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
-      TT, "generic", "+sve", TargetOptions(), std::nullopt, std::nullopt,
+      TT, "generic", "+sve2", TargetOptions(), std::nullopt, std::nullopt,
       CodeGenOptLevel::Default));
   AArch64Subtarget ST(TM->getTargetTriple(), TM->getTargetCPU(),
                       TM->getTargetCPU(), TM->getTargetFeatureString(), *TM,