[AArch64][SVE2] Lower OR to SLI/SRI #77555

UsmanNadeem · 2024-01-10T05:06:47Z

Code builds on NEON code and the tests are adapted from NEON tests
minus the tests for illegal types.

Change-Id: I11325949700fb7433f948bbe3e82dbc71696aecc

Code builds on NEON code and the tests are adapted from NEON tests minus the tests for illegal types. Change-Id: I11325949700fb7433f948bbe3e82dbc71696aecc

llvmbot · 2024-01-10T05:07:18Z

@llvm/pr-subscribers-backend-aarch64

Author: Usman Nadeem (UsmanNadeem)

Changes

Code builds on NEON code and the tests are adapted from NEON tests
minus the tests for illegal types.

Change-Id: I11325949700fb7433f948bbe3e82dbc71696aecc

Patch is 20.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/77555.diff

4 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+91-61)
(modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+2-2)
(modified) llvm/lib/Target/AArch64/AArch64Subtarget.h (+1)
(added) llvm/test/CodeGen/AArch64/sve2-sli-sri.ll (+263)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 102fd0c3dae2ab..269dde004bea78 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1358,6 +1358,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
       if (!Subtarget->isLittleEndian())
         setOperationAction(ISD::BITCAST, VT, Expand);
+
+      if (Subtarget->hasSVE2orSME())
+        // For SLI/SRI.
+        setOperationAction(ISD::OR, VT, Custom);
     }
 
     // Illegal unpacked integer vector types.
@@ -5411,7 +5415,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   }
 
   case Intrinsic::aarch64_neon_vsri:
-  case Intrinsic::aarch64_neon_vsli: {
+  case Intrinsic::aarch64_neon_vsli:
+  case Intrinsic::aarch64_sve_sri:
+  case Intrinsic::aarch64_sve_sli: {
     EVT Ty = Op.getValueType();
 
     if (!Ty.isVector())
@@ -5419,7 +5425,8 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
     assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits());
 
-    bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
+    bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri ||
+                        IntNo == Intrinsic::aarch64_sve_sri;
     unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
     return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
                        Op.getOperand(3));
@@ -12544,6 +12551,53 @@ static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
   return true;
 }
 
+static bool isAllInactivePredicate(SDValue N) {
+  // Look through cast.
+  while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
+    N = N.getOperand(0);
+
+  return ISD::isConstantSplatVectorAllZeros(N.getNode());
+}
+
+static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
+  unsigned NumElts = N.getValueType().getVectorMinNumElements();
+
+  // Look through cast.
+  while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
+    N = N.getOperand(0);
+    // When reinterpreting from a type with fewer elements the "new" elements
+    // are not active, so bail if they're likely to be used.
+    if (N.getValueType().getVectorMinNumElements() < NumElts)
+      return false;
+  }
+
+  if (ISD::isConstantSplatVectorAllOnes(N.getNode()))
+    return true;
+
+  // "ptrue p.<ty>, all" can be considered all active when <ty> is the same size
+  // or smaller than the implicit element type represented by N.
+  // NOTE: A larger element count implies a smaller element type.
+  if (N.getOpcode() == AArch64ISD::PTRUE &&
+      N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
+    return N.getValueType().getVectorMinNumElements() >= NumElts;
+
+  // If we're compiling for a specific vector-length, we can check if the
+  // pattern's VL equals that of the scalable vector at runtime.
+  if (N.getOpcode() == AArch64ISD::PTRUE) {
+    const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
+    unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
+    unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
+    if (MaxSVESize && MinSVESize == MaxSVESize) {
+      unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock;
+      unsigned PatNumElts =
+          getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0));
+      return PatNumElts == (NumElts * VScale);
+    }
+  }
+
+  return false;
+}
+
 // Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
 // to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
 // BUILD_VECTORs with constant element C1, C2 is a constant, and:
@@ -12569,32 +12623,52 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
   // Is one of the operands an AND or a BICi? The AND may have been optimised to
   // a BICi in order to use an immediate instead of a register.
   // Is the other operand an shl or lshr? This will have been turned into:
-  // AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
+  // AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift
+  // or (AArch64ISD::SHL_PRED || AArch64ISD::SRL_PRED) mask, vector, #shiftVec.
   if ((FirstOpc == ISD::AND || FirstOpc == AArch64ISD::BICi) &&
-      (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR)) {
+      (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR ||
+       SecondOpc == AArch64ISD::SHL_PRED ||
+       SecondOpc == AArch64ISD::SRL_PRED)) {
     And = FirstOp;
     Shift = SecondOp;
 
   } else if ((SecondOpc == ISD::AND || SecondOpc == AArch64ISD::BICi) &&
-             (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR)) {
+             (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR ||
+              FirstOpc == AArch64ISD::SHL_PRED ||
+              FirstOpc == AArch64ISD::SRL_PRED)) {
     And = SecondOp;
     Shift = FirstOp;
   } else
     return SDValue();
 
   bool IsAnd = And.getOpcode() == ISD::AND;
-  bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR;
-
-  // Is the shift amount constant?
-  ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
-  if (!C2node)
+  bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR ||
+                      Shift.getOpcode() == AArch64ISD::SRL_PRED;
+  bool ShiftHasPredOp = Shift.getOpcode() == AArch64ISD::SHL_PRED ||
+                        Shift.getOpcode() == AArch64ISD::SRL_PRED;
+
+  // Is the shift amount constant and are all lanes active?
+  uint64_t C2;
+  if (ShiftHasPredOp) {
+    if (!isAllActivePredicate(DAG, Shift.getOperand(0)))
+      return SDValue();
+    APInt C;
+    if (!ISD::isConstantSplatVector(Shift.getOperand(2).getNode(), C))
+      return SDValue();
+    C2 = C.getZExtValue();
+  } else if (ConstantSDNode *C2node =
+                 dyn_cast<ConstantSDNode>(Shift.getOperand(1)))
+    C2 = C2node->getZExtValue();
+  else
     return SDValue();
 
   uint64_t C1;
   if (IsAnd) {
     // Is the and mask vector all constant?
-    if (!isAllConstantBuildVector(And.getOperand(1), C1))
+    APInt C;
+    if (!ISD::isConstantSplatVector(And.getOperand(1).getNode(), C))
       return SDValue();
+    C1 = C.getZExtValue();
   } else {
     // Reconstruct the corresponding AND immediate from the two BICi immediates.
     ConstantSDNode *C1nodeImm = dyn_cast<ConstantSDNode>(And.getOperand(1));
@@ -12606,7 +12680,6 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
   // Is C1 == ~(Ones(ElemSizeInBits) << C2) or
   // C1 == ~(Ones(ElemSizeInBits) >> C2), taking into account
   // how much one can shift elements of a particular size?
-  uint64_t C2 = C2node->getZExtValue();
   unsigned ElemSizeInBits = VT.getScalarSizeInBits();
   if (C2 > ElemSizeInBits)
     return SDValue();
@@ -12618,10 +12691,12 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
     return SDValue();
 
   SDValue X = And.getOperand(0);
-  SDValue Y = Shift.getOperand(0);
+  SDValue Y = (ShiftHasPredOp) ? Shift.getOperand(1) : Shift.getOperand(0);
+  SDValue Imm = (ShiftHasPredOp) ? DAG.getTargetConstant(C2, DL, MVT::i32)
+                                 : Shift.getOperand(1);
 
   unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
-  SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
+  SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Imm);
 
   LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
   LLVM_DEBUG(N->dump(&DAG));
@@ -12643,6 +12718,8 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
     return Res;
 
   EVT VT = Op.getValueType();
+  if (VT.isScalableVector())
+    return Op;
 
   SDValue LHS = Op.getOperand(0);
   BuildVectorSDNode *BVN =
@@ -17434,53 +17511,6 @@ static bool isConstantSplatVectorMaskForType(SDNode *N, EVT MemVT) {
   return false;
 }
 
-static bool isAllInactivePredicate(SDValue N) {
-  // Look through cast.
-  while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
-    N = N.getOperand(0);
-
-  return ISD::isConstantSplatVectorAllZeros(N.getNode());
-}
-
-static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
-  unsigned NumElts = N.getValueType().getVectorMinNumElements();
-
-  // Look through cast.
-  while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
-    N = N.getOperand(0);
-    // When reinterpreting from a type with fewer elements the "new" elements
-    // are not active, so bail if they're likely to be used.
-    if (N.getValueType().getVectorMinNumElements() < NumElts)
-      return false;
-  }
-
-  if (ISD::isConstantSplatVectorAllOnes(N.getNode()))
-    return true;
-
-  // "ptrue p.<ty>, all" can be considered all active when <ty> is the same size
-  // or smaller than the implicit element type represented by N.
-  // NOTE: A larger element count implies a smaller element type.
-  if (N.getOpcode() == AArch64ISD::PTRUE &&
-      N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
-    return N.getValueType().getVectorMinNumElements() >= NumElts;
-
-  // If we're compiling for a specific vector-length, we can check if the
-  // pattern's VL equals that of the scalable vector at runtime.
-  if (N.getOpcode() == AArch64ISD::PTRUE) {
-    const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
-    unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
-    unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
-    if (MaxSVESize && MinSVESize == MaxSVESize) {
-      unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock;
-      unsigned PatNumElts =
-          getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0));
-      return PatNumElts == (NumElts * VScale);
-    }
-  }
-
-  return false;
-}
-
 static SDValue performReinterpretCastCombine(SDNode *N) {
   SDValue LeafOp = SDValue(N, 0);
   SDValue Op = N->getOperand(0);
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 344a153890631e..da9021f6e0feb5 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -3574,8 +3574,8 @@ let Predicates = [HasSVE2orSME] in {
   defm PMULLT_ZZZ   : sve2_pmul_long<0b1, "pmullt", int_aarch64_sve_pmullt_pair>;
 
   // SVE2 bitwise shift and insert
-  defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri", int_aarch64_sve_sri>;
-  defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli", int_aarch64_sve_sli>;
+  defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri", AArch64vsri>;
+  defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli", AArch64vsli>;
 
   // SVE2 bitwise shift right and accumulate
   defm SSRA_ZZI  : sve2_int_bin_accum_shift_imm_right<0b00, "ssra",  AArch64ssra>;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index b17e215e200dea..a131cf8a6f5402 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -394,6 +394,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   void mirFileLoaded(MachineFunction &MF) const override;
 
   bool hasSVEorSME() const { return hasSVE() || hasSME(); }
+  bool hasSVE2orSME() const { return hasSVE2() || hasSME(); }
 
   // Return the known range for the bit length of SVE data registers. A value
   // of 0 means nothing is known about that particular limit beyong what's
diff --git a/llvm/test/CodeGen/AArch64/sve2-sli-sri.ll b/llvm/test/CodeGen/AArch64/sve2-sli-sri.ll
new file mode 100644
index 00000000000000..80999fb1f4864b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-sli-sri.ll
@@ -0,0 +1,263 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck --check-prefixes=CHECK,SVE %s
+; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s -o - | FileCheck --check-prefixes=CHECK,SVE2 %s
+
+define <vscale x 16 x i8> @testLeftGood16x8(<vscale x 16 x i8> %src1, <vscale x 16 x i8> %src2) {
+; SVE-LABEL: testLeftGood16x8:
+; SVE:       // %bb.0:
+; SVE-NEXT:    and z0.b, z0.b, #0x7
+; SVE-NEXT:    lsl z1.b, z1.b, #3
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: testLeftGood16x8:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    sli z0.b, z1.b, #3
+; SVE2-NEXT:    ret
+  %and.i = and <vscale x 16 x i8> %src1, splat(i8 7)
+  %vshl_n = shl <vscale x 16 x i8> %src2, splat(i8 3)
+  %result = or <vscale x 16 x i8> %and.i, %vshl_n
+  ret <vscale x 16 x i8> %result
+}
+
+define <vscale x 16 x i8> @testLeftBad16x8(<vscale x 16 x i8> %src1, <vscale x 16 x i8> %src2) {
+; CHECK-LABEL: testLeftBad16x8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.b, #-91 // =0xffffffffffffffa5
+; CHECK-NEXT:    lsl z1.b, z1.b, #1
+; CHECK-NEXT:    and z0.d, z0.d, z2.d
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %and.i = and <vscale x 16 x i8> %src1, splat(i8 165)
+  %vshl_n = shl <vscale x 16 x i8> %src2, splat(i8 1)
+  %result = or <vscale x 16 x i8> %and.i, %vshl_n
+  ret <vscale x 16 x i8> %result
+}
+
+define <vscale x 16 x i8> @testRightGood16x8(<vscale x 16 x i8> %src1, <vscale x 16 x i8> %src2) {
+; SVE-LABEL: testRightGood16x8:
+; SVE:       // %bb.0:
+; SVE-NEXT:    and z0.b, z0.b, #0xe0
+; SVE-NEXT:    lsr z1.b, z1.b, #3
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: testRightGood16x8:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    sri z0.b, z1.b, #3
+; SVE2-NEXT:    ret
+  %and.i = and <vscale x 16 x i8> %src1, splat(i8 224)
+  %vshl_n = lshr <vscale x 16 x i8> %src2, splat(i8 3)
+  %result = or <vscale x 16 x i8> %and.i, %vshl_n
+  ret <vscale x 16 x i8> %result
+}
+
+define <vscale x 16 x i8> @testRightBad16x8(<vscale x 16 x i8> %src1, <vscale x 16 x i8> %src2) {
+; CHECK-LABEL: testRightBad16x8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z2.b, #-91 // =0xffffffffffffffa5
+; CHECK-NEXT:    lsr z1.b, z1.b, #1
+; CHECK-NEXT:    and z0.d, z0.d, z2.d
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %and.i = and <vscale x 16 x i8> %src1, splat(i8 165)
+  %vshl_n = lshr <vscale x 16 x i8> %src2, splat(i8 1)
+  %result = or <vscale x 16 x i8> %and.i, %vshl_n
+  ret <vscale x 16 x i8> %result
+}
+
+define <vscale x 8 x i16> @testLeftGood8x16(<vscale x 8 x i16> %src1, <vscale x 8 x i16> %src2) {
+; SVE-LABEL: testLeftGood8x16:
+; SVE:       // %bb.0:
+; SVE-NEXT:    and z0.h, z0.h, #0x3fff
+; SVE-NEXT:    lsl z1.h, z1.h, #14
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: testLeftGood8x16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    sli z0.h, z1.h, #14
+; SVE2-NEXT:    ret
+  %and.i = and <vscale x 8 x i16> %src1, splat(i16 16383)
+  %vshl_n = shl <vscale x 8 x i16> %src2, splat(i16 14)
+  %result = or <vscale x 8 x i16> %and.i, %vshl_n
+  ret <vscale x 8 x i16> %result
+}
+
+define <vscale x 8 x i16> @testLeftBad8x16(<vscale x 8 x i16> %src1, <vscale x 8 x i16> %src2) {
+; CHECK-LABEL: testLeftBad8x16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #16500 // =0x4074
+; CHECK-NEXT:    lsl z1.h, z1.h, #14
+; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    and z0.d, z0.d, z2.d
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %and.i = and <vscale x 8 x i16> %src1, splat(i16 16500)
+  %vshl_n = shl <vscale x 8 x i16> %src2, splat(i16 14)
+  %result = or <vscale x 8 x i16> %and.i, %vshl_n
+  ret <vscale x 8 x i16> %result
+}
+
+define <vscale x 8 x i16> @testRightGood8x16(<vscale x 8 x i16> %src1, <vscale x 8 x i16> %src2) {
+; SVE-LABEL: testRightGood8x16:
+; SVE:       // %bb.0:
+; SVE-NEXT:    and z0.h, z0.h, #0xfffc
+; SVE-NEXT:    lsr z1.h, z1.h, #14
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: testRightGood8x16:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    sri z0.h, z1.h, #14
+; SVE2-NEXT:    ret
+  %and.i = and <vscale x 8 x i16> %src1, splat(i16 65532)
+  %vshl_n = lshr <vscale x 8 x i16> %src2, splat(i16 14)
+  %result = or <vscale x 8 x i16> %and.i, %vshl_n
+  ret <vscale x 8 x i16> %result
+}
+
+define <vscale x 8 x i16> @testRightBad8x16(<vscale x 8 x i16> %src1, <vscale x 8 x i16> %src2) {
+; CHECK-LABEL: testRightBad8x16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #16500 // =0x4074
+; CHECK-NEXT:    lsr z1.h, z1.h, #14
+; CHECK-NEXT:    mov z2.h, w8
+; CHECK-NEXT:    and z0.d, z0.d, z2.d
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %and.i = and <vscale x 8 x i16> %src1, splat(i16 16500)
+  %vshl_n = lshr <vscale x 8 x i16> %src2, splat(i16 14)
+  %result = or <vscale x 8 x i16> %and.i, %vshl_n
+  ret <vscale x 8 x i16> %result
+}
+
+define <vscale x 4 x i32> @testLeftGood4x32(<vscale x 4 x i32> %src1, <vscale x 4 x i32> %src2) {
+; SVE-LABEL: testLeftGood4x32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    and z0.s, z0.s, #0x3fffff
+; SVE-NEXT:    lsl z1.s, z1.s, #22
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: testLeftGood4x32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    sli z0.s, z1.s, #22
+; SVE2-NEXT:    ret
+  %and.i = and <vscale x 4 x i32> %src1, splat(i32 4194303)
+  %vshl_n = shl <vscale x 4 x i32> %src2, splat(i32 22)
+  %result = or <vscale x 4 x i32> %and.i, %vshl_n
+  ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 4 x i32> @testLeftBad4x32(<vscale x 4 x i32> %src1, <vscale x 4 x i32> %src2) {
+; CHECK-LABEL: testLeftBad4x32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.s, z0.s, #0x3ffffc
+; CHECK-NEXT:    lsl z1.s, z1.s, #22
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %and.i = and <vscale x 4 x i32> %src1, splat(i32 4194300)
+  %vshl_n = shl <vscale x 4 x i32> %src2, splat(i32 22)
+  %result = or <vscale x 4 x i32> %and.i, %vshl_n
+  ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 4 x i32> @testRightGood4x32(<vscale x 4 x i32> %src1, <vscale x 4 x i32> %src2) {
+; SVE-LABEL: testRightGood4x32:
+; SVE:       // %bb.0:
+; SVE-NEXT:    and z0.s, z0.s, #0xfffffc00
+; SVE-NEXT:    lsr z1.s, z1.s, #22
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: testRightGood4x32:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    sri z0.s, z1.s, #22
+; SVE2-NEXT:    ret
+  %and.i = and <vscale x 4 x i32> %src1, splat(i32 4294966272)
+  %vshl_n = lshr <vscale x 4 x i32> %src2, splat(i32 22)
+  %result = or <vscale x 4 x i32> %and.i, %vshl_n
+  ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 4 x i32> @testRightBad4x32(<vscale x 4 x i32> %src1, <vscale x 4 x i32> %src2) {
+; CHECK-LABEL: testRightBad4x32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.s, z0.s, #0x3ffffc
+; CHECK-NEXT:    lsr z1.s, z1.s, #22
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %and.i = and <vscale x 4 x i32> %src1, splat(i32 4194300)
+  %vshl_n = lshr <vscale x 4 x i32> %src2, splat(i32 22)
+  %result = or <vscale x 4 x i32> %and.i, %vshl_n
+  ret <vscale x 4 x i32> %result
+}
+
+define <vscale x 2 x i64> @testLeftGood2x64(<vscale x 2 x i64> %src1, <vscale x 2 x i64> %src2) {
+; SVE-LABEL: testLeftGood2x64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    and z0.d, z0.d, #0xffffffffffff
+; SVE-NEXT:    lsl z1.d, z1.d, #48
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT:    ret
+;
+; SVE2-LABEL: testLeftGood2x64:
+; SVE2:       // %bb.0:
+; SVE2-NEXT:    sli z0.d, z1.d, #48
+; SVE2-NEXT:    ret
+  %and.i = and <vscale x 2 x i64> %src1, splat(i64 281474976710655)
+  %vshl_n = shl <vscale x 2 x i64> %src2, splat(i64 48)
+  %result = or <vscale x 2 x i64> %and.i, %vshl_n
+  ret <vscale x 2 x i64> %result
+}
+
+define <vscale x 2 x i64> @testLeftBad2x64(<vscale x 2 x i64> %src1, <vscale x 2 x i64> %src2) {
+; CHECK-LABEL: testLeftBad2x64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #10 // =0xa
+; CHECK-NEXT:    lsl z1.d, z1.d, #48
+; CHECK-NEXT:    movk x8, #1, lsl #48
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    and z0.d, z0.d, z2.d
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %and.i = and <vscale x 2 x i64> %src1, splat(i64 281474976710666)
+  %vshl_n = shl <vscale x 2 x i64> %src2, splat(i64 48)
+  %result = or <vscale x 2 x i64> %and.i, %vshl_n
+  ret <vscale x 2 x i64> %result
+}
+
+define <vscale x 2 x i64> @testRightGood2x64(<vscale x 2 x i64> %src1, <vscale x 2 x i64> %src2) {
+; SVE-LABEL: testRightGood2x64:
+; SVE:       // %bb.0:
+; SVE-NEXT:    and z0.d, z0.d, #0xffffffffffff0000
+; SVE-NEXT:    lsr z1.d, z1.d, #48
+; SVE-NEXT:    orr z0.d, z0.d, z1.d
+; SVE-NEXT...
[truncated]

davemgreen

I happened to have a patch a while back to change the And in tryLowerToSLI to use KnownBits. It was causing some issues in the tests though, and should be mutually exclusive to this. It just might help it trigger in more cases.

The patch here looks good, from what I can tell.

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

davemgreen

LGTM

Code builds on NEON code and the tests are adapted from NEON tests minus the tests for illegal types.

[AArch64][SVE2] Lower OR to SLI/SRI

7eeacff

Code builds on NEON code and the tests are adapted from NEON tests minus the tests for illegal types. Change-Id: I11325949700fb7433f948bbe3e82dbc71696aecc

llvmbot added the backend:AArch64 label Jan 10, 2024

UsmanNadeem requested review from davemgreen, paulwalker-arm and sdesmalen-arm January 10, 2024 05:14

davemgreen reviewed Jan 10, 2024

View reviewed changes

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp Show resolved Hide resolved

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp Show resolved Hide resolved

UsmanNadeem added 2 commits January 10, 2024 12:04

fixup! [AArch64][SVE2] Lower OR to SLI/SRI

2414112

Merge branch 'main' into sve2sli

4cc5428

davemgreen approved these changes Jan 12, 2024

View reviewed changes

UsmanNadeem merged commit 792fa23 into llvm:main Jan 12, 2024

justinfargnoli pushed a commit to justinfargnoli/llvm-project that referenced this pull request Jan 28, 2024

[AArch64][SVE2] Lower OR to SLI/SRI (llvm#77555)

3794e6c

Code builds on NEON code and the tests are adapted from NEON tests minus the tests for illegal types.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64][SVE2] Lower OR to SLI/SRI #77555

[AArch64][SVE2] Lower OR to SLI/SRI #77555

Uh oh!

UsmanNadeem commented Jan 10, 2024

Uh oh!

llvmbot commented Jan 10, 2024

Uh oh!

davemgreen left a comment

Uh oh!

Uh oh!

Uh oh!

davemgreen left a comment

Uh oh!

Uh oh!

[AArch64][SVE2] Lower OR to SLI/SRI #77555

[AArch64][SVE2] Lower OR to SLI/SRI #77555

Uh oh!

Conversation

UsmanNadeem commented Jan 10, 2024

Uh oh!

llvmbot commented Jan 10, 2024

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!