Merge commit 'ea2d2d10ee1e' from llvm.org/main into experimental/cas/main

git apple-llvm automerger · git apple-llvm automerger · commit 5061218e10ca · 2023-04-12T13:04:56.000+09:00
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -1151,6 +1151,12 @@ bool AVRTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
       return false;
     }
 
+    // FIXME: We temporarily disable post increment load from program memory,
+    //        due to bug https://github.com/llvm/llvm-project/issues/59914.
+    if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+      if (AVR::isProgramMemoryAccess(LD))
+        return false;
+
     Base = Op->getOperand(0);
     Offset = DAG.getConstant(RHSC, DL, MVT::i8);
     AM = ISD::POST_INC;
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -3165,6 +3165,27 @@ bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
         ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
          (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
       return true;
+    // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
+    // in which the immediate has two set bits. Or Break (MUL x, imm)
+    // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
+    // equals to (1 << s0) - (1 << s1).
+    if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
+      unsigned Shifts = Imm.countr_zero();
+      // Reject immediates which can be composed via a single LUI.
+      if (Shifts >= 12)
+        return false;
+      // Reject multiplications can be optimized to
+      // (SLLI (ALSL x, x, 1/2/3/4), s).
+      APInt ImmPop = Imm.ashr(Shifts);
+      if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
+        return false;
+      // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
+      // since it needs one more instruction than other 3 cases.
+      APInt ImmSmall = APInt(Imm.getBitWidth(), 1 << Shifts, true);
+      if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
+          (ImmSmall - Imm).isPowerOf2())
+        return true;
+    }
   }
 
   return false;
diff --git a/llvm/test/CodeGen/AVR/load.ll b/llvm/test/CodeGen/AVR/load.ll
@@ -140,3 +140,18 @@ while.end:                                        ; preds = %while.body, %entry
   %r.0.lcssa = phi i16 [ 0, %entry ], [ %add, %while.body ]
   ret i16 %r.0.lcssa
 }
+
+define ptr addrspace(1) @load16_postinc_progmem(ptr addrspace(1) readonly %0) {
+; CHECK-LABEL: load16_postinc_progmem:
+; CHECK:         movw r30, [[REG0:r[0-9]+]]
+; CHECK:         lpm  [[REG1:r[0-9]+]], Z+
+; CHECK:         lpm  [[REG1:r[0-9]+]], Z
+; CHECK:         call foo
+; CHECK:         adiw [[REG0:r[0-9]+]], 2
+  %2 = load i16, ptr addrspace(1) %0, align 1
+  tail call addrspace(1) void @foo(i16 %2)
+  %3 = getelementptr inbounds i16, ptr addrspace(1) %0, i16 1
+  ret ptr addrspace(1) %3
+}
+
+declare void @foo(i16)
diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/mul.ll
@@ -1304,3 +1304,218 @@ define i64 @mul_i64_4352(i64 %a) {
   %b = mul i64 %a, 4352
   ret i64 %b
 }
+
+define signext i32 @mul_i32_65792(i32 %a) {
+; LA32-LABEL: mul_i32_65792:
+; LA32:       # %bb.0:
+; LA32-NEXT:    slli.w $a1, $a0, 8
+; LA32-NEXT:    slli.w $a0, $a0, 16
+; LA32-NEXT:    add.w $a0, $a0, $a1
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: mul_i32_65792:
+; LA64:       # %bb.0:
+; LA64-NEXT:    slli.d $a1, $a0, 8
+; LA64-NEXT:    slli.d $a0, $a0, 16
+; LA64-NEXT:    add.w $a0, $a0, $a1
+; LA64-NEXT:    ret
+  %b = mul i32 %a, 65792
+  ret i32 %b
+}
+
+define signext i32 @mul_i32_65280(i32 %a) {
+; LA32-LABEL: mul_i32_65280:
+; LA32:       # %bb.0:
+; LA32-NEXT:    slli.w $a1, $a0, 8
+; LA32-NEXT:    slli.w $a0, $a0, 16
+; LA32-NEXT:    sub.w $a0, $a0, $a1
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: mul_i32_65280:
+; LA64:       # %bb.0:
+; LA64-NEXT:    slli.d $a1, $a0, 8
+; LA64-NEXT:    slli.d $a0, $a0, 16
+; LA64-NEXT:    sub.w $a0, $a0, $a1
+; LA64-NEXT:    ret
+  %b = mul i32 %a, 65280
+  ret i32 %b
+}
+
+define signext i32 @mul_i32_minus_65280(i32 %a) {
+; LA32-LABEL: mul_i32_minus_65280:
+; LA32:       # %bb.0:
+; LA32-NEXT:    slli.w $a1, $a0, 16
+; LA32-NEXT:    slli.w $a0, $a0, 8
+; LA32-NEXT:    sub.w $a0, $a0, $a1
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: mul_i32_minus_65280:
+; LA64:       # %bb.0:
+; LA64-NEXT:    slli.d $a1, $a0, 16
+; LA64-NEXT:    slli.d $a0, $a0, 8
+; LA64-NEXT:    sub.w $a0, $a0, $a1
+; LA64-NEXT:    ret
+  %b = mul i32 %a, -65280
+  ret i32 %b
+}
+
+define i64 @mul_i64_65792(i64 %a) {
+; LA32-LABEL: mul_i64_65792:
+; LA32:       # %bb.0:
+; LA32-NEXT:    lu12i.w $a2, 16
+; LA32-NEXT:    ori $a2, $a2, 256
+; LA32-NEXT:    mul.w $a1, $a1, $a2
+; LA32-NEXT:    mulh.wu $a3, $a0, $a2
+; LA32-NEXT:    add.w $a1, $a3, $a1
+; LA32-NEXT:    mul.w $a0, $a0, $a2
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: mul_i64_65792:
+; LA64:       # %bb.0:
+; LA64-NEXT:    slli.d $a1, $a0, 8
+; LA64-NEXT:    slli.d $a0, $a0, 16
+; LA64-NEXT:    add.d $a0, $a0, $a1
+; LA64-NEXT:    ret
+  %b = mul i64 %a, 65792
+  ret i64 %b
+}
+
+define i64 @mul_i64_65280(i64 %a) {
+; LA32-LABEL: mul_i64_65280:
+; LA32:       # %bb.0:
+; LA32-NEXT:    lu12i.w $a2, 15
+; LA32-NEXT:    ori $a2, $a2, 3840
+; LA32-NEXT:    mul.w $a1, $a1, $a2
+; LA32-NEXT:    mulh.wu $a3, $a0, $a2
+; LA32-NEXT:    add.w $a1, $a3, $a1
+; LA32-NEXT:    mul.w $a0, $a0, $a2
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: mul_i64_65280:
+; LA64:       # %bb.0:
+; LA64-NEXT:    slli.d $a1, $a0, 8
+; LA64-NEXT:    slli.d $a0, $a0, 16
+; LA64-NEXT:    sub.d $a0, $a0, $a1
+; LA64-NEXT:    ret
+  %b = mul i64 %a, 65280
+  ret i64 %b
+}
+
+define i64 @mul_i64_minus_65280(i64 %a) {
+; LA32-LABEL: mul_i64_minus_65280:
+; LA32:       # %bb.0:
+; LA32-NEXT:    lu12i.w $a2, -16
+; LA32-NEXT:    ori $a2, $a2, 256
+; LA32-NEXT:    mul.w $a1, $a1, $a2
+; LA32-NEXT:    mulh.wu $a3, $a0, $a2
+; LA32-NEXT:    sub.w $a3, $a3, $a0
+; LA32-NEXT:    add.w $a1, $a3, $a1
+; LA32-NEXT:    mul.w $a0, $a0, $a2
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: mul_i64_minus_65280:
+; LA64:       # %bb.0:
+; LA64-NEXT:    slli.d $a1, $a0, 16
+; LA64-NEXT:    slli.d $a0, $a0, 8
+; LA64-NEXT:    sub.d $a0, $a0, $a1
+; LA64-NEXT:    ret
+  %b = mul i64 %a, -65280
+  ret i64 %b
+}
+
+;; This multiplication is not transformed, due to
+;; 1088 can be composed via a single ORI.
+define i64 @mul_i64_1088(i64 %a) {
+; LA32-LABEL: mul_i64_1088:
+; LA32:       # %bb.0:
+; LA32-NEXT:    ori $a2, $zero, 1088
+; LA32-NEXT:    mul.w $a1, $a1, $a2
+; LA32-NEXT:    mulh.wu $a3, $a0, $a2
+; LA32-NEXT:    add.w $a1, $a3, $a1
+; LA32-NEXT:    mul.w $a0, $a0, $a2
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: mul_i64_1088:
+; LA64:       # %bb.0:
+; LA64-NEXT:    alsl.d $a0, $a0, $a0, 4
+; LA64-NEXT:    slli.d $a0, $a0, 6
+; LA64-NEXT:    ret
+  %b = mul i64 %a, 1088
+  ret i64 %b
+}
+
+;; This multiplication is not transformed, due to
+;; -992 can be composed via a single ADDI.
+define i64 @mul_i64_minus_992(i64 %a) {
+; LA32-LABEL: mul_i64_minus_992:
+; LA32:       # %bb.0:
+; LA32-NEXT:    addi.w $a2, $zero, -992
+; LA32-NEXT:    mul.w $a1, $a1, $a2
+; LA32-NEXT:    mulh.wu $a3, $a0, $a2
+; LA32-NEXT:    sub.w $a3, $a3, $a0
+; LA32-NEXT:    add.w $a1, $a3, $a1
+; LA32-NEXT:    mul.w $a0, $a0, $a2
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: mul_i64_minus_992:
+; LA64:       # %bb.0:
+; LA64-NEXT:    addi.w $a1, $zero, -992
+; LA64-NEXT:    mul.d $a0, $a0, $a1
+; LA64-NEXT:    ret
+  %b = mul i64 %a, -992
+  ret i64 %b
+}
+
+;; This multiplication is not transformed, due to
+;; 4456448 can be composed via a single LUI.
+define i64 @mul_i64_4456448(i64 %a) {
+; LA32-LABEL: mul_i64_4456448:
+; LA32:       # %bb.0:
+; LA32-NEXT:    lu12i.w $a2, 1088
+; LA32-NEXT:    mul.w $a1, $a1, $a2
+; LA32-NEXT:    mulh.wu $a3, $a0, $a2
+; LA32-NEXT:    add.w $a1, $a3, $a1
+; LA32-NEXT:    mul.w $a0, $a0, $a2
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: mul_i64_4456448:
+; LA64:       # %bb.0:
+; LA64-NEXT:    alsl.d $a0, $a0, $a0, 4
+; LA64-NEXT:    slli.d $a0, $a0, 18
+; LA64-NEXT:    ret
+  %b = mul i64 %a, 4456448
+  ret i64 %b
+}
+
+;; This multiplication is not transformed, due to
+;; 65280 is used multiple times.
+define i64 @mul_i64_65280_twice(i64 %a, i64 %b) {
+; LA32-LABEL: mul_i64_65280_twice:
+; LA32:       # %bb.0:
+; LA32-NEXT:    lu12i.w $a4, 15
+; LA32-NEXT:    ori $a4, $a4, 3840
+; LA32-NEXT:    mul.w $a3, $a3, $a4
+; LA32-NEXT:    mulh.wu $a5, $a2, $a4
+; LA32-NEXT:    add.w $a3, $a5, $a3
+; LA32-NEXT:    mul.w $a1, $a1, $a4
+; LA32-NEXT:    mulh.wu $a5, $a0, $a4
+; LA32-NEXT:    add.w $a1, $a5, $a1
+; LA32-NEXT:    xor $a1, $a1, $a3
+; LA32-NEXT:    mul.w $a2, $a2, $a4
+; LA32-NEXT:    mul.w $a0, $a0, $a4
+; LA32-NEXT:    xor $a0, $a0, $a2
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: mul_i64_65280_twice:
+; LA64:       # %bb.0:
+; LA64-NEXT:    lu12i.w $a2, 15
+; LA64-NEXT:    ori $a2, $a2, 3840
+; LA64-NEXT:    mul.d $a1, $a1, $a2
+; LA64-NEXT:    mul.d $a0, $a0, $a2
+; LA64-NEXT:    xor $a0, $a0, $a1
+; LA64-NEXT:    ret
+  %c = mul i64 %a, 65280
+  %d = mul i64 %b, 65280
+  %e = xor i64 %c, %d
+  ret i64 %e
+}
diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.h b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.h
@@ -50,6 +50,13 @@ class TrackingListener : public RewriterBase::Listener,
   virtual Operation *findReplacementOp(Operation *op,
                                        ValueRange newValues) const;
 
+  /// Notify the listener that the pattern failed to match the given operation,
+  /// and provide a callback to populate a diagnostic with the reason why the
+  /// failure occurred.
+  LogicalResult
+  notifyMatchFailure(Location loc,
+                     function_ref<void(Diagnostic &)> reasonCallback) override;
+
   /// This function is called when a tracked payload op is dropped because no
   /// replacement op was found. Derived classes can implement this function for
   /// custom error handling.
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp
@@ -244,16 +244,16 @@ Value mlir::sparse_tensor::genIsNonzero(OpBuilder &builder, mlir::Location loc,
 }
 
 void mlir::sparse_tensor::genReshapeDstShape(
-    Location loc, PatternRewriter &rewriter, SmallVectorImpl<Value> &dstShape,
+    OpBuilder &builder, Location loc, SmallVectorImpl<Value> &dstShape,
     ArrayRef<Value> srcShape, ArrayRef<StaticSize> staticDstShape,
     ArrayRef<ReassociationIndices> reassociation) {
   // Collapse shape.
   if (reassociation.size() < srcShape.size()) {
     unsigned start = 0;
     for (const auto &map : llvm::enumerate(reassociation)) {
-      auto dstDim = constantIndex(rewriter, loc, 1);
+      auto dstDim = constantIndex(builder, loc, 1);
       for (unsigned i = start; i < start + map.value().size(); i++) {
-        dstDim = rewriter.create<arith::MulIOp>(loc, dstDim, srcShape[i]);
+        dstDim = builder.create<arith::MulIOp>(loc, dstDim, srcShape[i]);
       }
       dstShape.push_back(dstDim);
       start = start + map.value().size();
@@ -285,13 +285,13 @@ void mlir::sparse_tensor::genReshapeDstShape(
           }
         }
         // Compute the dynamic dimension size.
-        Value productVal = constantIndex(rewriter, loc, product);
+        Value productVal = constantIndex(builder, loc, product);
         Value dynamicSize =
-            rewriter.create<arith::DivUIOp>(loc, srcDim, productVal);
+            builder.create<arith::DivUIOp>(loc, srcDim, productVal);
         dstShape.push_back(dynamicSize);
       } else {
         // The expanded dimension is statically known.
-        dstShape.push_back(constantIndex(rewriter, loc, staticDstShape[j]));
+        dstShape.push_back(constantIndex(builder, loc, staticDstShape[j]));
       }
     }
     start = start + map.size();
@@ -512,8 +512,8 @@ Operation *mlir::sparse_tensor::getTop(Operation *op) {
 }
 
 void sparse_tensor::foreachInSparseConstant(
-    Location loc, RewriterBase &rewriter, SparseElementsAttr attr,
-    AffineMap order, function_ref<void(ArrayRef<Value>, Value)> callback) {
+    OpBuilder &builder, Location loc, SparseElementsAttr attr, AffineMap order,
+    function_ref<void(ArrayRef<Value>, Value)> callback) {
   const Dimension dimRank = getSparseTensorType(attr).getDimRank();
   const auto coordinates = attr.getIndices().getValues<IntegerAttr>();
   const auto values = attr.getValues().getValues<Attribute>();
@@ -560,17 +560,17 @@ void sparse_tensor::foreachInSparseConstant(
     cvs.clear();
     for (Dimension d = 0; d < dimRank; d++) {
       auto crd = elems[i].first[d].getInt();
-      cvs.push_back(rewriter.create<arith::ConstantIndexOp>(loc, crd));
+      cvs.push_back(builder.create<arith::ConstantIndexOp>(loc, crd));
     }
     // Remap value.
     Value val;
     if (attr.getElementType().isa<ComplexType>()) {
       auto valAttr = elems[i].second.cast<ArrayAttr>();
-      val = rewriter.create<complex::ConstantOp>(loc, attr.getElementType(),
-                                                 valAttr);
+      val = builder.create<complex::ConstantOp>(loc, attr.getElementType(),
+                                                valAttr);
     } else {
       auto valAttr = elems[i].second.cast<TypedAttr>();
-      val = rewriter.create<arith::ConstantOp>(loc, valAttr);
+      val = builder.create<arith::ConstantOp>(loc, valAttr);
     }
     assert(val);
     callback(cvs, val);
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h
@@ -89,7 +89,7 @@ Value genIsNonzero(OpBuilder &builder, Location loc, Value v);
 /// Computes the shape of destination tensor of a reshape operator. This is only
 /// used when operands have dynamic shape. The shape of the destination is
 /// stored into dstShape.
-void genReshapeDstShape(Location loc, PatternRewriter &rewriter,
+void genReshapeDstShape(OpBuilder &builder, Location loc,
                         SmallVectorImpl<Value> &dstShape,
                         ArrayRef<Value> srcShape,
                         ArrayRef<StaticSize> staticDstShape,
@@ -211,8 +211,8 @@ Operation *getTop(Operation *op);
 /// %v3 = complex.constant (5.0, 6.0)
 /// callback({%c3}, %v3)
 void foreachInSparseConstant(
-    Location loc, RewriterBase &rewriter, SparseElementsAttr attr,
-    AffineMap order, function_ref<void(ArrayRef<Value>, Value)> callback);
+    OpBuilder &builder, Location loc, SparseElementsAttr attr, AffineMap order,
+    function_ref<void(ArrayRef<Value>, Value)> callback);
 
 /// Loads `size`-many values from the memref, which must have rank-1 and
 /// size greater-or-equal to `size`.  If the optional `(offsetIdx,offsetVal)`
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -489,7 +489,7 @@ genSparse2SparseReshape(ReshapeOp op, typename ReshapeOp::Adaptor adaptor,
     // Static "shapes" are in fact "sizes".
     fillDimShape(rewriter, loc, dstTp, dstDimSizes);
   else
-    genReshapeDstShape(loc, rewriter, dstDimSizes, srcDimSizes,
+    genReshapeDstShape(rewriter, loc, dstDimSizes, srcDimSizes,
                        dstTp.getDimShape(), op.getReassociationIndices());
   const Value coo =
       params.genBuffers(dstTp, dstDimSizes).genNewCall(Action::kEmptyCOO);
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp