GPUOpen-Drivers
diff --git a/‎.ci/monolithic-linux.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/monolithic-linux.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/monolithic-windows.sh
Lines changed: 1 addition & 1 deletion b/‎.ci/monolithic-windows.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release-binaries.yml
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/release-binaries.yml
Lines changed: 2 additions & 2 deletions
diff --git a/‎clang/test/CodeGen/remote-traps.c
Lines changed: 3 additions & 3 deletions b/‎clang/test/CodeGen/remote-traps.c
Lines changed: 3 additions & 3 deletions
diff --git a/‎llvm/include/llvm/Config/llvm-config.h.cmake
Lines changed: 1 addition & 1 deletion b/‎llvm/include/llvm/Config/llvm-config.h.cmake
Lines changed: 1 addition & 1 deletion
diff --git a/‎llvm/lib/IR/BasicBlock.cpp
Lines changed: 2 additions & 0 deletions b/‎llvm/lib/IR/BasicBlock.cpp
Lines changed: 2 additions & 0 deletions
diff --git a/‎llvm/utils/git/github-automation.py
Lines changed: 1 addition & 1 deletion b/‎llvm/utils/git/github-automation.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
Lines changed: 4 additions & 0 deletions b/‎mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
Lines changed: 4 additions & 0 deletions
diff --git a/‎mlir/lib/Conversion/NVVMToLLVM/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎mlir/lib/Conversion/NVVMToLLVM/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
Lines changed: 122 additions & 0 deletions b/‎mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
Lines changed: 122 additions & 0 deletions
diff --git a/‎mlir/lib/Dialect/OpenMP/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎mlir/lib/Dialect/OpenMP/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎mlir/lib/Transforms/Utils/Inliner.cpp
Lines changed: 8 additions & 0 deletions b/‎mlir/lib/Transforms/Utils/Inliner.cpp
Lines changed: 8 additions & 0 deletions
diff --git a/‎mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
Lines changed: 87 additions & 0 deletions b/‎mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
Lines changed: 87 additions & 0 deletions
@@ -54,4 +54,4 @@ cmake -S ${MONOREPO_ROOT}/llvm -B ${BUILD_DIR} \
 
 echo "--- ninja"
 # Targets are not escaped as they are passed as separate arguments.
-ninja -C -k 0 "${BUILD_DIR}" ${targets}
+ninja -C "${BUILD_DIR}" -k 0 ${targets}
@@ -62,4 +62,4 @@ cmake -S ${MONOREPO_ROOT}/llvm -B ${BUILD_DIR} \
 
 echo "--- ninja"
 # Targets are not escaped as they are passed as separate arguments.
-ninja -C -k 0 "${BUILD_DIR}" ${targets}
+ninja -C "${BUILD_DIR}" -k 0 ${targets}
@@ -71,8 +71,8 @@ jobs:
       # | X.Y.Z     | -final
       run: |
         tag="${{ github.ref_name }}"
-        trimmed=$(echo ${{ inputs.tag }} | xargs)
-        [[ "$trimmed" != "" ]] && tag="$trimmed"
+        trimmed=$(echo ${{ inputs.release-version }} | xargs)
+        [[ "$trimmed" != "" ]] && tag="llvmorg-$trimmed"
         if [ "$tag" = "main" ]; then
           # If tag is main, then we've been triggered by a scheduled so pass so
           # use the head commit as the tag.
 
@@ -1,15 +1,15 @@
 // RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow %s -o - | FileCheck %s 
 // RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow -mllvm -clang-remove-traps -mllvm -remove-traps-random-rate=1 %s -o - | FileCheck %s --implicit-check-not="call void @llvm.ubsantrap" --check-prefixes=REMOVE
 
-int f(int x) {
+int test(int x) {
   return x + 123;
 }
 
-// CHECK-LABEL: define dso_local noundef i32 @f(
+// CHECK-LABEL: define {{.*}}i32 @test(
 // CHECK: call { i32, i1 } @llvm.sadd.with.overflow.i32(
 // CHECK: trap:
 // CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
 // CHECK-NEXT: unreachable
 
-// REMOVE-LABEL: define dso_local noundef i32 @f(
+// REMOVE-LABEL: define {{.*}}i32 @test(
 // REMOVE: call { i32, i1 } @llvm.sadd.with.overflow.i32(
@@ -16,7 +16,7 @@
 
 /* Indicate that this is LLVM compiled from the amd-gfx branch. */
 #define LLVM_HAVE_BRANCH_AMD_GFX
-#define LLVM_MAIN_REVISION 492341
+#define LLVM_MAIN_REVISION 492352
 
 /* Define if LLVM_ENABLE_DUMP is enabled */
 #cmakedefine LLVM_ENABLE_DUMP
 
@@ -348,6 +348,8 @@ const Instruction* BasicBlock::getFirstNonPHI() const {
 
 BasicBlock::const_iterator BasicBlock::getFirstNonPHIIt() const {
   const Instruction *I = getFirstNonPHI();
+  if (!I)
+    return end();
   BasicBlock::const_iterator It = I->getIterator();
   // Set the head-inclusive bit to indicate that this iterator includes
   // any debug-info at the start of the block. This is a no-op unless the
 
@@ -586,7 +586,7 @@ def create_pull_request(
                 body=body,
                 base=release_branch_for_issue,
                 head=head,
-                maintainer_can_modify=False,
+                maintainer_can_modify=True,
             )
 
             pull.as_issue().edit(milestone=self.issue.milestone)
 
@@ -481,6 +481,10 @@ struct ControlDropUnitDims {
     if (auto genericOp = dyn_cast_or_null<GenericOp>(op)) {
       return llvm::to_vector(llvm::seq<unsigned>(0, genericOp.getNumLoops()));
     }
+    if (auto padOp = dyn_cast_or_null<tensor::PadOp>(op)) {
+      return llvm::to_vector(
+          llvm::seq<unsigned>(0, padOp.getSourceType().getRank()));
+    }
     return SmallVector<unsigned>{};
   };
 };
 
@@ -11,6 +11,7 @@ add_mlir_conversion_library(MLIRNVVMToLLVM
   Core
 
   LINK_LIBS PUBLIC
+  MLIRFuncDialect
   MLIRGPUDialect
   MLIRLLVMCommonConversion
   MLIRLLVMDialect
 
@@ -14,6 +14,7 @@ add_mlir_dialect_library(MLIRAMDGPUTransforms
   MLIRAMDGPUUtils
   MLIRArithDialect
   MLIRControlFlowDialect
+  MLIRFuncDialect
   MLIRIR
   MLIRPass
   MLIRTransforms
 
@@ -561,6 +561,126 @@ struct DropUnitDims : public OpRewritePattern<GenericOp> {
 };
 } // namespace
 
+//===---------------------------------------------------------------------===//
+// Drop dimensions that are unit-extents within tensor operations.
+//===---------------------------------------------------------------------===//
+
+namespace {
+struct DropPadUnitDims : public OpRewritePattern<tensor::PadOp> {
+  DropPadUnitDims(MLIRContext *context, ControlDropUnitDims options = {},
+                  PatternBenefit benefit = 1)
+      : OpRewritePattern(context, benefit), options(std::move(options)) {}
+
+  LogicalResult matchAndRewrite(tensor::PadOp padOp,
+                                PatternRewriter &rewriter) const override {
+    // 1a. Get the allowed list of dimensions to drop from the `options`.
+    SmallVector<unsigned> allowedUnitDims = options.controlFn(padOp);
+    if (allowedUnitDims.empty()) {
+      return rewriter.notifyMatchFailure(
+          padOp, "control function returns no allowed unit dims to prune");
+    }
+
+    if (padOp.getSourceType().getEncoding()) {
+      return rewriter.notifyMatchFailure(
+          padOp, "cannot collapse dims of tensor with encoding");
+    }
+
+    // Fail for non-constant padding values. The body of the pad could
+    // depend on the padding indices and/or properties of the padded
+    // tensor so for now we fail.
+    // TODO: Support non-constant padding values.
+    Value paddingVal = padOp.getConstantPaddingValue();
+    if (!paddingVal) {
+      return rewriter.notifyMatchFailure(
+          padOp, "unimplemented: non-constant padding value");
+    }
+
+    ArrayRef<int64_t> sourceShape = padOp.getSourceType().getShape();
+    int64_t padRank = sourceShape.size();
+
+    auto isStaticZero = [](OpFoldResult f) {
+      std::optional<int64_t> maybeInt = getConstantIntValue(f);
+      return maybeInt && *maybeInt == 0;
+    };
+
+    llvm::SmallDenseSet<unsigned> unitDimsFilter(allowedUnitDims.begin(),
+                                                 allowedUnitDims.end());
+    llvm::SmallDenseSet<unsigned> unitDims;
+    SmallVector<int64_t> newShape;
+    SmallVector<OpFoldResult> newLowPad;
+    SmallVector<OpFoldResult> newHighPad;
+    for (const auto [dim, size, low, high] :
+         zip_equal(llvm::seq(static_cast<int64_t>(0), padRank), sourceShape,
+                   padOp.getMixedLowPad(), padOp.getMixedHighPad())) {
+      if (unitDimsFilter.contains(dim) && size == 1 && isStaticZero(low) &&
+          isStaticZero(high)) {
+        unitDims.insert(dim);
+      } else {
+        newShape.push_back(size);
+        newLowPad.push_back(low);
+        newHighPad.push_back(high);
+      }
+    }
+
+    if (unitDims.empty()) {
+      return rewriter.notifyMatchFailure(padOp, "no unit dims to collapse");
+    }
+
+    ReassociationIndices reassociationGroup;
+    SmallVector<ReassociationIndices> reassociationMap;
+    int64_t dim = 0;
+    while (dim < padRank && unitDims.contains(dim))
+      reassociationGroup.push_back(dim++);
+    while (dim < padRank) {
+      assert(!unitDims.contains(dim) && "expected non unit-extent");
+      reassociationGroup.push_back(dim);
+      dim++;
+      // Fold all following dimensions that are unit-extent.
+      while (dim < padRank && unitDims.contains(dim))
+        reassociationGroup.push_back(dim++);
+      reassociationMap.push_back(reassociationGroup);
+      reassociationGroup.clear();
+    }
+
+    Value collapsedSource =
+        collapseValue(rewriter, padOp.getLoc(), padOp.getSource(), newShape,
+                      reassociationMap, options.rankReductionStrategy);
+
+    auto newPadOp = rewriter.create<tensor::PadOp>(
+        padOp.getLoc(), /*result=*/Type(), collapsedSource, newLowPad,
+        newHighPad, paddingVal, padOp.getNofold());
+
+    Value dest = padOp.getResult();
+    if (options.rankReductionStrategy ==
+        ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice) {
+      SmallVector<OpFoldResult> expandedSizes;
+      int64_t numUnitDims = 0;
+      for (auto dim : llvm::seq(static_cast<int64_t>(0), padRank)) {
+        if (unitDims.contains(dim)) {
+          expandedSizes.push_back(rewriter.getIndexAttr(1));
+          numUnitDims++;
+          continue;
+        }
+        expandedSizes.push_back(tensor::getMixedSize(
+            rewriter, padOp.getLoc(), newPadOp, dim - numUnitDims));
+      }
+      dest = rewriter.create<tensor::EmptyOp>(
+          padOp.getLoc(), expandedSizes,
+          padOp.getResultType().getElementType());
+    }
+
+    Value expandedValue =
+        expandValue(rewriter, padOp.getLoc(), newPadOp.getResult(), dest,
+                    reassociationMap, options.rankReductionStrategy);
+    rewriter.replaceOp(padOp, expandedValue);
+    return success();
+  }
+
+private:
+  ControlDropUnitDims options;
+};
+} // namespace
+
 namespace {
 /// Convert `extract_slice` operations to rank-reduced versions.
 struct RankReducedExtractSliceOp
@@ -640,6 +760,7 @@ populateFoldUnitExtentDimsViaReshapesPatterns(RewritePatternSet &patterns,
                                               ControlDropUnitDims &options) {
   auto *context = patterns.getContext();
   patterns.add<DropUnitDims>(context, options);
+  patterns.add<DropPadUnitDims>(context, options);
   // TODO: Patterns unrelated to unit dim folding should be factored out.
   patterns.add<RankReducedExtractSliceOp,
                RankReducedInsertSliceOp<tensor::InsertSliceOp>,
@@ -661,6 +782,7 @@ populateFoldUnitExtentDimsViaSlicesPatterns(RewritePatternSet &patterns,
   options.rankReductionStrategy =
       ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice;
   patterns.add<DropUnitDims>(context, options);
+  patterns.add<DropPadUnitDims>(context, options);
   // TODO: Patterns unrelated to unit dim folding should be factored out.
   linalg::FillOp::getCanonicalizationPatterns(patterns, context);
   tensor::EmptyOp::getCanonicalizationPatterns(patterns, context);
 
@@ -5,6 +5,7 @@ add_mlir_dialect_library(MLIROpenMPDialect
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/OpenMP
 
   DEPENDS
+  omp_gen
   MLIROpenMPOpsIncGen
   MLIROpenMPOpsInterfacesIncGen
   MLIROpenMPTypeInterfacesIncGen
 
@@ -21,6 +21,7 @@
 #include "mlir/Support/DebugStringHelper.h"
 #include "mlir/Transforms/InliningUtils.h"
 #include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Debug.h"
 
@@ -711,6 +712,13 @@ bool Inliner::Impl::shouldInline(ResolvedCall &resolvedCall) {
   if (resolvedCall.call->hasTrait<OpTrait::IsTerminator>())
     return false;
 
+  // Don't allow inlining if the target is a self-recursive function.
+  if (llvm::count_if(*resolvedCall.targetNode,
+                     [&](CallGraphNode::Edge const &edge) -> bool {
+                       return edge.getTarget() == resolvedCall.targetNode;
+                     }) > 0)
+    return false;
+
   // Don't allow inlining if the target is an ancestor of the call. This
   // prevents inlining recursively.
   Region *callableRegion = resolvedCall.targetNode->getCallableRegion();
 
@@ -946,3 +946,90 @@ func.func @drop_all_loops(%arg0 : memref<1x1xf32, 3>) -> memref<1x1xf32, 3>
 // CHECK-SLICES-LABEL: func @drop_all_loops
 //       CHECK-SLICES:   memref.subview %{{.*}}[0, 0] [1, 1] [1, 1] : memref<1x1xf32, 3> to memref<f32, strided<[]>, 3>
 //       CHECK-SLICES:   linalg.generic{{.*}}memref<f32, strided<[]>, 3>
+
+// -----
+
+func.func @drop_unit_pad_dims(%arg0: tensor<1x1x3x1x1xf32>) -> tensor<1x2x3x1x3xf32>
+{
+  %c0 = arith.constant 0 : index
+  %cst0 = arith.constant 0.0 : f32
+  %0 = tensor.pad %arg0 low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2] {
+    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index):
+      tensor.yield %cst0 : f32
+  } : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>
+  return %0 : tensor<1x2x3x1x3xf32>
+}
+
+// CHECK-LABEL: func @drop_unit_pad_dims
+//       CHECK:   %[[COLLAPSE:.+]] = tensor.collapse_shape
+//  CHECK-SAME:     {{\[}}[0, 1], [2, 3], [4]{{\]}} : tensor<1x1x3x1x1xf32> into tensor<1x3x1xf32>
+//       CHECK:   %[[PADDED:.+]] = tensor.pad %[[COLLAPSE]] low[1, 0, 0] high[0, 0, 2]
+//       CHECK:   } : tensor<1x3x1xf32> to tensor<2x3x3xf32>
+//       CHECK:   tensor.expand_shape %[[PADDED]]
+//  CHECK-SAME:     {{\[}}[0, 1], [2, 3], [4]{{\]}} : tensor<2x3x3xf32> into tensor<1x2x3x1x3xf32>
+
+// CHECK-SLICES-LABEL: func @drop_unit_pad_dims
+//       CHECK-SLICES:   %[[EXTRACT:.+]] = tensor.extract_slice
+//  CHECK-SLICES-SAME:     [0, 0, 0, 0, 0] [1, 1, 3, 1, 1] [1, 1, 1, 1, 1] : tensor<1x1x3x1x1xf32> to tensor<1x3x1xf32>
+//       CHECK-SLICES:   %[[PADDED:.+]] = tensor.pad %[[EXTRACT]] low[1, 0, 0] high[0, 0, 2]
+//       CHECK-SLICES:   } : tensor<1x3x1xf32> to tensor<2x3x3xf32>
+//       CHECK-SLICES:   tensor.insert_slice %[[PADDED]]
+//  CHECK-SLICES-SAME:     [0, 0, 0, 0, 0] [1, 2, 3, 1, 3] [1, 1, 1, 1, 1] : tensor<2x3x3xf32> into tensor<1x2x3x1x3xf32>
+
+// -----
+
+func.func @drop_unit_pad_dynamic_dims(%arg0: tensor<1x?xf32>) -> tensor<1x?xf32>
+{
+  %c0 = arith.constant 0 : index
+  %cst0 = arith.constant 0.0 : f32
+  %0 = tensor.pad %arg0 low[0, 5] high[0, 6] {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %cst0 : f32
+  } : tensor<1x?xf32> to tensor<1x?xf32>
+  return %0 : tensor<1x?xf32>
+}
+
+// CHECK-LABEL: func @drop_unit_pad_dynamic_dims
+//       CHECK:   %[[COLLAPSE:.+]] = tensor.collapse_shape
+//  CHECK-SAME:     {{\[}}[0, 1]{{\]}} : tensor<1x?xf32> into tensor<?xf32>
+//       CHECK:   %[[PADDED:.+]] = tensor.pad %[[COLLAPSE]] low[5] high[6]
+//       CHECK:   } : tensor<?xf32> to tensor<?xf32>
+//       CHECK:   tensor.expand_shape %[[PADDED]]
+//  CHECK-SAME:     {{\[}}[0, 1]{{\]}} : tensor<?xf32> into tensor<1x?xf32>
+
+// CHECK-SLICES: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 + 11)>
+
+// CHECK-SLICES-LABEL: func @drop_unit_pad_dynamic_dims
+//  CHECK-SLICES-SAME:   %[[ARG0:[A-Za-z0-9]+]]: tensor<1x?xf32>
+//       CHECK-SLICES:   %[[DIM:.+]] = tensor.dim %[[ARG0]], %c1
+//       CHECK-SLICES:   %[[EXTRACT:.+]] = tensor.extract_slice
+//  CHECK-SLICES-SAME:     [0, 0] [1, %[[DIM]]] [1, 1] : tensor<1x?xf32> to tensor<?xf32>
+//       CHECK-SLICES:   %[[PADDED:.+]] = tensor.pad %[[EXTRACT]] low[5] high[6]
+//       CHECK-SLICES:   } : tensor<?xf32> to tensor<?xf32>
+//       CHECK-SLICES:   %[[PADDED_DIM:.+]] = affine.apply #[[$MAP]]()[%[[DIM]]]
+//       CHECK-SLICES:   %[[EMPTY:.+]] = tensor.empty(%[[PADDED_DIM]]) : tensor<1x?xf32>
+//       CHECK-SLICES:   tensor.insert_slice %[[PADDED]] into %[[EMPTY]]
+//  CHECK-SLICES-SAME:     [0, 0] [1, %[[PADDED_DIM]]] [1, 1] : tensor<?xf32> into tensor<1x?xf32>
+
+// -----
+
+func.func @do_not_drop_non_constant_padding(%arg0: tensor<1x1x3x1x1xf32>, %pad: f32) -> tensor<1x2x3x1x3xf32>
+{
+  %c0 = arith.constant 0 : index
+  %0 = tensor.pad %arg0 low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2] {
+    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index):
+      %0 = arith.index_cast %arg3 : index to i64
+      %1 = arith.sitofp %0 : i64 to f32
+      %add = arith.addf %pad, %1 : f32
+      tensor.yield %add : f32
+  } : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>
+  return %0 : tensor<1x2x3x1x3xf32>
+}
+
+// CHECK-LABEL: func @do_not_drop_non_constant_padding
+//       CHECK:   tensor.pad %{{.*}} low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2]
+//       CHECK:   } : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>
+
+// CHECK-SLICES-LABEL: func @do_not_drop_non_constant_padding
+//       CHECK-SLICES:   tensor.pad %{{.*}} low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2]
+//       CHECK-SLICES:   } : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>
Original file line number	Diff line number	Diff line change
`@@ -586,7 +586,7 @@ def create_pull_request(`
`586`	`586`	`body=body,`
`587`	`587`	`base=release_branch_for_issue,`
`588`	`588`	`head=head,`
`589`		`- maintainer_can_modify=False,`
	`589`	`+ maintainer_can_modify=True,`
`590`	`590`	`)`
`591`	`591`
`592`	`592`	`pull.as_issue().edit(milestone=self.issue.milestone)`