Skip to content

Commit cb8e193

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:b2ea04673b782f95ac9841f87df8bb5f7b561067 into amd-gfx:0ce3951c67f4
Local branch amd-gfx 0ce3951 Merged main:c93c76b562784926b22a69d3f82a5032dcb4a274 into amd-gfx:eb3c7a678fbb Remote branch main b2ea046 [MLIR] Add missing omp_gen dep to MLIROpenMPDialect (llvm#84552)
2 parents 0ce3951 + b2ea046 commit cb8e193

34 files changed

+1034
-716
lines changed

.ci/monolithic-linux.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,4 +54,4 @@ cmake -S ${MONOREPO_ROOT}/llvm -B ${BUILD_DIR} \
5454

5555
echo "--- ninja"
5656
# Targets are not escaped as they are passed as separate arguments.
57-
ninja -C -k 0 "${BUILD_DIR}" ${targets}
57+
ninja -C "${BUILD_DIR}" -k 0 ${targets}

.ci/monolithic-windows.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,4 +62,4 @@ cmake -S ${MONOREPO_ROOT}/llvm -B ${BUILD_DIR} \
6262

6363
echo "--- ninja"
6464
# Targets are not escaped as they are passed as separate arguments.
65-
ninja -C -k 0 "${BUILD_DIR}" ${targets}
65+
ninja -C "${BUILD_DIR}" -k 0 ${targets}

.github/workflows/release-binaries.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ jobs:
7171
# | X.Y.Z | -final
7272
run: |
7373
tag="${{ github.ref_name }}"
74-
trimmed=$(echo ${{ inputs.tag }} | xargs)
75-
[[ "$trimmed" != "" ]] && tag="$trimmed"
74+
trimmed=$(echo ${{ inputs.release-version }} | xargs)
75+
[[ "$trimmed" != "" ]] && tag="llvmorg-$trimmed"
7676
if [ "$tag" = "main" ]; then
7777
# If tag is main, then we've been triggered by a scheduled so pass so
7878
# use the head commit as the tag.

clang/test/CodeGen/remote-traps.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
// RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow %s -o - | FileCheck %s
22
// RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow -mllvm -clang-remove-traps -mllvm -remove-traps-random-rate=1 %s -o - | FileCheck %s --implicit-check-not="call void @llvm.ubsantrap" --check-prefixes=REMOVE
33

4-
int f(int x) {
4+
int test(int x) {
55
return x + 123;
66
}
77

8-
// CHECK-LABEL: define dso_local noundef i32 @f(
8+
// CHECK-LABEL: define {{.*}}i32 @test(
99
// CHECK: call { i32, i1 } @llvm.sadd.with.overflow.i32(
1010
// CHECK: trap:
1111
// CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
1212
// CHECK-NEXT: unreachable
1313

14-
// REMOVE-LABEL: define dso_local noundef i32 @f(
14+
// REMOVE-LABEL: define {{.*}}i32 @test(
1515
// REMOVE: call { i32, i1 } @llvm.sadd.with.overflow.i32(

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 492341
19+
#define LLVM_MAIN_REVISION 492352
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/IR/BasicBlock.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,8 @@ const Instruction* BasicBlock::getFirstNonPHI() const {
348348

349349
BasicBlock::const_iterator BasicBlock::getFirstNonPHIIt() const {
350350
const Instruction *I = getFirstNonPHI();
351+
if (!I)
352+
return end();
351353
BasicBlock::const_iterator It = I->getIterator();
352354
// Set the head-inclusive bit to indicate that this iterator includes
353355
// any debug-info at the start of the block. This is a no-op unless the

llvm/utils/git/github-automation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -586,7 +586,7 @@ def create_pull_request(
586586
body=body,
587587
base=release_branch_for_issue,
588588
head=head,
589-
maintainer_can_modify=False,
589+
maintainer_can_modify=True,
590590
)
591591

592592
pull.as_issue().edit(milestone=self.issue.milestone)

mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,10 @@ struct ControlDropUnitDims {
481481
if (auto genericOp = dyn_cast_or_null<GenericOp>(op)) {
482482
return llvm::to_vector(llvm::seq<unsigned>(0, genericOp.getNumLoops()));
483483
}
484+
if (auto padOp = dyn_cast_or_null<tensor::PadOp>(op)) {
485+
return llvm::to_vector(
486+
llvm::seq<unsigned>(0, padOp.getSourceType().getRank()));
487+
}
484488
return SmallVector<unsigned>{};
485489
};
486490
};

mlir/lib/Conversion/NVVMToLLVM/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ add_mlir_conversion_library(MLIRNVVMToLLVM
1111
Core
1212

1313
LINK_LIBS PUBLIC
14+
MLIRFuncDialect
1415
MLIRGPUDialect
1516
MLIRLLVMCommonConversion
1617
MLIRLLVMDialect

mlir/lib/Dialect/AMDGPU/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ add_mlir_dialect_library(MLIRAMDGPUTransforms
1414
MLIRAMDGPUUtils
1515
MLIRArithDialect
1616
MLIRControlFlowDialect
17+
MLIRFuncDialect
1718
MLIRIR
1819
MLIRPass
1920
MLIRTransforms

mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,126 @@ struct DropUnitDims : public OpRewritePattern<GenericOp> {
561561
};
562562
} // namespace
563563

564+
//===---------------------------------------------------------------------===//
565+
// Drop dimensions that are unit-extents within tensor operations.
566+
//===---------------------------------------------------------------------===//
567+
568+
namespace {
569+
struct DropPadUnitDims : public OpRewritePattern<tensor::PadOp> {
570+
DropPadUnitDims(MLIRContext *context, ControlDropUnitDims options = {},
571+
PatternBenefit benefit = 1)
572+
: OpRewritePattern(context, benefit), options(std::move(options)) {}
573+
574+
LogicalResult matchAndRewrite(tensor::PadOp padOp,
575+
PatternRewriter &rewriter) const override {
576+
// 1a. Get the allowed list of dimensions to drop from the `options`.
577+
SmallVector<unsigned> allowedUnitDims = options.controlFn(padOp);
578+
if (allowedUnitDims.empty()) {
579+
return rewriter.notifyMatchFailure(
580+
padOp, "control function returns no allowed unit dims to prune");
581+
}
582+
583+
if (padOp.getSourceType().getEncoding()) {
584+
return rewriter.notifyMatchFailure(
585+
padOp, "cannot collapse dims of tensor with encoding");
586+
}
587+
588+
// Fail for non-constant padding values. The body of the pad could
589+
// depend on the padding indices and/or properties of the padded
590+
// tensor so for now we fail.
591+
// TODO: Support non-constant padding values.
592+
Value paddingVal = padOp.getConstantPaddingValue();
593+
if (!paddingVal) {
594+
return rewriter.notifyMatchFailure(
595+
padOp, "unimplemented: non-constant padding value");
596+
}
597+
598+
ArrayRef<int64_t> sourceShape = padOp.getSourceType().getShape();
599+
int64_t padRank = sourceShape.size();
600+
601+
auto isStaticZero = [](OpFoldResult f) {
602+
std::optional<int64_t> maybeInt = getConstantIntValue(f);
603+
return maybeInt && *maybeInt == 0;
604+
};
605+
606+
llvm::SmallDenseSet<unsigned> unitDimsFilter(allowedUnitDims.begin(),
607+
allowedUnitDims.end());
608+
llvm::SmallDenseSet<unsigned> unitDims;
609+
SmallVector<int64_t> newShape;
610+
SmallVector<OpFoldResult> newLowPad;
611+
SmallVector<OpFoldResult> newHighPad;
612+
for (const auto [dim, size, low, high] :
613+
zip_equal(llvm::seq(static_cast<int64_t>(0), padRank), sourceShape,
614+
padOp.getMixedLowPad(), padOp.getMixedHighPad())) {
615+
if (unitDimsFilter.contains(dim) && size == 1 && isStaticZero(low) &&
616+
isStaticZero(high)) {
617+
unitDims.insert(dim);
618+
} else {
619+
newShape.push_back(size);
620+
newLowPad.push_back(low);
621+
newHighPad.push_back(high);
622+
}
623+
}
624+
625+
if (unitDims.empty()) {
626+
return rewriter.notifyMatchFailure(padOp, "no unit dims to collapse");
627+
}
628+
629+
ReassociationIndices reassociationGroup;
630+
SmallVector<ReassociationIndices> reassociationMap;
631+
int64_t dim = 0;
632+
while (dim < padRank && unitDims.contains(dim))
633+
reassociationGroup.push_back(dim++);
634+
while (dim < padRank) {
635+
assert(!unitDims.contains(dim) && "expected non unit-extent");
636+
reassociationGroup.push_back(dim);
637+
dim++;
638+
// Fold all following dimensions that are unit-extent.
639+
while (dim < padRank && unitDims.contains(dim))
640+
reassociationGroup.push_back(dim++);
641+
reassociationMap.push_back(reassociationGroup);
642+
reassociationGroup.clear();
643+
}
644+
645+
Value collapsedSource =
646+
collapseValue(rewriter, padOp.getLoc(), padOp.getSource(), newShape,
647+
reassociationMap, options.rankReductionStrategy);
648+
649+
auto newPadOp = rewriter.create<tensor::PadOp>(
650+
padOp.getLoc(), /*result=*/Type(), collapsedSource, newLowPad,
651+
newHighPad, paddingVal, padOp.getNofold());
652+
653+
Value dest = padOp.getResult();
654+
if (options.rankReductionStrategy ==
655+
ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice) {
656+
SmallVector<OpFoldResult> expandedSizes;
657+
int64_t numUnitDims = 0;
658+
for (auto dim : llvm::seq(static_cast<int64_t>(0), padRank)) {
659+
if (unitDims.contains(dim)) {
660+
expandedSizes.push_back(rewriter.getIndexAttr(1));
661+
numUnitDims++;
662+
continue;
663+
}
664+
expandedSizes.push_back(tensor::getMixedSize(
665+
rewriter, padOp.getLoc(), newPadOp, dim - numUnitDims));
666+
}
667+
dest = rewriter.create<tensor::EmptyOp>(
668+
padOp.getLoc(), expandedSizes,
669+
padOp.getResultType().getElementType());
670+
}
671+
672+
Value expandedValue =
673+
expandValue(rewriter, padOp.getLoc(), newPadOp.getResult(), dest,
674+
reassociationMap, options.rankReductionStrategy);
675+
rewriter.replaceOp(padOp, expandedValue);
676+
return success();
677+
}
678+
679+
private:
680+
ControlDropUnitDims options;
681+
};
682+
} // namespace
683+
564684
namespace {
565685
/// Convert `extract_slice` operations to rank-reduced versions.
566686
struct RankReducedExtractSliceOp
@@ -640,6 +760,7 @@ populateFoldUnitExtentDimsViaReshapesPatterns(RewritePatternSet &patterns,
640760
ControlDropUnitDims &options) {
641761
auto *context = patterns.getContext();
642762
patterns.add<DropUnitDims>(context, options);
763+
patterns.add<DropPadUnitDims>(context, options);
643764
// TODO: Patterns unrelated to unit dim folding should be factored out.
644765
patterns.add<RankReducedExtractSliceOp,
645766
RankReducedInsertSliceOp<tensor::InsertSliceOp>,
@@ -661,6 +782,7 @@ populateFoldUnitExtentDimsViaSlicesPatterns(RewritePatternSet &patterns,
661782
options.rankReductionStrategy =
662783
ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice;
663784
patterns.add<DropUnitDims>(context, options);
785+
patterns.add<DropPadUnitDims>(context, options);
664786
// TODO: Patterns unrelated to unit dim folding should be factored out.
665787
linalg::FillOp::getCanonicalizationPatterns(patterns, context);
666788
tensor::EmptyOp::getCanonicalizationPatterns(patterns, context);

mlir/lib/Dialect/OpenMP/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ add_mlir_dialect_library(MLIROpenMPDialect
55
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/OpenMP
66

77
DEPENDS
8+
omp_gen
89
MLIROpenMPOpsIncGen
910
MLIROpenMPOpsInterfacesIncGen
1011
MLIROpenMPTypeInterfacesIncGen

mlir/lib/Transforms/Utils/Inliner.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "mlir/Support/DebugStringHelper.h"
2222
#include "mlir/Transforms/InliningUtils.h"
2323
#include "llvm/ADT/SCCIterator.h"
24+
#include "llvm/ADT/STLExtras.h"
2425
#include "llvm/ADT/SmallPtrSet.h"
2526
#include "llvm/Support/Debug.h"
2627

@@ -711,6 +712,13 @@ bool Inliner::Impl::shouldInline(ResolvedCall &resolvedCall) {
711712
if (resolvedCall.call->hasTrait<OpTrait::IsTerminator>())
712713
return false;
713714

715+
// Don't allow inlining if the target is a self-recursive function.
716+
if (llvm::count_if(*resolvedCall.targetNode,
717+
[&](CallGraphNode::Edge const &edge) -> bool {
718+
return edge.getTarget() == resolvedCall.targetNode;
719+
}) > 0)
720+
return false;
721+
714722
// Don't allow inlining if the target is an ancestor of the call. This
715723
// prevents inlining recursively.
716724
Region *callableRegion = resolvedCall.targetNode->getCallableRegion();

mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -946,3 +946,90 @@ func.func @drop_all_loops(%arg0 : memref<1x1xf32, 3>) -> memref<1x1xf32, 3>
946946
// CHECK-SLICES-LABEL: func @drop_all_loops
947947
// CHECK-SLICES: memref.subview %{{.*}}[0, 0] [1, 1] [1, 1] : memref<1x1xf32, 3> to memref<f32, strided<[]>, 3>
948948
// CHECK-SLICES: linalg.generic{{.*}}memref<f32, strided<[]>, 3>
949+
950+
// -----
951+
952+
func.func @drop_unit_pad_dims(%arg0: tensor<1x1x3x1x1xf32>) -> tensor<1x2x3x1x3xf32>
953+
{
954+
%c0 = arith.constant 0 : index
955+
%cst0 = arith.constant 0.0 : f32
956+
%0 = tensor.pad %arg0 low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2] {
957+
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index):
958+
tensor.yield %cst0 : f32
959+
} : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>
960+
return %0 : tensor<1x2x3x1x3xf32>
961+
}
962+
963+
// CHECK-LABEL: func @drop_unit_pad_dims
964+
// CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape
965+
// CHECK-SAME: {{\[}}[0, 1], [2, 3], [4]{{\]}} : tensor<1x1x3x1x1xf32> into tensor<1x3x1xf32>
966+
// CHECK: %[[PADDED:.+]] = tensor.pad %[[COLLAPSE]] low[1, 0, 0] high[0, 0, 2]
967+
// CHECK: } : tensor<1x3x1xf32> to tensor<2x3x3xf32>
968+
// CHECK: tensor.expand_shape %[[PADDED]]
969+
// CHECK-SAME: {{\[}}[0, 1], [2, 3], [4]{{\]}} : tensor<2x3x3xf32> into tensor<1x2x3x1x3xf32>
970+
971+
// CHECK-SLICES-LABEL: func @drop_unit_pad_dims
972+
// CHECK-SLICES: %[[EXTRACT:.+]] = tensor.extract_slice
973+
// CHECK-SLICES-SAME: [0, 0, 0, 0, 0] [1, 1, 3, 1, 1] [1, 1, 1, 1, 1] : tensor<1x1x3x1x1xf32> to tensor<1x3x1xf32>
974+
// CHECK-SLICES: %[[PADDED:.+]] = tensor.pad %[[EXTRACT]] low[1, 0, 0] high[0, 0, 2]
975+
// CHECK-SLICES: } : tensor<1x3x1xf32> to tensor<2x3x3xf32>
976+
// CHECK-SLICES: tensor.insert_slice %[[PADDED]]
977+
// CHECK-SLICES-SAME: [0, 0, 0, 0, 0] [1, 2, 3, 1, 3] [1, 1, 1, 1, 1] : tensor<2x3x3xf32> into tensor<1x2x3x1x3xf32>
978+
979+
// -----
980+
981+
func.func @drop_unit_pad_dynamic_dims(%arg0: tensor<1x?xf32>) -> tensor<1x?xf32>
982+
{
983+
%c0 = arith.constant 0 : index
984+
%cst0 = arith.constant 0.0 : f32
985+
%0 = tensor.pad %arg0 low[0, 5] high[0, 6] {
986+
^bb0(%arg1: index, %arg2: index):
987+
tensor.yield %cst0 : f32
988+
} : tensor<1x?xf32> to tensor<1x?xf32>
989+
return %0 : tensor<1x?xf32>
990+
}
991+
992+
// CHECK-LABEL: func @drop_unit_pad_dynamic_dims
993+
// CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape
994+
// CHECK-SAME: {{\[}}[0, 1]{{\]}} : tensor<1x?xf32> into tensor<?xf32>
995+
// CHECK: %[[PADDED:.+]] = tensor.pad %[[COLLAPSE]] low[5] high[6]
996+
// CHECK: } : tensor<?xf32> to tensor<?xf32>
997+
// CHECK: tensor.expand_shape %[[PADDED]]
998+
// CHECK-SAME: {{\[}}[0, 1]{{\]}} : tensor<?xf32> into tensor<1x?xf32>
999+
1000+
// CHECK-SLICES: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 + 11)>
1001+
1002+
// CHECK-SLICES-LABEL: func @drop_unit_pad_dynamic_dims
1003+
// CHECK-SLICES-SAME: %[[ARG0:[A-Za-z0-9]+]]: tensor<1x?xf32>
1004+
// CHECK-SLICES: %[[DIM:.+]] = tensor.dim %[[ARG0]], %c1
1005+
// CHECK-SLICES: %[[EXTRACT:.+]] = tensor.extract_slice
1006+
// CHECK-SLICES-SAME: [0, 0] [1, %[[DIM]]] [1, 1] : tensor<1x?xf32> to tensor<?xf32>
1007+
// CHECK-SLICES: %[[PADDED:.+]] = tensor.pad %[[EXTRACT]] low[5] high[6]
1008+
// CHECK-SLICES: } : tensor<?xf32> to tensor<?xf32>
1009+
// CHECK-SLICES: %[[PADDED_DIM:.+]] = affine.apply #[[$MAP]]()[%[[DIM]]]
1010+
// CHECK-SLICES: %[[EMPTY:.+]] = tensor.empty(%[[PADDED_DIM]]) : tensor<1x?xf32>
1011+
// CHECK-SLICES: tensor.insert_slice %[[PADDED]] into %[[EMPTY]]
1012+
// CHECK-SLICES-SAME: [0, 0] [1, %[[PADDED_DIM]]] [1, 1] : tensor<?xf32> into tensor<1x?xf32>
1013+
1014+
// -----
1015+
1016+
func.func @do_not_drop_non_constant_padding(%arg0: tensor<1x1x3x1x1xf32>, %pad: f32) -> tensor<1x2x3x1x3xf32>
1017+
{
1018+
%c0 = arith.constant 0 : index
1019+
%0 = tensor.pad %arg0 low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2] {
1020+
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index):
1021+
%0 = arith.index_cast %arg3 : index to i64
1022+
%1 = arith.sitofp %0 : i64 to f32
1023+
%add = arith.addf %pad, %1 : f32
1024+
tensor.yield %add : f32
1025+
} : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>
1026+
return %0 : tensor<1x2x3x1x3xf32>
1027+
}
1028+
1029+
// CHECK-LABEL: func @do_not_drop_non_constant_padding
1030+
// CHECK: tensor.pad %{{.*}} low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2]
1031+
// CHECK: } : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>
1032+
1033+
// CHECK-SLICES-LABEL: func @do_not_drop_non_constant_padding
1034+
// CHECK-SLICES: tensor.pad %{{.*}} low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2]
1035+
// CHECK-SLICES: } : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>

0 commit comments

Comments
 (0)