Skip to content

Commit c1862bd

Browse files
committed
[mlir][gpu] Add Support for CGA Clusters in gpu.launch
This PR improves `gpu.launch` to handle a new feature called cga cluster. Now, when using `gpu.launch`, one can include a cluster size, although it's opitional. If provided, the outliner will transform `gpu.launch` with the cluster size into `gpu.launch_func`. Previously, PR llvm#72871 introduced the required support for clusters in the MLIR compiler and its CUDA runtime. This PR builds upon that work.
1 parent d659bd1 commit c1862bd

File tree

6 files changed

+210
-23
lines changed

6 files changed

+210
-23
lines changed

mlir/include/mlir/Dialect/GPU/IR/GPUOps.td

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -676,8 +676,11 @@ def GPU_LaunchOp : GPU_Op<"launch", [
676676
DeclareOpInterfaceMethods<InferIntRangeInterface>,
677677
RecursiveMemoryEffects]>,
678678
Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
679-
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
679+
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
680680
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
681+
Optional<Index>:$clusterSizeX,
682+
Optional<Index>:$clusterSizeY,
683+
Optional<Index>:$clusterSizeZ,
681684
Optional<I32>:$dynamicSharedMemorySize)>,
682685
Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
683686
let summary = "GPU kernel launch operation";
@@ -700,8 +703,11 @@ def GPU_LaunchOp : GPU_Op<"launch", [
700703
to the amount of dynamic shared memory a kernel's workgroup should be
701704
allocated; when this operand is not present, a zero size is assumed.
702705

703-
The body region has at least _twelve_ arguments, grouped as follows:
706+
The body region has at least _twelve_ arguments, or _eighteen_ if cluster
707+
dimensions are present, grouped as follows:
704708

709+
- three optional arguments that contain cluster identifiers along x,y,z
710+
dimensions;
705711
- three arguments that contain block identifiers along x,y,z dimensions;
706712
- three arguments that contain thread identifiers along x,y,z dimensions;
707713
- operands of the `gpu.launch` operation as is (i.e. the operands for
@@ -713,6 +719,7 @@ def GPU_LaunchOp : GPU_Op<"launch", [
713719

714720
```
715721
operation ::= `gpu.launch` (`async` (`[` ssa-id-list `]`)? )?
722+
( `clusters` `(` ssa-id-list `)` `in` ssa-reassignment )?
716723
`blocks` `(` ssa-id-list `)` `in` ssa-reassignment
717724
`threads` `(` ssa-id-list `)` `in` ssa-reassignment
718725
(dynamic_shared_memory_size ssa-use)?
@@ -763,6 +770,16 @@ def GPU_LaunchOp : GPU_Op<"launch", [
763770
// Assuming %val1 is defined outside the gpu.launch region.
764771
%42 = load %workgroup[%bx] : memref<32xf32, 3>
765772
}
773+
774+
// Launch with clusters.
775+
gpu.launch clusters(%cx, %cy, %cz) in (%sz_cx = %0, %sz_cy = %1, %sz_cz = %2)
776+
blocks(%bx, %by, %bz) in (%sz_bx = %3, %sz_by = %4, %sz_bz = %5)
777+
threads(%tx, %ty, %tz) in (%sz_tx = %6, %sz_ty = %7, %sz_tz = %8)
778+
{
779+
// Cluster, block and thread identifiers, as well as cluster/block/grid
780+
// sizes are immediately usable inside body region.
781+
"some_op"(%cx, %bx, %tx) : (index, index, index) -> ()
782+
}
766783
```
767784

768785
Rationale: using operation/block arguments gives analyses a clear way of
@@ -784,25 +801,35 @@ def GPU_LaunchOp : GPU_Op<"launch", [
784801
CArg<"Type", "nullptr">:$asyncTokenType,
785802
CArg<"ValueRange", "{}">:$asyncDependencies,
786803
CArg<"TypeRange", "{}">:$workgroupAttributions,
787-
CArg<"TypeRange", "{}">:$privateAttributions)>
804+
CArg<"TypeRange", "{}">:$privateAttributions,
805+
CArg<"Value", "nullptr">:$clusterSizeX,
806+
CArg<"Value", "nullptr">:$clusterSizeY,
807+
CArg<"Value", "nullptr">:$clusterSizeZ)>
788808
];
789809

790810
let extraClassDeclaration = [{
791811
/// Get the SSA values corresponding to kernel block identifiers.
792812
KernelDim3 getBlockIds();
793813
/// Get the SSA values corresponding to kernel thread identifiers.
794814
KernelDim3 getThreadIds();
815+
/// Get the SSA values corresponding to kernel cluster identifiers.
816+
std::optional<KernelDim3> getClusterIds();
795817
/// Get the SSA values corresponding to kernel grid size.
796818
KernelDim3 getGridSize();
797819
/// Get the SSA values corresponding to kernel block size.
798820
KernelDim3 getBlockSize();
821+
/// Get the SSA values corresponding to kernel cluster size.
822+
std::optional<KernelDim3> getClusterSize();
799823

800824
/// Get the SSA values passed as operands to specify the grid size.
801825
KernelDim3 getGridSizeOperandValues();
802826
/// Get the SSA values passed as operands to specify the block size.
803827
KernelDim3 getBlockSizeOperandValues();
828+
/// Get the SSA values passed as operands to specify the cluster size.
829+
std::optional<KernelDim3> getClusterSizeOperandValues();
804830

805831
static StringRef getBlocksKeyword() { return "blocks"; }
832+
static StringRef getClustersKeyword() { return "clusters"; }
806833
static StringRef getThreadsKeyword() { return "threads"; }
807834
static StringRef getDynamicSharedMemorySizeKeyword() {
808835
return "dynamic_shared_memory_size";
@@ -816,6 +843,21 @@ def GPU_LaunchOp : GPU_Op<"launch", [
816843
/// placed in the leading positions of the argument list.
817844
static constexpr unsigned kNumConfigRegionAttributes = 12;
818845

846+
/// Returns true if cluster size is specified.
847+
bool hasClusterSize() {
848+
if (getClusterSizeX() && getClusterSizeY() && getClusterSizeZ())
849+
return true;
850+
return false;
851+
}
852+
/// Returns the number of operands including cluster size
853+
unsigned getNumConfigOperands() {
854+
return kNumConfigOperands + (hasClusterSize() ? 3 : 0);
855+
}
856+
/// Returns the number of region attributes including cluster size
857+
unsigned getNumConfigRegionAttributes() {
858+
return kNumConfigRegionAttributes + (hasClusterSize() ? 6 : 0);
859+
}
860+
819861
/// Returns the keywords used in the custom syntax for this Op.
820862
static StringRef getWorkgroupKeyword() { return "workgroup"; }
821863
static StringRef getPrivateKeyword() { return "private"; }
@@ -831,7 +873,7 @@ def GPU_LaunchOp : GPU_Op<"launch", [
831873
/// the workgroup memory
832874
ArrayRef<BlockArgument> getWorkgroupAttributions() {
833875
auto begin =
834-
std::next(getBody().args_begin(), kNumConfigRegionAttributes);
876+
std::next(getBody().args_begin(), getNumConfigRegionAttributes());
835877
auto end = std::next(begin, getNumWorkgroupAttributions());
836878
return {begin, end};
837879
}
@@ -842,7 +884,7 @@ def GPU_LaunchOp : GPU_Op<"launch", [
842884

843885
/// Returns the number of buffers located in the private memory.
844886
unsigned getNumPrivateAttributions() {
845-
return getBody().getNumArguments() - kNumConfigRegionAttributes -
887+
return getBody().getNumArguments() - getNumConfigRegionAttributes() -
846888
getNumWorkgroupAttributions();
847889
}
848890

@@ -853,7 +895,7 @@ def GPU_LaunchOp : GPU_Op<"launch", [
853895
// memory.
854896
auto begin =
855897
std::next(getBody().args_begin(),
856-
kNumConfigRegionAttributes + getNumWorkgroupAttributions());
898+
getNumConfigRegionAttributes() + getNumWorkgroupAttributions());
857899
return {begin, getBody().args_end()};
858900
}
859901

mlir/lib/Dialect/GPU/IR/GPUDialect.cpp

Lines changed: 70 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,8 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
646646
Value getBlockSizeZ, Value dynamicSharedMemorySize,
647647
Type asyncTokenType, ValueRange asyncDependencies,
648648
TypeRange workgroupAttributions,
649-
TypeRange privateAttributions) {
649+
TypeRange privateAttributions, Value clusterSizeX,
650+
Value clusterSizeY, Value clusterSizeZ) {
650651
// Add a WorkGroup attribution attribute. This attribute is required to
651652
// identify private attributions in the list of block argguments.
652653
result.addAttribute(getNumWorkgroupAttributionsAttrName(),
@@ -660,6 +661,8 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
660661
// Add grid and block sizes as op operands, followed by the data operands.
661662
result.addOperands({gridSizeX, gridSizeY, gridSizeZ, getBlockSizeX,
662663
getBlockSizeY, getBlockSizeZ});
664+
if (clusterSizeX && clusterSizeY && clusterSizeZ)
665+
result.addOperands({clusterSizeX, clusterSizeY, clusterSizeZ});
663666
if (dynamicSharedMemorySize)
664667
result.addOperands(dynamicSharedMemorySize);
665668

@@ -678,9 +681,14 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
678681
body->addArgument(argTy, result.location);
679682
kernelRegion->push_back(body);
680683
// Fill OperandSegmentSize Attribute.
681-
SmallVector<int32_t, 8> segmentSizes(8, 1);
684+
SmallVector<int32_t, 11> segmentSizes(11, 1);
682685
segmentSizes.front() = asyncDependencies.size();
683686
segmentSizes.back() = dynamicSharedMemorySize ? 1 : 0;
687+
if (!clusterSizeX) {
688+
segmentSizes[7] = 0;
689+
segmentSizes[8] = 0;
690+
segmentSizes[9] = 0;
691+
}
684692
result.addAttribute(getOperandSegmentSizeAttr(),
685693
builder.getDenseI32ArrayAttr(segmentSizes));
686694
}
@@ -709,6 +717,22 @@ KernelDim3 LaunchOp::getBlockSize() {
709717
return KernelDim3{args[9], args[10], args[11]};
710718
}
711719

720+
std::optional<KernelDim3> LaunchOp::getClusterIds() {
721+
assert(!getBody().empty() && "LaunchOp body must not be empty.");
722+
if (!hasClusterSize())
723+
return std::nullopt;
724+
auto args = getBody().getArguments();
725+
return KernelDim3{args[12], args[13], args[14]};
726+
}
727+
728+
std::optional<KernelDim3> LaunchOp::getClusterSize() {
729+
assert(!getBody().empty() && "LaunchOp body must not be empty.");
730+
if (!hasClusterSize())
731+
return std::nullopt;
732+
auto args = getBody().getArguments();
733+
return KernelDim3{args[15], args[16], args[17]};
734+
}
735+
712736
KernelDim3 LaunchOp::getGridSizeOperandValues() {
713737
auto operands = getOperands().drop_front(getAsyncDependencies().size());
714738
return KernelDim3{operands[0], operands[1], operands[2]};
@@ -719,6 +743,13 @@ KernelDim3 LaunchOp::getBlockSizeOperandValues() {
719743
return KernelDim3{operands[3], operands[4], operands[5]};
720744
}
721745

746+
std::optional<KernelDim3> LaunchOp::getClusterSizeOperandValues() {
747+
auto operands = getOperands().drop_front(getAsyncDependencies().size());
748+
if (!hasClusterSize())
749+
return std::nullopt;
750+
return KernelDim3{operands[6], operands[7], operands[8]};
751+
}
752+
722753
LogicalResult LaunchOp::verifyRegions() {
723754
// Kernel launch takes kNumConfigOperands leading operands for grid/block
724755
// sizes and transforms them into kNumConfigRegionAttributes region arguments
@@ -778,6 +809,12 @@ void LaunchOp::print(OpAsmPrinter &p) {
778809
p << " [" << getAsyncDependencies() << ']';
779810
}
780811
// Print the launch configuration.
812+
if (getClusterSizeX()) {
813+
p << ' ' << getClustersKeyword();
814+
printSizeAssignment(p, getClusterSize().value(),
815+
getClusterSizeOperandValues().value(),
816+
getClusterIds().value());
817+
}
781818
p << ' ' << getBlocksKeyword();
782819
printSizeAssignment(p, getGridSize(), getGridSizeOperandValues(),
783820
getBlockIds());
@@ -831,6 +868,7 @@ parseSizeAssignment(OpAsmParser &parser,
831868

832869
/// Parses a Launch operation.
833870
/// operation ::= `gpu.launch` (`async` `[` ssa-id-list `]`)?
871+
/// `clusters` `(` ssa-id-list `)` `in` ssa-reassignment (Optional)
834872
/// `blocks` `(` ssa-id-list `)` `in` ssa-reassignment
835873
/// `threads` `(` ssa-id-list `)` `in` ssa-reassignment
836874
/// memory-attribution
@@ -840,15 +878,13 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
840878
// Sizes of the grid and block.
841879
SmallVector<OpAsmParser::UnresolvedOperand, LaunchOp::kNumConfigOperands>
842880
sizes(LaunchOp::kNumConfigOperands);
843-
MutableArrayRef<OpAsmParser::UnresolvedOperand> sizesRef(sizes);
844881

845882
// Actual (data) operands passed to the kernel.
846883
SmallVector<OpAsmParser::UnresolvedOperand, 4> dataOperands;
847884

848885
// Region arguments to be created.
849886
SmallVector<OpAsmParser::UnresolvedOperand, 16> regionArgs(
850887
LaunchOp::kNumConfigRegionAttributes);
851-
MutableArrayRef<OpAsmParser::UnresolvedOperand> regionArgsRef(regionArgs);
852888

853889
// Parse optional async dependencies.
854890
SmallVector<OpAsmParser::UnresolvedOperand, 4> asyncDependencies;
@@ -861,6 +897,24 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
861897
if (parser.getNumResults() > 0)
862898
result.types.push_back(asyncTokenType);
863899

900+
bool hasCluster = false;
901+
if (succeeded(
902+
parser.parseOptionalKeyword(LaunchOp::getClustersKeyword().data()))) {
903+
hasCluster = true;
904+
sizes.resize(9);
905+
regionArgs.resize(18);
906+
}
907+
MutableArrayRef<OpAsmParser::UnresolvedOperand> sizesRef(sizes);
908+
MutableArrayRef<OpAsmParser::UnresolvedOperand> regionArgsRef(regionArgs);
909+
910+
// Last three segment assigns the cluster size. In the region argument
911+
// list, this is last 6 arguments.
912+
if (hasCluster) {
913+
if (parseSizeAssignment(parser, sizesRef.drop_front(6),
914+
regionArgsRef.slice(15, 3),
915+
regionArgsRef.slice(12, 3)))
916+
return failure();
917+
}
864918
// Parse the size assignment segments: the first segment assigns grid sizes
865919
// and defines values for block identifiers; the second segment assigns block
866920
// sizes and defines values for thread identifiers. In the region argument
@@ -898,7 +952,7 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
898952
// LaunchOp::getNumWorkgroupAttributionsAttrName().
899953
Type index = parser.getBuilder().getIndexType();
900954
SmallVector<Type, LaunchOp::kNumConfigRegionAttributes> dataTypes(
901-
LaunchOp::kNumConfigRegionAttributes, index);
955+
LaunchOp::kNumConfigRegionAttributes + 6, index);
902956

903957
SmallVector<OpAsmParser::Argument> regionArguments;
904958
for (auto ssaValueAndType : llvm::zip(regionArgs, dataTypes)) {
@@ -916,8 +970,9 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
916970

917971
// Store the number of operands we just parsed as the number of workgroup
918972
// memory attributions.
919-
unsigned numWorkgroupAttrs =
920-
regionArguments.size() - LaunchOp::kNumConfigRegionAttributes;
973+
unsigned numWorkgroupAttrs = regionArguments.size() -
974+
LaunchOp::kNumConfigRegionAttributes -
975+
(hasCluster ? 6 : 0);
921976
result.addAttribute(LaunchOp::getNumWorkgroupAttributionsAttrName(),
922977
builder.getI64IntegerAttr(numWorkgroupAttrs));
923978

@@ -934,8 +989,14 @@ ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
934989
parser.parseOptionalAttrDict(result.attributes))
935990
return failure();
936991

937-
SmallVector<int32_t, 8> segmentSizes(8, 1);
992+
SmallVector<int32_t, 11> segmentSizes(11, 1);
938993
segmentSizes.front() = asyncDependencies.size();
994+
995+
if (!hasCluster) {
996+
segmentSizes[7] = 0;
997+
segmentSizes[8] = 0;
998+
segmentSizes[9] = 0;
999+
}
9391000
segmentSizes.back() = hasDynamicSharedMemorySize ? 1 : 0;
9401001
result.addAttribute(LaunchOp::getOperandSegmentSizeAttr(),
9411002
parser.getBuilder().getDenseI32ArrayAttr(segmentSizes));
@@ -992,7 +1053,7 @@ BlockArgument LaunchOp::addWorkgroupAttribution(Type type, Location loc) {
9921053
(*this)->setAttr(attrName,
9931054
IntegerAttr::get(attr.getType(), attr.getValue() + 1));
9941055
return getBody().insertArgument(
995-
LaunchOp::kNumConfigRegionAttributes + attr.getInt(), type, loc);
1056+
LaunchOp::getNumConfigRegionAttributes() + attr.getInt(), type, loc);
9961057
}
9971058

9981059
/// Adds a new block argument that corresponds to buffers located in

mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,21 @@ static void createForAllDimensions(OpBuilder &builder, Location loc,
4949
/// entry block of `launchOpBody`, to the corresponding result value of the
5050
/// added operations.
5151
static void injectGpuIndexOperations(Location loc, Region &launchFuncOpBody,
52-
Region &launchOpBody, IRMapping &map) {
52+
Region &launchOpBody, IRMapping &map,
53+
bool hasCluster = false) {
5354
OpBuilder builder(loc->getContext());
5455
Block &firstBlock = launchOpBody.front();
5556
builder.setInsertionPointToStart(&launchFuncOpBody.front());
56-
SmallVector<Value, 12> indexOps;
57+
SmallVector<Value> indexOps;
58+
// The order is important here, as it must match the order of the arguments
5759
createForAllDimensions<gpu::BlockIdOp>(builder, loc, indexOps);
5860
createForAllDimensions<gpu::ThreadIdOp>(builder, loc, indexOps);
5961
createForAllDimensions<gpu::GridDimOp>(builder, loc, indexOps);
6062
createForAllDimensions<gpu::BlockDimOp>(builder, loc, indexOps);
63+
if (hasCluster) {
64+
createForAllDimensions<gpu::ClusterIdOp>(builder, loc, indexOps);
65+
createForAllDimensions<gpu::ClusterDimOp>(builder, loc, indexOps);
66+
}
6167
// Replace the leading 12 function args with the respective thread/block index
6268
// operations. Iterate backwards since args are erased and indices change.
6369
for (const auto &indexOp : enumerate(indexOps))
@@ -212,9 +218,11 @@ static gpu::GPUFuncOp outlineKernelFuncImpl(gpu::LaunchOp launchOp,
212218
IRMapping map;
213219

214220
// Map the arguments corresponding to the launch parameters like blockIdx,
215-
// threadIdx, etc.
221+
// threadIdx, etc. If cluster is present, then we also generate clusterIdx and
222+
// clusterDim.
216223
Region &outlinedFuncBody = outlinedFunc.getBody();
217-
injectGpuIndexOperations(loc, outlinedFuncBody, launchOpBody, map);
224+
injectGpuIndexOperations(loc, outlinedFuncBody, launchOpBody, map,
225+
launchOp.hasClusterSize());
218226

219227
// Map memory attributions from the LaunOp op to the GPUFuncOp attributions.
220228
for (const auto &[launchArg, funcArg] :
@@ -278,12 +286,14 @@ static void convertToLaunchFuncOp(gpu::LaunchOp launchOp,
278286
// The launch op has an optional dynamic shared memory size. If it doesn't
279287
// exist, we use zero.
280288
Value asyncToken = launchOp.getAsyncToken();
289+
std::optional<gpu::KernelDim3> clusterSize =
290+
launchOp.getClusterSizeOperandValues();
281291
auto launchFunc = builder.create<gpu::LaunchFuncOp>(
282292
launchOp.getLoc(), kernelFunc, launchOp.getGridSizeOperandValues(),
283293
launchOp.getBlockSizeOperandValues(),
284294
launchOp.getDynamicSharedMemorySize(), operands,
285295
asyncToken ? asyncToken.getType() : nullptr,
286-
launchOp.getAsyncDependencies());
296+
launchOp.getAsyncDependencies(), clusterSize);
287297
launchOp.replaceAllUsesWith(launchFunc);
288298
launchOp.erase();
289299
}

mlir/test/Conversion/SCFToGPU/no_blocks_no_threads.mlir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ func.func @one_d_loop(%A : memref<?xf32>, %B : memref<?xf32>) {
1717
// CHECK-BLOCKS-NEXT: %{{.*}} = arith.constant 1 : index
1818
// CHECK-BLOCKS-NEXT: %[[ONE:.*]] = arith.constant 1 : index
1919

20-
// CHECK-THREADS-NEXT: gpu.launch blocks(%[[B0:.*]], %[[B1:.*]], %[[B2:.*]]) in (%{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]], %{{.*}}0 = %[[ONE]]) threads(%[[T0:.*]], %[[T1:.*]], %[[T2:.*]]) in (%{{.*}} = %[[BOUND]], %{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]])
21-
// CHECK-BLOCKS-NEXT: gpu.launch blocks(%[[B0:.*]], %[[B1:.*]], %[[B2:.*]]) in (%{{.*}} = %[[BOUND]], %{{.*}} = %[[ONE]], %{{.*}}0 = %[[ONE]]) threads(%[[T0:.*]], %[[T1:.*]], %[[T2:.*]]) in (%{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]])
20+
// CHECK-THREADS-NEXT: gpu.launch blocks(%[[B0:.*]], %[[B1:.*]], %[[B2:.*]]) in (%{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]]) threads(%[[T0:.*]], %[[T1:.*]], %[[T2:.*]]) in (%{{.*}} = %[[BOUND]], %{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]])
21+
// CHECK-BLOCKS-NEXT: gpu.launch blocks(%[[B0:.*]], %[[B1:.*]], %[[B2:.*]]) in (%{{.*}} = %[[BOUND]], %{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]]) threads(%[[T0:.*]], %[[T1:.*]], %[[T2:.*]]) in (%{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]], %{{.*}} = %[[ONE]])
2222
affine.for %i = 0 to 42 {
2323
// CHECK-THREADS-NEXT: %[[INDEX:.*]] = arith.addi %{{.*}}, %[[T0]]
2424
// CHECK-THREADS-NEXT: memref.load %{{.*}}[%[[INDEX]]]

mlir/test/Dialect/GPU/invalid.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ func.func @no_region_attrs(%sz : index) {
1616
^bb1(%bx: index, %by: index, %bz: index,
1717
%tx: index, %ty: index, %tz: index):
1818
gpu.terminator
19-
}) {operandSegmentSizes = array<i32: 0, 1, 1, 1, 1, 1, 1, 0>} : (index, index, index, index, index, index) -> ()
19+
}) {operandSegmentSizes = array<i32: 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0>} : (index, index, index, index, index, index) -> ()
2020
return
2121
}
2222

0 commit comments

Comments
 (0)