@@ -88,7 +88,9 @@ class ROCDLDialectLLVMIRTranslationInterface
88
88
if (dialect->getKernelAttrHelper ().getName () == attribute.getName ()) {
89
89
auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
90
90
if (!func)
91
- return failure ();
91
+ return op->emitOpError (Twine (attribute.getName ()) +
92
+ " is only supported on `llvm.func` operations" );
93
+ ;
92
94
93
95
// For GPU kernels,
94
96
// 1. Insert AMDGPU_KERNEL calling convention.
@@ -100,6 +102,13 @@ class ROCDLDialectLLVMIRTranslationInterface
100
102
if (!llvmFunc->hasFnAttribute (" amdgpu-flat-work-group-size" )) {
101
103
llvmFunc->addFnAttr (" amdgpu-flat-work-group-size" , " 1,256" );
102
104
}
105
+
106
+ // MLIR's GPU kernel APIs all assume and produce uniformly-sized
107
+ // workgroups, so the lowering of the `rocdl.kernel` marker encodes this
108
+ // assumption. This assumption may be overridden by setting
109
+ // `rocdl.uniform_work_group_size` on a given function.
110
+ if (!llvmFunc->hasFnAttribute (" uniform-work-group-size" ))
111
+ llvmFunc->addFnAttr (" uniform-work-group-size" , " true" );
103
112
}
104
113
// Override flat-work-group-size
105
114
// TODO: update clients to rocdl.flat_work_group_size instead,
@@ -108,10 +117,12 @@ class ROCDLDialectLLVMIRTranslationInterface
108
117
attribute.getName ()) {
109
118
auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
110
119
if (!func)
111
- return failure ();
120
+ return op->emitOpError (Twine (attribute.getName ()) +
121
+ " is only supported on `llvm.func` operations" );
112
122
auto value = dyn_cast<IntegerAttr>(attribute.getValue ());
113
123
if (!value)
114
- return failure ();
124
+ return op->emitOpError (Twine (attribute.getName ()) +
125
+ " must be an integer" );
115
126
116
127
llvm::Function *llvmFunc =
117
128
moduleTranslation.lookupFunction (func.getName ());
@@ -124,27 +135,45 @@ class ROCDLDialectLLVMIRTranslationInterface
124
135
attribute.getName ()) {
125
136
auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
126
137
if (!func)
127
- return failure ();
138
+ return op->emitOpError (Twine (attribute.getName ()) +
139
+ " is only supported on `llvm.func` operations" );
128
140
auto value = dyn_cast<StringAttr>(attribute.getValue ());
129
141
if (!value)
130
- return failure ();
142
+ return op->emitOpError (Twine (attribute.getName ()) +
143
+ " must be a string" );
131
144
132
145
llvm::Function *llvmFunc =
133
146
moduleTranslation.lookupFunction (func.getName ());
134
147
llvm::SmallString<8 > llvmAttrValue;
135
148
llvmAttrValue.append (value.getValue ());
136
149
llvmFunc->addFnAttr (" amdgpu-flat-work-group-size" , llvmAttrValue);
137
150
}
138
-
151
+ if (ROCDL::ROCDLDialect::getUniformWorkGroupSizeAttrName () ==
152
+ attribute.getName ()) {
153
+ auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
154
+ if (!func)
155
+ return op->emitOpError (Twine (attribute.getName ()) +
156
+ " is only supported on `llvm.func` operations" );
157
+ auto value = dyn_cast<BoolAttr>(attribute.getValue ());
158
+ if (!value)
159
+ return op->emitOpError (Twine (attribute.getName ()) +
160
+ " must be a boolean" );
161
+ llvm::Function *llvmFunc =
162
+ moduleTranslation.lookupFunction (func.getName ());
163
+ llvmFunc->addFnAttr (" uniform-work-group-size" ,
164
+ value.getValue () ? " true" : " false" );
165
+ }
139
166
// Set reqd_work_group_size metadata
140
167
if (dialect->getReqdWorkGroupSizeAttrHelper ().getName () ==
141
168
attribute.getName ()) {
142
169
auto func = dyn_cast<LLVM::LLVMFuncOp>(op);
143
170
if (!func)
144
- return failure ();
171
+ return op->emitOpError (Twine (attribute.getName ()) +
172
+ " is only supported on `llvm.func` operations" );
145
173
auto value = dyn_cast<DenseI32ArrayAttr>(attribute.getValue ());
146
174
if (!value)
147
- return failure ();
175
+ return op->emitOpError (Twine (attribute.getName ()) +
176
+ " must be a dense i32 array attribute" );
148
177
llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext ();
149
178
SmallVector<llvm::Metadata *, 3 > metadata;
150
179
llvm::Type *i32 = llvm::IntegerType::get (llvmContext, 32 );
0 commit comments