@@ -256,11 +256,17 @@ struct ConvertLaunch final : ConvertOpPattern<gpu::LaunchFuncOp> {
256
256
257
257
int i = 0 ;
258
258
for (auto arg : kernelArgs) {
259
- if (isa<MemRefType>(gpuLaunch.getKernelOperand (i++).getType ())) {
259
+ if (auto type = gpuLaunch.getKernelOperand (i++).getType ();
260
+ isa<MemRefType>(type)) {
260
261
MemRefDescriptor desc (arg);
261
262
args.emplace_back (desc.alignedPtr (rewriter, loc));
262
263
} else {
263
- args.emplace_back (arg);
264
+ // Store the arg on the stack and pass the pointer
265
+ auto ptr = rewriter.create <LLVM::AllocaOp>(
266
+ loc, helper.ptrType , typeConverter->convertType (type),
267
+ helper.idxConstant (rewriter, loc, 1 ));
268
+ rewriter.create <LLVM::StoreOp>(loc, arg, ptr);
269
+ args.emplace_back (ptr);
264
270
}
265
271
}
266
272
@@ -352,7 +358,7 @@ struct ConvertLaunch final : ConvertOpPattern<gpu::LaunchFuncOp> {
352
358
// ...name_Ptr.
353
359
bool createKernel (
354
360
gpu::LaunchFuncOp &gpuLaunch, OpAdaptor &adaptor,
355
- ConversionPatternRewriter &rewriter, Location &loc, ModuleOp &mod,
361
+ ConversionPatternRewriter &rewriter, const Location &loc, ModuleOp &mod,
356
362
StringRef funcName,
357
363
const std::function<SmallString<128 > &(const char *chars)> &str) const {
358
364
auto kernelModName = gpuLaunch.getKernelModuleName ();
@@ -410,6 +416,8 @@ struct ConvertLaunch final : ConvertOpPattern<gpu::LaunchFuncOp> {
410
416
411
417
for (auto arg : adaptor.getKernelOperands ()) {
412
418
auto type = arg.getType ();
419
+ // Assuming, that the value is either an integer or a float or a pointer.
420
+ // In the latter case, the size is 0 bytes.
413
421
auto size = type.isIntOrFloat () ? type.getIntOrFloatBitWidth () / 8 : 0 ;
414
422
argSize.emplace_back (helper.idxConstant (rewriter, loc, size));
415
423
}
0 commit comments