@@ -639,10 +639,9 @@ struct AsyncCopyGlobalToLocalOpConversion
639
639
(vecTy.getNumElements () * vecTy.getElementTypeBitWidth ()) / 8 ;
640
640
assert (llvm::isPowerOf2_32 (vecBytes));
641
641
Value vecBytesVal = b.i32_val (vecBytes);
642
-
643
- Value cacheModifiers =
644
- b.i32_val (mlir::LLVM::AMD::getCtrlBitsForCacheModifierOnTarget (
645
- op.getCache (), /* isLoad=*/ true , targetInfo));
642
+ int32_t cacheModifiers =
643
+ mlir::LLVM::AMD::getCtrlBitsForCacheModifierOnTarget (
644
+ op.getCache (), /* isLoad=*/ true , targetInfo);
646
645
647
646
Value llMask = adaptor.getMask ();
648
647
SmallVector<Value> maskElems;
@@ -680,7 +679,7 @@ struct AsyncCopyGlobalToLocalOpConversion
680
679
auto globalLoadLdsOp = rewriter.create <ROCDL::GlobalLoadLDSOp>(
681
680
loc,
682
681
/* globalPtr=*/ srcPtr, /* ldsPtr=*/ coalescedShmemAddr[i],
683
- /* size=*/ vecBytesVal , /* offset=*/ b. i32_val ( 0 ) ,
682
+ /* size=*/ vecBytes , /* offset=*/ 0 ,
684
683
/* aux=*/ cacheModifiers, /* alias_scopes=*/ nullptr ,
685
684
/* noalias_scopes=*/ nullptr , /* tbaa=*/ nullptr );
686
685
LLVM::AMD::addAsyncCopyAliasScope (globalLoadLdsOp);
@@ -695,8 +694,8 @@ struct AsyncCopyGlobalToLocalOpConversion
695
694
rewriter.create <LLVM::CondBrOp>(loc, pred, loadBlock, afterLoad);
696
695
rewriter.setInsertionPointToStart (loadBlock);
697
696
auto globalLoadLdsOp = rewriter.create <ROCDL::GlobalLoadLDSOp>(
698
- loc, srcPtr, coalescedShmemAddr[i], vecBytesVal ,
699
- /* offset=*/ b. i32_val ( 0 ) , cacheModifiers, nullptr , nullptr , nullptr );
697
+ loc, srcPtr, coalescedShmemAddr[i], vecBytes ,
698
+ /* offset=*/ 0 , cacheModifiers, nullptr , nullptr , nullptr );
700
699
LLVM::AMD::addAsyncCopyAliasScope (globalLoadLdsOp);
701
700
702
701
rewriter.create <LLVM::BrOp>(loc, afterLoad);
0 commit comments