fixup! fixup! [AMDGPU] Use wider loop lowering type for LowerMemIntrinsics

ritter-x2a · ritter-x2a · commit 734289baec94 · 2024-10-23T08:22:22.000-04:00
Remove StoreSize==AllocSize assertions and add a test where they would be violated.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -79,7 +79,7 @@ static cl::opt<size_t> InlineMaxBB(
 static cl::opt<unsigned> MemcpyLoopUnroll(
     "amdgpu-memcpy-loop-unroll",
     cl::desc("Unroll factor (affecting 4x32-bit operations) to use for memory "
-             "operations when lowering memcpy as a loop, must be a power of 2"),
+             "operations when lowering memcpy as a loop"),
     cl::init(16), cl::Hidden);
 
 static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -48,9 +48,6 @@ void llvm::createMemCpyLoopKnownSize(
       Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
   assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
          "Atomic memcpy lowering is not supported for vector operand type");
-  assert((DL.getTypeStoreSize(LoopOpType) == DL.getTypeAllocSize(LoopOpType)) &&
-         "Bytes are missed if store and alloc size of the LoopOpType do not "
-         "match");
 
   Type *Int8Type = Type::getInt8Ty(Ctx);
   unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
@@ -203,9 +200,6 @@ void llvm::createMemCpyLoopUnknownSize(
       Ctx, CopyLen, SrcAS, DstAS, SrcAlign, DstAlign, AtomicElementSize);
   assert((!AtomicElementSize || !LoopOpType->isVectorTy()) &&
          "Atomic memcpy lowering is not supported for vector operand type");
-  assert((DL.getTypeStoreSize(LoopOpType) == DL.getTypeAllocSize(LoopOpType)) &&
-         "Bytes are missed if store and alloc size of the LoopOpType do not "
-         "match");
   unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
   assert((!AtomicElementSize || LoopOpSize % *AtomicElementSize == 0) &&
          "Atomic memcpy lowering is not supported for selected operand size");
@@ -420,9 +414,6 @@ static void createMemMoveLoopUnknownSize(Instruction *InsertBefore,
 
   Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
                                                    SrcAlign, DstAlign);
-  assert((DL.getTypeStoreSize(LoopOpType) == DL.getTypeAllocSize(LoopOpType)) &&
-         "Bytes are missed if store and alloc size of the LoopOpType do not "
-         "match");
   unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
   Type *Int8Type = Type::getInt8Ty(Ctx);
   bool LoopOpIsInt8 = LoopOpType == Int8Type;
@@ -681,9 +672,6 @@ static void createMemMoveLoopKnownSize(Instruction *InsertBefore,
 
   Type *LoopOpType = TTI.getMemcpyLoopLoweringType(Ctx, CopyLen, SrcAS, DstAS,
                                                    SrcAlign, DstAlign);
-  assert((DL.getTypeStoreSize(LoopOpType) == DL.getTypeAllocSize(LoopOpType)) &&
-         "Bytes are missed if store and alloc size of the LoopOpType do not "
-         "match");
   unsigned LoopOpSize = DL.getTypeStoreSize(LoopOpType);
   Type *Int8Type = Type::getInt8Ty(Ctx);
 
diff --git a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll