|
32 | 32 | #include "llvm/Support/Casting.h"
|
33 | 33 | #include "llvm/Target/TargetMachine.h"
|
34 | 34 | #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
|
| 35 | +#include "llvm/Transforms/Utils/BuildLibCalls.h" |
35 | 36 | #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
|
36 | 37 |
|
37 | 38 | using namespace llvm;
|
@@ -232,6 +233,59 @@ static bool canEmitLibcall(const TargetMachine *TM, Function *F,
|
232 | 233 | return TLI->getLibcallName(LC) != nullptr;
|
233 | 234 | }
|
234 | 235 |
|
| 236 | +// Return a value appropriate for use with the memset_pattern16 libcall, if |
| 237 | +// possible and if we know how. (Adapted from equivalent helper in |
| 238 | +// LoopIdiomRecognize). |
| 239 | +static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst, |
| 240 | + const TargetLibraryInfo &TLI) { |
| 241 | + // FIXME: This could check for UndefValue because it can be merged into any |
| 242 | + // other valid pattern. |
| 243 | + |
| 244 | + // Don't emit libcalls if a non-default address space is being used. |
| 245 | + if (Inst->getRawDest()->getType()->getPointerAddressSpace() != 0) |
| 246 | + return nullptr; |
| 247 | + |
| 248 | + Value *V = Inst->getValue(); |
| 249 | + const DataLayout &DL = Inst->getDataLayout(); |
| 250 | + Module *M = Inst->getModule(); |
| 251 | + |
| 252 | + if (!isLibFuncEmittable(M, &TLI, LibFunc_memset_pattern16)) |
| 253 | + return nullptr; |
| 254 | + |
| 255 | + // If the value isn't a constant, we can't promote it to being in a constant |
| 256 | + // array. We could theoretically do a store to an alloca or something, but |
| 257 | + // that doesn't seem worthwhile. |
| 258 | + Constant *C = dyn_cast<Constant>(V); |
| 259 | + if (!C || isa<ConstantExpr>(C)) |
| 260 | + return nullptr; |
| 261 | + |
| 262 | + // Only handle simple values that are a power of two bytes in size. |
| 263 | + uint64_t Size = DL.getTypeSizeInBits(V->getType()); |
| 264 | + if (Size == 0 || (Size & 7) || (Size & (Size - 1))) |
| 265 | + return nullptr; |
| 266 | + |
| 267 | + // Don't care enough about darwin/ppc to implement this. |
| 268 | + if (DL.isBigEndian()) |
| 269 | + return nullptr; |
| 270 | + |
| 271 | + // Convert to size in bytes. |
| 272 | + Size /= 8; |
| 273 | + |
| 274 | + // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see |
| 275 | + // if the top and bottom are the same (e.g. for vectors and large integers). |
| 276 | + if (Size > 16) |
| 277 | + return nullptr; |
| 278 | + |
| 279 | + // If the constant is exactly 16 bytes, just use it. |
| 280 | + if (Size == 16) |
| 281 | + return C; |
| 282 | + |
| 283 | + // Otherwise, we'll use an array of the constants. |
| 284 | + unsigned ArraySize = 16 / Size; |
| 285 | + ArrayType *AT = ArrayType::get(V->getType(), ArraySize); |
| 286 | + return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C)); |
| 287 | +} |
| 288 | + |
235 | 289 | // TODO: Handle atomic memcpy and memcpy.inline
|
236 | 290 | // TODO: Pass ScalarEvolution
|
237 | 291 | bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
|
@@ -322,7 +376,41 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
|
322 | 376 | }
|
323 | 377 | case Intrinsic::experimental_memset_pattern: {
|
324 | 378 | auto *Memset = cast<MemSetPatternInst>(Inst);
|
325 |
| - expandMemSetPatternAsLoop(Memset); |
| 379 | + const TargetLibraryInfo &TLI = LookupTLI(*Memset->getFunction()); |
| 380 | + if (Constant *PatternValue = getMemSetPattern16Value(Memset, TLI)) { |
| 381 | + // FIXME: There is currently no profitability calculation for emitting |
| 382 | + // the libcall vs expanding the memset.pattern directly. |
| 383 | + IRBuilder<> Builder(Inst); |
| 384 | + Module *M = Memset->getModule(); |
| 385 | + const DataLayout &DL = Memset->getDataLayout(); |
| 386 | + |
| 387 | + StringRef FuncName = "memset_pattern16"; |
| 388 | + FunctionCallee MSP = getOrInsertLibFunc( |
| 389 | + M, TLI, LibFunc_memset_pattern16, Builder.getVoidTy(), |
| 390 | + Memset->getRawDest()->getType(), Builder.getPtrTy(), |
| 391 | + Memset->getLength()->getType()); |
| 392 | + inferNonMandatoryLibFuncAttrs(M, FuncName, TLI); |
| 393 | + |
| 394 | + // Otherwise we should form a memset_pattern16. PatternValue is known |
| 395 | + // to be an constant array of 16-bytes. Put the value into a mergable |
| 396 | + // global. |
| 397 | + GlobalVariable *GV = new GlobalVariable( |
| 398 | + *M, PatternValue->getType(), true, GlobalValue::PrivateLinkage, |
| 399 | + PatternValue, ".memset_pattern"); |
| 400 | + GV->setUnnamedAddr( |
| 401 | + GlobalValue::UnnamedAddr::Global); // Ok to merge these. |
| 402 | + GV->setAlignment(Align(16)); |
| 403 | + Value *PatternPtr = GV; |
| 404 | + Value *NumBytes = Builder.CreateMul( |
| 405 | + Builder.getInt64( |
| 406 | + DL.getTypeSizeInBits(Memset->getValue()->getType()) / 8), |
| 407 | + Memset->getLength()); |
| 408 | + CallInst *MemsetPattern16Call = Builder.CreateCall( |
| 409 | + MSP, {Memset->getRawDest(), PatternPtr, NumBytes}); |
| 410 | + MemsetPattern16Call->setAAMetadata(Memset->getAAMetadata()); |
| 411 | + } else { |
| 412 | + expandMemSetPatternAsLoop(Memset); |
| 413 | + } |
326 | 414 | Changed = true;
|
327 | 415 | Memset->eraseFromParent();
|
328 | 416 | break;
|
|
0 commit comments