@@ -117,8 +117,8 @@ class MemCmpExpansion {
117
117
Value *Lhs = nullptr ;
118
118
Value *Rhs = nullptr ;
119
119
};
120
- LoadPair getLoadPair (Type *LoadSizeType, Type *BSwapSizeType ,
121
- Type *CmpSizeType, unsigned OffsetBytes);
120
+ LoadPair getLoadPair (Type *LoadSizeType, bool NeedsBSwap, Type *CmpSizeType ,
121
+ unsigned OffsetBytes);
122
122
123
123
static LoadEntryVector
124
124
computeGreedyLoadSequence (uint64_t Size, llvm::ArrayRef<unsigned > LoadSizes,
@@ -128,11 +128,6 @@ class MemCmpExpansion {
128
128
unsigned MaxNumLoads,
129
129
unsigned &NumLoadsNonOneByte);
130
130
131
- static void optimiseLoadSequence (
132
- LoadEntryVector &LoadSequence,
133
- const TargetTransformInfo::MemCmpExpansionOptions &Options,
134
- bool IsUsedForZeroCmp);
135
-
136
131
public:
137
132
MemCmpExpansion (CallInst *CI, uint64_t Size,
138
133
const TargetTransformInfo::MemCmpExpansionOptions &Options,
@@ -215,37 +210,6 @@ MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
215
210
return LoadSequence;
216
211
}
217
212
218
- void MemCmpExpansion::optimiseLoadSequence (
219
- LoadEntryVector &LoadSequence,
220
- const TargetTransformInfo::MemCmpExpansionOptions &Options,
221
- bool IsUsedForZeroCmp) {
222
- // This part of code attempts to optimize the LoadSequence by merging allowed
223
- // subsequences into single loads of allowed sizes from
224
- // `MemCmpExpansionOptions::AllowedTailExpansions`. If it is for zero
225
- // comparison or if no allowed tail expansions are specified, we exit early.
226
- if (IsUsedForZeroCmp || Options.AllowedTailExpansions .empty ())
227
- return ;
228
-
229
- while (LoadSequence.size () >= 2 ) {
230
- auto Last = LoadSequence[LoadSequence.size () - 1 ];
231
- auto PreLast = LoadSequence[LoadSequence.size () - 2 ];
232
-
233
- // Exit the loop if the two sequences are not contiguous
234
- if (PreLast.Offset + PreLast.LoadSize != Last.Offset )
235
- break ;
236
-
237
- auto LoadSize = Last.LoadSize + PreLast.LoadSize ;
238
- if (find (Options.AllowedTailExpansions , LoadSize) ==
239
- Options.AllowedTailExpansions .end ())
240
- break ;
241
-
242
- // Remove the last two sequences and replace with the combined sequence
243
- LoadSequence.pop_back ();
244
- LoadSequence.pop_back ();
245
- LoadSequence.emplace_back (PreLast.Offset , LoadSize);
246
- }
247
- }
248
-
249
213
// Initialize the basic block structure required for expansion of memcmp call
250
214
// with given maximum load size and memcmp size parameter.
251
215
// This structure includes:
@@ -291,7 +255,6 @@ MemCmpExpansion::MemCmpExpansion(
291
255
}
292
256
}
293
257
assert (LoadSequence.size () <= Options.MaxNumLoads && " broken invariant" );
294
- optimiseLoadSequence (LoadSequence, Options, IsUsedForZeroCmp);
295
258
}
296
259
297
260
unsigned MemCmpExpansion::getNumBlocks () {
@@ -315,7 +278,7 @@ void MemCmpExpansion::createResultBlock() {
315
278
}
316
279
317
280
MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair (Type *LoadSizeType,
318
- Type *BSwapSizeType ,
281
+ bool NeedsBSwap ,
319
282
Type *CmpSizeType,
320
283
unsigned OffsetBytes) {
321
284
// Get the memory source at offset `OffsetBytes`.
@@ -344,22 +307,16 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
344
307
if (!Rhs)
345
308
Rhs = Builder.CreateAlignedLoad (LoadSizeType, RhsSource, RhsAlign);
346
309
347
- // Zero extend if Byte Swap intrinsic has different type
348
- if (BSwapSizeType && LoadSizeType != BSwapSizeType) {
349
- Lhs = Builder.CreateZExt (Lhs, BSwapSizeType);
350
- Rhs = Builder.CreateZExt (Rhs, BSwapSizeType);
351
- }
352
-
353
310
// Swap bytes if required.
354
- if (BSwapSizeType ) {
355
- Function *Bswap = Intrinsic::getDeclaration (
356
- CI-> getModule (), Intrinsic::bswap, BSwapSizeType );
311
+ if (NeedsBSwap ) {
312
+ Function *Bswap = Intrinsic::getDeclaration (CI-> getModule (),
313
+ Intrinsic::bswap, LoadSizeType );
357
314
Lhs = Builder.CreateCall (Bswap, Lhs);
358
315
Rhs = Builder.CreateCall (Bswap, Rhs);
359
316
}
360
317
361
318
// Zero extend if required.
362
- if (CmpSizeType != nullptr && CmpSizeType != Lhs-> getType () ) {
319
+ if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType ) {
363
320
Lhs = Builder.CreateZExt (Lhs, CmpSizeType);
364
321
Rhs = Builder.CreateZExt (Rhs, CmpSizeType);
365
322
}
@@ -375,7 +332,7 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
375
332
BasicBlock *BB = LoadCmpBlocks[BlockIndex];
376
333
Builder.SetInsertPoint (BB);
377
334
const LoadPair Loads =
378
- getLoadPair (Type::getInt8Ty (CI->getContext ()), nullptr ,
335
+ getLoadPair (Type::getInt8Ty (CI->getContext ()), /* NeedsBSwap= */ false ,
379
336
Type::getInt32Ty (CI->getContext ()), OffsetBytes);
380
337
Value *Diff = Builder.CreateSub (Loads.Lhs , Loads.Rhs );
381
338
@@ -428,12 +385,11 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
428
385
IntegerType *const MaxLoadType =
429
386
NumLoads == 1 ? nullptr
430
387
: IntegerType::get (CI->getContext (), MaxLoadSize * 8 );
431
-
432
388
for (unsigned i = 0 ; i < NumLoads; ++i, ++LoadIndex) {
433
389
const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
434
390
const LoadPair Loads = getLoadPair (
435
- IntegerType::get (CI->getContext (), CurLoadEntry.LoadSize * 8 ), nullptr ,
436
- MaxLoadType, CurLoadEntry.Offset );
391
+ IntegerType::get (CI->getContext (), CurLoadEntry.LoadSize * 8 ),
392
+ /* NeedsBSwap= */ false , MaxLoadType, CurLoadEntry.Offset );
437
393
438
394
if (NumLoads != 1 ) {
439
395
// If we have multiple loads per block, we need to generate a composite
@@ -519,20 +475,14 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
519
475
520
476
Type *LoadSizeType =
521
477
IntegerType::get (CI->getContext (), CurLoadEntry.LoadSize * 8 );
522
- Type *BSwapSizeType =
523
- DL.isLittleEndian ()
524
- ? IntegerType::get (CI->getContext (),
525
- PowerOf2Ceil (CurLoadEntry.LoadSize * 8 ))
526
- : nullptr ;
527
- Type *MaxLoadType = IntegerType::get (
528
- CI->getContext (),
529
- std::max (MaxLoadSize, (unsigned )PowerOf2Ceil (CurLoadEntry.LoadSize )) * 8 );
478
+ Type *MaxLoadType = IntegerType::get (CI->getContext (), MaxLoadSize * 8 );
530
479
assert (CurLoadEntry.LoadSize <= MaxLoadSize && " Unexpected load type" );
531
480
532
481
Builder.SetInsertPoint (LoadCmpBlocks[BlockIndex]);
533
482
534
- const LoadPair Loads = getLoadPair (LoadSizeType, BSwapSizeType, MaxLoadType,
535
- CurLoadEntry.Offset );
483
+ const LoadPair Loads =
484
+ getLoadPair (LoadSizeType, /* NeedsBSwap=*/ DL.isLittleEndian (), MaxLoadType,
485
+ CurLoadEntry.Offset );
536
486
537
487
// Add the loaded values to the phi nodes for calculating memcmp result only
538
488
// if result is not used in a zero equality.
@@ -637,24 +587,19 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
637
587
// / A memcmp expansion that only has one block of load and compare can bypass
638
588
// / the compare, branch, and phi IR that is required in the general case.
639
589
Value *MemCmpExpansion::getMemCmpOneBlock () {
640
- bool NeedsBSwap = DL.isLittleEndian () && Size != 1 ;
641
590
Type *LoadSizeType = IntegerType::get (CI->getContext (), Size * 8 );
642
- Type *BSwapSizeType =
643
- NeedsBSwap ? IntegerType::get (CI->getContext (), PowerOf2Ceil (Size * 8 ))
644
- : nullptr ;
645
- Type *MaxLoadType =
646
- IntegerType::get (CI->getContext (),
647
- std::max (MaxLoadSize, (unsigned )PowerOf2Ceil (Size)) * 8 );
591
+ bool NeedsBSwap = DL.isLittleEndian () && Size != 1 ;
648
592
649
593
// The i8 and i16 cases don't need compares. We zext the loaded values and
650
594
// subtract them to get the suitable negative, zero, or positive i32 result.
651
595
if (Size < 4 ) {
652
- const LoadPair Loads = getLoadPair (LoadSizeType, BSwapSizeType,
653
- Builder.getInt32Ty (), /* Offset*/ 0 );
596
+ const LoadPair Loads =
597
+ getLoadPair (LoadSizeType, NeedsBSwap, Builder.getInt32Ty (),
598
+ /* Offset*/ 0 );
654
599
return Builder.CreateSub (Loads.Lhs , Loads.Rhs );
655
600
}
656
601
657
- const LoadPair Loads = getLoadPair (LoadSizeType, BSwapSizeType, MaxLoadType ,
602
+ const LoadPair Loads = getLoadPair (LoadSizeType, NeedsBSwap, LoadSizeType ,
658
603
/* Offset*/ 0 );
659
604
// The result of memcmp is negative, zero, or positive, so produce that by
660
605
// subtracting 2 extended compare bits: sub (ugt, ult).
0 commit comments