@@ -117,7 +117,7 @@ class MemCmpExpansion {
117
117
Value *Lhs = nullptr ;
118
118
Value *Rhs = nullptr ;
119
119
};
120
- LoadPair getLoadPair (Type *LoadSizeType, bool NeedsBSwap, Type *BSwapSizeType,
120
+ LoadPair getLoadPair (Type *LoadSizeType, Type *BSwapSizeType,
121
121
Type *CmpSizeType, unsigned OffsetBytes);
122
122
123
123
static LoadEntryVector
@@ -128,6 +128,11 @@ class MemCmpExpansion {
128
128
unsigned MaxNumLoads,
129
129
unsigned &NumLoadsNonOneByte);
130
130
131
+ static void optimiseLoadSequence (
132
+ LoadEntryVector &LoadSequence,
133
+ const TargetTransformInfo::MemCmpExpansionOptions &Options,
134
+ bool IsUsedForZeroCmp);
135
+
131
136
public:
132
137
MemCmpExpansion (CallInst *CI, uint64_t Size,
133
138
const TargetTransformInfo::MemCmpExpansionOptions &Options,
@@ -210,6 +215,37 @@ MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
210
215
return LoadSequence;
211
216
}
212
217
218
+ void MemCmpExpansion::optimiseLoadSequence (
219
+ LoadEntryVector &LoadSequence,
220
+ const TargetTransformInfo::MemCmpExpansionOptions &Options,
221
+ bool IsUsedForZeroCmp) {
222
+ // This part of code attempts to optimize the LoadSequence by merging allowed
223
+ // subsequences into single loads of allowed sizes from
224
+ // `MemCmpExpansionOptions::AllowedTailExpansions`. If it is for zero
225
+ // comparison or if no allowed tail expansions are specified, we exit early.
226
+ if (IsUsedForZeroCmp || Options.AllowedTailExpansions .empty ())
227
+ return ;
228
+
229
+ while (LoadSequence.size () >= 2 ) {
230
+ auto Last = LoadSequence[LoadSequence.size () - 1 ];
231
+ auto PreLast = LoadSequence[LoadSequence.size () - 2 ];
232
+
233
+ // Exit the loop if the two sequences are not contiguous
234
+ if (PreLast.Offset + PreLast.LoadSize != Last.Offset )
235
+ break ;
236
+
237
+ auto LoadSize = Last.LoadSize + PreLast.LoadSize ;
238
+ if (find (Options.AllowedTailExpansions , LoadSize) ==
239
+ Options.AllowedTailExpansions .end ())
240
+ break ;
241
+
242
+ // Remove the last two sequences and replace with the combined sequence
243
+ LoadSequence.pop_back ();
244
+ LoadSequence.pop_back ();
245
+ LoadSequence.emplace_back (PreLast.Offset , LoadSize);
246
+ }
247
+ }
248
+
213
249
// Initialize the basic block structure required for expansion of memcmp call
214
250
// with given maximum load size and memcmp size parameter.
215
251
// This structure includes:
@@ -255,31 +291,7 @@ MemCmpExpansion::MemCmpExpansion(
255
291
}
256
292
}
257
293
assert (LoadSequence.size () <= Options.MaxNumLoads && " broken invariant" );
258
- // This part of code attempts to optimize the LoadSequence by merging allowed
259
- // subsequences into single loads of allowed sizes from
260
- // `AllowedTailExpansions`. If it is for zero comparison or if no allowed tail
261
- // expansions are specified, we exit early.
262
- if (IsUsedForZeroCmp || !Options.AllowedTailExpansions .size ())
263
- return ;
264
-
265
- while (LoadSequence.size () >= 2 ) {
266
- auto Last = LoadSequence[LoadSequence.size () - 1 ];
267
- auto PreLast = LoadSequence[LoadSequence.size () - 2 ];
268
-
269
- // Exit the loop if the two sequences are not contiguous
270
- if (PreLast.Offset + PreLast.LoadSize != Last.Offset )
271
- break ;
272
-
273
- auto LoadSize = Last.LoadSize + PreLast.LoadSize ;
274
- if (find (Options.AllowedTailExpansions , LoadSize) ==
275
- Options.AllowedTailExpansions .end ())
276
- break ;
277
-
278
- // Remove the last two sequences and replace with the combined sequence
279
- LoadSequence.pop_back ();
280
- LoadSequence.pop_back ();
281
- LoadSequence.emplace_back (PreLast.Offset , LoadSize);
282
- }
294
+ optimiseLoadSequence (LoadSequence, Options, IsUsedForZeroCmp);
283
295
}
284
296
285
297
unsigned MemCmpExpansion::getNumBlocks () {
@@ -303,7 +315,6 @@ void MemCmpExpansion::createResultBlock() {
303
315
}
304
316
305
317
MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair (Type *LoadSizeType,
306
- bool NeedsBSwap,
307
318
Type *BSwapSizeType,
308
319
Type *CmpSizeType,
309
320
unsigned OffsetBytes) {
@@ -334,13 +345,13 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
334
345
Rhs = Builder.CreateAlignedLoad (LoadSizeType, RhsSource, RhsAlign);
335
346
336
347
// Zero extend if Byte Swap intrinsic has different type
337
- if (NeedsBSwap && LoadSizeType != BSwapSizeType) {
348
+ if (BSwapSizeType && LoadSizeType != BSwapSizeType) {
338
349
Lhs = Builder.CreateZExt (Lhs, BSwapSizeType);
339
350
Rhs = Builder.CreateZExt (Rhs, BSwapSizeType);
340
351
}
341
352
342
353
// Swap bytes if required.
343
- if (NeedsBSwap ) {
354
+ if (BSwapSizeType ) {
344
355
Function *Bswap = Intrinsic::getDeclaration (
345
356
CI->getModule (), Intrinsic::bswap, BSwapSizeType);
346
357
Lhs = Builder.CreateCall (Bswap, Lhs);
@@ -364,8 +375,8 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
364
375
BasicBlock *BB = LoadCmpBlocks[BlockIndex];
365
376
Builder.SetInsertPoint (BB);
366
377
const LoadPair Loads =
367
- getLoadPair (Type::getInt8Ty (CI->getContext ()), /* NeedsBSwap= */ false ,
368
- nullptr , Type::getInt32Ty (CI->getContext ()), OffsetBytes);
378
+ getLoadPair (Type::getInt8Ty (CI->getContext ()), nullptr ,
379
+ Type::getInt32Ty (CI->getContext ()), OffsetBytes);
369
380
Value *Diff = Builder.CreateSub (Loads.Lhs , Loads.Rhs );
370
381
371
382
PhiRes->addIncoming (Diff, BB);
@@ -421,8 +432,8 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
421
432
for (unsigned i = 0 ; i < NumLoads; ++i, ++LoadIndex) {
422
433
const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
423
434
const LoadPair Loads = getLoadPair (
424
- IntegerType::get (CI->getContext (), CurLoadEntry.LoadSize * 8 ),
425
- /* NeedsBSwap= */ false , nullptr , MaxLoadType, CurLoadEntry.Offset );
435
+ IntegerType::get (CI->getContext (), CurLoadEntry.LoadSize * 8 ), nullptr ,
436
+ MaxLoadType, CurLoadEntry.Offset );
426
437
427
438
if (NumLoads != 1 ) {
428
439
// If we have multiple loads per block, we need to generate a composite
@@ -508,18 +519,20 @@ void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
508
519
509
520
Type *LoadSizeType =
510
521
IntegerType::get (CI->getContext (), CurLoadEntry.LoadSize * 8 );
511
- Type *BSwapSizeType = IntegerType::get (
512
- CI->getContext (), PowerOf2Ceil (CurLoadEntry.LoadSize * 8 ));
522
+ Type *BSwapSizeType =
523
+ DL.isLittleEndian ()
524
+ ? IntegerType::get (CI->getContext (),
525
+ PowerOf2Ceil (CurLoadEntry.LoadSize * 8 ))
526
+ : nullptr ;
513
527
Type *MaxLoadType = IntegerType::get (
514
528
CI->getContext (),
515
529
std::max (MaxLoadSize, (unsigned )PowerOf2Ceil (CurLoadEntry.LoadSize )) * 8 );
516
530
assert (CurLoadEntry.LoadSize <= MaxLoadSize && " Unexpected load type" );
517
531
518
532
Builder.SetInsertPoint (LoadCmpBlocks[BlockIndex]);
519
533
520
- const LoadPair Loads =
521
- getLoadPair (LoadSizeType, /* NeedsBSwap=*/ DL.isLittleEndian (),
522
- BSwapSizeType, MaxLoadType, CurLoadEntry.Offset );
534
+ const LoadPair Loads = getLoadPair (LoadSizeType, BSwapSizeType, MaxLoadType,
535
+ CurLoadEntry.Offset );
523
536
524
537
// Add the loaded values to the phi nodes for calculating memcmp result only
525
538
// if result is not used in a zero equality.
@@ -624,27 +637,25 @@ Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
624
637
// / A memcmp expansion that only has one block of load and compare can bypass
625
638
// / the compare, branch, and phi IR that is required in the general case.
626
639
Value *MemCmpExpansion::getMemCmpOneBlock () {
640
+ bool NeedsBSwap = DL.isLittleEndian () && Size != 1 ;
627
641
Type *LoadSizeType = IntegerType::get (CI->getContext (), Size * 8 );
628
642
Type *BSwapSizeType =
629
- IntegerType::get (CI->getContext (), PowerOf2Ceil (Size * 8 ));
643
+ NeedsBSwap ? IntegerType::get (CI->getContext (), PowerOf2Ceil (Size * 8 ))
644
+ : nullptr ;
630
645
Type *MaxLoadType =
631
646
IntegerType::get (CI->getContext (),
632
647
std::max (MaxLoadSize, (unsigned )PowerOf2Ceil (Size)) * 8 );
633
648
634
- bool NeedsBSwap = DL.isLittleEndian () && Size != 1 ;
635
-
636
649
// The i8 and i16 cases don't need compares. We zext the loaded values and
637
650
// subtract them to get the suitable negative, zero, or positive i32 result.
638
651
if (Size < 4 ) {
639
- const LoadPair Loads = getLoadPair (LoadSizeType, NeedsBSwap, BSwapSizeType,
640
- Builder.getInt32Ty (),
641
- /* Offset*/ 0 );
652
+ const LoadPair Loads = getLoadPair (LoadSizeType, BSwapSizeType,
653
+ Builder.getInt32Ty (), /* Offset*/ 0 );
642
654
return Builder.CreateSub (Loads.Lhs , Loads.Rhs );
643
655
}
644
656
645
- const LoadPair Loads =
646
- getLoadPair (LoadSizeType, NeedsBSwap, BSwapSizeType, MaxLoadType,
647
- /* Offset*/ 0 );
657
+ const LoadPair Loads = getLoadPair (LoadSizeType, BSwapSizeType, MaxLoadType,
658
+ /* Offset*/ 0 );
648
659
// The result of memcmp is negative, zero, or positive, so produce that by
649
660
// subtracting 2 extended compare bits: sub (ugt, ult).
650
661
// If a target prefers to use selects to get -1/0/1, they should be able
0 commit comments