Skip to content

Commit aacfe2b

Browse files
committed
[InstCombine] recognizeBSwapOrBitReverseIdiom - add vector support
Add basic vector handling to recognizeBSwapOrBitReverseIdiom/collectBitParts - this works at the element level, all vector element operations must match (splat constants etc.) and there is no cross-element support (insert/extract/shuffle etc.).
1 parent 089e628 commit aacfe2b

File tree

2 files changed

+27
-56
lines changed

2 files changed

+27
-56
lines changed

llvm/lib/Transforms/Utils/Local.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2803,7 +2803,7 @@ struct BitPart {
28032803

28042804
/// Analyze the specified subexpression and see if it is capable of providing
28052805
/// pieces of a bswap or bitreverse. The subexpression provides a potential
2806-
/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in
2806+
/// piece of a bswap or bitreverse if it can be proved that each non-zero bit in
28072807
/// the output of the expression came from a corresponding bit in some other
28082808
/// value. This function is recursive, and the end result is a mapping of
28092809
/// bitnumber to bitnumber. It is the caller's responsibility to validate that
@@ -2815,6 +2815,10 @@ struct BitPart {
28152815
/// BitPart is returned with Provider set to %X and Provenance[24-31] set to
28162816
/// [0-7].
28172817
///
2818+
/// For vector types, all analysis is performed at the per-element level. No
2819+
/// cross-element analysis is supported (shuffle/insertion/reduction), and all
2820+
/// constant masks must be splatted across all elements.
2821+
///
28182822
/// To avoid revisiting values, the BitPart results are memoized into the
28192823
/// provided map. To avoid unnecessary copying of BitParts, BitParts are
28202824
/// constructed in-place in the \c BPS map. Because of this \c BPS needs to
@@ -3019,14 +3023,14 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
30193023
return false;
30203024
if (!MatchBSwaps && !MatchBitReversals)
30213025
return false;
3022-
IntegerType *ITy = dyn_cast<IntegerType>(I->getType());
3023-
if (!ITy || ITy->getBitWidth() > 128)
3024-
return false; // Can't do vectors or integers > 128 bits.
3026+
Type *ITy = I->getType();
3027+
if (!ITy->isIntOrIntVectorTy() || ITy->getScalarSizeInBits() > 128)
3028+
return false; // Can't do integer/elements > 128 bits.
30253029

3026-
IntegerType *DemandedTy = ITy;
3030+
Type *DemandedTy = ITy;
30273031
if (I->hasOneUse())
30283032
if (auto *Trunc = dyn_cast<TruncInst>(I->user_back()))
3029-
DemandedTy = cast<IntegerType>(Trunc->getType());
3033+
DemandedTy = Trunc->getType();
30303034

30313035
// Try to find all the pieces corresponding to the bswap.
30323036
std::map<Value *, Optional<BitPart>> BPS;
@@ -3044,12 +3048,14 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
30443048
BitProvenance = BitProvenance.drop_back();
30453049
if (BitProvenance.empty())
30463050
return false; // TODO - handle null value?
3047-
DemandedTy = IntegerType::get(I->getContext(), BitProvenance.size());
3051+
DemandedTy = Type::getIntNTy(I->getContext(), BitProvenance.size());
3052+
if (auto *IVecTy = dyn_cast<VectorType>(ITy))
3053+
DemandedTy = VectorType::get(DemandedTy, IVecTy);
30483054
}
30493055

30503056
// Check BitProvenance hasn't found a source larger than the result type.
3051-
unsigned DemandedBW = DemandedTy->getBitWidth();
3052-
if (DemandedBW > ITy->getBitWidth())
3057+
unsigned DemandedBW = DemandedTy->getScalarSizeInBits();
3058+
if (DemandedBW > ITy->getScalarSizeInBits())
30533059
return false;
30543060

30553061
// Now, is the bit permutation correct for a bswap or a bitreverse? We can

llvm/test/Transforms/InstCombine/bswap.ll

Lines changed: 12 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,7 @@ define i32 @test1(i32 %i) {
2222

2323
define <2 x i32> @test1_vector(<2 x i32> %i) {
2424
; CHECK-LABEL: @test1_vector(
25-
; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[I:%.*]], <i32 24, i32 24>
26-
; CHECK-NEXT: [[T3:%.*]] = lshr <2 x i32> [[I]], <i32 8, i32 8>
27-
; CHECK-NEXT: [[T4:%.*]] = and <2 x i32> [[T3]], <i32 65280, i32 65280>
28-
; CHECK-NEXT: [[T5:%.*]] = or <2 x i32> [[T1]], [[T4]]
29-
; CHECK-NEXT: [[T7:%.*]] = shl <2 x i32> [[I]], <i32 8, i32 8>
30-
; CHECK-NEXT: [[T8:%.*]] = and <2 x i32> [[T7]], <i32 16711680, i32 16711680>
31-
; CHECK-NEXT: [[T9:%.*]] = or <2 x i32> [[T5]], [[T8]]
32-
; CHECK-NEXT: [[T11:%.*]] = shl <2 x i32> [[I]], <i32 24, i32 24>
33-
; CHECK-NEXT: [[T12:%.*]] = or <2 x i32> [[T9]], [[T11]]
25+
; CHECK-NEXT: [[T12:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[I:%.*]])
3426
; CHECK-NEXT: ret <2 x i32> [[T12]]
3527
;
3628
%t1 = lshr <2 x i32> %i, <i32 24, i32 24>
@@ -64,15 +56,7 @@ define i32 @test2(i32 %arg) {
6456

6557
define <2 x i32> @test2_vector(<2 x i32> %arg) {
6658
; CHECK-LABEL: @test2_vector(
67-
; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[ARG:%.*]], <i32 24, i32 24>
68-
; CHECK-NEXT: [[T4:%.*]] = shl <2 x i32> [[ARG]], <i32 8, i32 8>
69-
; CHECK-NEXT: [[T5:%.*]] = and <2 x i32> [[T4]], <i32 16711680, i32 16711680>
70-
; CHECK-NEXT: [[T6:%.*]] = or <2 x i32> [[T2]], [[T5]]
71-
; CHECK-NEXT: [[T8:%.*]] = lshr <2 x i32> [[ARG]], <i32 8, i32 8>
72-
; CHECK-NEXT: [[T9:%.*]] = and <2 x i32> [[T8]], <i32 65280, i32 65280>
73-
; CHECK-NEXT: [[T10:%.*]] = or <2 x i32> [[T6]], [[T9]]
74-
; CHECK-NEXT: [[T12:%.*]] = lshr <2 x i32> [[ARG]], <i32 24, i32 24>
75-
; CHECK-NEXT: [[T14:%.*]] = or <2 x i32> [[T10]], [[T12]]
59+
; CHECK-NEXT: [[T14:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[ARG:%.*]])
7660
; CHECK-NEXT: ret <2 x i32> [[T14]]
7761
;
7862
%t2 = shl <2 x i32> %arg, <i32 24, i32 24>
@@ -225,15 +209,7 @@ define i32 @test6(i32 %x) nounwind readnone {
225209

226210
define <2 x i32> @test6_vector(<2 x i32> %x) nounwind readnone {
227211
; CHECK-LABEL: @test6_vector(
228-
; CHECK-NEXT: [[T:%.*]] = shl <2 x i32> [[X:%.*]], <i32 16, i32 16>
229-
; CHECK-NEXT: [[X_MASK:%.*]] = and <2 x i32> [[X]], <i32 65280, i32 65280>
230-
; CHECK-NEXT: [[T1:%.*]] = lshr <2 x i32> [[X]], <i32 16, i32 16>
231-
; CHECK-NEXT: [[T2:%.*]] = and <2 x i32> [[T1]], <i32 255, i32 255>
232-
; CHECK-NEXT: [[T3:%.*]] = or <2 x i32> [[X_MASK]], [[T]]
233-
; CHECK-NEXT: [[T4:%.*]] = or <2 x i32> [[T3]], [[T2]]
234-
; CHECK-NEXT: [[T5:%.*]] = shl <2 x i32> [[T4]], <i32 8, i32 8>
235-
; CHECK-NEXT: [[T6:%.*]] = lshr <2 x i32> [[X]], <i32 24, i32 24>
236-
; CHECK-NEXT: [[T7:%.*]] = or <2 x i32> [[T5]], [[T6]]
212+
; CHECK-NEXT: [[T7:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]])
237213
; CHECK-NEXT: ret <2 x i32> [[T7]]
238214
;
239215
%t = shl <2 x i32> %x, <i32 16, i32 16>
@@ -381,12 +357,9 @@ define i16 @test10(i32 %a) {
381357

382358
define <2 x i16> @test10_vector(<2 x i32> %a) {
383359
; CHECK-LABEL: @test10_vector(
384-
; CHECK-NEXT: [[SHR1:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 8, i32 8>
385-
; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[SHR1]], <i32 255, i32 255>
386-
; CHECK-NEXT: [[AND2:%.*]] = shl <2 x i32> [[A]], <i32 8, i32 8>
387-
; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[AND1]], [[AND2]]
388-
; CHECK-NEXT: [[CONV:%.*]] = trunc <2 x i32> [[OR]] to <2 x i16>
389-
; CHECK-NEXT: ret <2 x i16> [[CONV]]
360+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i32> [[A:%.*]] to <2 x i16>
361+
; CHECK-NEXT: [[REV:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[TRUNC]])
362+
; CHECK-NEXT: ret <2 x i16> [[REV]]
390363
;
391364
%shr1 = lshr <2 x i32> %a, <i32 8, i32 8>
392365
%and1 = and <2 x i32> %shr1, <i32 255, i32 255>
@@ -457,12 +430,10 @@ define i64 @PR39793_bswap_u64_as_u16(i64 %0) {
457430

458431
define <2 x i64> @PR39793_bswap_u64_as_u16_vector(<2 x i64> %0) {
459432
; CHECK-LABEL: @PR39793_bswap_u64_as_u16_vector(
460-
; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP0:%.*]], <i64 8, i64 8>
461-
; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 255, i64 255>
462-
; CHECK-NEXT: [[TMP4:%.*]] = shl <2 x i64> [[TMP0]], <i64 8, i64 8>
463-
; CHECK-NEXT: [[TMP5:%.*]] = and <2 x i64> [[TMP4]], <i64 65280, i64 65280>
464-
; CHECK-NEXT: [[TMP6:%.*]] = or <2 x i64> [[TMP3]], [[TMP5]]
465-
; CHECK-NEXT: ret <2 x i64> [[TMP6]]
433+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i64> [[TMP0:%.*]] to <2 x i16>
434+
; CHECK-NEXT: [[REV:%.*]] = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> [[TRUNC]])
435+
; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[REV]] to <2 x i64>
436+
; CHECK-NEXT: ret <2 x i64> [[TMP2]]
466437
;
467438
%2 = lshr <2 x i64> %0, <i64 8, i64 8>
468439
%3 = and <2 x i64> %2, <i64 255, i64 255>
@@ -550,14 +521,8 @@ declare i32 @llvm.bswap.i32(i32)
550521

551522
define <2 x i32> @partial_bswap_vector(<2 x i32> %x) {
552523
; CHECK-LABEL: @partial_bswap_vector(
553-
; CHECK-NEXT: [[X3:%.*]] = shl <2 x i32> [[X:%.*]], <i32 24, i32 24>
554-
; CHECK-NEXT: [[A2:%.*]] = shl <2 x i32> [[X]], <i32 8, i32 8>
555-
; CHECK-NEXT: [[X2:%.*]] = and <2 x i32> [[A2]], <i32 16711680, i32 16711680>
556-
; CHECK-NEXT: [[X32:%.*]] = or <2 x i32> [[X3]], [[X2]]
557-
; CHECK-NEXT: [[T1:%.*]] = and <2 x i32> [[X]], <i32 -65536, i32 -65536>
558-
; CHECK-NEXT: [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[T1]])
559-
; CHECK-NEXT: [[R:%.*]] = or <2 x i32> [[X32]], [[T2]]
560-
; CHECK-NEXT: ret <2 x i32> [[R]]
524+
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]])
525+
; CHECK-NEXT: ret <2 x i32> [[TMP1]]
561526
;
562527
%x3 = shl <2 x i32> %x, <i32 24, i32 24>
563528
%a2 = shl <2 x i32> %x, <i32 8, i32 8>

0 commit comments

Comments
 (0)