@@ -96,6 +96,9 @@ namespace
96
96
void generalGroupI8Stream (
97
97
LLVMContext& C, uint32_t NumI8, uint32_t Align,
98
98
uint32_t & NumI32, Type** Vecs, uint32_t & L, uint32_t BaseTypeSize);
99
+ // support function for replaceCountTheLeadingZeros
100
+ Value* evaluateCtlzUpto32bit (IGCLLVM::IRBuilder<>* Builder, Value* inVal, Type* singleElementType, Value* canBePoison);
101
+ Value* evaluateCtlz64bit (IGCLLVM::IRBuilder<>* Builder, Value* inVal, Type* singleElementType, Value* canBePoison);
99
102
100
103
// / replace member function
101
104
void replaceMemcpy (IntrinsicInst* I);
@@ -104,6 +107,7 @@ namespace
104
107
void replaceExpect (IntrinsicInst* I);
105
108
void replaceFunnelShift (IntrinsicInst* I);
106
109
void replaceLRound (IntrinsicInst* I);
110
+ void replaceCountTheLeadingZeros (IntrinsicInst* I);
107
111
108
112
static const std::map< Intrinsic::ID, MemFuncPtr_t > m_intrinsicToFunc;
109
113
};
@@ -129,7 +133,8 @@ const std::map< Intrinsic::ID, ReplaceUnsupportedIntrinsics::MemFuncPtr_t > Repl
129
133
{ Intrinsic::memmove, &ReplaceUnsupportedIntrinsics::replaceMemMove },
130
134
{ Intrinsic::expect, &ReplaceUnsupportedIntrinsics::replaceExpect },
131
135
{ Intrinsic::lround, &ReplaceUnsupportedIntrinsics::replaceLRound },
132
- { Intrinsic::llround, &ReplaceUnsupportedIntrinsics::replaceLRound }
136
+ { Intrinsic::llround, &ReplaceUnsupportedIntrinsics::replaceLRound },
137
+ { Intrinsic::ctlz, &ReplaceUnsupportedIntrinsics::replaceCountTheLeadingZeros }
133
138
};
134
139
135
140
ReplaceUnsupportedIntrinsics::ReplaceUnsupportedIntrinsics () : FunctionPass(ID)
@@ -985,6 +990,113 @@ void ReplaceUnsupportedIntrinsics::replaceLRound(IntrinsicInst* I) {
985
990
I->eraseFromParent ();
986
991
}
987
992
993
+ /*
994
+ Replaces llvm.ctlz.* intrinsics (count the leading zeros)
995
+ to llvm.ctlz.i32 because we support llvm.ctlz intrinsic
996
+ only with source type i32.
997
+
998
+ E.g.
999
+ %1 = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %0, i1 false)
1000
+ ret <2 x i8> %1
1001
+ =>
1002
+ %1 = extractelement <2 x i8> %0, i64 0
1003
+ %2 = zext i8 %1 to i32
1004
+ %3 = call i32 @llvm.ctlz.i32(i32 %2, i1 false)
1005
+ %4 = trunc i32 %3 to i8
1006
+ %5 = add nsw i8 %4, -24
1007
+ %6 = insertelement <2 x i8> undef, i8 %5, i32 0
1008
+ %7 = extractelement <2 x i8> %0, i64 1
1009
+ %8 = zext i8 %7 to i32
1010
+ %9 = call i32 @llvm.ctlz.i32(i32 %8, i1 false)
1011
+ %10 = trunc i32 %9 to i8
1012
+ %11 = add nsw i8 %10, -24
1013
+ %12 = insertelement <2 x i8> %6, i8 %11, i32 1
1014
+ %13 = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %0, i1 false)
1015
+ ret <2 x i8> %12
1016
+ */
1017
+ void ReplaceUnsupportedIntrinsics::replaceCountTheLeadingZeros (IntrinsicInst* I) {
1018
+ IGC_ASSERT (I->getIntrinsicID () == Intrinsic::ctlz);
1019
+
1020
+ Type* oldIntrinsicDstType = I->getType ();
1021
+ Type* singleElementType = oldIntrinsicDstType;
1022
+ uint32_t numOfElements = 1 ;
1023
+ bool isVector = oldIntrinsicDstType->isVectorTy ();
1024
+
1025
+ if (isVector)
1026
+ {
1027
+ auto oldIntrinsicDstTypeFVT = dyn_cast<IGCLLVM::FixedVectorType>(oldIntrinsicDstType);
1028
+ numOfElements = (uint32_t )oldIntrinsicDstTypeFVT->getNumElements ();
1029
+ singleElementType = oldIntrinsicDstTypeFVT->getElementType ();
1030
+ }
1031
+
1032
+ int singleElementSizeInBits = singleElementType->getScalarSizeInBits ();
1033
+
1034
+ IGC_ASSERT_MESSAGE (singleElementSizeInBits == 8 || singleElementSizeInBits == 16 ||
1035
+ singleElementSizeInBits == 32 || singleElementSizeInBits == 64 ,
1036
+ " Currently for Intrinsic::ctlz we support source bit size: 8,16,32,64" );
1037
+
1038
+ // noting to replace, early return
1039
+ if (!isVector && singleElementSizeInBits == 32 ) return ;
1040
+
1041
+ bool bitSizeLowerThan32 = singleElementSizeInBits < 32 ;
1042
+ bool bitSizeEqual64 = singleElementSizeInBits == 64 ;
1043
+
1044
+ IGCLLVM::IRBuilder<> Builder (I);
1045
+
1046
+ Value* inputVal = I->getArgOperand (0 );
1047
+ Value* canBePoison = I->getArgOperand (1 );
1048
+ Value* outputVal = llvm::UndefValue::get (oldIntrinsicDstType); // Will be overwritten in scalar case.
1049
+ Value* retVal = inputVal;
1050
+
1051
+ for (uint32_t i = 0 ; i < numOfElements; i++)
1052
+ {
1053
+ if (isVector) retVal = Builder.CreateExtractElement (inputVal, i);
1054
+
1055
+ if (bitSizeLowerThan32)
1056
+ retVal = evaluateCtlzUpto32bit (&Builder, retVal, singleElementType, canBePoison);
1057
+ else if (bitSizeEqual64)
1058
+ retVal = evaluateCtlz64bit (&Builder, retVal, singleElementType, canBePoison);
1059
+
1060
+ if (singleElementSizeInBits == 32 )
1061
+ retVal = Builder.CreateIntrinsic (Intrinsic::ctlz, { Builder.getInt32Ty () }, { retVal, canBePoison });
1062
+
1063
+ if (isVector)
1064
+ outputVal = Builder.CreateInsertElement (outputVal, retVal, Builder.getInt32 (i));
1065
+ else // for scalar type
1066
+ outputVal = retVal;
1067
+ }
1068
+ I->replaceAllUsesWith (outputVal);
1069
+ }
1070
+
1071
+ Value* ReplaceUnsupportedIntrinsics::evaluateCtlzUpto32bit (IGCLLVM::IRBuilder<>* Builder, Value* inVal, Type* singleElementType, Value* canBePoison) {
1072
+ int sizeInBits = singleElementType->getScalarSizeInBits ();
1073
+ Value* retVal = Builder->CreateZExt (inVal, Builder->getInt32Ty ());
1074
+ retVal = Builder->CreateIntrinsic (Intrinsic::ctlz, { Builder->getInt32Ty () }, { retVal, canBePoison });
1075
+ retVal = Builder->CreateTrunc (retVal, singleElementType);
1076
+ auto constInt = Builder->getIntN (sizeInBits, sizeInBits - 32 );
1077
+ retVal = Builder->CreateNSWAdd (retVal, constInt);
1078
+ return retVal;
1079
+ }
1080
+
1081
+ Value* ReplaceUnsupportedIntrinsics::evaluateCtlz64bit (IGCLLVM::IRBuilder<>* Builder, Value* inVal, Type* singleElementType, Value* canBePoison) {
1082
+ Value* lowBits = Builder->CreateTrunc (inVal, Builder->getInt32Ty ());
1083
+ lowBits = Builder->CreateIntrinsic (Intrinsic::ctlz, { Builder->getInt32Ty () }, { lowBits, canBePoison });
1084
+
1085
+ Value* hiBits = Builder->CreateLShr (inVal, 32 );
1086
+ hiBits = Builder->CreateTrunc (hiBits, Builder->getInt32Ty ());
1087
+ hiBits = Builder->CreateIntrinsic (Intrinsic::ctlz, { Builder->getInt32Ty () }, { hiBits, canBePoison });
1088
+
1089
+ auto maxValueIn32BitsPlusOne = Builder->getInt64 ((uint64_t )(0xffffffff ) + 1 ); // maxValueIn32Bits + 1
1090
+ Value* cmp = Builder->CreateICmp (CmpInst::Predicate::ICMP_ULT, inVal, maxValueIn32BitsPlusOne);
1091
+
1092
+ auto constInt = Builder->getInt32 (32 );
1093
+ lowBits = Builder->CreateAdd (lowBits, constInt);
1094
+
1095
+ Value* retVal = Builder->CreateSelect (cmp, lowBits, hiBits);
1096
+ retVal = Builder->CreateZExt (retVal, singleElementType);
1097
+ return retVal;
1098
+ }
1099
+
988
1100
void ReplaceUnsupportedIntrinsics::visitIntrinsicInst (IntrinsicInst& I) {
989
1101
if (m_intrinsicToFunc.find (I.getIntrinsicID ()) != m_intrinsicToFunc.end ()) {
990
1102
m_instsToReplace.push_back (&I);
0 commit comments