@@ -2834,7 +2834,7 @@ void GenSpecificPattern::visitAnd(BinaryOperator& I)
2834
2834
}
2835
2835
}
2836
2836
2837
- void GenSpecificPattern::visitAShr (BinaryOperator & I)
2837
+ void GenSpecificPattern::visitAShr (BinaryOperator& I)
2838
2838
{
2839
2839
/*
2840
2840
From:
@@ -2850,70 +2850,36 @@ void GenSpecificPattern::visitAShr(BinaryOperator &I)
2850
2850
%132 = sext i8 %ee1 to i32
2851
2851
%133 = sext i8 %ee2 to i32
2852
2852
Which will end up as regioning instead of 2 isntr.
2853
-
2854
- Also change shl 24 + asr 24 -> extractelement <4 x i8> %temp, i32 0
2855
2853
*/
2856
2854
2857
2855
llvm::IRBuilder<> builder (&I);
2858
2856
using namespace llvm ::PatternMatch;
2859
2857
2860
- auto tryTransformAsrToEE = [&](Instruction &I, uint32_t BaseTypeSize, uint32_t ElemSize)
2861
- {
2862
- IGC_ASSERT (BaseTypeSize % ElemSize == 0 );
2863
-
2864
- auto *BaseType = builder.getIntNTy (BaseTypeSize);
2865
- if (I.getType () != BaseType)
2866
- return false ;
2867
-
2868
- Value *AShrSrc = nullptr ;
2869
- uint32_t ShiftBits = BaseTypeSize - ElemSize;
2870
- auto AShrPattern = m_AShr (m_Value (AShrSrc), m_SpecificInt (ShiftBits));
2858
+ Instruction* AShrSrc = nullptr ;
2859
+ auto pattern_1 = m_AShr (m_Instruction (AShrSrc), m_SpecificInt (16 ));
2871
2860
2872
- if (!match (&I, AShrPattern))
2873
- return false ;
2874
- if (!AShrSrc || AShrSrc->getType () != BaseType)
2875
- return false ;
2861
+ if (match (&I, pattern_1) && I.getType ()->isIntegerTy (32 ) && AShrSrc && AShrSrc->getType ()->isIntegerTy (32 ))
2862
+ {
2863
+ Instruction* ShlSrc = nullptr ;
2876
2864
2877
- Value *ShlSrc = nullptr ;
2865
+ auto Shl_Pattern = m_Shl (m_Instruction (ShlSrc), m_SpecificInt (16 ));
2866
+ bool submatch = match (AShrSrc, Shl_Pattern) && ShlSrc && ShlSrc->getType ()->isIntegerTy (32 );
2878
2867
2879
- auto ShlPattern = m_Shl ( m_Value (ShlSrc), m_SpecificInt (ShiftBits));
2880
- bool ShlMatch = match (AShrSrc, ShlPattern) && ShlSrc && ShlSrc-> getType () == BaseType ;
2868
+ // in case there's no shr, we take upper half
2869
+ uint32_t newIndex = 1 ;
2881
2870
2882
- uint32_t Index = 0 ;
2883
- Value *BaseValue = nullptr ;
2884
- if (ShlMatch)
2871
+ // if there was Shl, we take lower half
2872
+ if (submatch)
2885
2873
{
2886
- BaseValue = ShlSrc;
2887
- Index = 0 ;
2874
+ AShrSrc = ShlSrc;
2875
+ newIndex = 0 ;
2888
2876
}
2889
- else if (ShiftBits * 2 == BaseTypeSize)
2890
- {
2891
- // if Shl is not matched we can still make an EE on the AShr source
2892
- // but extract the upper half. Check we shift exactly the half bits
2893
- BaseValue = AShrSrc;
2894
- Index = 1 ;
2895
- }
2896
- else
2897
- {
2898
- return false ;
2899
- }
2900
-
2901
- VectorType *Vec = VectorType::get (builder.getIntNTy (ElemSize), BaseTypeSize / ElemSize, false );
2902
- Value* BC = builder.CreateBitCast (BaseValue, Vec);
2903
- Value* EE = builder.CreateExtractElement (BC, builder.getIntN (BaseTypeSize, Index));
2904
- Value* SExt = builder.CreateSExt (EE, BaseType);
2905
- I.replaceAllUsesWith (SExt);
2877
+ VectorType* vec2 = VectorType::get (builder.getInt16Ty (), 2 , false );
2878
+ Value* BC = builder.CreateBitCast (AShrSrc, vec2);
2879
+ Value* EE = builder.CreateExtractElement (BC, builder.getInt32 (newIndex));
2880
+ Value* Sext = builder.CreateSExt (EE, builder.getInt32Ty ());
2881
+ I.replaceAllUsesWith (Sext);
2906
2882
I.eraseFromParent ();
2907
-
2908
- return true ;
2909
- };
2910
-
2911
- tryTransformAsrToEE (I, 32 , 16 );
2912
-
2913
- CodeGenContext *CTX = getAnalysis<CodeGenContextWrapper>().getCodeGenContext ();
2914
- if (CTX->platform .supportByteALUOperation ())
2915
- {
2916
- tryTransformAsrToEE (I, 32 , 8 );
2917
2883
}
2918
2884
}
2919
2885
0 commit comments