Skip to content

Commit b870336

Browse files
[SLP] Match poison as instruction with the same opcode
Patch allows to vector scalar instruction + poison values as if poisons are instructions with the same opcode. It allows better vectorization of the repeated values, reduces number of insertelement instructions and serves as a base ground for copyable elements vectorization AVX512, -O3 + LTO JM/ldecod - better vector code Applications/oggenc - better vectorization CINT2017speed/625.x264_s CINT2017rate/525.x264_r - better vector code CFP2017rate/526.blender_r - better vector code CFP2006/447.dealII - small variations Benchmarks/Bullet - extra vector code CFP2017rate/510.parest_r - better vectorization CINT2017rate/502.gcc_r CINT2017speed/602.gcc_s - extra vector code Benchmarks/tramp3d-v4 - small variations CFP2006/453.povray - extra vector code JM/lencod - better vector code CFP2017rate/511.povray_r - extra vector code MemFunctions/MemFunctions - extra vector code LoopVectorization/LoopVectorizationBenchmarks - extra vector code XRay/FDRMode - extra vector code XRay/ReturnReference - extra vector code LCALS/SubsetCLambdaLoops - extra vector code LCALS/SubsetCRawLoops - extra vector code LCALS/SubsetARawLoops - extra vector code LCALS/SubsetALambdaLoops - extra vector code DOE-ProxyApps-C++/miniFE - extra vector code LoopVectorization/LoopInterleavingBenchmarks - extra vector code LCALS/SubsetBLambdaLoops - extra vector code MicroBenchmarks/harris - extra vector code ImageProcessing/Dither - extra vector code MicroBenchmarks/SLPVectorization - extra vector code ImageProcessing/Blur - extra vector code ImageProcessing/Dilate - extra vector code Builtins/Int128 - extra vector code ImageProcessing/Interpolation - extra vector code ImageProcessing/BilateralFiltering - extra vector code ImageProcessing/AnisotropicDiffusion - extra vector code MicroBenchmarks/LoopInterchange - extra code vectorized LCALS/SubsetBRawLoops - extra code vectorized CINT2006/464.h264ref - extra vectorization with wider vectors CFP2017rate/508.namd_r - small variations, extra phis vectorized CFP2006/444.namd - 2 2 x phi replaced by 4 x phi DOE-ProxyApps-C/SimpleMOC - extra code vectorized CINT2017rate/541.leela_r CINT2017speed/641.leela_s - the function better vectorized and inlined Benchmarks/Misc/oourafft - 2 4 x bit reductions replaced by 2 x vector code FreeBench/fourinarow - better vectorization Reviewers: RKSimon Reviewed By: RKSimon Pull Request: #115946
1 parent 2fe947b commit b870336

File tree

5 files changed

+238
-96
lines changed

5 files changed

+238
-96
lines changed

llvm/include/llvm/IR/Instruction.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ class Instruction : public User,
278278
bool isUnaryOp() const { return isUnaryOp(getOpcode()); }
279279
bool isBinaryOp() const { return isBinaryOp(getOpcode()); }
280280
bool isIntDivRem() const { return isIntDivRem(getOpcode()); }
281+
bool isFPDivRem() const { return isFPDivRem(getOpcode()); }
281282
bool isShift() const { return isShift(getOpcode()); }
282283
bool isCast() const { return isCast(getOpcode()); }
283284
bool isFuncletPad() const { return isFuncletPad(getOpcode()); }
@@ -304,6 +305,10 @@ class Instruction : public User,
304305
return Opcode == UDiv || Opcode == SDiv || Opcode == URem || Opcode == SRem;
305306
}
306307

308+
static inline bool isFPDivRem(unsigned Opcode) {
309+
return Opcode == FDiv || Opcode == FRem;
310+
}
311+
307312
/// Determine if the Opcode is one of the shift instructions.
308313
static inline bool isShift(unsigned Opcode) {
309314
return Opcode >= Shl && Opcode <= AShr;

0 commit comments

Comments
 (0)