@@ -62,8 +62,6 @@ cmp+sel to avoid expensive VxH mov.
62
62
#include " GenISAIntrinsics/GenIntrinsicInst.h"
63
63
#include " common/IGCConstantFolder.h"
64
64
#include " common/LLVMWarningsPush.hpp"
65
- #include " llvm/Config/llvm-config.h"
66
- #include < llvmWrapper/ADT/APInt.h>
67
65
#include " llvmWrapper/IR/IntrinsicInst.h"
68
66
#include < llvmWrapper/IR/DIBuilder.h>
69
67
#include < llvmWrapper/IR/DerivedTypes.h>
@@ -73,6 +71,7 @@ cmp+sel to avoid expensive VxH mov.
73
71
#include < llvm/ADT/Statistic.h>
74
72
#include < llvm/ADT/SetVector.h>
75
73
#include < llvm/Analysis/ConstantFolding.h>
74
+ #include < llvm/Analysis/InstructionSimplify.h>
76
75
#include < llvm/IR/Constants.h>
77
76
#include " llvm/IR/DebugInfo.h"
78
77
#include < llvm/IR/Function.h>
@@ -2737,50 +2736,80 @@ void GenSpecificPattern::visitMul(llvm::BinaryOperator& I)
2737
2736
Value* ValOp = nullptr ;
2738
2737
const APInt* ConstOp = nullptr ;
2739
2738
using namespace llvm ::PatternMatch;
2740
- if (match (&I, m_c_Mul (m_Value (ValOp), m_APInt (ConstOp))))
2741
- {
2742
- IRBuilder<> builder (&I);
2743
- if (ConstOp->isPowerOf2 ())
2744
- {
2745
- I.replaceAllUsesWith (
2746
- builder.CreateShl (ValOp, (uint64_t )ConstOp->exactLogBase2 ()));
2747
- I.eraseFromParent ();
2748
- return ;
2749
- }
2750
- else if (!IGCLLVM::isNegatedPowerOf2 (*ConstOp))
2751
- return ;
2739
+ if (!match (&I, m_c_Mul (m_Value (ValOp), m_APInt (ConstOp))))
2740
+ return ;
2741
+
2742
+ const bool HasNUW = I.hasNoUnsignedWrap ();
2743
+ const bool HasNSW = I.hasNoSignedWrap ();
2744
+ IRBuilder<> builder (&I);
2745
+ // 0. Skip the optimization for mul(x, {1; -1}) - in most cases, such
2746
+ // instances are generated within i64 emulation sequences, and the emitter
2747
+ // is geared toward such patterns.
2748
+ if (ConstOp->isOne () || ConstOp->isAllOnes ())
2749
+ return ;
2752
2750
2753
- APInt ConstOpAbs = ConstOp->abs ();
2751
+ // 1. 2^n case
2752
+ if (ConstOp->isPowerOf2 ())
2753
+ {
2754
2754
Value* Shl = builder.CreateShl (
2755
- ValOp, (uint64_t )ConstOpAbs.exactLogBase2 ());
2756
- for (User* UI : I.users ())
2757
- {
2758
- // We're going a little further and making sure that we merge the
2759
- // shift result's negation with any subsequent adds:
2760
- // '%shift = shl i64 %x, n'
2761
- // '%res' = sub %var, %shift
2762
- // preventing the additional negation in between the shift and the
2763
- // `var` addition. This kind of a peephole optimization may not be
2764
- // available later down the pipeline.
2765
- // TODO: Consider moving this logic to add/sub visitors once the
2766
- // whole GenSpecificPattern iteration logic is guaranteed to allow
2767
- // deferred instructions.
2768
- Value* Addend = nullptr ;
2769
- if (match (UI, m_c_Add (m_Specific (&I), m_Value (Addend))))
2770
- {
2771
- // Propagate the 'shl' <- 'sub' results instead. No way to
2772
- // erase the original adds within this pass just yet, as we'd
2773
- // be invalidating the InstVisitor iteration, but subsequent
2774
- // DCE instances will handle the orphaned instructions anyway.
2775
- builder.SetInsertPoint (cast<Instruction>(UI));
2776
- UI->replaceAllUsesWith (builder.CreateSub (Addend, Shl));
2777
- }
2778
- }
2779
- // Make sure to reset the insertion point for the shl negation after
2780
- // the possible transformations above
2781
- builder.SetInsertPoint (&I);
2782
- I.replaceAllUsesWith (builder.CreateNeg (Shl));
2755
+ ValOp, (uint64_t )ConstOp->exactLogBase2 (), " " ,
2756
+ /* bool NUW=*/ HasNUW, /* bool NSW=*/ HasNSW);
2757
+ I.replaceAllUsesWith (Shl);
2783
2758
I.eraseFromParent ();
2759
+ return ;
2760
+ }
2761
+
2762
+ // 2. -2^n case
2763
+ if (!ConstOp->isNegatedPowerOf2 ())
2764
+ return ;
2765
+ APInt ConstOpAbs = ConstOp->abs ();
2766
+ Value* Shl = builder.CreateShl (
2767
+ ValOp, (uint64_t )ConstOpAbs.exactLogBase2 (), " " ,
2768
+ /* bool NUW=*/ HasNUW, /* bool NSW=*/ HasNSW);
2769
+ // NB: We should retain NUW infromation in our case, so a simpler
2770
+ // IRBuilder<>::CreateNeg doesn't cut it.
2771
+ Value* Sub = builder.CreateSub (Constant::getNullValue (Shl->getType ()), Shl,
2772
+ " " , /* bool NUW=*/ HasNUW,/* bool NSW=*/ HasNSW);
2773
+ I.replaceAllUsesWith (Sub);
2774
+ I.eraseFromParent ();
2775
+
2776
+ // Go a little further and make sure to merge the shift result's negation
2777
+ // with any subsequent adds:
2778
+ // '%shift = shl i64 %x, n'
2779
+ // '%res' = sub %var, %shift
2780
+ // preventing the additional negation in between the shift and `var`
2781
+ // addition.
2782
+ //
2783
+ // TODO: Consider also folding sdiv/srem uses with a constant operand by
2784
+ // negating said operand and using the 'shl' result directly. No reason to
2785
+ // consider 'mul's though - in case of matching overflow flags, we'd expect
2786
+ // these to be constant-folded with the original 'mul' earlier in the
2787
+ // pipeline.
2788
+ //
2789
+ // TODO: Consider re-using an LLVM routine of some sort for this
2790
+ // optimization. That said, on LLVM 14, llvm::SimplifyBinOp() and
2791
+ // higher-level IRBuilder<InstSimplifyFolder>::CreateAdd()/CreateBinOp()
2792
+ // replacements fail to work out of the box.
2793
+ // Alternatively, move this logic to add/sub visitors once the whole
2794
+ // GenSpecificPattern iteration logic allows for deferred instructions.
2795
+ for (User* UI : Sub->users ())
2796
+ {
2797
+ Value* Addend = nullptr ;
2798
+ if (!match (UI, m_c_Add (m_Specific (Sub), m_Value (Addend))))
2799
+ continue ;
2800
+ // Propagate the 'shl' <- 'sub' results instead. No way to erase the
2801
+ // original adds within this pass just yet, as we'd invalidate the
2802
+ // ongoing InstVisitor iteration, however subsequent DCE instances will
2803
+ // handle the orphaned instructions anyway.
2804
+ auto * AddI = cast<Instruction>(UI);
2805
+ if (AddI->hasNoUnsignedWrap () != HasNUW ||
2806
+ AddI->hasNoSignedWrap () != HasNSW)
2807
+ continue ;
2808
+
2809
+ builder.SetInsertPoint (AddI);
2810
+ Value* Sub = builder.CreateSub (
2811
+ Addend, Shl, " " , /* bool NUW=*/ HasNUW, /* bool NSW=*/ HasNSW);
2812
+ AddI->replaceAllUsesWith (Sub);
2784
2813
}
2785
2814
}
2786
2815
0 commit comments