Mix function optimization (3rd try).

mtargows · igcbot · commit fa056a099d81 · 2022-05-21T00:54:34.000+02:00
Mix function optimization, found in GLSL shaders.
diff --git a/IGC/Compiler/CustomSafeOptPass.cpp b/IGC/Compiler/CustomSafeOptPass.cpp
@@ -1227,6 +1227,159 @@ void CustomSafeOptPass::matchDp4a(BinaryOperator &I) {
     I.replaceAllUsesWith(Res);
 }
 
+// Optimize mix operation if detected.
+// Mix is computed as x*(1 - a) + y*a
+// Replace it with a*(y - x) + x to save one instruction ('add' ISA, 'sub' in IR).
+// This pattern also optimizes a similar operation:
+// x*(a - 1) + y*a which can be replaced with a(x + y) - x
+void CustomSafeOptPass::matchMixOperation(BinaryOperator& I)
+{
+    // Pattern Mix check step 1: find a FSub instruction with a constant value of 1.
+    if (I.getOpcode() == BinaryOperator::FSub)
+    {
+        unsigned int fSubOpIdx = 0;
+        while (fSubOpIdx < 2 && !llvm::isa<llvm::ConstantFP>(I.getOperand(fSubOpIdx)))
+        {
+            fSubOpIdx++;
+        }
+        if ((fSubOpIdx == 1) ||
+            ((fSubOpIdx == 0) && !llvm::isa<llvm::ConstantFP>(I.getOperand(1))))
+        {
+            llvm::ConstantFP* fSubOpConst = llvm::dyn_cast<llvm::ConstantFP>(I.getOperand(fSubOpIdx));
+            const APFloat& APF = fSubOpConst->getValueAPF();
+            bool isInf = APF.isInfinity();
+            bool isNaN = APF.isNaN();
+            double val = 0.0;
+            if (!isInf && !isNaN)
+            {
+                if (&APF.getSemantics() == &APFloat::IEEEdouble())
+                {
+                    val = APF.convertToDouble();
+                }
+                else if (&APF.getSemantics() == &APFloat::IEEEsingle())
+                {
+                    val = (double)APF.convertToFloat();
+                }
+            }
+            if (val == 1.0)
+            {
+                bool doNotOptimize = false;
+                bool matchFound = false;
+                SmallVector<std::pair<Instruction*, Instruction*>, 3> fMulInsts;
+
+                // Pattern Mix check step 2: there should be only FMul users of this FSub instruction
+                for (User* U : I.users())
+                {
+                    matchFound = false;
+                    Instruction* fMul = dyn_cast_or_null<Instruction>(U);
+                    if (fMul && fMul->getOpcode() == BinaryOperator::FMul)
+                    {
+                        // Pattern Mix check step 3: there should be only one fAdd user for such an FMul instruction
+                        if ((cast<Value>(fMul))->hasOneUse())
+                        {
+                            Instruction* fAdd = dyn_cast_or_null<Instruction>(*fMul->users().begin());
+
+                            // Pattern Mix check step 4: fAdd should be a user of two FMul instructions
+                            if (fAdd && fAdd->getOpcode() == BinaryOperator::FAdd)
+                            {
+                                unsigned int opIdx = 0;
+                                while (opIdx < 2 && fMul != fAdd->getOperand(opIdx))
+                                {
+                                    opIdx++;
+                                }
+
+                                if (opIdx < 2)
+                                {
+                                    opIdx = 1 - opIdx; // 0 -> 1 or 1 -> 0
+                                    Instruction* fMul2nd = dyn_cast_or_null<Instruction>(fAdd->getOperand(opIdx));
+
+                                    // Pattern Mix check step 5: Second fMul should be a user of the same,
+                                    // other than a value of 1.0, operand as fSub instruction
+                                    if (fMul2nd && fMul2nd->getOpcode() == BinaryOperator::FMul)
+                                    {
+                                        unsigned int fSubNon1OpIdx = 1 - fSubOpIdx; // 0 -> 1 or 1 -> 0
+                                        while (opIdx < 2 && fMul2nd->getOperand(opIdx) != I.getOperand(fSubNon1OpIdx))
+                                        {
+                                            opIdx++;
+                                        }
+
+                                        if (opIdx < 2)
+                                        {
+                                            fMulInsts.push_back(std::make_pair(fMul, fMul2nd));
+                                            matchFound = true;  // Pattern Mix (partially) detected.
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+
+                    if (!matchFound)
+                    {
+                        doNotOptimize = true; // To optimize both FMul instructions and FAdd must be found
+                    }
+                }
+
+                if (!doNotOptimize && !fMulInsts.empty() && I.users().begin() != I.users().end())
+                {
+                    // Pattern Mix fully detected. Replace sequence of detected instructions with new ones.
+                    IGC_ASSERT_MESSAGE(
+                        fMulInsts.size() == (int)std::distance(I.users().begin(), I.users().end()),
+                        "Incorrect pattern match data");
+                    // If Pattern Mix with 1-a in the first instruction was detected then create
+                    // this sequence of new instructions: FSub, FMul, FAdd.
+                    // But if Pattern Mix with a-1 in the first instruction was detected then create
+                    // this sequence of new instructions: FAdd, FMul, FSub.
+                    Instruction::BinaryOps newFirstInstType = (fSubOpIdx == 0) ? Instruction::FSub : Instruction::FAdd;
+                    Instruction::BinaryOps newLastInstType = (fSubOpIdx == 0) ? Instruction::FAdd : Instruction::FSub;
+
+                    fSubOpIdx = 1 - fSubOpIdx; // 0 -> 1 or 1 -> 0, i.e. get another FSub operand
+                    Value* r = I.getOperand(fSubOpIdx);
+
+                    for (std::pair<Instruction*, Instruction*> fMulPair : fMulInsts)
+                    {
+                        Instruction* fAdd = cast<Instruction>(*fMulPair.first->users().begin());
+
+                        unsigned int fMul2OpToFirstInstIdx = (r == fMulPair.second->getOperand(0)) ? 1 : 0;
+                        Value* newFirstInstOp = fMulPair.second->getOperand(fMul2OpToFirstInstIdx);
+                        Value* fSubVal = cast<Value>(&I);
+                        unsigned int fMul1OpToTakeIdx = (fSubVal == fMulPair.first->getOperand(0)) ? 1 : 0;
+
+                        Instruction* newFirstInst = BinaryOperator::Create(
+                            newFirstInstType, newFirstInstOp, fMulPair.first->getOperand(fMul1OpToTakeIdx), "", fAdd);
+                        newFirstInst->copyFastMathFlags(fMulPair.first);
+                        DILocation* DL1st = I.getDebugLoc();
+                        if (DL1st)
+                        {
+                            newFirstInst->setDebugLoc(DL1st);
+                        }
+
+                        Instruction* newFMul = BinaryOperator::CreateFMul(
+                            fMulPair.second->getOperand((fMul2OpToFirstInstIdx + 1) % 2), newFirstInst, "", fAdd);
+                        newFMul->copyFastMathFlags(fMulPair.second);
+                        DILocation* DL2nd = fMulPair.second->getDebugLoc();
+                        if (DL2nd)
+                        {
+                            newFMul->setDebugLoc(DL2nd);
+                        }
+
+                        Instruction* newLastInst = BinaryOperator::Create(
+                            newLastInstType, newFMul, fMulPair.first->getOperand(fMul1OpToTakeIdx), "", fAdd);
+                        newLastInst->copyFastMathFlags(fAdd);
+                        DILocation* DL3rd = fAdd->getDebugLoc();
+                        if (DL3rd)
+                        {
+                            newLastInst->setDebugLoc(DL3rd);
+                        }
+
+                        fAdd->replaceAllUsesWith(newLastInst);
+                    }
+                }
+            }
+        }
+    }
+}
+
 void CustomSafeOptPass::hoistDp3(BinaryOperator& I)
 {
     if (I.getOpcode() != Instruction::BinaryOps::FAdd)
@@ -1602,6 +1755,15 @@ void CustomSafeOptPass::visitBinaryOperator(BinaryOperator& I)
 {
     matchDp4a(I);
 
+    CodeGenContext* pContext = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
+
+    if (!pContext->platform.supportLRPInstruction())
+    {
+        // Optimize mix operation if detected.
+        // Mix is computed as x*(1 - a) + y*a
+        matchMixOperation(I);
+    }
+
     // move immediate value in consecutive integer adds to the last added value.
     // this can allow more chance of doing CSE and memopt.
     //    a = b + 8
@@ -1610,8 +1772,6 @@ void CustomSafeOptPass::visitBinaryOperator(BinaryOperator& I)
     //    a = b + c
     //    d = a + 8
 
-    CodeGenContext* pContext = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
-
     // Before WA if() as it's validated behavior.
     if (I.getType()->isIntegerTy() && I.getOpcode() == Instruction::Or)
     {
diff --git a/IGC/Compiler/CustomSafeOptPass.hpp b/IGC/Compiler/CustomSafeOptPass.hpp
@@ -91,6 +91,7 @@ namespace IGC
         void visitBitCast(llvm::BitCastInst& BC);
 
         void matchDp4a(llvm::BinaryOperator& I);
+        void matchMixOperation(llvm::BinaryOperator& I);
         void hoistDp3(llvm::BinaryOperator& I);
 
         template <typename MaskType> void matchReverse(llvm::BinaryOperator& I);