Add FNeg support to ScalarizeFunction pass

agrabezh · igcbot · commit 312883ca7330 · 2024-04-08T13:28:56.000+02:00
Adding `FNeg` instruction support to ScalarizeFunction pass
diff --git a/IGC/Compiler/Optimizer/Scalarizer.cpp b/IGC/Compiler/Optimizer/Scalarizer.cpp
@@ -310,6 +310,9 @@ void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
 
     switch (I->getOpcode())
     {
+    case Instruction::FNeg:
+        scalarizeInstruction(dyn_cast<UnaryOperator>(I));
+        break;
     case Instruction::Add:
     case Instruction::Sub:
     case Instruction::Mul:
@@ -415,6 +418,55 @@ void ScalarizeFunction::recoverNonScalarizableInst(Instruction* Inst)
     }
 }
 
+void ScalarizeFunction::scalarizeInstruction(UnaryOperator* UI)
+{
+    V_PRINT(scalarizer, "\t\tUnary instruction\n");
+    IGC_ASSERT_MESSAGE(UI, "instruction type dynamic cast failed");
+    IGCLLVM::FixedVectorType* instType = dyn_cast<IGCLLVM::FixedVectorType>(UI->getType());
+    // Only need handling for vector binary ops
+    if (!instType) return;
+
+    // Prepare empty SCM entry for the instruction
+    SCMEntry* newEntry = getSCMEntry(UI);
+
+    // Get additional info from instruction
+    unsigned numElements = int_cast<unsigned>(instType->getNumElements());
+
+    // Obtain scalarized argument
+    SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
+    bool op0IsConst;
+
+    obtainScalarizedValues(operand0, &op0IsConst, UI->getOperand(0), UI);
+
+    // If argument is constant, don't bother Scalarizing inst
+    if (op0IsConst) return;
+
+    // Generate new (scalar) instructions
+    SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
+    newScalarizedInsts.resize(numElements);
+    for (unsigned dup = 0; dup < numElements; dup++)
+    {
+        Value* Val = UnaryOperator::Create(
+            UI->getOpcode(),
+            operand0[dup],
+            UI->getName(),
+            UI
+        );
+        if (UnaryOperator* UO = dyn_cast<UnaryOperator>(Val)) {
+            // Copy fast math flags if any.
+            if (isa<FPMathOperator>(UO))
+                UO->setFastMathFlags(UI->getFastMathFlags());
+        }
+        newScalarizedInsts[dup] = Val;
+    }
+
+    // Add new value/s to SCM
+    updateSCMEntryWithValues(newEntry, &(newScalarizedInsts[0]), UI, true);
+
+    // Remove original instruction
+    m_removedInsts.insert(UI);
+}
+
 void ScalarizeFunction::scalarizeInstruction(BinaryOperator* BI)
 {
     V_PRINT(scalarizer, "\t\tBinary instruction\n");
diff --git a/IGC/Compiler/Optimizer/Scalarizer.h b/IGC/Compiler/Optimizer/Scalarizer.h
@@ -92,6 +92,7 @@ namespace IGC
          *  \{ */
          /// @brief Scalarize an instruction
          /// @param I Instruction to scalarize
+        void scalarizeInstruction(llvm::UnaryOperator* UI);
         void scalarizeInstruction(llvm::BinaryOperator* BI);
         void scalarizeInstruction(llvm::CmpInst* CI);
         void scalarizeInstruction(llvm::CastInst* CI);
diff --git a/IGC/Compiler/tests/ScalarizeFunction/basic.ll b/IGC/Compiler/tests/ScalarizeFunction/basic.ll
@@ -11,6 +11,24 @@
 ; ScalarizeFunction
 ; ------------------------------------------------
 
+define spir_kernel void @test_unary(<2 x float> %src1) {
+; CHECK-LABEL: @test_unary(
+; CHECK:    [[SRC1_SCALAR:%.*]] = extractelement <2 x float> [[SRC1:%.*]], i32 0
+; CHECK:    [[SRC1_SCALAR1:%.*]] = extractelement <2 x float> [[SRC1]], i32 1
+; CHECK:    [[TMP1:%.*]] = alloca <2 x float>, align 4
+; CHECK:    [[TMP2:%.*]] = fneg float [[SRC1_SCALAR]]
+; CHECK:    [[TMP3:%.*]] = fneg float [[SRC1_SCALAR1]]
+; CHECK:    [[ASSEMBLED_VECT:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0
+; CHECK:    [[ASSEMBLED_VECT2:%.*]] = insertelement <2 x float> [[ASSEMBLED_VECT]], float [[TMP3]], i32 1
+; CHECK:    store <2 x float> [[ASSEMBLED_VECT2]], <2 x float>* [[TMP1]], align 8
+; CHECK:    ret void
+;
+  %1 = alloca <2 x float>, align 4
+  %2 = fneg <2 x float> %src1
+  store <2 x float> %2, <2 x float>* %1, align 8
+  ret void
+}
+
 define spir_kernel void @test_binary(<2 x i32> %src1, <2 x i32> %src2) {
 ; CHECK-LABEL: @test_binary(
 ; CHECK:    [[SCALAR2:%.*]] = extractelement <2 x i32> [[SRC2:%.*]], i32 0