Skip to content

Commit 312883c

Browse files
agrabezhigcbot
authored andcommitted
Add FNeg support to ScalarizeFunction pass
Adding `FNeg` instruction support to ScalarizeFunction pass
1 parent a0caa5f commit 312883c

File tree

3 files changed

+71
-0
lines changed

3 files changed

+71
-0
lines changed

IGC/Compiler/Optimizer/Scalarizer.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,9 @@ void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
310310

311311
switch (I->getOpcode())
312312
{
313+
case Instruction::FNeg:
314+
scalarizeInstruction(dyn_cast<UnaryOperator>(I));
315+
break;
313316
case Instruction::Add:
314317
case Instruction::Sub:
315318
case Instruction::Mul:
@@ -415,6 +418,55 @@ void ScalarizeFunction::recoverNonScalarizableInst(Instruction* Inst)
415418
}
416419
}
417420

421+
void ScalarizeFunction::scalarizeInstruction(UnaryOperator* UI)
422+
{
423+
V_PRINT(scalarizer, "\t\tUnary instruction\n");
424+
IGC_ASSERT_MESSAGE(UI, "instruction type dynamic cast failed");
425+
IGCLLVM::FixedVectorType* instType = dyn_cast<IGCLLVM::FixedVectorType>(UI->getType());
426+
// Only need handling for vector binary ops
427+
if (!instType) return;
428+
429+
// Prepare empty SCM entry for the instruction
430+
SCMEntry* newEntry = getSCMEntry(UI);
431+
432+
// Get additional info from instruction
433+
unsigned numElements = int_cast<unsigned>(instType->getNumElements());
434+
435+
// Obtain scalarized argument
436+
SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
437+
bool op0IsConst;
438+
439+
obtainScalarizedValues(operand0, &op0IsConst, UI->getOperand(0), UI);
440+
441+
// If argument is constant, don't bother Scalarizing inst
442+
if (op0IsConst) return;
443+
444+
// Generate new (scalar) instructions
445+
SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
446+
newScalarizedInsts.resize(numElements);
447+
for (unsigned dup = 0; dup < numElements; dup++)
448+
{
449+
Value* Val = UnaryOperator::Create(
450+
UI->getOpcode(),
451+
operand0[dup],
452+
UI->getName(),
453+
UI
454+
);
455+
if (UnaryOperator* UO = dyn_cast<UnaryOperator>(Val)) {
456+
// Copy fast math flags if any.
457+
if (isa<FPMathOperator>(UO))
458+
UO->setFastMathFlags(UI->getFastMathFlags());
459+
}
460+
newScalarizedInsts[dup] = Val;
461+
}
462+
463+
// Add new value/s to SCM
464+
updateSCMEntryWithValues(newEntry, &(newScalarizedInsts[0]), UI, true);
465+
466+
// Remove original instruction
467+
m_removedInsts.insert(UI);
468+
}
469+
418470
void ScalarizeFunction::scalarizeInstruction(BinaryOperator* BI)
419471
{
420472
V_PRINT(scalarizer, "\t\tBinary instruction\n");

IGC/Compiler/Optimizer/Scalarizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ namespace IGC
9292
* \{ */
9393
/// @brief Scalarize an instruction
9494
/// @param I Instruction to scalarize
95+
void scalarizeInstruction(llvm::UnaryOperator* UI);
9596
void scalarizeInstruction(llvm::BinaryOperator* BI);
9697
void scalarizeInstruction(llvm::CmpInst* CI);
9798
void scalarizeInstruction(llvm::CastInst* CI);

IGC/Compiler/tests/ScalarizeFunction/basic.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,24 @@
1111
; ScalarizeFunction
1212
; ------------------------------------------------
1313

14+
define spir_kernel void @test_unary(<2 x float> %src1) {
15+
; CHECK-LABEL: @test_unary(
16+
; CHECK: [[SRC1_SCALAR:%.*]] = extractelement <2 x float> [[SRC1:%.*]], i32 0
17+
; CHECK: [[SRC1_SCALAR1:%.*]] = extractelement <2 x float> [[SRC1]], i32 1
18+
; CHECK: [[TMP1:%.*]] = alloca <2 x float>, align 4
19+
; CHECK: [[TMP2:%.*]] = fneg float [[SRC1_SCALAR]]
20+
; CHECK: [[TMP3:%.*]] = fneg float [[SRC1_SCALAR1]]
21+
; CHECK: [[ASSEMBLED_VECT:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0
22+
; CHECK: [[ASSEMBLED_VECT2:%.*]] = insertelement <2 x float> [[ASSEMBLED_VECT]], float [[TMP3]], i32 1
23+
; CHECK: store <2 x float> [[ASSEMBLED_VECT2]], <2 x float>* [[TMP1]], align 8
24+
; CHECK: ret void
25+
;
26+
%1 = alloca <2 x float>, align 4
27+
%2 = fneg <2 x float> %src1
28+
store <2 x float> %2, <2 x float>* %1, align 8
29+
ret void
30+
}
31+
1432
define spir_kernel void @test_binary(<2 x i32> %src1, <2 x i32> %src2) {
1533
; CHECK-LABEL: @test_binary(
1634
; CHECK: [[SCALAR2:%.*]] = extractelement <2 x i32> [[SRC2:%.*]], i32 0

0 commit comments

Comments
 (0)