Skip to content

Commit 778e445

Browse files
committed
[LoopVectorize] Add FNeg instruction support
Differential Revision: https://reviews.llvm.org/D62510 llvm-svn: 362124
1 parent 5d5f629 commit 778e445

File tree

4 files changed

+43
-25
lines changed

4 files changed

+43
-25
lines changed

llvm/include/llvm/IR/IRBuilder.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1383,6 +1383,24 @@ class IRBuilder : public IRBuilderBase, public Inserter {
13831383
return Insert(UnOp, Name);
13841384
}
13851385

1386+
/// Create either a UnaryOperator or BinaryOperator depending on \p Opc.
1387+
/// Correct number of operands must be passed accordingly.
1388+
Value *CreateNAryOp(unsigned Opc, ArrayRef<Value *> Ops,
1389+
const Twine &Name = "",
1390+
MDNode *FPMathTag = nullptr) {
1391+
if (Instruction::isBinaryOp(Opc)) {
1392+
assert(Ops.size() == 2 && "Invalid number of operands!");
1393+
return CreateBinOp(static_cast<Instruction::BinaryOps>(Opc),
1394+
Ops[0], Ops[1], Name, FPMathTag);
1395+
}
1396+
if (Instruction::isUnaryOp(Opc)) {
1397+
assert(Ops.size() == 1 && "Invalid number of operands!");
1398+
return CreateUnOp(static_cast<Instruction::UnaryOps>(Opc),
1399+
Ops[0], Name, FPMathTag);
1400+
}
1401+
llvm_unreachable("Unexpected opcode!");
1402+
}
1403+
13861404
//===--------------------------------------------------------------------===//
13871405
// Instruction creation methods: Memory Instructions
13881406
//===--------------------------------------------------------------------===//

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3969,6 +3969,7 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
39693969
case Instruction::FAdd:
39703970
case Instruction::Sub:
39713971
case Instruction::FSub:
3972+
case Instruction::FNeg:
39723973
case Instruction::Mul:
39733974
case Instruction::FMul:
39743975
case Instruction::FDiv:
@@ -3979,21 +3980,22 @@ void InnerLoopVectorizer::widenInstruction(Instruction &I) {
39793980
case Instruction::And:
39803981
case Instruction::Or:
39813982
case Instruction::Xor: {
3982-
// Just widen binops.
3983-
auto *BinOp = cast<BinaryOperator>(&I);
3984-
setDebugLocFromInst(Builder, BinOp);
3983+
// Just widen unops and binops.
3984+
setDebugLocFromInst(Builder, &I);
39853985

39863986
for (unsigned Part = 0; Part < UF; ++Part) {
3987-
Value *A = getOrCreateVectorValue(BinOp->getOperand(0), Part);
3988-
Value *B = getOrCreateVectorValue(BinOp->getOperand(1), Part);
3989-
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
3987+
SmallVector<Value *, 2> Ops;
3988+
for (Value *Op : I.operands())
3989+
Ops.push_back(getOrCreateVectorValue(Op, Part));
3990+
3991+
Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
39903992

3991-
if (BinaryOperator *VecOp = dyn_cast<BinaryOperator>(V))
3992-
VecOp->copyIRFlags(BinOp);
3993+
if (auto *VecOp = dyn_cast<Instruction>(V))
3994+
VecOp->copyIRFlags(&I);
39933995

39943996
// Use this vector value for all users of the original instruction.
39953997
VectorLoopValueMap.setVectorValue(&I, Part, V);
3996-
addMetadata(V, BinOp);
3998+
addMetadata(V, &I);
39973999
}
39984000

39994001
break;
@@ -5960,6 +5962,14 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
59605962
I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
59615963
Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands);
59625964
}
5965+
case Instruction::FNeg: {
5966+
unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1;
5967+
return N * TTI.getArithmeticInstrCost(
5968+
I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue,
5969+
TargetTransformInfo::OK_AnyValue,
5970+
TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
5971+
I->getOperand(0));
5972+
}
59635973
case Instruction::Select: {
59645974
SelectInst *SI = cast<SelectInst>(I);
59655975
const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
@@ -6589,6 +6599,7 @@ bool VPRecipeBuilder::tryToWiden(Instruction *I, VPBasicBlock *VPBB,
65896599
case Instruction::FCmp:
65906600
case Instruction::FDiv:
65916601
case Instruction::FMul:
6602+
case Instruction::FNeg:
65926603
case Instruction::FPExt:
65936604
case Instruction::FPToSI:
65946605
case Instruction::FPToUI:

llvm/test/Transforms/LoopVectorize/X86/fneg-cost.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
66
target triple = "x86_64-apple-macosx10.8.0"
77

8-
; CHECK: Found an estimated cost of 2 for VF 1 For instruction: %neg = fneg float %{{.*}}
9-
; CHECK: Found an estimated cost of 6 for VF 2 For instruction: %neg = fneg float %{{.*}}
10-
; CHECK: Found an estimated cost of 14 for VF 4 For instruction: %neg = fneg float %{{.*}}
8+
; CHECK: Found an estimated cost of 4 for VF 1 For instruction: %neg = fneg float %{{.*}}
9+
; CHECK: Found an estimated cost of 4 for VF 2 For instruction: %neg = fneg float %{{.*}}
10+
; CHECK: Found an estimated cost of 4 for VF 4 For instruction: %neg = fneg float %{{.*}}
1111
define void @fneg_cost(float* %a, i64 %n) {
1212
entry:
1313
br label %for.body

llvm/test/Transforms/LoopVectorize/fneg.ll

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,8 @@
33
define void @foo(float* %a, i64 %n) {
44
; CHECK: vector.body:
55
; CHECK: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* {{.*}}, align 4
6-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 0
7-
; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]]
8-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 1
9-
; CHECK-NEXT: [[TMP7:%.*]] = fneg float [[TMP6]]
10-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 2
11-
; CHECK-NEXT: [[TMP9:%.*]] = fneg float [[TMP8]]
12-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[WIDE_LOAD]], i32 3
13-
; CHECK-NEXT: [[TMP11:%.*]] = fneg float [[TMP10]]
14-
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP5]], i32 0
15-
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP7]], i32 1
16-
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i32 2
17-
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i32 3
18-
; CHECK: store <4 x float> [[TMP15]], <4 x float>* {{.*}}, align 4
6+
; CHECK-NEXT: [[TMP4:%.*]] = fneg <4 x float> [[WIDE_LOAD]]
7+
; CHECK: store <4 x float> [[TMP4]], <4 x float>* {{.*}}, align 4
198
;
209
entry:
2110
br label %for.body

0 commit comments

Comments
 (0)