Skip to content

Commit 069436a

Browse files
SC llvm teamSC llvm team
authored andcommitted
Merged main:ce0235779569b150acad6d6aaa648edba4ade014 into amd-gfx:a7f33e870fdb
Local branch amd-gfx a7f33e8 Merged main:a01b58aef0e42fb1b52e358adf4c56678a884d37 into amd-gfx:e01c1ad5de58 Remote branch main ce02357 [llvm-profdata] Make tests more readable (NFC)
2 parents a7f33e8 + ce02357 commit 069436a

File tree

12 files changed

+123
-26
lines changed

12 files changed

+123
-26
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,6 +1243,18 @@ class TargetTransformInfo {
12431243
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
12441244
const Instruction *CxtI = nullptr) const;
12451245

1246+
/// Returns the cost estimation for alternating opcode pattern that can be
1247+
/// lowered to a single instruction on the target. In X86 this is for the
1248+
/// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in
1249+
/// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being
1250+
/// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
1251+
/// when \p Opcode0 is selected and `1` when Opcode1 is selected.
1252+
/// \p VecTy is the vector type of the instruction to be generated.
1253+
InstructionCost getAltInstrCost(
1254+
VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
1255+
const SmallBitVector &OpcodeMask,
1256+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1257+
12461258
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
12471259
/// The exact mask may be passed as Mask, or else the array will be empty.
12481260
/// The index and subtype parameters are used by the subvector insertion and
@@ -1944,6 +1956,10 @@ class TargetTransformInfo::Concept {
19441956
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
19451957
OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
19461958
ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
1959+
virtual InstructionCost getAltInstrCost(
1960+
VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
1961+
const SmallBitVector &OpcodeMask,
1962+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = 0;
19471963

19481964
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
19491965
ArrayRef<int> Mask,
@@ -2555,6 +2571,12 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
25552571
return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
25562572
Args, CxtI);
25572573
}
2574+
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
2575+
unsigned Opcode1,
2576+
const SmallBitVector &OpcodeMask,
2577+
TTI::TargetCostKind CostKind) const override {
2578+
return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
2579+
}
25582580

25592581
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
25602582
ArrayRef<int> Mask,

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,13 @@ class TargetTransformInfoImplBase {
554554
return 1;
555555
}
556556

557+
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
558+
unsigned Opcode1,
559+
const SmallBitVector &OpcodeMask,
560+
TTI::TargetCostKind CostKind) const {
561+
return InstructionCost::getInvalid();
562+
}
563+
557564
InstructionCost
558565
getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
559566
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
/* Indicate that this is LLVM compiled from the amd-gfx branch. */
1818
#define LLVM_HAVE_BRANCH_AMD_GFX
19-
#define LLVM_MAIN_REVISION 484816
19+
#define LLVM_MAIN_REVISION 484819
2020

2121
/* Define if LLVM_ENABLE_DUMP is enabled */
2222
#cmakedefine LLVM_ENABLE_DUMP

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -862,6 +862,15 @@ InstructionCost TargetTransformInfo::getArithmeticInstrCost(
862862
return Cost;
863863
}
864864

865+
InstructionCost TargetTransformInfo::getAltInstrCost(
866+
VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
867+
const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const {
868+
InstructionCost Cost =
869+
TTIImpl->getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
870+
assert(Cost >= 0 && "TTI should not produce negative costs!");
871+
return Cost;
872+
}
873+
865874
InstructionCost TargetTransformInfo::getShuffleCost(
866875
ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
867876
TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,6 +1459,15 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
14591459
Args, CxtI);
14601460
}
14611461

1462+
InstructionCost
1463+
X86TTIImpl::getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
1464+
unsigned Opcode1, const SmallBitVector &OpcodeMask,
1465+
TTI::TargetCostKind CostKind) const {
1466+
if (isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask))
1467+
return TTI::TCC_Basic;
1468+
return InstructionCost::getInvalid();
1469+
}
1470+
14621471
InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
14631472
VectorType *BaseTp,
14641473
ArrayRef<int> Mask,

llvm/lib/Target/X86/X86TargetTransformInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,11 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
140140
TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None},
141141
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
142142
const Instruction *CxtI = nullptr);
143+
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
144+
unsigned Opcode1,
145+
const SmallBitVector &OpcodeMask,
146+
TTI::TargetCostKind CostKind) const;
147+
143148
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
144149
ArrayRef<int> Mask,
145150
TTI::TargetCostKind CostKind, int Index,

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1685,8 +1685,8 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
16851685
assert(NotLHS != nullptr && NotRHS != nullptr &&
16861686
"isFreeToInvert desynced with getFreelyInverted");
16871687
Value *LHSPlusRHS = Builder.CreateAdd(NotLHS, NotRHS);
1688-
return BinaryOperator::CreateSub(ConstantInt::get(RHS->getType(), -2),
1689-
LHSPlusRHS);
1688+
return BinaryOperator::CreateSub(
1689+
ConstantInt::getSigned(RHS->getType(), -2), LHSPlusRHS);
16901690
}
16911691
}
16921692

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8428,6 +8428,25 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
84288428
Mask);
84298429
VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
84308430
FinalVecTy, Mask);
8431+
// Patterns like [fadd,fsub] can be combined into a single instruction
8432+
// in x86. Reordering them into [fsub,fadd] blocks this pattern. So we
8433+
// need to take into account their order when looking for the most used
8434+
// order.
8435+
unsigned Opcode0 = E->getOpcode();
8436+
unsigned Opcode1 = E->getAltOpcode();
8437+
// The opcode mask selects between the two opcodes.
8438+
SmallBitVector OpcodeMask(E->Scalars.size(), false);
8439+
for (unsigned Lane : seq<unsigned>(0, E->Scalars.size()))
8440+
if (cast<Instruction>(E->Scalars[Lane])->getOpcode() == Opcode1)
8441+
OpcodeMask.set(Lane);
8442+
// If this pattern is supported by the target then we consider the
8443+
// order.
8444+
if (TTI->isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
8445+
InstructionCost AltVecCost =
8446+
TTI->getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
8447+
return AltVecCost < VecCost ? AltVecCost : VecCost;
8448+
}
8449+
// TODO: Check the reverse order too.
84318450
return VecCost;
84328451
};
84338452
return GetCostDiff(GetScalarCost, GetVectorCost);

llvm/test/Transforms/InstCombine/free-inversion.ll

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,24 @@ define i8 @sub_2(i8 %a, i1 %c, i8 %x, i8 %y) {
133133
ret i8 %not_ab
134134
}
135135

136+
; Same as above but with a type larger than i64 to make sure we create -2
137+
; correctly.
138+
define i128 @sub_3(i128 %a, i1 %c, i128 %x, i128 %y) {
139+
; CHECK-LABEL: @sub_3(
140+
; CHECK-NEXT: [[TMP1:%.*]] = xor i128 [[Y:%.*]], -124
141+
; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[C:%.*]], i128 [[X:%.*]], i128 [[TMP1]]
142+
; CHECK-NEXT: [[TMP3:%.*]] = add i128 [[TMP2]], [[A:%.*]]
143+
; CHECK-NEXT: [[NOT_AB:%.*]] = sub i128 -2, [[TMP3]]
144+
; CHECK-NEXT: ret i128 [[NOT_AB]]
145+
;
146+
%nx = xor i128 %x, -1
147+
%yy = xor i128 %y, 123
148+
%b = select i1 %c, i128 %nx, i128 %yy
149+
%ab = sub i128 %a, %b
150+
%not_ab = xor i128 %ab, -1
151+
ret i128 %not_ab
152+
}
153+
136154
define i8 @sub_fail(i8 %a, i1 %c, i8 %x, i8 %y) {
137155
; CHECK-LABEL: @sub_fail(
138156
; CHECK-NEXT: [[NX:%.*]] = xor i8 [[X:%.*]], -1

llvm/test/Transforms/SLPVectorizer/X86/supernode.ll

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -103,21 +103,23 @@ define void @test_supernode_addsub_alt(ptr %Aarray, ptr %Barray, ptr %Carray, pt
103103
; ENABLED-LABEL: @test_supernode_addsub_alt(
104104
; ENABLED-NEXT: entry:
105105
; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, ptr [[AARRAY:%.*]], i64 1
106-
; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, ptr [[BARRAY:%.*]], i64 1
107106
; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, ptr [[CARRAY:%.*]], i64 1
108-
; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, ptr [[SARRAY:%.*]], i64 1
109107
; ENABLED-NEXT: [[A0:%.*]] = load double, ptr [[AARRAY]], align 8
110108
; ENABLED-NEXT: [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
111-
; ENABLED-NEXT: [[B0:%.*]] = load double, ptr [[BARRAY]], align 8
112-
; ENABLED-NEXT: [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
113109
; ENABLED-NEXT: [[C0:%.*]] = load double, ptr [[CARRAY]], align 8
114110
; ENABLED-NEXT: [[C1:%.*]] = load double, ptr [[IDXC1]], align 8
115-
; ENABLED-NEXT: [[SUBA0B0:%.*]] = fsub fast double [[A0]], [[B0]]
116-
; ENABLED-NEXT: [[ADDB1C1:%.*]] = fadd fast double [[B1]], [[C1]]
117-
; ENABLED-NEXT: [[SUB0:%.*]] = fsub fast double [[SUBA0B0]], [[C0]]
118-
; ENABLED-NEXT: [[ADD1:%.*]] = fadd fast double [[ADDB1C1]], [[A1]]
119-
; ENABLED-NEXT: store double [[SUB0]], ptr [[SARRAY]], align 8
120-
; ENABLED-NEXT: store double [[ADD1]], ptr [[IDXS1]], align 8
111+
; ENABLED-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[BARRAY:%.*]], align 8
112+
; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
113+
; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[C1]], i32 1
114+
; ENABLED-NEXT: [[TMP3:%.*]] = fsub fast <2 x double> [[TMP2]], [[TMP0]]
115+
; ENABLED-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP0]]
116+
; ENABLED-NEXT: [[TMP5:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP4]], <2 x i32> <i32 0, i32 3>
117+
; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
118+
; ENABLED-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[A1]], i32 1
119+
; ENABLED-NEXT: [[TMP8:%.*]] = fsub fast <2 x double> [[TMP5]], [[TMP7]]
120+
; ENABLED-NEXT: [[TMP9:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP7]]
121+
; ENABLED-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP9]], <2 x i32> <i32 0, i32 3>
122+
; ENABLED-NEXT: store <2 x double> [[TMP10]], ptr [[SARRAY:%.*]], align 8
121123
; ENABLED-NEXT: ret void
122124
;
123125
entry:

llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,24 @@ define void @foo() {
1212
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1
1313
; CHECK-NEXT: br label [[BB2:%.*]]
1414
; CHECK: bb2:
15-
; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP10:%.*]], [[BB3:%.*]] ]
15+
; CHECK-NEXT: [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ]
1616
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr undef, align 8
1717
; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]]
1818
; CHECK: bb4:
1919
; CHECK-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP2]] to <4 x double>
2020
; CHECK-NEXT: [[CONV2:%.*]] = uitofp i16 undef to double
21-
; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[TMP3]], [[CONV2]]
22-
; CHECK-NEXT: [[SUB1:%.*]] = fsub double undef, undef
23-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> <double poison, double poison, double undef, double undef>, double [[SUB1]], i32 0
24-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[ADD1]], i32 1
25-
; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <4 x double> [[TMP6]], [[TMP4]]
26-
; CHECK-NEXT: [[TMP8:%.*]] = fptrunc <4 x double> [[TMP6]] to <4 x float>
27-
; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP2]], <4 x float> [[TMP8]]
21+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[TMP3]], i32 1
22+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> <double undef, double poison>, double [[CONV2]], i32 1
23+
; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x double> [[TMP5]], [[TMP6]]
24+
; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]]
25+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> <i32 0, i32 3>
26+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
27+
; CHECK-NEXT: [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]]
28+
; CHECK-NEXT: [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float>
29+
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]]
2830
; CHECK-NEXT: br label [[BB3]]
2931
; CHECK: bb3:
30-
; CHECK-NEXT: [[TMP10]] = phi <4 x float> [ [[TMP9]], [[BB4]] ], [ [[TMP2]], [[BB2]] ]
32+
; CHECK-NEXT: [[TMP14]] = phi <4 x float> [ [[TMP13]], [[BB4]] ], [ [[TMP2]], [[BB2]] ]
3133
; CHECK-NEXT: br label [[BB2]]
3234
;
3335
entry:

llvm/test/tools/llvm-profdata/raw-64-bits-be.test

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,21 @@ RUN: printf '\0\0\0\1\0\4\0\0' >> %t
1919
RUN: printf '\0\0\0\3\0\4\0\0' >> %t
2020
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
2121
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
22-
RUN: printf '\0\0\0\1\0\0\0\0' >> %t
23-
RUN: printf '\0\0\0\3\0\0\0\0' >> %t
22+
RUN: printf '\0\0\0\1' >> %t
23+
RUN: printf '\0\0\0\0' >> %t
24+
RUN: printf '\0\0\0\3' >> %t
25+
RUN: printf '\0\0\0\0' >> %t
2426

2527
RUN: printf '\344\023\165\112\031\035\265\067' >> %t
2628
RUN: printf '\0\0\0\0\0\0\0\02' >> %t
2729
RUN: printf '\0\0\0\1\0\3\xff\xc8' >> %t
2830
RUN: printf '\0\0\0\3\0\3\xff\xc3' >> %t
2931
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
3032
RUN: printf '\0\0\0\0\0\0\0\0' >> %t
31-
RUN: printf '\0\0\0\02\0\0\0\0' >> %t
32-
RUN: printf '\0\0\0\1\0\0\0\0' >> %t
33+
RUN: printf '\0\0\0\02' >> %t
34+
RUN: printf '\0\0\0\0' >> %t
35+
RUN: printf '\0\0\0\1' >> %t
36+
RUN: printf '\0\0\0\0' >> %t
3337

3438
RUN: printf '\0\0\0\0\0\0\0\023' >> %t
3539
RUN: printf '\0\0\0\0\0\0\0\067' >> %t

0 commit comments

Comments
 (0)