Skip to content

Commit cc59752

Browse files
[LLVM][CodeGen][AArch64] Don't scalarise v8{f16,bf16} vsetcc operations.
I have also removed custom promotion code for the v4{f16,bf16} cases because the same common code can be used.
1 parent b7cfa4a commit cc59752

File tree

7 files changed

+442
-2067
lines changed

7 files changed

+442
-2067
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 11 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -840,18 +840,21 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
840840
setOperationPromotedToType(ISD::FRINT, V4Narrow, MVT::v4f32);
841841
setOperationPromotedToType(ISD::FNEARBYINT, V4Narrow, MVT::v4f32);
842842
setOperationPromotedToType(ISD::FCANONICALIZE, V4Narrow, MVT::v4f32);
843+
setOperationPromotedToType(ISD::SETCC, V4Narrow, MVT::v4f32);
843844

844845
setOperationAction(ISD::FABS, V4Narrow, Legal);
845-
setOperationAction(ISD::FNEG, V4Narrow, Legal);
846+
setOperationAction(ISD::FNEG, V4Narrow, Legal);
846847
setOperationAction(ISD::FMA, V4Narrow, Expand);
847-
setOperationAction(ISD::SETCC, V4Narrow, Custom);
848848
setOperationAction(ISD::BR_CC, V4Narrow, Expand);
849849
setOperationAction(ISD::SELECT, V4Narrow, Expand);
850850
setOperationAction(ISD::SELECT_CC, V4Narrow, Expand);
851851
setOperationAction(ISD::FCOPYSIGN, V4Narrow, Custom);
852852
setOperationAction(ISD::FSQRT, V4Narrow, Expand);
853853

854854
auto V8Narrow = MVT::getVectorVT(ScalarVT, 8);
855+
setOperationPromotedToType(ISD::FCANONICALIZE, V8Narrow, MVT::v8f32);
856+
setOperationPromotedToType(ISD::SETCC, V8Narrow, MVT::v8f32);
857+
855858
setOperationAction(ISD::FABS, V8Narrow, Legal);
856859
setOperationAction(ISD::FADD, V8Narrow, Legal);
857860
setOperationAction(ISD::FCEIL, V8Narrow, Legal);
@@ -861,19 +864,17 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
861864
setOperationAction(ISD::FMA, V8Narrow, Expand);
862865
setOperationAction(ISD::FMUL, V8Narrow, Legal);
863866
setOperationAction(ISD::FNEARBYINT, V8Narrow, Legal);
864-
setOperationAction(ISD::FNEG, V8Narrow, Legal);
867+
setOperationAction(ISD::FNEG, V8Narrow, Legal);
865868
setOperationAction(ISD::FROUND, V8Narrow, Legal);
866869
setOperationAction(ISD::FROUNDEVEN, V8Narrow, Legal);
867870
setOperationAction(ISD::FRINT, V8Narrow, Legal);
868871
setOperationAction(ISD::FSQRT, V8Narrow, Expand);
869872
setOperationAction(ISD::FSUB, V8Narrow, Legal);
870873
setOperationAction(ISD::FTRUNC, V8Narrow, Legal);
871-
setOperationAction(ISD::SETCC, V8Narrow, Expand);
872874
setOperationAction(ISD::BR_CC, V8Narrow, Expand);
873875
setOperationAction(ISD::SELECT, V8Narrow, Expand);
874876
setOperationAction(ISD::SELECT_CC, V8Narrow, Expand);
875877
setOperationAction(ISD::FP_EXTEND, V8Narrow, Expand);
876-
setOperationPromotedToType(ISD::FCANONICALIZE, V8Narrow, MVT::v8f32);
877878
};
878879

879880
if (!Subtarget->hasFullFP16()) {
@@ -15898,6 +15899,11 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
1589815899
if (LHS.getValueType().getVectorElementType().isInteger())
1589915900
return Op;
1590015901

15902+
assert((!Subtarget->hasFullFP16() &&
15903+
LHS.getValueType().getVectorElementType() != MVT::f16) ||
15904+
LHS.getValueType().getVectorElementType() != MVT::bf16 ||
15905+
LHS.getValueType().getVectorElementType() != MVT::f128);
15906+
1590115907
// Lower isnan(x) | isnan(never-nan) to x != x.
1590215908
// Lower !isnan(x) & !isnan(never-nan) to x == x.
1590315909
if (CC == ISD::SETUO || CC == ISD::SETO) {
@@ -15916,26 +15922,6 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
1591615922
}
1591715923
}
1591815924

15919-
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
15920-
15921-
// Make v4f16 (only) fcmp operations utilise vector instructions
15922-
// v8f16 support will be a litle more complicated
15923-
if ((!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) ||
15924-
LHS.getValueType().getVectorElementType() == MVT::bf16) {
15925-
if (LHS.getValueType().getVectorNumElements() == 4) {
15926-
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
15927-
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
15928-
SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
15929-
DAG.ReplaceAllUsesWith(Op, NewSetcc);
15930-
CmpVT = MVT::v4i32;
15931-
} else
15932-
return SDValue();
15933-
}
15934-
15935-
assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||
15936-
LHS.getValueType().getVectorElementType() != MVT::bf16 ||
15937-
LHS.getValueType().getVectorElementType() != MVT::f128);
15938-
1593915925
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
1594015926
// clean. Some of them require two branches to implement.
1594115927
AArch64CC::CondCode CC1, CC2;

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4241,11 +4241,6 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
42414241
ValScalarTy->isBFloatTy()) {
42424242
auto *ValVTy = cast<FixedVectorType>(ValTy);
42434243

4244-
// FIXME: We currently scalarise these.
4245-
if (ValVTy->getNumElements() > 4)
4246-
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
4247-
CostKind, Op1Info, Op2Info, I);
4248-
42494244
// Without dedicated instructions we promote [b]f16 compares to f32.
42504245
auto *PromotedTy =
42514246
VectorType::get(Type::getFloatTy(ValTy->getContext()), ValVTy);

llvm/test/Analysis/CostModel/AArch64/cmp.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ define void @cmps() {
1717
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cf32 = fcmp ogt float undef, undef
1818
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cf64 = fcmp ogt double undef, undef
1919
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cbf64 = fcmp ogt bfloat undef, undef
20-
; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cfv816 = fcmp olt <8 x half> undef, undef
20+
; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cfv816 = fcmp olt <8 x half> undef, undef
2121
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cfv432 = fcmp oge <4 x float> undef, undef
2222
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cfv264 = fcmp oge <2 x double> undef, undef
23-
; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef
23+
; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef
2424
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
2525
;
2626
%c8 = icmp slt i8 undef, undef

llvm/test/Analysis/CostModel/AArch64/vector-select.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ define <4 x half> @v4f16_select_ogt(<4 x half> %a, <4 x half> %b, <4 x half> %c)
119119

120120
define <8 x half> @v8f16_select_ogt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
121121
; COST-NOFP16-LABEL: 'v8f16_select_ogt'
122-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <8 x half> %a, %b
122+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <8 x half> %a, %b
123123
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
124124
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
125125
;
@@ -179,7 +179,7 @@ define <4 x bfloat> @v4bf16_select_ogt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
179179

180180
define <8 x bfloat> @v8bf16_select_ogt(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
181181
; COST-LABEL: 'v8bf16_select_ogt'
182-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <8 x bfloat> %a, %b
182+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <8 x bfloat> %a, %b
183183
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
184184
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
185185
;
@@ -206,7 +206,7 @@ define <4 x half> @v4f16_select_oge(<4 x half> %a, <4 x half> %b, <4 x half> %c)
206206

207207
define <8 x half> @v8f16_select_oge(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
208208
; COST-NOFP16-LABEL: 'v8f16_select_oge'
209-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <8 x half> %a, %b
209+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <8 x half> %a, %b
210210
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
211211
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
212212
;
@@ -266,7 +266,7 @@ define <4 x bfloat> @v4bf16_select_oge(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
266266

267267
define <8 x bfloat> @v8bf16_select_oge(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
268268
; COST-LABEL: 'v8bf16_select_oge'
269-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <8 x bfloat> %a, %b
269+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <8 x bfloat> %a, %b
270270
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
271271
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
272272
;
@@ -293,7 +293,7 @@ define <4 x half> @v4f16_select_olt(<4 x half> %a, <4 x half> %b, <4 x half> %c)
293293

294294
define <8 x half> @v8f16_select_olt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
295295
; COST-NOFP16-LABEL: 'v8f16_select_olt'
296-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <8 x half> %a, %b
296+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <8 x half> %a, %b
297297
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
298298
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
299299
;
@@ -353,7 +353,7 @@ define <4 x bfloat> @v4bf16_select_olt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
353353

354354
define <8 x bfloat> @v8bf16_select_olt(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
355355
; COST-LABEL: 'v8bf16_select_olt'
356-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <8 x bfloat> %a, %b
356+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <8 x bfloat> %a, %b
357357
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
358358
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
359359
;
@@ -380,7 +380,7 @@ define <4 x half> @v4f16_select_ole(<4 x half> %a, <4 x half> %b, <4 x half> %c)
380380

381381
define <8 x half> @v8f16_select_ole(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
382382
; COST-NOFP16-LABEL: 'v8f16_select_ole'
383-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <8 x half> %a, %b
383+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <8 x half> %a, %b
384384
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
385385
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
386386
;
@@ -440,7 +440,7 @@ define <4 x bfloat> @v4bf16_select_ole(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
440440

441441
define <8 x bfloat> @v8bf16_select_ole(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
442442
; COST-LABEL: 'v8bf16_select_ole'
443-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <8 x bfloat> %a, %b
443+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <8 x bfloat> %a, %b
444444
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
445445
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
446446
;
@@ -467,7 +467,7 @@ define <4 x half> @v4f16_select_oeq(<4 x half> %a, <4 x half> %b, <4 x half> %c)
467467

468468
define <8 x half> @v8f16_select_oeq(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
469469
; COST-NOFP16-LABEL: 'v8f16_select_oeq'
470-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <8 x half> %a, %b
470+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <8 x half> %a, %b
471471
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
472472
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
473473
;
@@ -527,7 +527,7 @@ define <4 x bfloat> @v4bf16_select_oeq(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
527527

528528
define <8 x bfloat> @v8bf16_select_oeq(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
529529
; COST-LABEL: 'v8bf16_select_oeq'
530-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <8 x bfloat> %a, %b
530+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <8 x bfloat> %a, %b
531531
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
532532
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
533533
;
@@ -554,7 +554,7 @@ define <4 x half> @v4f16_select_one(<4 x half> %a, <4 x half> %b, <4 x half> %c)
554554

555555
define <8 x half> @v8f16_select_one(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
556556
; COST-NOFP16-LABEL: 'v8f16_select_one'
557-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <8 x half> %a, %b
557+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <8 x half> %a, %b
558558
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
559559
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
560560
;
@@ -614,7 +614,7 @@ define <4 x bfloat> @v4bf16_select_one(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
614614

615615
define <8 x bfloat> @v8bf16_select_one(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
616616
; COST-LABEL: 'v8bf16_select_one'
617-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <8 x bfloat> %a, %b
617+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <8 x bfloat> %a, %b
618618
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
619619
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
620620
;
@@ -641,7 +641,7 @@ define <4 x half> @v4f16_select_une(<4 x half> %a, <4 x half> %b, <4 x half> %c)
641641

642642
define <8 x half> @v8f16_select_une(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
643643
; COST-NOFP16-LABEL: 'v8f16_select_une'
644-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <8 x half> %a, %b
644+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <8 x half> %a, %b
645645
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
646646
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
647647
;
@@ -701,7 +701,7 @@ define <4 x bfloat> @v4bf16_select_une(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
701701

702702
define <8 x bfloat> @v8bf16_select_une(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
703703
; COST-LABEL: 'v8bf16_select_une'
704-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <8 x bfloat> %a, %b
704+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <8 x bfloat> %a, %b
705705
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
706706
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
707707
;
@@ -728,7 +728,7 @@ define <4 x half> @v4f16_select_ord(<4 x half> %a, <4 x half> %b, <4 x half> %c)
728728

729729
define <8 x half> @v8f16_select_ord(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
730730
; COST-NOFP16-LABEL: 'v8f16_select_ord'
731-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ord <8 x half> %a, %b
731+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ord <8 x half> %a, %b
732732
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
733733
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
734734
;
@@ -788,7 +788,7 @@ define <4 x bfloat> @v4bf16_select_ord(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
788788

789789
define <8 x bfloat> @v8bf16_select_ord(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
790790
; COST-LABEL: 'v8bf16_select_ord'
791-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ord <8 x bfloat> %a, %b
791+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ord <8 x bfloat> %a, %b
792792
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
793793
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
794794
;

0 commit comments

Comments
 (0)