Skip to content

Commit 0fab741

Browse files
[LLVM][CodeGen][AArch64] Don't scalarise v8{f16,bf16} vsetcc operations. (#135398)
I have also removed custom promotion code for the v4{f16,bf16} cases because the same common code can be used.
1 parent 3015edf commit 0fab741

File tree

7 files changed

+443
-2067
lines changed

7 files changed

+443
-2067
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -843,18 +843,21 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
843843
setOperationPromotedToType(ISD::FRINT, V4Narrow, MVT::v4f32);
844844
setOperationPromotedToType(ISD::FNEARBYINT, V4Narrow, MVT::v4f32);
845845
setOperationPromotedToType(ISD::FCANONICALIZE, V4Narrow, MVT::v4f32);
846+
setOperationPromotedToType(ISD::SETCC, V4Narrow, MVT::v4f32);
846847

847848
setOperationAction(ISD::FABS, V4Narrow, Legal);
848-
setOperationAction(ISD::FNEG, V4Narrow, Legal);
849+
setOperationAction(ISD::FNEG, V4Narrow, Legal);
849850
setOperationAction(ISD::FMA, V4Narrow, Expand);
850-
setOperationAction(ISD::SETCC, V4Narrow, Custom);
851851
setOperationAction(ISD::BR_CC, V4Narrow, Expand);
852852
setOperationAction(ISD::SELECT, V4Narrow, Expand);
853853
setOperationAction(ISD::SELECT_CC, V4Narrow, Expand);
854854
setOperationAction(ISD::FCOPYSIGN, V4Narrow, Custom);
855855
setOperationAction(ISD::FSQRT, V4Narrow, Expand);
856856

857857
auto V8Narrow = MVT::getVectorVT(ScalarVT, 8);
858+
setOperationPromotedToType(ISD::FCANONICALIZE, V8Narrow, MVT::v8f32);
859+
setOperationPromotedToType(ISD::SETCC, V8Narrow, MVT::v8f32);
860+
858861
setOperationAction(ISD::FABS, V8Narrow, Legal);
859862
setOperationAction(ISD::FADD, V8Narrow, Legal);
860863
setOperationAction(ISD::FCEIL, V8Narrow, Legal);
@@ -864,19 +867,17 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
864867
setOperationAction(ISD::FMA, V8Narrow, Expand);
865868
setOperationAction(ISD::FMUL, V8Narrow, Legal);
866869
setOperationAction(ISD::FNEARBYINT, V8Narrow, Legal);
867-
setOperationAction(ISD::FNEG, V8Narrow, Legal);
870+
setOperationAction(ISD::FNEG, V8Narrow, Legal);
868871
setOperationAction(ISD::FROUND, V8Narrow, Legal);
869872
setOperationAction(ISD::FROUNDEVEN, V8Narrow, Legal);
870873
setOperationAction(ISD::FRINT, V8Narrow, Legal);
871874
setOperationAction(ISD::FSQRT, V8Narrow, Expand);
872875
setOperationAction(ISD::FSUB, V8Narrow, Legal);
873876
setOperationAction(ISD::FTRUNC, V8Narrow, Legal);
874-
setOperationAction(ISD::SETCC, V8Narrow, Expand);
875877
setOperationAction(ISD::BR_CC, V8Narrow, Expand);
876878
setOperationAction(ISD::SELECT, V8Narrow, Expand);
877879
setOperationAction(ISD::SELECT_CC, V8Narrow, Expand);
878880
setOperationAction(ISD::FP_EXTEND, V8Narrow, Expand);
879-
setOperationPromotedToType(ISD::FCANONICALIZE, V8Narrow, MVT::v8f32);
880881
};
881882

882883
if (!Subtarget->hasFullFP16()) {
@@ -15892,6 +15893,12 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
1589215893
if (LHS.getValueType().getVectorElementType().isInteger())
1589315894
return Op;
1589415895

15896+
assert(((!Subtarget->hasFullFP16() &&
15897+
LHS.getValueType().getVectorElementType() != MVT::f16) ||
15898+
LHS.getValueType().getVectorElementType() != MVT::bf16 ||
15899+
LHS.getValueType().getVectorElementType() != MVT::f128) &&
15900+
"Unexpected type!");
15901+
1589515902
// Lower isnan(x) | isnan(never-nan) to x != x.
1589615903
// Lower !isnan(x) & !isnan(never-nan) to x == x.
1589715904
if (CC == ISD::SETUO || CC == ISD::SETO) {
@@ -15910,26 +15917,6 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
1591015917
}
1591115918
}
1591215919

15913-
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
15914-
15915-
// Make v4f16 (only) fcmp operations utilise vector instructions
15916-
// v8f16 support will be a litle more complicated
15917-
if ((!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) ||
15918-
LHS.getValueType().getVectorElementType() == MVT::bf16) {
15919-
if (LHS.getValueType().getVectorNumElements() == 4) {
15920-
LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
15921-
RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
15922-
SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
15923-
DAG.ReplaceAllUsesWith(Op, NewSetcc);
15924-
CmpVT = MVT::v4i32;
15925-
} else
15926-
return SDValue();
15927-
}
15928-
15929-
assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||
15930-
LHS.getValueType().getVectorElementType() != MVT::bf16 ||
15931-
LHS.getValueType().getVectorElementType() != MVT::f128);
15932-
1593315920
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
1593415921
// clean. Some of them require two branches to implement.
1593515922
AArch64CC::CondCode CC1, CC2;

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4285,11 +4285,6 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
42854285
ValScalarTy->isBFloatTy()) {
42864286
auto *ValVTy = cast<FixedVectorType>(ValTy);
42874287

4288-
// FIXME: We currently scalarise these.
4289-
if (ValVTy->getNumElements() > 4)
4290-
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
4291-
CostKind, Op1Info, Op2Info, I);
4292-
42934288
// Without dedicated instructions we promote [b]f16 compares to f32.
42944289
auto *PromotedTy =
42954290
VectorType::get(Type::getFloatTy(ValTy->getContext()), ValVTy);

llvm/test/Analysis/CostModel/AArch64/cmp.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ define void @cmps() {
1717
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cf32 = fcmp ogt float undef, undef
1818
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cf64 = fcmp ogt double undef, undef
1919
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cbf64 = fcmp ogt bfloat undef, undef
20-
; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cfv816 = fcmp olt <8 x half> undef, undef
20+
; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cfv816 = fcmp olt <8 x half> undef, undef
2121
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cfv432 = fcmp oge <4 x float> undef, undef
2222
; CHECK-NEXT: Cost Model: Found costs of 1 for: %cfv264 = fcmp oge <2 x double> undef, undef
23-
; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef
23+
; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cbfv816 = fcmp olt <8 x bfloat> undef, undef
2424
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
2525
;
2626
%c8 = icmp slt i8 undef, undef

llvm/test/Analysis/CostModel/AArch64/vector-select.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ define <4 x half> @v4f16_select_ogt(<4 x half> %a, <4 x half> %b, <4 x half> %c)
119119

120120
define <8 x half> @v8f16_select_ogt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
121121
; COST-NOFP16-LABEL: 'v8f16_select_ogt'
122-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <8 x half> %a, %b
122+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <8 x half> %a, %b
123123
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
124124
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
125125
;
@@ -179,7 +179,7 @@ define <4 x bfloat> @v4bf16_select_ogt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
179179

180180
define <8 x bfloat> @v8bf16_select_ogt(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
181181
; COST-LABEL: 'v8bf16_select_ogt'
182-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <8 x bfloat> %a, %b
182+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <8 x bfloat> %a, %b
183183
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
184184
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
185185
;
@@ -206,7 +206,7 @@ define <4 x half> @v4f16_select_oge(<4 x half> %a, <4 x half> %b, <4 x half> %c)
206206

207207
define <8 x half> @v8f16_select_oge(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
208208
; COST-NOFP16-LABEL: 'v8f16_select_oge'
209-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <8 x half> %a, %b
209+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <8 x half> %a, %b
210210
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
211211
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
212212
;
@@ -266,7 +266,7 @@ define <4 x bfloat> @v4bf16_select_oge(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
266266

267267
define <8 x bfloat> @v8bf16_select_oge(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
268268
; COST-LABEL: 'v8bf16_select_oge'
269-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <8 x bfloat> %a, %b
269+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <8 x bfloat> %a, %b
270270
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
271271
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
272272
;
@@ -293,7 +293,7 @@ define <4 x half> @v4f16_select_olt(<4 x half> %a, <4 x half> %b, <4 x half> %c)
293293

294294
define <8 x half> @v8f16_select_olt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
295295
; COST-NOFP16-LABEL: 'v8f16_select_olt'
296-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <8 x half> %a, %b
296+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <8 x half> %a, %b
297297
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
298298
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
299299
;
@@ -353,7 +353,7 @@ define <4 x bfloat> @v4bf16_select_olt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
353353

354354
define <8 x bfloat> @v8bf16_select_olt(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
355355
; COST-LABEL: 'v8bf16_select_olt'
356-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <8 x bfloat> %a, %b
356+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <8 x bfloat> %a, %b
357357
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
358358
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
359359
;
@@ -380,7 +380,7 @@ define <4 x half> @v4f16_select_ole(<4 x half> %a, <4 x half> %b, <4 x half> %c)
380380

381381
define <8 x half> @v8f16_select_ole(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
382382
; COST-NOFP16-LABEL: 'v8f16_select_ole'
383-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <8 x half> %a, %b
383+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <8 x half> %a, %b
384384
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
385385
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
386386
;
@@ -440,7 +440,7 @@ define <4 x bfloat> @v4bf16_select_ole(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
440440

441441
define <8 x bfloat> @v8bf16_select_ole(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
442442
; COST-LABEL: 'v8bf16_select_ole'
443-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <8 x bfloat> %a, %b
443+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <8 x bfloat> %a, %b
444444
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
445445
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
446446
;
@@ -467,7 +467,7 @@ define <4 x half> @v4f16_select_oeq(<4 x half> %a, <4 x half> %b, <4 x half> %c)
467467

468468
define <8 x half> @v8f16_select_oeq(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
469469
; COST-NOFP16-LABEL: 'v8f16_select_oeq'
470-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <8 x half> %a, %b
470+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <8 x half> %a, %b
471471
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
472472
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
473473
;
@@ -527,7 +527,7 @@ define <4 x bfloat> @v4bf16_select_oeq(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
527527

528528
define <8 x bfloat> @v8bf16_select_oeq(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
529529
; COST-LABEL: 'v8bf16_select_oeq'
530-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <8 x bfloat> %a, %b
530+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <8 x bfloat> %a, %b
531531
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
532532
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
533533
;
@@ -554,7 +554,7 @@ define <4 x half> @v4f16_select_one(<4 x half> %a, <4 x half> %b, <4 x half> %c)
554554

555555
define <8 x half> @v8f16_select_one(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
556556
; COST-NOFP16-LABEL: 'v8f16_select_one'
557-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <8 x half> %a, %b
557+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <8 x half> %a, %b
558558
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
559559
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
560560
;
@@ -614,7 +614,7 @@ define <4 x bfloat> @v4bf16_select_one(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
614614

615615
define <8 x bfloat> @v8bf16_select_one(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
616616
; COST-LABEL: 'v8bf16_select_one'
617-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <8 x bfloat> %a, %b
617+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <8 x bfloat> %a, %b
618618
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
619619
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
620620
;
@@ -641,7 +641,7 @@ define <4 x half> @v4f16_select_une(<4 x half> %a, <4 x half> %b, <4 x half> %c)
641641

642642
define <8 x half> @v8f16_select_une(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
643643
; COST-NOFP16-LABEL: 'v8f16_select_une'
644-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <8 x half> %a, %b
644+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <8 x half> %a, %b
645645
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
646646
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
647647
;
@@ -701,7 +701,7 @@ define <4 x bfloat> @v4bf16_select_une(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
701701

702702
define <8 x bfloat> @v8bf16_select_une(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
703703
; COST-LABEL: 'v8bf16_select_une'
704-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <8 x bfloat> %a, %b
704+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <8 x bfloat> %a, %b
705705
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
706706
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
707707
;
@@ -728,7 +728,7 @@ define <4 x half> @v4f16_select_ord(<4 x half> %a, <4 x half> %b, <4 x half> %c)
728728

729729
define <8 x half> @v8f16_select_ord(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
730730
; COST-NOFP16-LABEL: 'v8f16_select_ord'
731-
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ord <8 x half> %a, %b
731+
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ord <8 x half> %a, %b
732732
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
733733
; COST-NOFP16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x half> %s.1
734734
;
@@ -788,7 +788,7 @@ define <4 x bfloat> @v4bf16_select_ord(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bf
788788

789789
define <8 x bfloat> @v8bf16_select_ord(<8 x bfloat> %a, <8 x bfloat> %b, <8 x bfloat> %c) {
790790
; COST-LABEL: 'v8bf16_select_ord'
791-
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ord <8 x bfloat> %a, %b
791+
; COST-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ord <8 x bfloat> %a, %b
792792
; COST-NEXT: Cost Model: Found costs of RThru:22 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <8 x i1> %cmp.1, <8 x bfloat> %a, <8 x bfloat> %c
793793
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x bfloat> %s.1
794794
;

0 commit comments

Comments
 (0)