Skip to content

Commit 7273284

Browse files
committed
[X86] Add back fmaddsub intrinsics to work towards fixing the strict fp implementation
Previously we emitted an fmadd and a fmadd+fneg and combined them with a shufflevector. But this doesn't follow the correct exception behavior for unselected elements so the backend can't merge them into the fmaddsub/fmsubadd instructions. This patch restores the the fmaddsub intrinsics so we don't have two arithmetic operations. We lose out on optimization opportunity in the non-strict FP case, but I don't think this is a big loss. If someone gives us a test case we can look into adding instcombine/dagcombine improvements. I'd rather not have the frontend do completely different things for strict and non-strict. This still has problems because target specific intrinsics don't support strict semantics yet. We also still have all of the problems with masking. But we at least generate the right instruction in constrained mode now. Differential Revision: https://reviews.llvm.org/D74268
1 parent 4135077 commit 7273284

File tree

9 files changed

+157
-313
lines changed

9 files changed

+157
-313
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10152,7 +10152,8 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
1015210152

1015310153
// Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
1015410154
if (IID != Intrinsic::not_intrinsic &&
10155-
cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) {
10155+
(cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4 ||
10156+
IsAddSub)) {
1015610157
Function *Intr = CGF.CGM.getIntrinsic(IID);
1015710158
Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
1015810159
} else {
@@ -10165,24 +10166,6 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
1016510166
FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
1016610167
Res = CGF.Builder.CreateCall(FMA, {A, B, C});
1016710168
}
10168-
10169-
if (IsAddSub) {
10170-
// Negate even elts in C using a mask.
10171-
unsigned NumElts = Ty->getVectorNumElements();
10172-
SmallVector<uint32_t, 16> Indices(NumElts);
10173-
for (unsigned i = 0; i != NumElts; ++i)
10174-
Indices[i] = i + (i % 2) * NumElts;
10175-
10176-
// FIXME: This code isn't exception safe for constrained FP. We need to
10177-
// suppress exceptions on the unselected elements.
10178-
Value *NegC = CGF.Builder.CreateFNeg(C);
10179-
Value *FMSub;
10180-
if (CGF.Builder.getIsFPConstrained())
10181-
FMSub = CGF.Builder.CreateConstrainedFPCall(FMA, {A, B, NegC} );
10182-
else
10183-
FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
10184-
Res = CGF.Builder.CreateShuffleVector(FMSub, Res, Indices);
10185-
}
1018610169
}
1018710170

1018810171
// Handle any required masking.
@@ -10818,10 +10801,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
1081810801
case X86::BI__builtin_ia32_vfmaddpd512_mask3:
1081910802
case X86::BI__builtin_ia32_vfmsubpd512_mask3:
1082010803
return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);
10821-
case X86::BI__builtin_ia32_vfmaddsubps:
10822-
case X86::BI__builtin_ia32_vfmaddsubpd:
10823-
case X86::BI__builtin_ia32_vfmaddsubps256:
10824-
case X86::BI__builtin_ia32_vfmaddsubpd256:
1082510804
case X86::BI__builtin_ia32_vfmaddsubps512_mask:
1082610805
case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
1082710806
case X86::BI__builtin_ia32_vfmaddsubps512_mask3:

clang/test/CodeGen/avx512f-builtins.c

Lines changed: 24 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -887,66 +887,52 @@ __m512d test_mm512_maskz_fmsubadd_round_pd(__mmask8 __U, __m512d __A, __m512d __
887887
}
888888
__m512d test_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) {
889889
// CHECK-LABEL: @test_mm512_fmaddsub_pd
890-
// CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
891-
// CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
892-
// CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
893-
// CHECK: shufflevector <8 x double> [[SUB]], <8 x double> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
890+
// CHECK-NOT: fneg
891+
// CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4)
894892
return _mm512_fmaddsub_pd(__A, __B, __C);
895893
}
896894
__m512d test_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
897895
// CHECK-LABEL: @test_mm512_mask_fmaddsub_pd
898-
// CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
899-
// CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
900-
// CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
901-
// CHECK: shufflevector <8 x double> [[SUB]], <8 x double> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
896+
// CHECK-NOT: fneg
897+
// CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4)
902898
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
903899
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
904900
return _mm512_mask_fmaddsub_pd(__A, __U, __B, __C);
905901
}
906902
__m512d test_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
907903
// CHECK-LABEL: @test_mm512_mask3_fmaddsub_pd
908-
// CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
909-
// CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
910-
// CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
911-
// CHECK: shufflevector <8 x double> [[SUB]], <8 x double> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
904+
// CHECK-NOT: fneg
905+
// CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4)
912906
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
913907
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
914908
return _mm512_mask3_fmaddsub_pd(__A, __B, __C, __U);
915909
}
916910
__m512d test_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
917911
// CHECK-LABEL: @test_mm512_maskz_fmaddsub_pd
918-
// CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
919-
// CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
920-
// CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
921-
// CHECK: shufflevector <8 x double> [[SUB]], <8 x double> [[ADD]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
912+
// CHECK-NOT: fneg
913+
// CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 4)
922914
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
923915
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
924916
return _mm512_maskz_fmaddsub_pd(__U, __A, __B, __C);
925917
}
926918
__m512d test_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) {
927919
// CHECK-LABEL: @test_mm512_fmsubadd_pd
928920
// CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
929-
// CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
930-
// CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
931-
// CHECK: shufflevector <8 x double> [[ADD]], <8 x double> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
921+
// CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4)
932922
return _mm512_fmsubadd_pd(__A, __B, __C);
933923
}
934924
__m512d test_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) {
935925
// CHECK-LABEL: @test_mm512_mask_fmsubadd_pd
936926
// CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
937-
// CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
938-
// CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
939-
// CHECK: shufflevector <8 x double> [[ADD]], <8 x double> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
927+
// CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4)
940928
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
941929
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
942930
return _mm512_mask_fmsubadd_pd(__A, __U, __B, __C);
943931
}
944932
__m512d test_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) {
945933
// CHECK-LABEL: @test_mm512_maskz_fmsubadd_pd
946934
// CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
947-
// CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
948-
// CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
949-
// CHECK: shufflevector <8 x double> [[ADD]], <8 x double> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
935+
// CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4)
950936
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
951937
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> zeroinitializer
952938
return _mm512_maskz_fmsubadd_pd(__U, __A, __B, __C);
@@ -1001,66 +987,52 @@ __m512 test_mm512_maskz_fmsubadd_round_ps(__mmask16 __U, __m512 __A, __m512 __B,
1001987
}
1002988
__m512 test_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) {
1003989
// CHECK-LABEL: @test_mm512_fmaddsub_ps
1004-
// CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
1005-
// CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
1006-
// CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
1007-
// CHECK: shufflevector <16 x float> [[SUB]], <16 x float> [[ADD]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
990+
// CHECK-NOT: fneg
991+
// CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4)
1008992
return _mm512_fmaddsub_ps(__A, __B, __C);
1009993
}
1010994
__m512 test_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
1011995
// CHECK-LABEL: @test_mm512_mask_fmaddsub_ps
1012-
// CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
1013-
// CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
1014-
// CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
1015-
// CHECK: shufflevector <16 x float> [[SUB]], <16 x float> [[ADD]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
996+
// CHECK-NOT: fneg
997+
// CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4)
1016998
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
1017999
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
10181000
return _mm512_mask_fmaddsub_ps(__A, __U, __B, __C);
10191001
}
10201002
__m512 test_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
10211003
// CHECK-LABEL: @test_mm512_mask3_fmaddsub_ps
1022-
// CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
1023-
// CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
1024-
// CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
1025-
// CHECK: shufflevector <16 x float> [[SUB]], <16 x float> [[ADD]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1004+
// CHECK-NOT: fneg
1005+
// CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4)
10261006
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
10271007
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
10281008
return _mm512_mask3_fmaddsub_ps(__A, __B, __C, __U);
10291009
}
10301010
__m512 test_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
10311011
// CHECK-LABEL: @test_mm512_maskz_fmaddsub_ps
1032-
// CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
1033-
// CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
1034-
// CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
1035-
// CHECK: shufflevector <16 x float> [[SUB]], <16 x float> [[ADD]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1012+
// CHECK-NOT: fneg
1013+
// CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 4)
10361014
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
10371015
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
10381016
return _mm512_maskz_fmaddsub_ps(__U, __A, __B, __C);
10391017
}
10401018
__m512 test_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) {
10411019
// CHECK-LABEL: @test_mm512_fmsubadd_ps
10421020
// CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
1043-
// CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
1044-
// CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
1045-
// CHECK: shufflevector <16 x float> [[ADD]], <16 x float> [[SUB]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1021+
// CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4)
10461022
return _mm512_fmsubadd_ps(__A, __B, __C);
10471023
}
10481024
__m512 test_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) {
10491025
// CHECK-LABEL: @test_mm512_mask_fmsubadd_ps
10501026
// CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
1051-
// CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
1052-
// CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
1053-
// CHECK: shufflevector <16 x float> [[ADD]], <16 x float> [[SUB]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1027+
// CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4)
10541028
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
10551029
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
10561030
return _mm512_mask_fmsubadd_ps(__A, __U, __B, __C);
10571031
}
10581032
__m512 test_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) {
10591033
// CHECK-LABEL: @test_mm512_maskz_fmsubadd_ps
10601034
// CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
1061-
// CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
1062-
// CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
1063-
// CHECK: shufflevector <16 x float> [[ADD]], <16 x float> [[SUB]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1035+
// CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4)
10641036
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
10651037
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> zeroinitializer
10661038
return _mm512_maskz_fmsubadd_ps(__U, __A, __B, __C);
@@ -1108,9 +1080,7 @@ __m512d test_mm512_mask3_fmsubadd_round_pd(__m512d __A, __m512d __B, __m512d __C
11081080
__m512d test_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) {
11091081
// CHECK-LABEL: @test_mm512_mask3_fmsubadd_pd
11101082
// CHECK: [[NEG:%.+]] = fneg <8 x double> %{{.*}}
1111-
// CHECK: [[SUB:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]]
1112-
// CHECK: [[ADD:%.+]] = call <8 x double> @llvm.fma.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}})
1113-
// CHECK: shufflevector <8 x double> [[ADD]], <8 x double> [[SUB]], <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
1083+
// CHECK: call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x double> [[NEG]], i32 4)
11141084
// CHECK: bitcast i8 %{{.*}} to <8 x i1>
11151085
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
11161086
return _mm512_mask3_fmsubadd_pd(__A, __B, __C, __U);
@@ -1126,9 +1096,7 @@ __m512 test_mm512_mask3_fmsubadd_round_ps(__m512 __A, __m512 __B, __m512 __C, __
11261096
__m512 test_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) {
11271097
// CHECK-LABEL: @test_mm512_mask3_fmsubadd_ps
11281098
// CHECK: [[NEG:%.+]] = fneg <16 x float> %{{.*}}
1129-
// CHECK: [[SUB:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]]
1130-
// CHECK: [[ADD:%.+]] = call <16 x float> @llvm.fma.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}})
1131-
// CHECK: shufflevector <16 x float> [[ADD]], <16 x float> [[SUB]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1099+
// CHECK: call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x float> [[NEG]], i32 4)
11321100
// CHECK: bitcast i16 %{{.*}} to <16 x i1>
11331101
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
11341102
return _mm512_mask3_fmsubadd_ps(__A, __B, __C, __U);

0 commit comments

Comments
 (0)