Skip to content

Commit f66ba4c

Browse files
committed
[x86] propagate FMF from x86-specific intrinsic nodes to others during lowering
This is another fast-math-flags failure exposed by D90901.
1 parent 25207d5 commit f66ba4c

File tree

2 files changed

+7
-2
lines changed

2 files changed

+7
-2
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25049,6 +25049,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2504925049
MVT VT = Op.getSimpleValueType();
2505025050
const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
2505125051

25052+
// Propagate flags from original node to transformed node(s).
25053+
SelectionDAG::FlagInserter FlagsInserter(DAG, Op->getFlags());
25054+
2505225055
if (IntrData) {
2505325056
switch(IntrData->Type) {
2505425057
case INTR_TYPE_1OP: {

llvm/test/CodeGen/X86/fmf-propagation.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,15 @@ define float @fmf_setcc_canon(float %x, float %y) {
4747

4848
declare <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i32)
4949

50+
; Check that FMF are propagated twice: from IR to x86-specific node and from x86-specific node to generic node.
51+
5052
; CHECK-LABEL: Initial selection DAG: %bb.0 'fmf_target_intrinsic:'
5153
; CHECK: v16f32 = llvm.x86.avx512.vfmadd.ps.512 ninf nsz TargetConstant:i64<{{.*}}>
5254
; CHECK: v16f32 = llvm.x86.avx512.vfmadd.ps.512 nsz TargetConstant:i64<{{.*}}>
5355

5456
; CHECK-LABEL: Legalized selection DAG: %bb.0 'fmf_target_intrinsic:'
55-
; CHECK: v16f32 = fma t{{.*}}
56-
; CHECK: v16f32 = fma t{{.*}}
57+
; CHECK: v16f32 = fma ninf nsz t{{.*}}
58+
; CHECK: v16f32 = fma nsz t{{.*}}
5759

5860
define <16 x float> @fmf_target_intrinsic(<16 x float> %a, <16 x float> %b, <16 x float> %c) nounwind {
5961
%t0 = tail call ninf nsz <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i32 4)

0 commit comments

Comments
 (0)