Skip to content

Commit 1984c75

Browse files
authored
[ValueTracking] Do not use FMF from fcmp (#142266)
This patch introduces an FMF parameter for `matchDecomposedSelectPattern` to pass FMF flags from select, instead of fcmp. Closes #137998. Closes #141017.
1 parent b3fd2ea commit 1984c75

File tree

13 files changed

+323
-298
lines changed

13 files changed

+323
-298
lines changed

llvm/include/llvm/Analysis/ValueTracking.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -889,7 +889,8 @@ inline SelectPatternResult matchSelectPattern(const Value *V, const Value *&LHS,
889889
/// predicate and given values as its true/false operands would match.
890890
LLVM_ABI SelectPatternResult matchDecomposedSelectPattern(
891891
CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
892-
Instruction::CastOps *CastOp = nullptr, unsigned Depth = 0);
892+
FastMathFlags FMF = FastMathFlags(), Instruction::CastOps *CastOp = nullptr,
893+
unsigned Depth = 0);
893894

894895
/// Determine the pattern for predicate `X Pred Y ? X : Y`.
895896
LLVM_ABI SelectPatternResult getSelectPattern(

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8898,19 +8898,20 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
88988898
Value *TrueVal = SI->getTrueValue();
88998899
Value *FalseVal = SI->getFalseValue();
89008900

8901-
return llvm::matchDecomposedSelectPattern(CmpI, TrueVal, FalseVal, LHS, RHS,
8902-
CastOp, Depth);
8901+
return llvm::matchDecomposedSelectPattern(
8902+
CmpI, TrueVal, FalseVal, LHS, RHS,
8903+
isa<FPMathOperator>(SI) ? SI->getFastMathFlags() : FastMathFlags(),
8904+
CastOp, Depth);
89038905
}
89048906

89058907
SelectPatternResult llvm::matchDecomposedSelectPattern(
89068908
CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
8907-
Instruction::CastOps *CastOp, unsigned Depth) {
8909+
FastMathFlags FMF, Instruction::CastOps *CastOp, unsigned Depth) {
89088910
CmpInst::Predicate Pred = CmpI->getPredicate();
89098911
Value *CmpLHS = CmpI->getOperand(0);
89108912
Value *CmpRHS = CmpI->getOperand(1);
8911-
FastMathFlags FMF;
8912-
if (isa<FPMathOperator>(CmpI))
8913-
FMF = CmpI->getFastMathFlags();
8913+
if (isa<FPMathOperator>(CmpI) && CmpI->hasNoNaNs())
8914+
FMF.setNoNaNs();
89148915

89158916
// Bail out early.
89168917
if (CmpI->isEquality())

llvm/test/CodeGen/AArch64/arm64-fmax-safe.ll

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,22 @@ define i64 @test_integer(i64 %in) {
5353
%val = select i1 %cmp, i64 0, i64 %in
5454
ret i64 %val
5555
}
56+
57+
; Make sure we don't translate it into fminnm when the nsz flag is set on the fcmp.
58+
define float @minnum_fcmp_nsz(float %x, float %y) {
59+
; CHECK-LABEL: minnum_fcmp_nsz:
60+
%cmp = fcmp nnan nsz ole float %x, %y
61+
%sel = select i1 %cmp, float %x, float %y
62+
ret float %sel
63+
; CHECK-NOT: fminnm
64+
; CHECK: fcsel s0, s0, s1, le
65+
}
66+
67+
; Make sure we translate it into fminnm when the nsz flag is set on the select.
68+
define float @minnum_select_nsz(float %x, float %y) {
69+
; CHECK-LABEL: minnum_select_nsz:
70+
%cmp = fcmp nnan ole float %x, %y
71+
%sel = select nsz i1 %cmp, float %x, float %y
72+
ret float %sel
73+
; CHECK: fminnm s0, s0, s1
74+
}

llvm/test/CodeGen/AMDGPU/select-flags-to-fmin-fmax.ll

Lines changed: 62 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -355,17 +355,15 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <
355355
; GFX7-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
356356
; GFX7: ; %bb.0:
357357
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358-
; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
359-
; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
358+
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
359+
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
360360
; GFX7-NEXT: s_setpc_b64 s[30:31]
361361
;
362362
; GFX9-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
363363
; GFX9: ; %bb.0:
364364
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
365-
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v0, v2
366-
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
367-
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v1, v3
368-
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
365+
; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
366+
; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
369367
; GFX9-NEXT: s_setpc_b64 s[30:31]
370368
;
371369
; GFX12-LABEL: v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag:
@@ -375,12 +373,7 @@ define <2 x float> @v_test_fmin_legacy_ule_v2f32_nnan_nsz_flag(<2 x float> %a, <
375373
; GFX12-NEXT: s_wait_samplecnt 0x0
376374
; GFX12-NEXT: s_wait_bvhcnt 0x0
377375
; GFX12-NEXT: s_wait_kmcnt 0x0
378-
; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v2
379-
; GFX12-NEXT: s_wait_alu 0xfffd
380-
; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
381-
; GFX12-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v3
382-
; GFX12-NEXT: s_wait_alu 0xfffd
383-
; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
376+
; GFX12-NEXT: v_dual_min_num_f32 v0, v0, v2 :: v_dual_min_num_f32 v1, v1, v3
384377
; GFX12-NEXT: s_setpc_b64 s[30:31]
385378
%cmp = fcmp ule <2 x float> %a, %b
386379
%val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
@@ -499,17 +492,15 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <
499492
; GFX7-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
500493
; GFX7: ; %bb.0:
501494
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
502-
; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
503-
; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
495+
; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
496+
; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
504497
; GFX7-NEXT: s_setpc_b64 s[30:31]
505498
;
506499
; GFX9-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
507500
; GFX9: ; %bb.0:
508501
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
509-
; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v2
510-
; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
511-
; GFX9-NEXT: v_cmp_nlt_f32_e32 vcc, v1, v3
512-
; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
502+
; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
503+
; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
513504
; GFX9-NEXT: s_setpc_b64 s[30:31]
514505
;
515506
; GFX12-LABEL: v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag:
@@ -519,12 +510,7 @@ define <2 x float> @v_test_fmax_legacy_uge_v2f32_nnan_nsz_flag(<2 x float> %a, <
519510
; GFX12-NEXT: s_wait_samplecnt 0x0
520511
; GFX12-NEXT: s_wait_bvhcnt 0x0
521512
; GFX12-NEXT: s_wait_kmcnt 0x0
522-
; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v2
523-
; GFX12-NEXT: s_wait_alu 0xfffd
524-
; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
525-
; GFX12-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v3
526-
; GFX12-NEXT: s_wait_alu 0xfffd
527-
; GFX12-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc_lo
513+
; GFX12-NEXT: v_dual_max_num_f32 v0, v0, v2 :: v_dual_max_num_f32 v1, v1, v3
528514
; GFX12-NEXT: s_setpc_b64 s[30:31]
529515
%cmp = fcmp uge <2 x float> %a, %b
530516
%val = select nnan nsz <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
@@ -673,10 +659,10 @@ define half @v_test_fmin_legacy_ule_f16_nnan_nsz_flag(half %a, half %b) {
673659
; GFX7-LABEL: v_test_fmin_legacy_ule_f16_nnan_nsz_flag:
674660
; GFX7: ; %bb.0:
675661
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
676-
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
677662
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
678-
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
663+
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
679664
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
665+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
680666
; GFX7-NEXT: v_min_f32_e32 v0, v0, v1
681667
; GFX7-NEXT: s_setpc_b64 s[30:31]
682668
;
@@ -852,10 +838,10 @@ define half @v_test_fmax_legacy_uge_f16_nnan_nsz_flag(half %a, half %b) {
852838
; GFX7-LABEL: v_test_fmax_legacy_uge_f16_nnan_nsz_flag:
853839
; GFX7: ; %bb.0:
854840
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
855-
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
856841
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
857-
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
842+
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
858843
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
844+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
859845
; GFX7-NEXT: v_max_f32_e32 v0, v0, v1
860846
; GFX7-NEXT: s_setpc_b64 s[30:31]
861847
;
@@ -1097,16 +1083,16 @@ define <2 x half> @v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag(<2 x half> %a, <2
10971083
; GFX7-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
10981084
; GFX7: ; %bb.0:
10991085
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1100-
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1101-
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1102-
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
11031086
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1104-
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1105-
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1106-
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1087+
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1088+
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1089+
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
11071090
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1108-
; GFX7-NEXT: v_min_legacy_f32_e32 v0, v2, v0
1109-
; GFX7-NEXT: v_min_legacy_f32_e32 v1, v3, v1
1091+
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1092+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1093+
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1094+
; GFX7-NEXT: v_min_f32_e32 v0, v0, v2
1095+
; GFX7-NEXT: v_min_f32_e32 v1, v1, v3
11101096
; GFX7-NEXT: s_setpc_b64 s[30:31]
11111097
;
11121098
; GFX9-LABEL: v_test_fmin_legacy_ule_v2f16_nnan_nsz_flag:
@@ -1337,16 +1323,16 @@ define <2 x half> @v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag(<2 x half> %a, <2
13371323
; GFX7-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
13381324
; GFX7: ; %bb.0:
13391325
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1340-
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1341-
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1342-
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
13431326
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1344-
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1345-
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1346-
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1327+
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
1328+
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1329+
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
13471330
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1348-
; GFX7-NEXT: v_max_legacy_f32_e32 v0, v2, v0
1349-
; GFX7-NEXT: v_max_legacy_f32_e32 v1, v3, v1
1331+
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1332+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1333+
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1334+
; GFX7-NEXT: v_max_f32_e32 v0, v0, v2
1335+
; GFX7-NEXT: v_max_f32_e32 v1, v1, v3
13501336
; GFX7-NEXT: s_setpc_b64 s[30:31]
13511337
;
13521338
; GFX9-LABEL: v_test_fmax_legacy_uge_v2f16_nnan_nsz_flag:
@@ -1667,26 +1653,26 @@ define <4 x half> @v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag(<4 x half> %a, <4
16671653
; GFX7-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
16681654
; GFX7: ; %bb.0:
16691655
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1670-
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1671-
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
16721656
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
16731657
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
1674-
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
1675-
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1676-
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1658+
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
1659+
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
16771660
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
1678-
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1679-
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1680-
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1681-
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1682-
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1683-
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1684-
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1661+
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
1662+
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
1663+
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
16851664
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
1686-
; GFX7-NEXT: v_min_legacy_f32_e32 v0, v4, v0
1687-
; GFX7-NEXT: v_min_legacy_f32_e32 v1, v5, v1
1688-
; GFX7-NEXT: v_min_legacy_f32_e32 v2, v6, v2
1689-
; GFX7-NEXT: v_min_legacy_f32_e32 v3, v7, v3
1665+
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
1666+
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
1667+
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
1668+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
1669+
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
1670+
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
1671+
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
1672+
; GFX7-NEXT: v_min_f32_e32 v0, v0, v4
1673+
; GFX7-NEXT: v_min_f32_e32 v1, v1, v5
1674+
; GFX7-NEXT: v_min_f32_e32 v2, v2, v6
1675+
; GFX7-NEXT: v_min_f32_e32 v3, v3, v7
16901676
; GFX7-NEXT: s_setpc_b64 s[30:31]
16911677
;
16921678
; GFX9-LABEL: v_test_fmin_legacy_ule_v4f16_nnan_nsz_flag:
@@ -2009,26 +1995,26 @@ define <4 x half> @v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag(<4 x half> %a, <4
20091995
; GFX7-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:
20101996
; GFX7: ; %bb.0:
20111997
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2012-
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
2013-
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
20141998
; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7
20151999
; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6
2016-
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
2017-
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
2018-
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
2000+
; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3
2001+
; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2
20192002
; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5
2020-
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
2021-
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
2022-
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
2023-
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
2024-
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
2025-
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
2026-
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
2003+
; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4
2004+
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
2005+
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
20272006
; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7
2028-
; GFX7-NEXT: v_max_legacy_f32_e32 v0, v4, v0
2029-
; GFX7-NEXT: v_max_legacy_f32_e32 v1, v5, v1
2030-
; GFX7-NEXT: v_max_legacy_f32_e32 v2, v6, v2
2031-
; GFX7-NEXT: v_max_legacy_f32_e32 v3, v7, v3
2007+
; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v6
2008+
; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v5
2009+
; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4
2010+
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
2011+
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
2012+
; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2
2013+
; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3
2014+
; GFX7-NEXT: v_max_f32_e32 v0, v0, v4
2015+
; GFX7-NEXT: v_max_f32_e32 v1, v1, v5
2016+
; GFX7-NEXT: v_max_f32_e32 v2, v2, v6
2017+
; GFX7-NEXT: v_max_f32_e32 v3, v3, v7
20322018
; GFX7-NEXT: s_setpc_b64 s[30:31]
20332019
;
20342020
; GFX9-LABEL: v_test_fmax_legacy_uge_v4f16_nnan_nsz_flag:

llvm/test/CodeGen/ARM/fp16-vminmaxnm-safe.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -517,9 +517,9 @@ define half @fp16_vminmaxnm_e_0(half %a) {
517517
; CHECK-NEXT: .short 0x0000 @ half 0
518518
entry:
519519
%cmp1 = fcmp nsz ole half 0., %a
520-
%cond1 = select i1 %cmp1, half 0., half %a
520+
%cond1 = select nsz i1 %cmp1, half 0., half %a
521521
%cmp2 = fcmp nsz uge half 0., %cond1
522-
%cond2 = select i1 %cmp2, half 0., half %cond1
522+
%cond2 = select nsz i1 %cmp2, half 0., half %cond1
523523
ret half %cond2
524524
}
525525

@@ -540,7 +540,7 @@ define half @fp16_vminmaxnm_e_neg0(half %a) {
540540
; CHECK-NEXT: .short 0x8000 @ half -0
541541
entry:
542542
%cmp1 = fcmp nsz ule half -0., %a
543-
%cond1 = select i1 %cmp1, half -0., half %a
543+
%cond1 = select nsz i1 %cmp1, half -0., half %a
544544
%cmp2 = fcmp nsz oge half -0., %cond1
545545
%cond2 = select i1 %cmp2, half -0., half %cond1
546546
ret half %cond2

0 commit comments

Comments
 (0)