Skip to content

Commit 2bfa7d0

Browse files
authored
[InstCombine] Fold fmul X, -0.0 into copysign(0.0, -X) (#85772)
`fneg + copysign` is better than fmul for analysis/codegen. godbolt: https://godbolt.org/z/eEs6dGd1G Alive2: https://alive2.llvm.org/ce/z/K3M5BA
1 parent 686f459 commit 2bfa7d0

File tree

4 files changed

+116
-15
lines changed

4 files changed

+116
-15
lines changed

llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -814,8 +814,19 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
814814
if (match(Op1, m_SpecificFP(-1.0)))
815815
return UnaryOperator::CreateFNegFMF(Op0, &I);
816816

817-
// With no-nans: X * 0.0 --> copysign(0.0, X)
818-
if (I.hasNoNaNs() && match(Op1, m_PosZeroFP())) {
817+
// With no-nans/no-infs:
818+
// X * 0.0 --> copysign(0.0, X)
819+
// X * -0.0 --> copysign(0.0, -X)
820+
const APFloat *FPC;
821+
if (match(Op1, m_APFloatAllowUndef(FPC)) && FPC->isZero() &&
822+
((I.hasNoInfs() &&
823+
isKnownNeverNaN(Op0, /*Depth=*/0, SQ.getWithInstruction(&I))) ||
824+
isKnownNeverNaN(&I, /*Depth=*/0, SQ.getWithInstruction(&I)))) {
825+
if (FPC->isNegative())
826+
Op0 = Builder.CreateFNegFMF(Op0, &I);
827+
Op1 = Constant::replaceUndefsWith(
828+
cast<Constant>(Op1),
829+
ConstantFP::get(Op1->getType()->getScalarType(), *FPC));
819830
CallInst *CopySign = Builder.CreateIntrinsic(Intrinsic::copysign,
820831
{I.getType()}, {Op1, Op0}, &I);
821832
return replaceInstUsesWith(I, CopySign);

llvm/test/Transforms/InstCombine/binop-itofp.ll

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,7 +1012,7 @@ define float @missed_nonzero_check_on_constant_for_si_fmul(i1 %c, i1 %.b, ptr %g
10121012
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264
10131013
; CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[SEL]] to i16
10141014
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp i16 [[CONV_I]] to float
1015-
; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul float [[CONV1_I]], 0.000000e+00
1015+
; CHECK-NEXT: [[MUL3_I_I:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[CONV1_I]])
10161016
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
10171017
; CHECK-NEXT: ret float [[MUL3_I_I]]
10181018
;
@@ -1031,7 +1031,7 @@ define <2 x float> @missed_nonzero_check_on_constant_for_si_fmul_vec(i1 %c, i1 %
10311031
; CHECK-NEXT: [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0
10321032
; CHECK-NEXT: [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer
10331033
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float>
1034-
; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul <2 x float> [[CONV1_I]], zeroinitializer
1034+
; CHECK-NEXT: [[MUL3_I_I:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> zeroinitializer, <2 x float> [[CONV1_I]])
10351035
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
10361036
; CHECK-NEXT: ret <2 x float> [[MUL3_I_I]]
10371037
;
@@ -1050,7 +1050,8 @@ define float @negzero_check_on_constant_for_si_fmul(i1 %c, i1 %.b, ptr %g_2345)
10501050
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[C:%.*]], i32 65529, i32 53264
10511051
; CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[SEL]] to i16
10521052
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp i16 [[CONV_I]] to float
1053-
; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul float [[CONV1_I]], -0.000000e+00
1053+
; CHECK-NEXT: [[TMP1:%.*]] = fneg float [[CONV1_I]]
1054+
; CHECK-NEXT: [[MUL3_I_I:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP1]])
10541055
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
10551056
; CHECK-NEXT: ret float [[MUL3_I_I]]
10561057
;
@@ -1069,7 +1070,7 @@ define <2 x float> @nonzero_check_on_constant_for_si_fmul_vec_w_undef(i1 %c, i1
10691070
; CHECK-NEXT: [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0
10701071
; CHECK-NEXT: [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer
10711072
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float>
1072-
; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul <2 x float> [[CONV1_I]], <float undef, float 0.000000e+00>
1073+
; CHECK-NEXT: [[MUL3_I_I:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> zeroinitializer, <2 x float> [[CONV1_I]])
10731074
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
10741075
; CHECK-NEXT: ret <2 x float> [[MUL3_I_I]]
10751076
;
@@ -1111,7 +1112,8 @@ define <2 x float> @nonzero_check_on_constant_for_si_fmul_negz_vec_w_undef(i1 %c
11111112
; CHECK-NEXT: [[CONV_I_V:%.*]] = insertelement <2 x i16> poison, i16 [[CONV_I_S]], i64 0
11121113
; CHECK-NEXT: [[CONV_I:%.*]] = shufflevector <2 x i16> [[CONV_I_V]], <2 x i16> poison, <2 x i32> zeroinitializer
11131114
; CHECK-NEXT: [[CONV1_I:%.*]] = sitofp <2 x i16> [[CONV_I]] to <2 x float>
1114-
; CHECK-NEXT: [[MUL3_I_I:%.*]] = fmul <2 x float> [[CONV1_I]], <float undef, float -0.000000e+00>
1115+
; CHECK-NEXT: [[TMP1:%.*]] = fneg <2 x float> [[CONV1_I]]
1116+
; CHECK-NEXT: [[MUL3_I_I:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> zeroinitializer, <2 x float> [[TMP1]])
11151117
; CHECK-NEXT: store i32 [[SEL]], ptr [[G_2345:%.*]], align 4
11161118
; CHECK-NEXT: ret <2 x float> [[MUL3_I_I]]
11171119
;

llvm/test/Transforms/InstCombine/fmul.ll

Lines changed: 95 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1250,7 +1250,7 @@ define half @mul_zero_nnan(half %x) {
12501250

12511251
define <2 x float> @mul_zero_nnan_vec_poison(<2 x float> %x) {
12521252
; CHECK-LABEL: @mul_zero_nnan_vec_poison(
1253-
; CHECK-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.copysign.v2f32(<2 x float> <float 0.000000e+00, float poison>, <2 x float> [[X:%.*]])
1253+
; CHECK-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.copysign.v2f32(<2 x float> zeroinitializer, <2 x float> [[X:%.*]])
12541254
; CHECK-NEXT: ret <2 x float> [[R]]
12551255
;
12561256
%r = fmul nnan <2 x float> %x, <float 0.0, float poison>
@@ -1268,13 +1268,104 @@ define half @mul_zero(half %x) {
12681268
ret half %r
12691269
}
12701270

1271-
; TODO: This could be fneg+copysign.
1272-
12731271
define half @mul_negzero_nnan(half %x) {
12741272
; CHECK-LABEL: @mul_negzero_nnan(
1275-
; CHECK-NEXT: [[R:%.*]] = fmul nnan half [[X:%.*]], 0xH8000
1273+
; CHECK-NEXT: [[TMP1:%.*]] = fneg nnan half [[X:%.*]]
1274+
; CHECK-NEXT: [[R:%.*]] = call nnan half @llvm.copysign.f16(half 0xH0000, half [[TMP1]])
12761275
; CHECK-NEXT: ret half [[R]]
12771276
;
12781277
%r = fmul nnan half %x, -0.0
12791278
ret half %r
12801279
}
1280+
1281+
define float @mul_pos_zero_nnan_ninf(float nofpclass(inf nan) %a) {
1282+
; CHECK-LABEL: @mul_pos_zero_nnan_ninf(
1283+
; CHECK-NEXT: entry:
1284+
; CHECK-NEXT: [[RET:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[A:%.*]])
1285+
; CHECK-NEXT: ret float [[RET]]
1286+
;
1287+
entry:
1288+
%ret = fmul float %a, 0.000000e+00
1289+
ret float %ret
1290+
}
1291+
1292+
define float @mul_pos_zero_nnan(float nofpclass(nan) %a) {
1293+
; CHECK-LABEL: @mul_pos_zero_nnan(
1294+
; CHECK-NEXT: entry:
1295+
; CHECK-NEXT: [[RET:%.*]] = fmul float [[A:%.*]], 0.000000e+00
1296+
; CHECK-NEXT: ret float [[RET]]
1297+
;
1298+
entry:
1299+
%ret = fmul float %a, 0.000000e+00
1300+
ret float %ret
1301+
}
1302+
1303+
define float @mul_pos_zero_nnan_ninf_fmf(float nofpclass(nan) %a) {
1304+
; CHECK-LABEL: @mul_pos_zero_nnan_ninf_fmf(
1305+
; CHECK-NEXT: entry:
1306+
; CHECK-NEXT: [[RET:%.*]] = call ninf float @llvm.copysign.f32(float 0.000000e+00, float [[A:%.*]])
1307+
; CHECK-NEXT: ret float [[RET]]
1308+
;
1309+
entry:
1310+
%ret = fmul ninf float %a, 0.000000e+00
1311+
ret float %ret
1312+
}
1313+
1314+
define float @mul_neg_zero_nnan_ninf(float nofpclass(inf nan) %a) {
1315+
; CHECK-LABEL: @mul_neg_zero_nnan_ninf(
1316+
; CHECK-NEXT: entry:
1317+
; CHECK-NEXT: [[TMP0:%.*]] = fneg float [[A:%.*]]
1318+
; CHECK-NEXT: [[RET:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP0]])
1319+
; CHECK-NEXT: ret float [[RET]]
1320+
;
1321+
entry:
1322+
%ret = fmul float %a, -0.000000e+00
1323+
ret float %ret
1324+
}
1325+
1326+
define float @mul_neg_zero_nnan_fmf(float %a) {
1327+
; CHECK-LABEL: @mul_neg_zero_nnan_fmf(
1328+
; CHECK-NEXT: entry:
1329+
; CHECK-NEXT: [[TMP0:%.*]] = fneg nnan float [[A:%.*]]
1330+
; CHECK-NEXT: [[RET:%.*]] = call nnan float @llvm.copysign.f32(float 0.000000e+00, float [[TMP0]])
1331+
; CHECK-NEXT: ret float [[RET]]
1332+
;
1333+
entry:
1334+
%ret = fmul nnan float %a, -0.000000e+00
1335+
ret float %ret
1336+
}
1337+
1338+
define float @mul_neg_zero_nnan_ninf_fmf(float nofpclass(inf nan) %a) {
1339+
; CHECK-LABEL: @mul_neg_zero_nnan_ninf_fmf(
1340+
; CHECK-NEXT: entry:
1341+
; CHECK-NEXT: [[TMP0:%.*]] = fneg nnan ninf float [[A:%.*]]
1342+
; CHECK-NEXT: [[RET:%.*]] = call nnan ninf float @llvm.copysign.f32(float 0.000000e+00, float [[TMP0]])
1343+
; CHECK-NEXT: ret float [[RET]]
1344+
;
1345+
entry:
1346+
%ret = fmul nnan ninf float %a, -0.000000e+00
1347+
ret float %ret
1348+
}
1349+
1350+
define <3 x float> @mul_neg_zero_nnan_ninf_vec(<3 x float> nofpclass(inf nan) %a) {
1351+
; CHECK-LABEL: @mul_neg_zero_nnan_ninf_vec(
1352+
; CHECK-NEXT: entry:
1353+
; CHECK-NEXT: [[TMP0:%.*]] = fneg <3 x float> [[A:%.*]]
1354+
; CHECK-NEXT: [[RET:%.*]] = call <3 x float> @llvm.copysign.v3f32(<3 x float> zeroinitializer, <3 x float> [[TMP0]])
1355+
; CHECK-NEXT: ret <3 x float> [[RET]]
1356+
;
1357+
entry:
1358+
%ret = fmul <3 x float> %a, <float -0.0, float undef, float poison>
1359+
ret <3 x float> %ret
1360+
}
1361+
1362+
define <3 x float> @mul_mixed_zero_nnan_ninf_vec(<3 x float> nofpclass(inf nan) %a) {
1363+
; CHECK-LABEL: @mul_mixed_zero_nnan_ninf_vec(
1364+
; CHECK-NEXT: entry:
1365+
; CHECK-NEXT: [[RET:%.*]] = fmul <3 x float> [[A:%.*]], <float -0.000000e+00, float 0.000000e+00, float poison>
1366+
; CHECK-NEXT: ret <3 x float> [[RET]]
1367+
;
1368+
entry:
1369+
%ret = fmul <3 x float> %a, <float -0.0, float 0.0, float poison>
1370+
ret <3 x float> %ret
1371+
}

llvm/test/Transforms/InstCombine/fpcast.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -424,10 +424,7 @@ define i32 @fptosi_select(i1 %cond) {
424424
define i32 @mul_pos_zero_convert(i32 %a) {
425425
; CHECK-LABEL: @mul_pos_zero_convert(
426426
; CHECK-NEXT: entry:
427-
; CHECK-NEXT: [[FP:%.*]] = sitofp i32 [[A:%.*]] to float
428-
; CHECK-NEXT: [[RET:%.*]] = fmul float [[FP]], 0.000000e+00
429-
; CHECK-NEXT: [[CONV:%.*]] = fptosi float [[RET]] to i32
430-
; CHECK-NEXT: ret i32 [[CONV]]
427+
; CHECK-NEXT: ret i32 0
431428
;
432429
entry:
433430
%fp = sitofp i32 %a to float

0 commit comments

Comments
 (0)