Skip to content

Commit 15a7de6

Browse files
committed
[SelectionDAG] Support sign tracking through {S|U}INT_TO_FP
Just a minimal amount of easily provable tracking. Proofs: https://alive2.llvm.org/ce/z/RQYbdw Closes #82808 Alive2 to has an issue with `(sitofp i1)`, but it can be verified by hand: https://godbolt.org/z/qKr7hT7s9
1 parent e9cdd16 commit 15a7de6

File tree

5 files changed

+75
-166
lines changed

5 files changed

+75
-166
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4004,6 +4004,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
40044004

40054005
break;
40064006
}
4007+
case ISD::UINT_TO_FP: {
4008+
Known.makeNonNegative();
4009+
break;
4010+
}
4011+
case ISD::SINT_TO_FP: {
4012+
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
4013+
if (Known2.isNonNegative())
4014+
Known.makeNonNegative();
4015+
else if (Known2.isNegative())
4016+
Known.makeNegative();
4017+
break;
4018+
}
40074019
case ISD::FP_TO_UINT_SAT: {
40084020
// FP_TO_UINT_SAT produces an unsigned value that fits in the saturating VT.
40094021
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();

llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -525,12 +525,10 @@ define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) nounwind {
525525
define float @fdiv_pow_shl_cnt(i64 %cnt_in) nounwind {
526526
; CHECK-LABEL: fdiv_pow_shl_cnt:
527527
; CHECK: // %bb.0:
528-
; CHECK-NEXT: mov w8, #8 // =0x8
529-
; CHECK-NEXT: and x9, x0, #0x1f
530-
; CHECK-NEXT: fmov s1, #-0.50000000
531-
; CHECK-NEXT: lsl x8, x8, x9
532-
; CHECK-NEXT: scvtf s0, x8
533-
; CHECK-NEXT: fdiv s0, s1, s0
528+
; CHECK-NEXT: mov w8, #-1115684864 // =0xbd800000
529+
; CHECK-NEXT: and w9, w0, #0x1f
530+
; CHECK-NEXT: sub w8, w8, w9, lsl #23
531+
; CHECK-NEXT: fmov s0, w8
534532
; CHECK-NEXT: ret
535533
%cnt = and i64 %cnt_in, 31
536534
%shl = shl i64 8, %cnt

llvm/test/CodeGen/AMDGPU/bf16.ll

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -32379,15 +32379,15 @@ define bfloat @v_uitofp_i16_to_bf16(i16 %x) {
3237932379
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3238032380
; GCN-NEXT: v_and_b32_e32 v0, 0xffff, v0
3238132381
; GCN-NEXT: v_cvt_f32_u32_e32 v0, v0
32382-
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
32382+
; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
3238332383
; GCN-NEXT: s_setpc_b64 s[30:31]
3238432384
;
3238532385
; GFX7-LABEL: v_uitofp_i16_to_bf16:
3238632386
; GFX7: ; %bb.0:
3238732387
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3238832388
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
3238932389
; GFX7-NEXT: v_cvt_f32_u32_e32 v0, v0
32390-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
32390+
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
3239132391
; GFX7-NEXT: s_setpc_b64 s[30:31]
3239232392
;
3239332393
; GFX8-LABEL: v_uitofp_i16_to_bf16:
@@ -32455,8 +32455,8 @@ define <2 x bfloat> @v_uitofp_v2i16_to_v2bf16(<2 x i16> %x) {
3245532455
; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v1
3245632456
; GCN-NEXT: v_cvt_f32_u32_e32 v1, v1
3245732457
; GCN-NEXT: v_cvt_f32_u32_e32 v0, v0
32458-
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
32459-
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
32458+
; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
32459+
; GCN-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
3246032460
; GCN-NEXT: s_setpc_b64 s[30:31]
3246132461
;
3246232462
; GFX7-LABEL: v_uitofp_v2i16_to_v2bf16:
@@ -32466,8 +32466,8 @@ define <2 x bfloat> @v_uitofp_v2i16_to_v2bf16(<2 x i16> %x) {
3246632466
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
3246732467
; GFX7-NEXT: v_cvt_f32_u32_e32 v0, v0
3246832468
; GFX7-NEXT: v_cvt_f32_u32_e32 v1, v1
32469-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
32470-
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
32469+
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
32470+
; GFX7-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
3247132471
; GFX7-NEXT: s_setpc_b64 s[30:31]
3247232472
;
3247332473
; GFX8-LABEL: v_uitofp_v2i16_to_v2bf16:
@@ -32566,9 +32566,9 @@ define <3 x bfloat> @v_uitofp_v3i16_to_v3bf16(<3 x i16> %x) {
3256632566
; GCN-NEXT: v_cvt_f32_u32_e32 v2, v2
3256732567
; GCN-NEXT: v_cvt_f32_u32_e32 v1, v1
3256832568
; GCN-NEXT: v_cvt_f32_u32_e32 v0, v0
32569-
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
32570-
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
32571-
; GCN-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
32569+
; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
32570+
; GCN-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
32571+
; GCN-NEXT: v_and_b32_e32 v2, 0x7fff0000, v2
3257232572
; GCN-NEXT: s_setpc_b64 s[30:31]
3257332573
;
3257432574
; GFX7-LABEL: v_uitofp_v3i16_to_v3bf16:
@@ -32580,9 +32580,9 @@ define <3 x bfloat> @v_uitofp_v3i16_to_v3bf16(<3 x i16> %x) {
3258032580
; GFX7-NEXT: v_cvt_f32_u32_e32 v0, v0
3258132581
; GFX7-NEXT: v_cvt_f32_u32_e32 v1, v1
3258232582
; GFX7-NEXT: v_cvt_f32_u32_e32 v2, v2
32583-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
32584-
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
32585-
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
32583+
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
32584+
; GFX7-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
32585+
; GFX7-NEXT: v_and_b32_e32 v2, 0x7fff0000, v2
3258632586
; GFX7-NEXT: s_setpc_b64 s[30:31]
3258732587
;
3258832588
; GFX8-LABEL: v_uitofp_v3i16_to_v3bf16:
@@ -32682,10 +32682,10 @@ define <4 x bfloat> @v_uitofp_v4i16_to_v4bf16(<4 x i16> %x) {
3268232682
; GCN-NEXT: v_cvt_f32_u32_e32 v2, v2
3268332683
; GCN-NEXT: v_cvt_f32_u32_e32 v1, v1
3268432684
; GCN-NEXT: v_cvt_f32_u32_e32 v0, v0
32685-
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
32686-
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
32687-
; GCN-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
32688-
; GCN-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
32685+
; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
32686+
; GCN-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
32687+
; GCN-NEXT: v_and_b32_e32 v2, 0x7fff0000, v2
32688+
; GCN-NEXT: v_and_b32_e32 v3, 0x7fff0000, v3
3268932689
; GCN-NEXT: s_setpc_b64 s[30:31]
3269032690
;
3269132691
; GFX7-LABEL: v_uitofp_v4i16_to_v4bf16:
@@ -32699,10 +32699,10 @@ define <4 x bfloat> @v_uitofp_v4i16_to_v4bf16(<4 x i16> %x) {
3269932699
; GFX7-NEXT: v_cvt_f32_u32_e32 v1, v1
3270032700
; GFX7-NEXT: v_cvt_f32_u32_e32 v2, v2
3270132701
; GFX7-NEXT: v_cvt_f32_u32_e32 v3, v3
32702-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
32703-
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
32704-
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
32705-
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
32702+
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
32703+
; GFX7-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
32704+
; GFX7-NEXT: v_and_b32_e32 v2, 0x7fff0000, v2
32705+
; GFX7-NEXT: v_and_b32_e32 v3, 0x7fff0000, v3
3270632706
; GFX7-NEXT: s_setpc_b64 s[30:31]
3270732707
;
3270832708
; GFX8-LABEL: v_uitofp_v4i16_to_v4bf16:
@@ -32857,14 +32857,14 @@ define bfloat @v_uitofp_i32_to_bf16(i32 %x) {
3285732857
; GCN: ; %bb.0:
3285832858
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3285932859
; GCN-NEXT: v_cvt_f32_u32_e32 v0, v0
32860-
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
32860+
; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
3286132861
; GCN-NEXT: s_setpc_b64 s[30:31]
3286232862
;
3286332863
; GFX7-LABEL: v_uitofp_i32_to_bf16:
3286432864
; GFX7: ; %bb.0:
3286532865
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3286632866
; GFX7-NEXT: v_cvt_f32_u32_e32 v0, v0
32867-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
32867+
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
3286832868
; GFX7-NEXT: s_setpc_b64 s[30:31]
3286932869
;
3287032870
; GFX8-LABEL: v_uitofp_i32_to_bf16:
@@ -32928,17 +32928,17 @@ define <2 x bfloat> @v_uitofp_v2i32_to_v2bf16(<2 x i32> %x) {
3292832928
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3292932929
; GCN-NEXT: v_cvt_f32_u32_e32 v1, v1
3293032930
; GCN-NEXT: v_cvt_f32_u32_e32 v0, v0
32931-
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
32932-
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
32931+
; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
32932+
; GCN-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
3293332933
; GCN-NEXT: s_setpc_b64 s[30:31]
3293432934
;
3293532935
; GFX7-LABEL: v_uitofp_v2i32_to_v2bf16:
3293632936
; GFX7: ; %bb.0:
3293732937
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3293832938
; GFX7-NEXT: v_cvt_f32_u32_e32 v0, v0
3293932939
; GFX7-NEXT: v_cvt_f32_u32_e32 v1, v1
32940-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
32941-
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
32940+
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
32941+
; GFX7-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
3294232942
; GFX7-NEXT: s_setpc_b64 s[30:31]
3294332943
;
3294432944
; GFX8-LABEL: v_uitofp_v2i32_to_v2bf16:
@@ -33031,9 +33031,9 @@ define <3 x bfloat> @v_uitofp_v3i32_to_v3bf16(<3 x i32> %x) {
3303133031
; GCN-NEXT: v_cvt_f32_u32_e32 v2, v2
3303233032
; GCN-NEXT: v_cvt_f32_u32_e32 v1, v1
3303333033
; GCN-NEXT: v_cvt_f32_u32_e32 v0, v0
33034-
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
33035-
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
33036-
; GCN-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
33034+
; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
33035+
; GCN-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
33036+
; GCN-NEXT: v_and_b32_e32 v2, 0x7fff0000, v2
3303733037
; GCN-NEXT: s_setpc_b64 s[30:31]
3303833038
;
3303933039
; GFX7-LABEL: v_uitofp_v3i32_to_v3bf16:
@@ -33042,9 +33042,9 @@ define <3 x bfloat> @v_uitofp_v3i32_to_v3bf16(<3 x i32> %x) {
3304233042
; GFX7-NEXT: v_cvt_f32_u32_e32 v0, v0
3304333043
; GFX7-NEXT: v_cvt_f32_u32_e32 v1, v1
3304433044
; GFX7-NEXT: v_cvt_f32_u32_e32 v2, v2
33045-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
33046-
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
33047-
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
33045+
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
33046+
; GFX7-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
33047+
; GFX7-NEXT: v_and_b32_e32 v2, 0x7fff0000, v2
3304833048
; GFX7-NEXT: s_setpc_b64 s[30:31]
3304933049
;
3305033050
; GFX8-LABEL: v_uitofp_v3i32_to_v3bf16:
@@ -33140,10 +33140,10 @@ define <4 x bfloat> @v_uitofp_v4i32_to_v4bf16(<4 x i32> %x) {
3314033140
; GCN-NEXT: v_cvt_f32_u32_e32 v2, v2
3314133141
; GCN-NEXT: v_cvt_f32_u32_e32 v1, v1
3314233142
; GCN-NEXT: v_cvt_f32_u32_e32 v0, v0
33143-
; GCN-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
33144-
; GCN-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
33145-
; GCN-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
33146-
; GCN-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
33143+
; GCN-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
33144+
; GCN-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
33145+
; GCN-NEXT: v_and_b32_e32 v2, 0x7fff0000, v2
33146+
; GCN-NEXT: v_and_b32_e32 v3, 0x7fff0000, v3
3314733147
; GCN-NEXT: s_setpc_b64 s[30:31]
3314833148
;
3314933149
; GFX7-LABEL: v_uitofp_v4i32_to_v4bf16:
@@ -33153,10 +33153,10 @@ define <4 x bfloat> @v_uitofp_v4i32_to_v4bf16(<4 x i32> %x) {
3315333153
; GFX7-NEXT: v_cvt_f32_u32_e32 v1, v1
3315433154
; GFX7-NEXT: v_cvt_f32_u32_e32 v2, v2
3315533155
; GFX7-NEXT: v_cvt_f32_u32_e32 v3, v3
33156-
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
33157-
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
33158-
; GFX7-NEXT: v_and_b32_e32 v2, 0xffff0000, v2
33159-
; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v3
33156+
; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff0000, v0
33157+
; GFX7-NEXT: v_and_b32_e32 v1, 0x7fff0000, v1
33158+
; GFX7-NEXT: v_and_b32_e32 v2, 0x7fff0000, v2
33159+
; GFX7-NEXT: v_and_b32_e32 v3, 0x7fff0000, v3
3316033160
; GFX7-NEXT: s_setpc_b64 s[30:31]
3316133161
;
3316233162
; GFX8-LABEL: v_uitofp_v4i32_to_v4bf16:

llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 6 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -2532,98 +2532,25 @@ define float @fdiv_pow_shl_cnt(i64 %cnt_in) nounwind {
25322532
; VI: ; %bb.0:
25332533
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25342534
; VI-NEXT: v_and_b32_e32 v0, 31, v0
2535-
; VI-NEXT: v_lshlrev_b64 v[0:1], v0, 8
2536-
; VI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
2537-
; VI-NEXT: v_ffbh_i32_e32 v3, v1
2538-
; VI-NEXT: v_add_u32_e32 v2, vcc, 32, v2
2539-
; VI-NEXT: v_add_u32_e32 v3, vcc, -1, v3
2540-
; VI-NEXT: v_min_u32_e32 v2, v3, v2
2541-
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
2542-
; VI-NEXT: v_min_u32_e32 v0, 1, v0
2543-
; VI-NEXT: v_or_b32_e32 v0, v1, v0
2544-
; VI-NEXT: v_cvt_f32_i32_e32 v0, v0
2545-
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v2
2546-
; VI-NEXT: v_ldexp_f32 v0, v0, v1
2547-
; VI-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, -0.5
2548-
; VI-NEXT: v_div_scale_f32 v2, vcc, -0.5, v0, -0.5
2549-
; VI-NEXT: v_rcp_f32_e32 v3, v1
2550-
; VI-NEXT: v_fma_f32 v4, -v1, v3, 1.0
2551-
; VI-NEXT: v_fma_f32 v3, v4, v3, v3
2552-
; VI-NEXT: v_mul_f32_e32 v4, v2, v3
2553-
; VI-NEXT: v_fma_f32 v5, -v1, v4, v2
2554-
; VI-NEXT: v_fma_f32 v4, v5, v3, v4
2555-
; VI-NEXT: v_fma_f32 v1, -v1, v4, v2
2556-
; VI-NEXT: v_div_fmas_f32 v1, v1, v3, v4
2557-
; VI-NEXT: v_div_fixup_f32 v0, v1, v0, -0.5
2535+
; VI-NEXT: v_lshlrev_b32_e32 v0, 23, v0
2536+
; VI-NEXT: v_sub_u32_e32 v0, vcc, 0xbd800000, v0
25582537
; VI-NEXT: s_setpc_b64 s[30:31]
25592538
;
25602539
; GFX10-LABEL: fdiv_pow_shl_cnt:
25612540
; GFX10: ; %bb.0:
25622541
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25632542
; GFX10-NEXT: v_and_b32_e32 v0, 31, v0
2564-
; GFX10-NEXT: v_lshlrev_b64 v[0:1], v0, 8
2565-
; GFX10-NEXT: v_ashrrev_i32_e32 v2, 31, v0
2566-
; GFX10-NEXT: v_ffbh_i32_e32 v3, v1
2567-
; GFX10-NEXT: v_add_nc_u32_e32 v2, 32, v2
2568-
; GFX10-NEXT: v_add_nc_u32_e32 v3, -1, v3
2569-
; GFX10-NEXT: v_min_u32_e32 v2, v3, v2
2570-
; GFX10-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
2571-
; GFX10-NEXT: v_min_u32_e32 v0, 1, v0
2572-
; GFX10-NEXT: v_or_b32_e32 v0, v1, v0
2573-
; GFX10-NEXT: v_sub_nc_u32_e32 v1, 32, v2
2574-
; GFX10-NEXT: v_cvt_f32_i32_e32 v0, v0
2575-
; GFX10-NEXT: v_ldexp_f32 v0, v0, v1
2576-
; GFX10-NEXT: v_div_scale_f32 v1, s4, v0, v0, -0.5
2577-
; GFX10-NEXT: v_rcp_f32_e32 v2, v1
2578-
; GFX10-NEXT: v_fma_f32 v3, -v1, v2, 1.0
2579-
; GFX10-NEXT: v_fmac_f32_e32 v2, v3, v2
2580-
; GFX10-NEXT: v_div_scale_f32 v3, vcc_lo, -0.5, v0, -0.5
2581-
; GFX10-NEXT: v_mul_f32_e32 v4, v3, v2
2582-
; GFX10-NEXT: v_fma_f32 v5, -v1, v4, v3
2583-
; GFX10-NEXT: v_fmac_f32_e32 v4, v5, v2
2584-
; GFX10-NEXT: v_fma_f32 v1, -v1, v4, v3
2585-
; GFX10-NEXT: v_div_fmas_f32 v1, v1, v2, v4
2586-
; GFX10-NEXT: v_div_fixup_f32 v0, v1, v0, -0.5
2543+
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 23, v0
2544+
; GFX10-NEXT: v_sub_nc_u32_e32 v0, 0xbd800000, v0
25872545
; GFX10-NEXT: s_setpc_b64 s[30:31]
25882546
;
25892547
; GFX11-LABEL: fdiv_pow_shl_cnt:
25902548
; GFX11: ; %bb.0:
25912549
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25922550
; GFX11-NEXT: v_and_b32_e32 v0, 31, v0
25932551
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2594-
; GFX11-NEXT: v_lshlrev_b64 v[0:1], v0, 8
2595-
; GFX11-NEXT: v_ashrrev_i32_e32 v2, 31, v0
2596-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2597-
; GFX11-NEXT: v_cls_i32_e32 v3, v1
2598-
; GFX11-NEXT: v_add_nc_u32_e32 v2, 32, v2
2599-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2600-
; GFX11-NEXT: v_add_nc_u32_e32 v3, -1, v3
2601-
; GFX11-NEXT: v_min_u32_e32 v2, v3, v2
2602-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2603-
; GFX11-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
2604-
; GFX11-NEXT: v_min_u32_e32 v0, 1, v0
2605-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2606-
; GFX11-NEXT: v_or_b32_e32 v0, v1, v0
2607-
; GFX11-NEXT: v_sub_nc_u32_e32 v1, 32, v2
2608-
; GFX11-NEXT: v_cvt_f32_i32_e32 v0, v0
2609-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2610-
; GFX11-NEXT: v_ldexp_f32 v0, v0, v1
2611-
; GFX11-NEXT: v_div_scale_f32 v1, null, v0, v0, -0.5
2612-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
2613-
; GFX11-NEXT: v_rcp_f32_e32 v2, v1
2614-
; GFX11-NEXT: s_waitcnt_depctr 0xfff
2615-
; GFX11-NEXT: v_fma_f32 v3, -v1, v2, 1.0
2616-
; GFX11-NEXT: v_fmac_f32_e32 v2, v3, v2
2617-
; GFX11-NEXT: v_div_scale_f32 v3, vcc_lo, -0.5, v0, -0.5
2618-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2619-
; GFX11-NEXT: v_mul_f32_e32 v4, v3, v2
2620-
; GFX11-NEXT: v_fma_f32 v5, -v1, v4, v3
2621-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2622-
; GFX11-NEXT: v_fmac_f32_e32 v4, v5, v2
2623-
; GFX11-NEXT: v_fma_f32 v1, -v1, v4, v3
2624-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2625-
; GFX11-NEXT: v_div_fmas_f32 v1, v1, v2, v4
2626-
; GFX11-NEXT: v_div_fixup_f32 v0, v1, v0, -0.5
2552+
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 23, v0
2553+
; GFX11-NEXT: v_sub_nc_u32_e32 v0, 0xbd800000, v0
26272554
; GFX11-NEXT: s_setpc_b64 s[30:31]
26282555
%cnt = and i64 %cnt_in, 31
26292556
%shl = shl i64 8, %cnt

llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 13 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1368,49 +1368,21 @@ define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) nounwind {
13681368
define float @fdiv_pow_shl_cnt(i64 %cnt_in) nounwind {
13691369
; CHECK-SSE-LABEL: fdiv_pow_shl_cnt:
13701370
; CHECK-SSE: # %bb.0:
1371-
; CHECK-SSE-NEXT: movq %rdi, %rcx
1372-
; CHECK-SSE-NEXT: andb $31, %cl
1373-
; CHECK-SSE-NEXT: movl $8, %eax
1374-
; CHECK-SSE-NEXT: # kill: def $cl killed $cl killed $rcx
1375-
; CHECK-SSE-NEXT: shlq %cl, %rax
1376-
; CHECK-SSE-NEXT: cvtsi2ss %rax, %xmm1
1377-
; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [-5.0E-1,0.0E+0,0.0E+0,0.0E+0]
1378-
; CHECK-SSE-NEXT: divss %xmm1, %xmm0
1371+
; CHECK-SSE-NEXT: andl $31, %edi
1372+
; CHECK-SSE-NEXT: shll $23, %edi
1373+
; CHECK-SSE-NEXT: movl $-1115684864, %eax # imm = 0xBD800000
1374+
; CHECK-SSE-NEXT: subl %edi, %eax
1375+
; CHECK-SSE-NEXT: movd %eax, %xmm0
13791376
; CHECK-SSE-NEXT: retq
13801377
;
1381-
; CHECK-AVX2-LABEL: fdiv_pow_shl_cnt:
1382-
; CHECK-AVX2: # %bb.0:
1383-
; CHECK-AVX2-NEXT: movq %rdi, %rcx
1384-
; CHECK-AVX2-NEXT: andb $31, %cl
1385-
; CHECK-AVX2-NEXT: movl $8, %eax
1386-
; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
1387-
; CHECK-AVX2-NEXT: shlq %cl, %rax
1388-
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
1389-
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [-5.0E-1,0.0E+0,0.0E+0,0.0E+0]
1390-
; CHECK-AVX2-NEXT: vdivss %xmm0, %xmm1, %xmm0
1391-
; CHECK-AVX2-NEXT: retq
1392-
;
1393-
; CHECK-NO-FASTFMA-LABEL: fdiv_pow_shl_cnt:
1394-
; CHECK-NO-FASTFMA: # %bb.0:
1395-
; CHECK-NO-FASTFMA-NEXT: movq %rdi, %rcx
1396-
; CHECK-NO-FASTFMA-NEXT: andb $31, %cl
1397-
; CHECK-NO-FASTFMA-NEXT: movl $8, %eax
1398-
; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $rcx
1399-
; CHECK-NO-FASTFMA-NEXT: shlq %cl, %rax
1400-
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
1401-
; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [-5.0E-1,0.0E+0,0.0E+0,0.0E+0]
1402-
; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
1403-
; CHECK-NO-FASTFMA-NEXT: retq
1404-
;
1405-
; CHECK-FMA-LABEL: fdiv_pow_shl_cnt:
1406-
; CHECK-FMA: # %bb.0:
1407-
; CHECK-FMA-NEXT: andb $31, %dil
1408-
; CHECK-FMA-NEXT: movl $8, %eax
1409-
; CHECK-FMA-NEXT: shlxq %rdi, %rax, %rax
1410-
; CHECK-FMA-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
1411-
; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [-5.0E-1,0.0E+0,0.0E+0,0.0E+0]
1412-
; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
1413-
; CHECK-FMA-NEXT: retq
1378+
; CHECK-AVX-LABEL: fdiv_pow_shl_cnt:
1379+
; CHECK-AVX: # %bb.0:
1380+
; CHECK-AVX-NEXT: andl $31, %edi
1381+
; CHECK-AVX-NEXT: shll $23, %edi
1382+
; CHECK-AVX-NEXT: movl $-1115684864, %eax # imm = 0xBD800000
1383+
; CHECK-AVX-NEXT: subl %edi, %eax
1384+
; CHECK-AVX-NEXT: vmovd %eax, %xmm0
1385+
; CHECK-AVX-NEXT: retq
14141386
%cnt = and i64 %cnt_in, 31
14151387
%shl = shl i64 8, %cnt
14161388
%conv = sitofp i64 %shl to float

0 commit comments

Comments
 (0)