Skip to content

Commit 6c21e6a

Browse files
committed
[X86][SSE] Improve recognition of uitofp conversions that can be performed as sitofp
With D24253 we can now use SelectionDAG::SignBitIsZero with vector operations. This patch uses SelectionDAG::SignBitIsZero to recognise that a zero sign bit means that we can use a sitofp instead of a uitofp (which is not directly support on pre-AVX512 hardware). While AVX512 does provide support for uitofp, the conversion to sitofp should not cause any regressions. Differential Revision: https://reviews.llvm.org/D24343 llvm-svn: 281852
1 parent a1a0e7d commit 6c21e6a

File tree

6 files changed

+112
-495
lines changed

6 files changed

+112
-495
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1980,10 +1980,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
19801980
/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
19811981
/// use this predicate to simplify operations downstream.
19821982
bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
1983-
// This predicate is not safe for vector operations.
1984-
if (Op.getValueType().isVector())
1985-
return false;
1986-
19871983
unsigned BitWidth = Op.getScalarValueSizeInBits();
19881984
return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
19891985
}

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13871,15 +13871,15 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
1387113871
SDLoc dl(Op);
1387213872
auto PtrVT = getPointerTy(DAG.getDataLayout());
1387313873

13874-
if (Op.getSimpleValueType().isVector())
13875-
return lowerUINT_TO_FP_vec(Op, DAG);
13876-
1387713874
// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
1387813875
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
1387913876
// the optimization here.
1388013877
if (DAG.SignBitIsZero(N0))
1388113878
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
1388213879

13880+
if (Op.getSimpleValueType().isVector())
13881+
return lowerUINT_TO_FP_vec(Op, DAG);
13882+
1388313883
MVT SrcVT = N0.getSimpleValueType();
1388413884
MVT DstVT = Op.getSimpleValueType();
1388513885

@@ -31204,6 +31204,12 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
3120431204
return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
3120531205
}
3120631206

31207+
// Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
31208+
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
31209+
// the optimization here.
31210+
if (DAG.SignBitIsZero(Op0))
31211+
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
31212+
3120731213
return SDValue();
3120831214
}
3120931215

llvm/test/CodeGen/X86/avx512-cvt.ll

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -924,7 +924,7 @@ define <16 x float> @uitofp_16i8(<16 x i8>%a) {
924924
; ALL-LABEL: uitofp_16i8:
925925
; ALL: ## BB#0:
926926
; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
927-
; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0
927+
; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0
928928
; ALL-NEXT: retq
929929
%b = uitofp <16 x i8> %a to <16 x float>
930930
ret <16 x float>%b
@@ -934,7 +934,7 @@ define <16 x float> @uitofp_16i16(<16 x i16>%a) {
934934
; ALL-LABEL: uitofp_16i16:
935935
; ALL: ## BB#0:
936936
; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
937-
; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0
937+
; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0
938938
; ALL-NEXT: retq
939939
%b = uitofp <16 x i16> %a to <16 x float>
940940
ret <16 x float>%b
@@ -1036,9 +1036,8 @@ define <4 x float> @uitofp_4i1_float(<4 x i32> %a) {
10361036
; KNL: ## BB#0:
10371037
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
10381038
; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
1039-
; KNL-NEXT: vpsrld $31, %xmm0, %xmm0
1040-
; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0
1041-
; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
1039+
; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
1040+
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
10421041
; KNL-NEXT: retq
10431042
;
10441043
; SKX-LABEL: uitofp_4i1_float:
@@ -1059,8 +1058,7 @@ define <4 x double> @uitofp_4i1_double(<4 x i32> %a) {
10591058
; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1
10601059
; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
10611060
; KNL-NEXT: vpsrld $31, %xmm0, %xmm0
1062-
; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0
1063-
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
1061+
; KNL-NEXT: vcvtdq2pd %xmm0, %ymm0
10641062
; KNL-NEXT: retq
10651063
;
10661064
; SKX-LABEL: uitofp_4i1_double:
@@ -1113,12 +1111,7 @@ define <2 x double> @uitofp_2i1_double(<2 x i32> %a) {
11131111
; KNL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
11141112
; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0
11151113
; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
1116-
; KNL-NEXT: vpsrlq $63, %xmm0, %xmm0
1117-
; KNL-NEXT: vpextrq $1, %xmm0, %rax
1118-
; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm1
1119-
; KNL-NEXT: vmovq %xmm0, %rax
1120-
; KNL-NEXT: vcvtusi2sdq %rax, %xmm0, %xmm0
1121-
; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
1114+
; KNL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
11221115
; KNL-NEXT: retq
11231116
;
11241117
; SKX-LABEL: uitofp_2i1_double:

0 commit comments

Comments
 (0)