Skip to content

Commit 754f2ca

Browse files
authored
[X86][FP16] Widen UI2FP for FP16 when VLX not enabled (#142956)
Fixes: https://godbolt.org/z/5vc8oMhxz
1 parent 2b3e07f commit 754f2ca

File tree

2 files changed

+30
-17
lines changed

2 files changed

+30
-17
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20361,10 +20361,16 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, const SDLoc &DL,
2036120361
if (VT == MVT::v8f64)
2036220362
return Op;
2036320363

20364-
assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64) &&
20364+
assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64 ||
20365+
VT == MVT::v8f16) &&
2036520366
"Unexpected VT!");
20366-
MVT WideVT = VT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32;
20367-
MVT WideIntVT = VT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32;
20367+
MVT WideVT = VT == MVT::v8f16 ? MVT::v16f16 : MVT::v16f32;
20368+
MVT WideIntVT = MVT::v16i32;
20369+
if (VT == MVT::v4f64) {
20370+
WideVT = MVT::v8f64;
20371+
WideIntVT = MVT::v8i32;
20372+
}
20373+
2036820374
// Need to concat with zero vector for strict fp to avoid spurious
2036920375
// exceptions.
2037020376
SDValue Tmp =

llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ define <2 x half> @vector_sint64ToHalf(<2 x i64> %int64) {
88
; CHECK-NEXT: vcvtqq2ph %zmm0, %xmm0
99
; CHECK-NEXT: vzeroupper
1010
; CHECK-NEXT: retq
11-
%fp16 = sitofp <2 x i64> %int64 to <2 x half>
12-
ret <2 x half> %fp16
11+
%fp16 = sitofp <2 x i64> %int64 to <2 x half>
12+
ret <2 x half> %fp16
1313
}
1414

1515
define <4 x half> @vector_sint32ToHalf(<4 x i32> %int32) {
@@ -27,8 +27,8 @@ define <4 x half> @vector_sint32ToHalf(<4 x i32> %int32) {
2727
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2828
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
2929
; CHECK-NEXT: retq
30-
%fp16 = sitofp <4 x i32> %int32 to <4 x half>
31-
ret <4 x half> %fp16
30+
%fp16 = sitofp <4 x i32> %int32 to <4 x half>
31+
ret <4 x half> %fp16
3232
}
3333

3434
define <8 x half> @vector_sint16ToHalf(<8 x i16> %int16) {
@@ -66,8 +66,8 @@ define <8 x half> @vector_sint16ToHalf(<8 x i16> %int16) {
6666
; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
6767
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
6868
; CHECK-NEXT: retq
69-
%fp16 = sitofp <8 x i16> %int16 to <8 x half>
70-
ret <8 x half> %fp16
69+
%fp16 = sitofp <8 x i16> %int16 to <8 x half>
70+
ret <8 x half> %fp16
7171
}
7272

7373
define <2 x half> @vector_uint64ToHalf(<2 x i64> %int64) {
@@ -77,14 +77,21 @@ define <2 x half> @vector_uint64ToHalf(<2 x i64> %int64) {
7777
; CHECK-NEXT: vcvtuqq2ph %zmm0, %xmm0
7878
; CHECK-NEXT: vzeroupper
7979
; CHECK-NEXT: retq
80-
%fp16 = uitofp <2 x i64> %int64 to <2 x half>
81-
ret <2 x half> %fp16
80+
%fp16 = uitofp <2 x i64> %int64 to <2 x half>
81+
ret <2 x half> %fp16
8282
}
8383

84-
; define <4 x half> @vector_uint32ToHalf(<4 x i32> %int32) {
85-
; %fp16 = uitofp <4 x i32> %int32 to <4 x half>
86-
; ret <4 x half> %fp16
87-
; }
84+
define <4 x half> @vector_uint32ToHalf(<4 x i32> %int32) {
85+
; CHECK-LABEL: vector_uint32ToHalf:
86+
; CHECK: # %bb.0:
87+
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
88+
; CHECK-NEXT: vcvtudq2ph %zmm0, %ymm0
89+
; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
90+
; CHECK-NEXT: vzeroupper
91+
; CHECK-NEXT: retq
92+
%fp16 = uitofp <4 x i32> %int32 to <4 x half>
93+
ret <4 x half> %fp16
94+
}
8895

8996
define <8 x half> @vector_uint16ToHalf(<8 x i16> %int16) {
9097
; CHECK-LABEL: vector_uint16ToHalf:
@@ -113,6 +120,6 @@ define <8 x half> @vector_uint16ToHalf(<8 x i16> %int16) {
113120
; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
114121
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
115122
; CHECK-NEXT: retq
116-
%fp16 = uitofp <8 x i16> %int16 to <8 x half>
117-
ret <8 x half> %fp16
123+
%fp16 = uitofp <8 x i16> %int16 to <8 x half>
124+
ret <8 x half> %fp16
118125
}

0 commit comments

Comments
 (0)