Skip to content

[X86][FP16] Widen UI2FP for FP16 when VLX not enabled #142956

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 5, 2025

Conversation

phoebewang
Copy link
Contributor

@llvmbot
Copy link
Member

llvmbot commented Jun 5, 2025

@llvm/pr-subscribers-backend-x86

Author: Phoebe Wang (phoebewang)

Changes

Fixes: https://godbolt.org/z/5vc8oMhxz


Full diff: https://github.com/llvm/llvm-project/pull/142956.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+9-3)
  • (modified) llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll (+21-14)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9cff3d76913ab..760119bc62604 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -20361,10 +20361,16 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, const SDLoc &DL,
     if (VT == MVT::v8f64)
       return Op;
 
-    assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64) &&
+    assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64 ||
+            VT == MVT::v8f16) &&
            "Unexpected VT!");
-    MVT WideVT = VT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32;
-    MVT WideIntVT = VT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32;
+    MVT WideVT = VT == MVT::v8f16 ? MVT::v16f16 : MVT::v16f32;
+    MVT WideIntVT = MVT::v16i32;
+    if (VT == MVT::v4f64) {
+      WideVT = MVT::v8f64;
+      WideIntVT = MVT::v8i32;
+    }
+
     // Need to concat with zero vector for strict fp to avoid spurious
     // exceptions.
     SDValue Tmp =
diff --git a/llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll b/llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll
index 38c833ee89bc0..26947b5eb3022 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-cvt-novl.ll
@@ -8,8 +8,8 @@ define <2 x half> @vector_sint64ToHalf(<2 x i64> %int64) {
 ; CHECK-NEXT:    vcvtqq2ph %zmm0, %xmm0
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-    %fp16 = sitofp <2 x i64> %int64 to <2 x half>
-    ret <2 x half> %fp16
+  %fp16 = sitofp <2 x i64> %int64 to <2 x half>
+  ret <2 x half> %fp16
 }
 
 define <4 x half> @vector_sint32ToHalf(<4 x i32> %int32) {
@@ -27,8 +27,8 @@ define <4 x half> @vector_sint32ToHalf(<4 x i32> %int32) {
 ; CHECK-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
 ; CHECK-NEXT:    retq
-    %fp16 = sitofp <4 x i32> %int32 to <4 x half>
-    ret <4 x half> %fp16
+  %fp16 = sitofp <4 x i32> %int32 to <4 x half>
+  ret <4 x half> %fp16
 }
 
 define <8 x half> @vector_sint16ToHalf(<8 x i16> %int16) {
@@ -66,8 +66,8 @@ define <8 x half> @vector_sint16ToHalf(<8 x i16> %int16) {
 ; CHECK-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    retq
-    %fp16 = sitofp <8 x i16> %int16 to <8 x half>
-    ret <8 x half> %fp16
+  %fp16 = sitofp <8 x i16> %int16 to <8 x half>
+  ret <8 x half> %fp16
 }
 
 define <2 x half> @vector_uint64ToHalf(<2 x i64> %int64) {
@@ -77,14 +77,21 @@ define <2 x half> @vector_uint64ToHalf(<2 x i64> %int64) {
 ; CHECK-NEXT:    vcvtuqq2ph %zmm0, %xmm0
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
-    %fp16 = uitofp <2 x i64> %int64 to <2 x half>
-    ret <2 x half> %fp16
+  %fp16 = uitofp <2 x i64> %int64 to <2 x half>
+  ret <2 x half> %fp16
 }
 
-; define <4 x half> @vector_uint32ToHalf(<4 x i32> %int32) {
-;     %fp16 = uitofp <4 x i32> %int32 to <4 x half>
-;     ret <4 x half> %fp16
-; }
+define <4 x half> @vector_uint32ToHalf(<4 x i32> %int32) {
+; CHECK-LABEL: vector_uint32ToHalf:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; CHECK-NEXT:    vcvtudq2ph %zmm0, %ymm0
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %fp16 = uitofp <4 x i32> %int32 to <4 x half>
+  ret <4 x half> %fp16
+}
 
 define <8 x half> @vector_uint16ToHalf(<8 x i16> %int16) {
 ; CHECK-LABEL: vector_uint16ToHalf:
@@ -113,6 +120,6 @@ define <8 x half> @vector_uint16ToHalf(<8 x i16> %int16) {
 ; CHECK-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
 ; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-NEXT:    retq
-    %fp16 = uitofp <8 x i16> %int16 to <8 x half>
-    ret <8 x half> %fp16
+  %fp16 = uitofp <8 x i16> %int16 to <8 x half>
+  ret <8 x half> %fp16
 }

Copy link
Contributor

@e-kud e-kud left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@RKSimon RKSimon changed the title [X86][FP16] Winden UI2FP for FP16 when VLX not enabled [X86][FP16] Widen UI2FP for FP16 when VLX not enabled Jun 5, 2025
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@phoebewang phoebewang merged commit 754f2ca into llvm:main Jun 5, 2025
10 of 12 checks passed
@phoebewang phoebewang deleted the inttofp branch June 5, 2025 13:14
rorth pushed a commit to rorth/llvm-project that referenced this pull request Jun 11, 2025
DhruvSrivastavaX pushed a commit to DhruvSrivastavaX/lldb-for-aix that referenced this pull request Jun 12, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants