-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] Generate cvtpd2dq for (v2i32 lrint(v2f64)) #126508
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Found when addressing comment on llvm#126477
@llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) ChangesFound when addressing comment on #126477 Full diff: https://github.com/llvm/llvm-project/pull/126508.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 744e4e740cb2102..72f6fd7a96c3d28 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1159,6 +1159,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
setOperationAction(ISD::LRINT, MVT::v4f32, Custom);
+ setOperationAction(ISD::LRINT, MVT::v2i32, Custom);
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
@@ -34017,6 +34018,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::LRINT:
+ if (N->getValueType(0) == MVT::v2i32) {
+ SDValue Src = N->getOperand(0);
+ if (Src.getValueType() == MVT::v2f64)
+ Results.push_back(DAG.getNode(X86ISD::CVTP2SI, dl, MVT::v4i32, Src));
+ return;
+ }
+ [[fallthrough]];
case ISD::LLRINT: {
if (SDValue V = LRINT_LLRINTHelper(N, DAG))
Results.push_back(V);
diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll
index 3612205bf1bfa9c..b1c8d46f497f320 100644
--- a/llvm/test/CodeGen/X86/vector-lrint.ll
+++ b/llvm/test/CodeGen/X86/vector-lrint.ll
@@ -269,31 +269,17 @@ declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
; X86-SSE2-LABEL: lrint_v2f64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
+; X86-SSE2-NEXT: cvtpd2dq %xmm0, %xmm0
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: lrint_v2f64:
; X86-AVX: # %bb.0:
-; X86-AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; X86-AVX-NEXT: vcvtsd2si %xmm1, %eax
-; X86-AVX-NEXT: vcvtsd2si %xmm0, %ecx
-; X86-AVX-NEXT: vmovd %ecx, %xmm0
-; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; X86-AVX-NEXT: vcvtpd2dq %xmm0, %xmm0
; X86-AVX-NEXT: retl
;
; X64-AVX-i32-LABEL: lrint_v2f64:
; X64-AVX-i32: # %bb.0:
-; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
-; X64-AVX-i32-NEXT: vcvtsd2si %xmm1, %eax
-; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %ecx
-; X64-AVX-i32-NEXT: vmovd %ecx, %xmm0
-; X64-AVX-i32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; X64-AVX-i32-NEXT: vcvtpd2dq %xmm0, %xmm0
; X64-AVX-i32-NEXT: retq
;
; X64-AVX1-i64-LABEL: lrint_v2f64:
@@ -328,20 +314,9 @@ declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
; X86-SSE2-LABEL: lrint_v4f64:
; X86-SSE2: # %bb.0:
-; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm2
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
+; X86-SSE2-NEXT: cvtpd2dq %xmm1, %xmm1
+; X86-SSE2-NEXT: cvtpd2dq %xmm0, %xmm0
+; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X86-SSE2-NEXT: retl
;
; X86-AVX-LABEL: lrint_v4f64:
@@ -411,34 +386,12 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
; X86-SSE2-NEXT: andl $-16, %esp
; X86-SSE2-NEXT: subl $16, %esp
-; X86-SSE2-NEXT: movapd %xmm0, %xmm3
-; X86-SSE2-NEXT: movapd 8(%ebp), %xmm4
-; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm5
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
-; X86-SSE2-NEXT: cvtsd2si %xmm3, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm0
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm3, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
-; X86-SSE2-NEXT: cvtsd2si %xmm4, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm3
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm4, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; X86-SSE2-NEXT: cvtsd2si %xmm2, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm1
-; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
-; X86-SSE2-NEXT: cvtsd2si %xmm2, %eax
-; X86-SSE2-NEXT: movd %eax, %xmm2
-; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; X86-SSE2-NEXT: cvtpd2dq %xmm1, %xmm1
+; X86-SSE2-NEXT: cvtpd2dq %xmm0, %xmm0
+; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-SSE2-NEXT: cvtpd2dq %xmm2, %xmm1
+; X86-SSE2-NEXT: cvtpd2dq 8(%ebp), %xmm2
+; X86-SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; X86-SSE2-NEXT: movl %ebp, %esp
; X86-SSE2-NEXT: popl %ebp
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Found when addressing comment on llvm#126477
Found when addressing comment on llvm#126477
Found when addressing comment on llvm#126477
Found when addressing comment on #126477