Skip to content

Commit 0c95516

Browse files
authored
[X86] Generate cvtpd2dq for (v2i32 lrint(v2f64)) (#126508)
Found when addressing comment on #126477
1 parent af522c5 commit 0c95516

File tree

2 files changed

+20
-59
lines changed

2 files changed

+20
-59
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,6 +1159,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
11591159
setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
11601160

11611161
setOperationAction(ISD::LRINT, MVT::v4f32, Custom);
1162+
setOperationAction(ISD::LRINT, MVT::v2i32, Custom);
11621163

11631164
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
11641165
setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
@@ -34029,6 +34030,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
3402934030
return;
3403034031
}
3403134032
case ISD::LRINT:
34033+
if (N->getValueType(0) == MVT::v2i32) {
34034+
SDValue Src = N->getOperand(0);
34035+
if (Src.getValueType() == MVT::v2f64)
34036+
Results.push_back(DAG.getNode(X86ISD::CVTP2SI, dl, MVT::v4i32, Src));
34037+
return;
34038+
}
34039+
[[fallthrough]];
3403234040
case ISD::LLRINT: {
3403334041
if (SDValue V = LRINT_LLRINTHelper(N, DAG))
3403434042
Results.push_back(V);

llvm/test/CodeGen/X86/vector-lrint.ll

Lines changed: 12 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -269,31 +269,17 @@ declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>)
269269
define <2 x iXLen> @lrint_v2f64(<2 x double> %x) {
270270
; X86-SSE2-LABEL: lrint_v2f64:
271271
; X86-SSE2: # %bb.0:
272-
; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
273-
; X86-SSE2-NEXT: movd %eax, %xmm1
274-
; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
275-
; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
276-
; X86-SSE2-NEXT: movd %eax, %xmm0
277-
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
278-
; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
272+
; X86-SSE2-NEXT: cvtpd2dq %xmm0, %xmm0
279273
; X86-SSE2-NEXT: retl
280274
;
281275
; X86-AVX-LABEL: lrint_v2f64:
282276
; X86-AVX: # %bb.0:
283-
; X86-AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
284-
; X86-AVX-NEXT: vcvtsd2si %xmm1, %eax
285-
; X86-AVX-NEXT: vcvtsd2si %xmm0, %ecx
286-
; X86-AVX-NEXT: vmovd %ecx, %xmm0
287-
; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
277+
; X86-AVX-NEXT: vcvtpd2dq %xmm0, %xmm0
288278
; X86-AVX-NEXT: retl
289279
;
290280
; X64-AVX-i32-LABEL: lrint_v2f64:
291281
; X64-AVX-i32: # %bb.0:
292-
; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
293-
; X64-AVX-i32-NEXT: vcvtsd2si %xmm1, %eax
294-
; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %ecx
295-
; X64-AVX-i32-NEXT: vmovd %ecx, %xmm0
296-
; X64-AVX-i32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
282+
; X64-AVX-i32-NEXT: vcvtpd2dq %xmm0, %xmm0
297283
; X64-AVX-i32-NEXT: retq
298284
;
299285
; X64-AVX1-i64-LABEL: lrint_v2f64:
@@ -328,20 +314,9 @@ declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>)
328314
define <4 x iXLen> @lrint_v4f64(<4 x double> %x) {
329315
; X86-SSE2-LABEL: lrint_v4f64:
330316
; X86-SSE2: # %bb.0:
331-
; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
332-
; X86-SSE2-NEXT: movd %eax, %xmm2
333-
; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
334-
; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
335-
; X86-SSE2-NEXT: movd %eax, %xmm1
336-
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
337-
; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
338-
; X86-SSE2-NEXT: movd %eax, %xmm1
339-
; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
340-
; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax
341-
; X86-SSE2-NEXT: movd %eax, %xmm0
342-
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
343-
; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
344-
; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
317+
; X86-SSE2-NEXT: cvtpd2dq %xmm1, %xmm1
318+
; X86-SSE2-NEXT: cvtpd2dq %xmm0, %xmm0
319+
; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
345320
; X86-SSE2-NEXT: retl
346321
;
347322
; X86-AVX-LABEL: lrint_v4f64:
@@ -411,34 +386,12 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) {
411386
; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp
412387
; X86-SSE2-NEXT: andl $-16, %esp
413388
; X86-SSE2-NEXT: subl $16, %esp
414-
; X86-SSE2-NEXT: movapd %xmm0, %xmm3
415-
; X86-SSE2-NEXT: movapd 8(%ebp), %xmm4
416-
; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
417-
; X86-SSE2-NEXT: movd %eax, %xmm5
418-
; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
419-
; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax
420-
; X86-SSE2-NEXT: movd %eax, %xmm0
421-
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
422-
; X86-SSE2-NEXT: cvtsd2si %xmm3, %eax
423-
; X86-SSE2-NEXT: movd %eax, %xmm0
424-
; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1]
425-
; X86-SSE2-NEXT: cvtsd2si %xmm3, %eax
426-
; X86-SSE2-NEXT: movd %eax, %xmm1
427-
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
428-
; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0]
429-
; X86-SSE2-NEXT: cvtsd2si %xmm4, %eax
430-
; X86-SSE2-NEXT: movd %eax, %xmm3
431-
; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1]
432-
; X86-SSE2-NEXT: cvtsd2si %xmm4, %eax
433-
; X86-SSE2-NEXT: movd %eax, %xmm1
434-
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
435-
; X86-SSE2-NEXT: cvtsd2si %xmm2, %eax
436-
; X86-SSE2-NEXT: movd %eax, %xmm1
437-
; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1]
438-
; X86-SSE2-NEXT: cvtsd2si %xmm2, %eax
439-
; X86-SSE2-NEXT: movd %eax, %xmm2
440-
; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
441-
; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
389+
; X86-SSE2-NEXT: cvtpd2dq %xmm1, %xmm1
390+
; X86-SSE2-NEXT: cvtpd2dq %xmm0, %xmm0
391+
; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
392+
; X86-SSE2-NEXT: cvtpd2dq %xmm2, %xmm1
393+
; X86-SSE2-NEXT: cvtpd2dq 8(%ebp), %xmm2
394+
; X86-SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
442395
; X86-SSE2-NEXT: movl %ebp, %esp
443396
; X86-SSE2-NEXT: popl %ebp
444397
; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4

0 commit comments

Comments
 (0)