Skip to content

Commit 3a32590

Browse files
[AArch64] Avoid using NEON FCVTXN in Streaming-SVE mode. (#91981)
1 parent f5c8242 commit 3a32590

File tree

2 files changed

+39
-24
lines changed

2 files changed

+39
-24
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19501,7 +19501,8 @@ static SDValue performBuildVectorCombine(SDNode *N,
1950119501
SDLoc DL(N);
1950219502
EVT VT = N->getValueType(0);
1950319503

19504-
if (VT == MVT::v4f16 || VT == MVT::v4bf16) {
19504+
if (DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable() &&
19505+
(VT == MVT::v4f16 || VT == MVT::v4bf16)) {
1950519506
SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1),
1950619507
Elt2 = N->getOperand(2), Elt3 = N->getOperand(3);
1950719508
if (Elt0->getOpcode() == ISD::FP_ROUND &&

llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll

Lines changed: 37 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -427,35 +427,49 @@ define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) {
427427
define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) {
428428
; SVE-LABEL: test_copysign_v4f16_v4f64:
429429
; SVE: // %bb.0:
430-
; SVE-NEXT: ldp q0, q1, [x1]
431-
; SVE-NEXT: ptrue p0.s, vl2
432-
; SVE-NEXT: fcvtxn v1.2s, v1.2d
433-
; SVE-NEXT: fcvtxn v0.2s, v0.2d
434-
; SVE-NEXT: splice z0.s, p0, z0.s, z1.s
435-
; SVE-NEXT: ptrue p0.s
436-
; SVE-NEXT: ldr d1, [x0]
437-
; SVE-NEXT: and z1.h, z1.h, #0x7fff
438-
; SVE-NEXT: fcvt z0.h, p0/m, z0.s
439-
; SVE-NEXT: uzp1 z0.h, z0.h, z0.h
430+
; SVE-NEXT: sub sp, sp, #16
431+
; SVE-NEXT: .cfi_def_cfa_offset 16
432+
; SVE-NEXT: ldp q1, q0, [x1]
433+
; SVE-NEXT: ldr d4, [x0]
434+
; SVE-NEXT: and z4.h, z4.h, #0x7fff
435+
; SVE-NEXT: mov z2.d, z0.d[1]
436+
; SVE-NEXT: mov z3.d, z1.d[1]
437+
; SVE-NEXT: fcvt h0, d0
438+
; SVE-NEXT: fcvt h1, d1
439+
; SVE-NEXT: fcvt h2, d2
440+
; SVE-NEXT: fcvt h3, d3
441+
; SVE-NEXT: str h0, [sp, #12]
442+
; SVE-NEXT: str h1, [sp, #8]
443+
; SVE-NEXT: str h2, [sp, #14]
444+
; SVE-NEXT: str h3, [sp, #10]
445+
; SVE-NEXT: ldr d0, [sp, #8]
440446
; SVE-NEXT: and z0.h, z0.h, #0x8000
441-
; SVE-NEXT: orr z0.d, z1.d, z0.d
447+
; SVE-NEXT: orr z0.d, z4.d, z0.d
442448
; SVE-NEXT: str d0, [x0]
449+
; SVE-NEXT: add sp, sp, #16
443450
; SVE-NEXT: ret
444451
;
445452
; SVE2-LABEL: test_copysign_v4f16_v4f64:
446453
; SVE2: // %bb.0:
447-
; SVE2-NEXT: ldp q0, q1, [x1]
448-
; SVE2-NEXT: ptrue p0.s, vl2
449-
; SVE2-NEXT: ldr d2, [x0]
450-
; SVE2-NEXT: fcvtxn v1.2s, v1.2d
451-
; SVE2-NEXT: fcvtxn v0.2s, v0.2d
452-
; SVE2-NEXT: splice z0.s, p0, z0.s, z1.s
453-
; SVE2-NEXT: ptrue p0.s
454-
; SVE2-NEXT: mov z1.h, #32767 // =0x7fff
455-
; SVE2-NEXT: fcvt z0.h, p0/m, z0.s
456-
; SVE2-NEXT: uzp1 z0.h, z0.h, z0.h
457-
; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
458-
; SVE2-NEXT: str d2, [x0]
454+
; SVE2-NEXT: sub sp, sp, #16
455+
; SVE2-NEXT: .cfi_def_cfa_offset 16
456+
; SVE2-NEXT: ldp q2, q1, [x1]
457+
; SVE2-NEXT: mov z0.h, #32767 // =0x7fff
458+
; SVE2-NEXT: ldr d5, [x0]
459+
; SVE2-NEXT: mov z3.d, z1.d[1]
460+
; SVE2-NEXT: mov z4.d, z2.d[1]
461+
; SVE2-NEXT: fcvt h1, d1
462+
; SVE2-NEXT: fcvt h2, d2
463+
; SVE2-NEXT: fcvt h3, d3
464+
; SVE2-NEXT: fcvt h4, d4
465+
; SVE2-NEXT: str h1, [sp, #12]
466+
; SVE2-NEXT: str h2, [sp, #8]
467+
; SVE2-NEXT: str h3, [sp, #14]
468+
; SVE2-NEXT: str h4, [sp, #10]
469+
; SVE2-NEXT: ldr d1, [sp, #8]
470+
; SVE2-NEXT: bsl z5.d, z5.d, z1.d, z0.d
471+
; SVE2-NEXT: str d5, [x0]
472+
; SVE2-NEXT: add sp, sp, #16
459473
; SVE2-NEXT: ret
460474
%a = load <4 x half>, ptr %ap
461475
%b = load <4 x double>, ptr %bp

0 commit comments

Comments
 (0)