@@ -427,35 +427,49 @@ define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) {
427
427
define void @test_copysign_v4f16_v4f64 (ptr %ap , ptr %bp ) {
428
428
; SVE-LABEL: test_copysign_v4f16_v4f64:
429
429
; SVE: // %bb.0:
430
- ; SVE-NEXT: ldp q0, q1, [x1]
431
- ; SVE-NEXT: ptrue p0.s, vl2
432
- ; SVE-NEXT: fcvtxn v1.2s, v1.2d
433
- ; SVE-NEXT: fcvtxn v0.2s, v0.2d
434
- ; SVE-NEXT: splice z0.s, p0, z0.s, z1.s
435
- ; SVE-NEXT: ptrue p0.s
436
- ; SVE-NEXT: ldr d1, [x0]
437
- ; SVE-NEXT: and z1.h, z1.h, #0x7fff
438
- ; SVE-NEXT: fcvt z0.h, p0/m, z0.s
439
- ; SVE-NEXT: uzp1 z0.h, z0.h, z0.h
430
+ ; SVE-NEXT: sub sp, sp, #16
431
+ ; SVE-NEXT: .cfi_def_cfa_offset 16
432
+ ; SVE-NEXT: ldp q1, q0, [x1]
433
+ ; SVE-NEXT: ldr d4, [x0]
434
+ ; SVE-NEXT: and z4.h, z4.h, #0x7fff
435
+ ; SVE-NEXT: mov z2.d, z0.d[1]
436
+ ; SVE-NEXT: mov z3.d, z1.d[1]
437
+ ; SVE-NEXT: fcvt h0, d0
438
+ ; SVE-NEXT: fcvt h1, d1
439
+ ; SVE-NEXT: fcvt h2, d2
440
+ ; SVE-NEXT: fcvt h3, d3
441
+ ; SVE-NEXT: str h0, [sp, #12]
442
+ ; SVE-NEXT: str h1, [sp, #8]
443
+ ; SVE-NEXT: str h2, [sp, #14]
444
+ ; SVE-NEXT: str h3, [sp, #10]
445
+ ; SVE-NEXT: ldr d0, [sp, #8]
440
446
; SVE-NEXT: and z0.h, z0.h, #0x8000
441
- ; SVE-NEXT: orr z0.d, z1 .d, z0.d
447
+ ; SVE-NEXT: orr z0.d, z4 .d, z0.d
442
448
; SVE-NEXT: str d0, [x0]
449
+ ; SVE-NEXT: add sp, sp, #16
443
450
; SVE-NEXT: ret
444
451
;
445
452
; SVE2-LABEL: test_copysign_v4f16_v4f64:
446
453
; SVE2: // %bb.0:
447
- ; SVE2-NEXT: ldp q0, q1, [x1]
448
- ; SVE2-NEXT: ptrue p0.s, vl2
449
- ; SVE2-NEXT: ldr d2, [x0]
450
- ; SVE2-NEXT: fcvtxn v1.2s, v1.2d
451
- ; SVE2-NEXT: fcvtxn v0.2s, v0.2d
452
- ; SVE2-NEXT: splice z0.s, p0, z0.s, z1.s
453
- ; SVE2-NEXT: ptrue p0.s
454
- ; SVE2-NEXT: mov z1.h, #32767 // =0x7fff
455
- ; SVE2-NEXT: fcvt z0.h, p0/m, z0.s
456
- ; SVE2-NEXT: uzp1 z0.h, z0.h, z0.h
457
- ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
458
- ; SVE2-NEXT: str d2, [x0]
454
+ ; SVE2-NEXT: sub sp, sp, #16
455
+ ; SVE2-NEXT: .cfi_def_cfa_offset 16
456
+ ; SVE2-NEXT: ldp q2, q1, [x1]
457
+ ; SVE2-NEXT: mov z0.h, #32767 // =0x7fff
458
+ ; SVE2-NEXT: ldr d5, [x0]
459
+ ; SVE2-NEXT: mov z3.d, z1.d[1]
460
+ ; SVE2-NEXT: mov z4.d, z2.d[1]
461
+ ; SVE2-NEXT: fcvt h1, d1
462
+ ; SVE2-NEXT: fcvt h2, d2
463
+ ; SVE2-NEXT: fcvt h3, d3
464
+ ; SVE2-NEXT: fcvt h4, d4
465
+ ; SVE2-NEXT: str h1, [sp, #12]
466
+ ; SVE2-NEXT: str h2, [sp, #8]
467
+ ; SVE2-NEXT: str h3, [sp, #14]
468
+ ; SVE2-NEXT: str h4, [sp, #10]
469
+ ; SVE2-NEXT: ldr d1, [sp, #8]
470
+ ; SVE2-NEXT: bsl z5.d, z5.d, z1.d, z0.d
471
+ ; SVE2-NEXT: str d5, [x0]
472
+ ; SVE2-NEXT: add sp, sp, #16
459
473
; SVE2-NEXT: ret
460
474
%a = load <4 x half >, ptr %ap
461
475
%b = load <4 x double >, ptr %bp
0 commit comments