Skip to content

Commit 2da7057

Browse files
committed
[AArch64][GISel] Scalarize fp128 fadd/fsub/fmul/etc.
Like other fp128/i128 vectors, we scalarize these operations to allow them to be libcalled.
1 parent 1ed65fe commit 2da7057

File tree

2 files changed

+186
-95
lines changed

2 files changed

+186
-95
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
239239
{G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO})
240240
.legalFor({{s32, s32}, {s64, s32}})
241241
.clampScalar(0, s32, s64)
242-
.clampScalar(1, s32, s64)
242+
.clampScalar(1, s32, s64)
243243
.widenScalarToNextPow2(0);
244244

245245
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
@@ -253,6 +253,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
253253
return (Ty == v8s16 || Ty == v4s16) && HasFP16;
254254
})
255255
.libcallFor({s128})
256+
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
256257
.minScalarOrElt(0, MinFPScalar)
257258
.clampNumElements(0, v4s16, v8s16)
258259
.clampNumElements(0, v2s32, v4s32)

llvm/test/CodeGen/AArch64/arm64-fp128.ll

Lines changed: 184 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,6 @@
44

55
; CHECK-GI: warning: Instruction selection used fallback path for test_neg_sub
66
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_neg
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_add
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_sub
9-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_mul
10-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_div
11-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_neg_sub
127
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for vec_neg
138

149
define fp128 @test_add(fp128 %lhs, fp128 %rhs) {
@@ -426,89 +421,161 @@ define fp128 @test_neg(fp128 %in) {
426421

427422

428423
define <2 x fp128> @vec_add(<2 x fp128> %lhs, <2 x fp128> %rhs) {
429-
; CHECK-LABEL: vec_add:
430-
; CHECK: // %bb.0:
431-
; CHECK-NEXT: sub sp, sp, #64
432-
; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
433-
; CHECK-NEXT: .cfi_def_cfa_offset 64
434-
; CHECK-NEXT: .cfi_offset w30, -16
435-
; CHECK-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
436-
; CHECK-NEXT: mov v1.16b, v2.16b
437-
; CHECK-NEXT: bl __addtf3
438-
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
439-
; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
440-
; CHECK-NEXT: bl __addtf3
441-
; CHECK-NEXT: mov v1.16b, v0.16b
442-
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
443-
; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
444-
; CHECK-NEXT: add sp, sp, #64
445-
; CHECK-NEXT: ret
424+
; CHECK-SD-LABEL: vec_add:
425+
; CHECK-SD: // %bb.0:
426+
; CHECK-SD-NEXT: sub sp, sp, #64
427+
; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
428+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
429+
; CHECK-SD-NEXT: .cfi_offset w30, -16
430+
; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
431+
; CHECK-SD-NEXT: mov v1.16b, v2.16b
432+
; CHECK-SD-NEXT: bl __addtf3
433+
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
434+
; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
435+
; CHECK-SD-NEXT: bl __addtf3
436+
; CHECK-SD-NEXT: mov v1.16b, v0.16b
437+
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
438+
; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
439+
; CHECK-SD-NEXT: add sp, sp, #64
440+
; CHECK-SD-NEXT: ret
441+
;
442+
; CHECK-GI-LABEL: vec_add:
443+
; CHECK-GI: // %bb.0:
444+
; CHECK-GI-NEXT: sub sp, sp, #64
445+
; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
446+
; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
447+
; CHECK-GI-NEXT: .cfi_offset w30, -16
448+
; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill
449+
; CHECK-GI-NEXT: mov v1.16b, v2.16b
450+
; CHECK-GI-NEXT: bl __addtf3
451+
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
452+
; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
453+
; CHECK-GI-NEXT: bl __addtf3
454+
; CHECK-GI-NEXT: mov v1.16b, v0.16b
455+
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
456+
; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
457+
; CHECK-GI-NEXT: add sp, sp, #64
458+
; CHECK-GI-NEXT: ret
446459
%val = fadd <2 x fp128> %lhs, %rhs
447460
ret <2 x fp128> %val
448461
}
449462

450463
define <2 x fp128> @vec_sub(<2 x fp128> %lhs, <2 x fp128> %rhs) {
451-
; CHECK-LABEL: vec_sub:
452-
; CHECK: // %bb.0:
453-
; CHECK-NEXT: sub sp, sp, #64
454-
; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
455-
; CHECK-NEXT: .cfi_def_cfa_offset 64
456-
; CHECK-NEXT: .cfi_offset w30, -16
457-
; CHECK-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
458-
; CHECK-NEXT: mov v1.16b, v2.16b
459-
; CHECK-NEXT: bl __subtf3
460-
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
461-
; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
462-
; CHECK-NEXT: bl __subtf3
463-
; CHECK-NEXT: mov v1.16b, v0.16b
464-
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
465-
; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
466-
; CHECK-NEXT: add sp, sp, #64
467-
; CHECK-NEXT: ret
464+
; CHECK-SD-LABEL: vec_sub:
465+
; CHECK-SD: // %bb.0:
466+
; CHECK-SD-NEXT: sub sp, sp, #64
467+
; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
468+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
469+
; CHECK-SD-NEXT: .cfi_offset w30, -16
470+
; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
471+
; CHECK-SD-NEXT: mov v1.16b, v2.16b
472+
; CHECK-SD-NEXT: bl __subtf3
473+
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
474+
; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
475+
; CHECK-SD-NEXT: bl __subtf3
476+
; CHECK-SD-NEXT: mov v1.16b, v0.16b
477+
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
478+
; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
479+
; CHECK-SD-NEXT: add sp, sp, #64
480+
; CHECK-SD-NEXT: ret
481+
;
482+
; CHECK-GI-LABEL: vec_sub:
483+
; CHECK-GI: // %bb.0:
484+
; CHECK-GI-NEXT: sub sp, sp, #64
485+
; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
486+
; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
487+
; CHECK-GI-NEXT: .cfi_offset w30, -16
488+
; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill
489+
; CHECK-GI-NEXT: mov v1.16b, v2.16b
490+
; CHECK-GI-NEXT: bl __subtf3
491+
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
492+
; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
493+
; CHECK-GI-NEXT: bl __subtf3
494+
; CHECK-GI-NEXT: mov v1.16b, v0.16b
495+
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
496+
; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
497+
; CHECK-GI-NEXT: add sp, sp, #64
498+
; CHECK-GI-NEXT: ret
468499
%val = fsub <2 x fp128> %lhs, %rhs
469500
ret <2 x fp128> %val
470501
}
471502

472503
define <2 x fp128> @vec_mul(<2 x fp128> %lhs, <2 x fp128> %rhs) {
473-
; CHECK-LABEL: vec_mul:
474-
; CHECK: // %bb.0:
475-
; CHECK-NEXT: sub sp, sp, #64
476-
; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
477-
; CHECK-NEXT: .cfi_def_cfa_offset 64
478-
; CHECK-NEXT: .cfi_offset w30, -16
479-
; CHECK-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
480-
; CHECK-NEXT: mov v1.16b, v2.16b
481-
; CHECK-NEXT: bl __multf3
482-
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
483-
; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
484-
; CHECK-NEXT: bl __multf3
485-
; CHECK-NEXT: mov v1.16b, v0.16b
486-
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
487-
; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
488-
; CHECK-NEXT: add sp, sp, #64
489-
; CHECK-NEXT: ret
504+
; CHECK-SD-LABEL: vec_mul:
505+
; CHECK-SD: // %bb.0:
506+
; CHECK-SD-NEXT: sub sp, sp, #64
507+
; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
508+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
509+
; CHECK-SD-NEXT: .cfi_offset w30, -16
510+
; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
511+
; CHECK-SD-NEXT: mov v1.16b, v2.16b
512+
; CHECK-SD-NEXT: bl __multf3
513+
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
514+
; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
515+
; CHECK-SD-NEXT: bl __multf3
516+
; CHECK-SD-NEXT: mov v1.16b, v0.16b
517+
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
518+
; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
519+
; CHECK-SD-NEXT: add sp, sp, #64
520+
; CHECK-SD-NEXT: ret
521+
;
522+
; CHECK-GI-LABEL: vec_mul:
523+
; CHECK-GI: // %bb.0:
524+
; CHECK-GI-NEXT: sub sp, sp, #64
525+
; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
526+
; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
527+
; CHECK-GI-NEXT: .cfi_offset w30, -16
528+
; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill
529+
; CHECK-GI-NEXT: mov v1.16b, v2.16b
530+
; CHECK-GI-NEXT: bl __multf3
531+
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
532+
; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
533+
; CHECK-GI-NEXT: bl __multf3
534+
; CHECK-GI-NEXT: mov v1.16b, v0.16b
535+
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
536+
; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
537+
; CHECK-GI-NEXT: add sp, sp, #64
538+
; CHECK-GI-NEXT: ret
490539
%val = fmul <2 x fp128> %lhs, %rhs
491540
ret <2 x fp128> %val
492541
}
493542

494543
define <2 x fp128> @vec_div(<2 x fp128> %lhs, <2 x fp128> %rhs) {
495-
; CHECK-LABEL: vec_div:
496-
; CHECK: // %bb.0:
497-
; CHECK-NEXT: sub sp, sp, #64
498-
; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
499-
; CHECK-NEXT: .cfi_def_cfa_offset 64
500-
; CHECK-NEXT: .cfi_offset w30, -16
501-
; CHECK-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
502-
; CHECK-NEXT: mov v1.16b, v2.16b
503-
; CHECK-NEXT: bl __divtf3
504-
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
505-
; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
506-
; CHECK-NEXT: bl __divtf3
507-
; CHECK-NEXT: mov v1.16b, v0.16b
508-
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
509-
; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
510-
; CHECK-NEXT: add sp, sp, #64
511-
; CHECK-NEXT: ret
544+
; CHECK-SD-LABEL: vec_div:
545+
; CHECK-SD: // %bb.0:
546+
; CHECK-SD-NEXT: sub sp, sp, #64
547+
; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
548+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
549+
; CHECK-SD-NEXT: .cfi_offset w30, -16
550+
; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill
551+
; CHECK-SD-NEXT: mov v1.16b, v2.16b
552+
; CHECK-SD-NEXT: bl __divtf3
553+
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
554+
; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload
555+
; CHECK-SD-NEXT: bl __divtf3
556+
; CHECK-SD-NEXT: mov v1.16b, v0.16b
557+
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
558+
; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
559+
; CHECK-SD-NEXT: add sp, sp, #64
560+
; CHECK-SD-NEXT: ret
561+
;
562+
; CHECK-GI-LABEL: vec_div:
563+
; CHECK-GI: // %bb.0:
564+
; CHECK-GI-NEXT: sub sp, sp, #64
565+
; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
566+
; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
567+
; CHECK-GI-NEXT: .cfi_offset w30, -16
568+
; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill
569+
; CHECK-GI-NEXT: mov v1.16b, v2.16b
570+
; CHECK-GI-NEXT: bl __divtf3
571+
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
572+
; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload
573+
; CHECK-GI-NEXT: bl __divtf3
574+
; CHECK-GI-NEXT: mov v1.16b, v0.16b
575+
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
576+
; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
577+
; CHECK-GI-NEXT: add sp, sp, #64
578+
; CHECK-GI-NEXT: ret
512579
%val = fdiv <2 x fp128> %lhs, %rhs
513580
ret <2 x fp128> %val
514581
}
@@ -1381,27 +1448,50 @@ define <2 x fp128> @vec_extend_f64(<2 x double> %val) {
13811448
}
13821449

13831450
define <2 x fp128> @vec_neg_sub(<2 x fp128> %in) {
1384-
; CHECK-LABEL: vec_neg_sub:
1385-
; CHECK: // %bb.0:
1386-
; CHECK-NEXT: sub sp, sp, #64
1387-
; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
1388-
; CHECK-NEXT: .cfi_def_cfa_offset 64
1389-
; CHECK-NEXT: .cfi_offset w30, -16
1390-
; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
1391-
; CHECK-NEXT: mov v1.16b, v0.16b
1392-
; CHECK-NEXT: adrp x8, .LCPI47_0
1393-
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI47_0]
1394-
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
1395-
; CHECK-NEXT: bl __subtf3
1396-
; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
1397-
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1398-
; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
1399-
; CHECK-NEXT: bl __subtf3
1400-
; CHECK-NEXT: mov v1.16b, v0.16b
1401-
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
1402-
; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
1403-
; CHECK-NEXT: add sp, sp, #64
1404-
; CHECK-NEXT: ret
1451+
; CHECK-SD-LABEL: vec_neg_sub:
1452+
; CHECK-SD: // %bb.0:
1453+
; CHECK-SD-NEXT: sub sp, sp, #64
1454+
; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
1455+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
1456+
; CHECK-SD-NEXT: .cfi_offset w30, -16
1457+
; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
1458+
; CHECK-SD-NEXT: mov v1.16b, v0.16b
1459+
; CHECK-SD-NEXT: adrp x8, .LCPI47_0
1460+
; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI47_0]
1461+
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
1462+
; CHECK-SD-NEXT: bl __subtf3
1463+
; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
1464+
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1465+
; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
1466+
; CHECK-SD-NEXT: bl __subtf3
1467+
; CHECK-SD-NEXT: mov v1.16b, v0.16b
1468+
; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
1469+
; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
1470+
; CHECK-SD-NEXT: add sp, sp, #64
1471+
; CHECK-SD-NEXT: ret
1472+
;
1473+
; CHECK-GI-LABEL: vec_neg_sub:
1474+
; CHECK-GI: // %bb.0:
1475+
; CHECK-GI-NEXT: sub sp, sp, #64
1476+
; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
1477+
; CHECK-GI-NEXT: .cfi_def_cfa_offset 64
1478+
; CHECK-GI-NEXT: .cfi_offset w30, -16
1479+
; CHECK-GI-NEXT: mov v2.16b, v0.16b
1480+
; CHECK-GI-NEXT: adrp x8, .LCPI47_0
1481+
; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill
1482+
; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI47_0]
1483+
; CHECK-GI-NEXT: mov v1.16b, v2.16b
1484+
; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill
1485+
; CHECK-GI-NEXT: bl __subtf3
1486+
; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
1487+
; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
1488+
; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload
1489+
; CHECK-GI-NEXT: bl __subtf3
1490+
; CHECK-GI-NEXT: mov v1.16b, v0.16b
1491+
; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
1492+
; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
1493+
; CHECK-GI-NEXT: add sp, sp, #64
1494+
; CHECK-GI-NEXT: ret
14051495
%ret = fsub <2 x fp128> zeroinitializer, %in
14061496
ret <2 x fp128> %ret
14071497
}

0 commit comments

Comments
 (0)