Skip to content

Commit c4a8d38

Browse files
committed
[AArch64][GlobalISel] Legalize 128-bit types for FABS
- Generate AND to clear sign bit for s128 - Vectors are scalarized
1 parent 2e386b2 commit c4a8d38

File tree

3 files changed

+102
-41
lines changed

3 files changed

+102
-41
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -242,9 +242,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
242242
.widenScalarToNextPow2(0);
243243

244244
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
245-
G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM,
246-
G_FMAXIMUM, G_FMINIMUM, G_FCEIL, G_FFLOOR,
247-
G_FRINT, G_FNEARBYINT, G_INTRINSIC_TRUNC,
245+
G_FSQRT, G_FMAXNUM, G_FMINNUM, G_FMAXIMUM,
246+
G_FMINIMUM, G_FCEIL, G_FFLOOR, G_FRINT,
247+
G_FNEARBYINT, G_INTRINSIC_TRUNC,
248248
G_INTRINSIC_ROUND, G_INTRINSIC_ROUNDEVEN})
249249
.legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
250250
.legalIf([=](const LegalityQuery &Query) {
@@ -258,6 +258,20 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
258258
.clampNumElements(0, v2s64, v2s64)
259259
.moreElementsToNextPow2(0);
260260

261+
getActionDefinitionsBuilder(G_FABS)
262+
.legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
263+
.legalIf([=](const LegalityQuery &Query) {
264+
const auto &Ty = Query.Types[0];
265+
return (Ty == v8s16 || Ty == v4s16) && HasFP16;
266+
})
267+
.customFor({s128})
268+
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
269+
.minScalarOrElt(0, MinFPScalar)
270+
.clampNumElements(0, v4s16, v8s16)
271+
.clampNumElements(0, v2s32, v4s32)
272+
.clampNumElements(0, v2s64, v2s64)
273+
.moreElementsToNextPow2(0);
274+
261275
getActionDefinitionsBuilder(G_FREM)
262276
.libcallFor({s32, s64})
263277
.minScalar(0, s32)
@@ -1346,6 +1360,8 @@ bool AArch64LegalizerInfo::legalizeCustom(
13461360
return legalizePrefetch(MI, Helper);
13471361
case TargetOpcode::G_ABS:
13481362
return Helper.lowerAbsToCNeg(MI);
1363+
case TargetOpcode::G_FABS:
1364+
return legalizeFABS(MI, MRI, MIRBuilder);
13491365
case TargetOpcode::G_ICMP:
13501366
return legalizeICMP(MI, MRI, MIRBuilder);
13511367
}
@@ -1406,6 +1422,25 @@ bool AArch64LegalizerInfo::legalizeFunnelShift(MachineInstr &MI,
14061422
return true;
14071423
}
14081424

1425+
bool AArch64LegalizerInfo::legalizeFABS(MachineInstr &MI,
1426+
MachineRegisterInfo &MRI,
1427+
MachineIRBuilder &MIRBuilder) const {
1428+
Register SrcReg = MI.getOperand(1).getReg();
1429+
Register DstReg = MI.getOperand(0).getReg();
1430+
1431+
constexpr LLT S128 = LLT::scalar(128);
1432+
if (MRI.getType(SrcReg) != S128 || MRI.getType(DstReg) != S128)
1433+
return false;
1434+
1435+
MIRBuilder.buildAnd(
1436+
DstReg, SrcReg,
1437+
MIRBuilder.buildConstant(
1438+
S128, APInt::getSignedMaxValue(128)));
1439+
1440+
MI.eraseFromParent();
1441+
return true;
1442+
}
1443+
14091444
bool AArch64LegalizerInfo::legalizeICMP(MachineInstr &MI,
14101445
MachineRegisterInfo &MRI,
14111446
MachineIRBuilder &MIRBuilder) const {

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ class AArch64LegalizerInfo : public LegalizerInfo {
5050
LegalizerHelper &Helper) const;
5151
bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI,
5252
LegalizerHelper &Helper) const;
53+
bool legalizeFABS(MachineInstr &MI, MachineRegisterInfo &MRI,
54+
MachineIRBuilder &MIRBuilder) const;
5355
bool legalizeICMP(MachineInstr &MI, MachineRegisterInfo &MRI,
5456
MachineIRBuilder &MIRBuilder) const;
5557
bool legalizeFunnelShift(MachineInstr &MI, MachineRegisterInfo &MRI,
Lines changed: 62 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,82 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2-
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc -mtriple=aarch64 -global-isel=1 -global-isel-abort=2 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK: warning: Instruction selection used fallback path for fabs_f128
6-
; CHECK-NEXT: warning: Instruction selection used fallback path for fabs_v1f128
7-
; CHECK-NEXT: warning: Instruction selection used fallback path for fabs_v2f128
2+
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+
; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
84

95
define fp128 @fabs_f128(fp128 %a) {
10-
; CHECK-LABEL: fabs_f128:
11-
; CHECK: // %bb.0: // %entry
12-
; CHECK-NEXT: str q0, [sp, #-16]!
13-
; CHECK-NEXT: .cfi_def_cfa_offset 16
14-
; CHECK-NEXT: ldrb w8, [sp, #15]
15-
; CHECK-NEXT: and w8, w8, #0x7f
16-
; CHECK-NEXT: strb w8, [sp, #15]
17-
; CHECK-NEXT: ldr q0, [sp], #16
18-
; CHECK-NEXT: ret
6+
; CHECK-SD-LABEL: fabs_f128:
7+
; CHECK-SD: // %bb.0: // %entry
8+
; CHECK-SD-NEXT: str q0, [sp, #-16]!
9+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
10+
; CHECK-SD-NEXT: ldrb w8, [sp, #15]
11+
; CHECK-SD-NEXT: and w8, w8, #0x7f
12+
; CHECK-SD-NEXT: strb w8, [sp, #15]
13+
; CHECK-SD-NEXT: ldr q0, [sp], #16
14+
; CHECK-SD-NEXT: ret
15+
;
16+
; CHECK-GI-LABEL: fabs_f128:
17+
; CHECK-GI: // %bb.0: // %entry
18+
; CHECK-GI-NEXT: mov x8, v0.d[1]
19+
; CHECK-GI-NEXT: mov v0.d[0], v0.d[0]
20+
; CHECK-GI-NEXT: and x8, x8, #0x7fffffffffffffff
21+
; CHECK-GI-NEXT: mov v0.d[1], x8
22+
; CHECK-GI-NEXT: ret
1923
entry:
2024
%c = call fp128 @llvm.fabs.f128(fp128 %a)
2125
ret fp128 %c
2226
}
2327

2428
define <1 x fp128> @fabs_v1f128(<1 x fp128> %a) {
25-
; CHECK-LABEL: fabs_v1f128:
26-
; CHECK: // %bb.0: // %entry
27-
; CHECK-NEXT: str q0, [sp, #-16]!
28-
; CHECK-NEXT: .cfi_def_cfa_offset 16
29-
; CHECK-NEXT: ldrb w8, [sp, #15]
30-
; CHECK-NEXT: and w8, w8, #0x7f
31-
; CHECK-NEXT: strb w8, [sp, #15]
32-
; CHECK-NEXT: ldr q0, [sp], #16
33-
; CHECK-NEXT: ret
29+
; CHECK-SD-LABEL: fabs_v1f128:
30+
; CHECK-SD: // %bb.0: // %entry
31+
; CHECK-SD-NEXT: str q0, [sp, #-16]!
32+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
33+
; CHECK-SD-NEXT: ldrb w8, [sp, #15]
34+
; CHECK-SD-NEXT: and w8, w8, #0x7f
35+
; CHECK-SD-NEXT: strb w8, [sp, #15]
36+
; CHECK-SD-NEXT: ldr q0, [sp], #16
37+
; CHECK-SD-NEXT: ret
38+
;
39+
; CHECK-GI-LABEL: fabs_v1f128:
40+
; CHECK-GI: // %bb.0: // %entry
41+
; CHECK-GI-NEXT: mov x8, v0.d[1]
42+
; CHECK-GI-NEXT: mov v0.d[0], v0.d[0]
43+
; CHECK-GI-NEXT: and x8, x8, #0x7fffffffffffffff
44+
; CHECK-GI-NEXT: mov v0.d[1], x8
45+
; CHECK-GI-NEXT: ret
3446
entry:
3547
%c = call <1 x fp128> @llvm.fabs.v1f128(<1 x fp128> %a)
3648
ret <1 x fp128> %c
3749
}
3850

3951
define <2 x fp128> @fabs_v2f128(<2 x fp128> %a) {
40-
; CHECK-LABEL: fabs_v2f128:
41-
; CHECK: // %bb.0: // %entry
42-
; CHECK-NEXT: stp q0, q1, [sp, #-32]!
43-
; CHECK-NEXT: .cfi_def_cfa_offset 32
44-
; CHECK-NEXT: ldrb w8, [sp, #15]
45-
; CHECK-NEXT: and w8, w8, #0x7f
46-
; CHECK-NEXT: strb w8, [sp, #15]
47-
; CHECK-NEXT: ldrb w8, [sp, #31]
48-
; CHECK-NEXT: and w8, w8, #0x7f
49-
; CHECK-NEXT: strb w8, [sp, #31]
50-
; CHECK-NEXT: ldp q0, q1, [sp], #32
51-
; CHECK-NEXT: ret
52+
; CHECK-SD-LABEL: fabs_v2f128:
53+
; CHECK-SD: // %bb.0: // %entry
54+
; CHECK-SD-NEXT: stp q0, q1, [sp, #-32]!
55+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 32
56+
; CHECK-SD-NEXT: ldrb w8, [sp, #15]
57+
; CHECK-SD-NEXT: and w8, w8, #0x7f
58+
; CHECK-SD-NEXT: strb w8, [sp, #15]
59+
; CHECK-SD-NEXT: ldrb w8, [sp, #31]
60+
; CHECK-SD-NEXT: and w8, w8, #0x7f
61+
; CHECK-SD-NEXT: strb w8, [sp, #31]
62+
; CHECK-SD-NEXT: ldp q0, q1, [sp], #32
63+
; CHECK-SD-NEXT: ret
64+
;
65+
; CHECK-GI-LABEL: fabs_v2f128:
66+
; CHECK-GI: // %bb.0: // %entry
67+
; CHECK-GI-NEXT: mov x8, v0.d[1]
68+
; CHECK-GI-NEXT: mov x9, v1.d[1]
69+
; CHECK-GI-NEXT: mov v0.d[0], v0.d[0]
70+
; CHECK-GI-NEXT: mov v1.d[0], v1.d[0]
71+
; CHECK-GI-NEXT: and x8, x8, #0x7fffffffffffffff
72+
; CHECK-GI-NEXT: and x9, x9, #0x7fffffffffffffff
73+
; CHECK-GI-NEXT: mov v0.d[1], x8
74+
; CHECK-GI-NEXT: mov v1.d[1], x9
75+
; CHECK-GI-NEXT: ret
5276
entry:
5377
%c = call <2 x fp128> @llvm.fabs.v2f128(<2 x fp128> %a)
5478
ret <2 x fp128> %c
5579
}
80+
5681
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
57-
; CHECK-GI: {{.*}}
58-
; CHECK-SD: {{.*}}
82+
; CHECK: {{.*}}

0 commit comments

Comments
 (0)