Skip to content

Commit b85fd55

Browse files
committed
Apply suggestions:
- Support vectors in lowerFAbs - Simplify legality predicates
1 parent b8e3193 commit b85fd55

File tree

3 files changed

+104
-8
lines changed

3 files changed

+104
-8
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8770,9 +8770,10 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFAbs(MachineInstr &MI) {
87708770
LLT Ty = MRI.getType(DstReg);
87718771

87728772
// Reset sign bit
8773-
MIRBuilder.buildAnd(DstReg, SrcReg,
8774-
MIRBuilder.buildConstant(
8775-
Ty, APInt::getSignedMaxValue(Ty.getSizeInBits())));
8773+
MIRBuilder.buildAnd(
8774+
DstReg, SrcReg,
8775+
MIRBuilder.buildConstant(
8776+
Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
87768777

87778778
MI.eraseFromParent();
87788779
return Legalized;

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
8989
const bool HasFP16 = ST.hasFullFP16();
9090
const LLT &MinFPScalar = HasFP16 ? s16 : s32;
9191

92+
// A legality predicate that returns true if the subtarget has FP16 support.
93+
// To be used in combination with other predicates, e.g:
94+
// .legalIf(all(hasFP16(), typeInSet(0, {v8s16, v4s16})))
95+
const auto hasFP16 = [=]() -> LegalityPredicate {
96+
return [=](const LegalityQuery &) { return HasFP16; };
97+
};
98+
9299
const bool HasCSSC = ST.hasCSSC();
93100
const bool HasRCPC3 = ST.hasRCPC3();
94101

@@ -260,12 +267,12 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
260267

261268
getActionDefinitionsBuilder(G_FABS)
262269
.legalFor({MinFPScalar, s32, s64, v2s32, v4s32, v2s64})
263-
.legalIf([=](const LegalityQuery &Query) {
264-
const auto &Ty = Query.Types[0];
265-
return (Ty == v8s16 || Ty == v4s16) && HasFP16;
266-
})
267-
.lowerFor({s128})
270+
.legalIf(all(hasFP16(), typeInSet(0, {v8s16, v4s16})))
271+
// TODO: Lower supports 128-bit types but G_AND generated by Lower does
272+
// not yet.
273+
// When it does, we can remove scalarizeIf.
268274
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
275+
.lowerIf(scalarOrEltWiderThan(0, 64))
269276
.minScalarOrElt(0, MinFPScalar)
270277
.clampNumElements(0, v4s16, v8s16)
271278
.clampNumElements(0, v2s32, v4s32)

llvm/test/CodeGen/AArch64/fabs-fp128.ll

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,5 +78,93 @@ entry:
7878
ret <2 x fp128> %c
7979
}
8080

81+
define <3 x fp128> @fabs_v3f128(<3 x fp128> %a) {
82+
; CHECK-SD-LABEL: fabs_v3f128:
83+
; CHECK-SD: // %bb.0: // %entry
84+
; CHECK-SD-NEXT: stp q0, q1, [sp, #-48]!
85+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
86+
; CHECK-SD-NEXT: ldrb w8, [sp, #15]
87+
; CHECK-SD-NEXT: str q2, [sp, #32]
88+
; CHECK-SD-NEXT: and w8, w8, #0x7f
89+
; CHECK-SD-NEXT: strb w8, [sp, #15]
90+
; CHECK-SD-NEXT: ldrb w8, [sp, #31]
91+
; CHECK-SD-NEXT: and w8, w8, #0x7f
92+
; CHECK-SD-NEXT: strb w8, [sp, #31]
93+
; CHECK-SD-NEXT: ldrb w8, [sp, #47]
94+
; CHECK-SD-NEXT: ldp q0, q1, [sp]
95+
; CHECK-SD-NEXT: and w8, w8, #0x7f
96+
; CHECK-SD-NEXT: strb w8, [sp, #47]
97+
; CHECK-SD-NEXT: ldr q2, [sp, #32]
98+
; CHECK-SD-NEXT: add sp, sp, #48
99+
; CHECK-SD-NEXT: ret
100+
;
101+
; CHECK-GI-LABEL: fabs_v3f128:
102+
; CHECK-GI: // %bb.0: // %entry
103+
; CHECK-GI-NEXT: mov x8, v0.d[1]
104+
; CHECK-GI-NEXT: mov x9, v1.d[1]
105+
; CHECK-GI-NEXT: mov x10, v2.d[1]
106+
; CHECK-GI-NEXT: mov v0.d[0], v0.d[0]
107+
; CHECK-GI-NEXT: mov v1.d[0], v1.d[0]
108+
; CHECK-GI-NEXT: mov v2.d[0], v2.d[0]
109+
; CHECK-GI-NEXT: and x8, x8, #0x7fffffffffffffff
110+
; CHECK-GI-NEXT: and x9, x9, #0x7fffffffffffffff
111+
; CHECK-GI-NEXT: and x10, x10, #0x7fffffffffffffff
112+
; CHECK-GI-NEXT: mov v0.d[1], x8
113+
; CHECK-GI-NEXT: mov v1.d[1], x9
114+
; CHECK-GI-NEXT: mov v2.d[1], x10
115+
; CHECK-GI-NEXT: ret
116+
entry:
117+
%c = call <3 x fp128> @llvm.fabs.v3f128(<3 x fp128> %a)
118+
ret <3 x fp128> %c
119+
}
120+
121+
define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) {
122+
; CHECK-SD-LABEL: fabs_v4f128:
123+
; CHECK-SD: // %bb.0: // %entry
124+
; CHECK-SD-NEXT: stp q0, q1, [sp, #-64]!
125+
; CHECK-SD-NEXT: .cfi_def_cfa_offset 64
126+
; CHECK-SD-NEXT: ldrb w8, [sp, #15]
127+
; CHECK-SD-NEXT: stp q2, q3, [sp, #32]
128+
; CHECK-SD-NEXT: and w8, w8, #0x7f
129+
; CHECK-SD-NEXT: strb w8, [sp, #15]
130+
; CHECK-SD-NEXT: ldrb w8, [sp, #31]
131+
; CHECK-SD-NEXT: and w8, w8, #0x7f
132+
; CHECK-SD-NEXT: strb w8, [sp, #31]
133+
; CHECK-SD-NEXT: ldrb w8, [sp, #47]
134+
; CHECK-SD-NEXT: ldp q0, q1, [sp]
135+
; CHECK-SD-NEXT: and w8, w8, #0x7f
136+
; CHECK-SD-NEXT: strb w8, [sp, #47]
137+
; CHECK-SD-NEXT: ldrb w8, [sp, #63]
138+
; CHECK-SD-NEXT: and w8, w8, #0x7f
139+
; CHECK-SD-NEXT: strb w8, [sp, #63]
140+
; CHECK-SD-NEXT: ldp q2, q3, [sp, #32]
141+
; CHECK-SD-NEXT: add sp, sp, #64
142+
; CHECK-SD-NEXT: ret
143+
;
144+
; CHECK-GI-LABEL: fabs_v4f128:
145+
; CHECK-GI: // %bb.0: // %entry
146+
; CHECK-GI-NEXT: mov x8, v0.d[1]
147+
; CHECK-GI-NEXT: mov v7.d[0], v0.d[0]
148+
; CHECK-GI-NEXT: mov x9, v1.d[1]
149+
; CHECK-GI-NEXT: mov x10, v2.d[1]
150+
; CHECK-GI-NEXT: mov x11, v3.d[1]
151+
; CHECK-GI-NEXT: mov v1.d[0], v1.d[0]
152+
; CHECK-GI-NEXT: mov v2.d[0], v2.d[0]
153+
; CHECK-GI-NEXT: mov v3.d[0], v3.d[0]
154+
; CHECK-GI-NEXT: and x8, x8, #0x7fffffffffffffff
155+
; CHECK-GI-NEXT: mov v7.d[1], x8
156+
; CHECK-GI-NEXT: and x8, x9, #0x7fffffffffffffff
157+
; CHECK-GI-NEXT: and x9, x10, #0x7fffffffffffffff
158+
; CHECK-GI-NEXT: and x10, x11, #0x7fffffffffffffff
159+
; CHECK-GI-NEXT: mov v1.d[1], x8
160+
; CHECK-GI-NEXT: mov v2.d[1], x9
161+
; CHECK-GI-NEXT: mov v3.d[1], x10
162+
; CHECK-GI-NEXT: mov v0.16b, v7.16b
163+
; CHECK-GI-NEXT: ret
164+
entry:
165+
%c = call <4 x fp128> @llvm.fabs.v4f128(<4 x fp128> %a)
166+
ret <4 x fp128> %c
167+
}
168+
81169
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
82170
; CHECK: {{.*}}

0 commit comments

Comments
 (0)