Skip to content

Commit 9f255d8

Browse files
authored
[AArch64][GlobalISel] Lower fp16 abs and neg without fullfp16. (#110096)
This changes the existing promote logic to lower, so that it can use normal integer operations. A minor change was needed to fneg lower code to handle vectors.
1 parent 9a36168 commit 9f255d8

File tree

6 files changed

+44
-106
lines changed

6 files changed

+44
-106
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4051,12 +4051,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
40514051
auto [Res, SubByReg] = MI.getFirst2Regs();
40524052
LLT Ty = MRI.getType(Res);
40534053

4054-
// TODO: Handle vector types once we are able to
4055-
// represent them.
4056-
if (Ty.isVector())
4057-
return UnableToLegalize;
4058-
auto SignMask =
4059-
MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
4054+
auto SignMask = MIRBuilder.buildConstant(
4055+
Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
40604056
MIRBuilder.buildXor(Res, SubByReg, SignMask);
40614057
MI.eraseFromParent();
40624058
return Legalized;

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,11 +268,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
268268
})
269269
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
270270
.lowerIf(scalarOrEltWiderThan(0, 64))
271-
.minScalarOrElt(0, MinFPScalar)
272271
.clampNumElements(0, v4s16, v8s16)
273272
.clampNumElements(0, v2s32, v4s32)
274273
.clampNumElements(0, v2s64, v2s64)
275-
.moreElementsToNextPow2(0);
274+
.moreElementsToNextPow2(0)
275+
.lowerFor({s16, v4s16, v8s16});
276276

277277
getActionDefinitionsBuilder(G_FREM)
278278
.libcallFor({s32, s64})

llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp-arith-fp16.mir

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,10 @@ body: |
150150
; NO-FP16: liveins: $h0
151151
; NO-FP16-NEXT: {{ $}}
152152
; NO-FP16-NEXT: %x:_(s16) = COPY $h0
153-
; NO-FP16-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %x(s16)
154-
; NO-FP16-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[FPEXT]]
155-
; NO-FP16-NEXT: %op:_(s16) = G_FPTRUNC [[FNEG]](s32)
153+
; NO-FP16-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %x(s16)
154+
; NO-FP16-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768
155+
; NO-FP16-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[C]]
156+
; NO-FP16-NEXT: %op:_(s16) = G_TRUNC [[XOR]](s32)
156157
; NO-FP16-NEXT: $h0 = COPY %op(s16)
157158
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
158159
;

llvm/test/CodeGen/AArch64/f16-instructions.ll

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1392,26 +1392,19 @@ define half @test_fma(half %a, half %b, half %c) #0 {
13921392
}
13931393

13941394
define half @test_fabs(half %a) #0 {
1395-
; CHECK-CVT-SD-LABEL: test_fabs:
1396-
; CHECK-CVT-SD: // %bb.0:
1397-
; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 def $s0
1398-
; CHECK-CVT-SD-NEXT: fmov w8, s0
1399-
; CHECK-CVT-SD-NEXT: and w8, w8, #0x7fff
1400-
; CHECK-CVT-SD-NEXT: fmov s0, w8
1401-
; CHECK-CVT-SD-NEXT: // kill: def $h0 killed $h0 killed $s0
1402-
; CHECK-CVT-SD-NEXT: ret
1395+
; CHECK-CVT-LABEL: test_fabs:
1396+
; CHECK-CVT: // %bb.0:
1397+
; CHECK-CVT-NEXT: // kill: def $h0 killed $h0 def $s0
1398+
; CHECK-CVT-NEXT: fmov w8, s0
1399+
; CHECK-CVT-NEXT: and w8, w8, #0x7fff
1400+
; CHECK-CVT-NEXT: fmov s0, w8
1401+
; CHECK-CVT-NEXT: // kill: def $h0 killed $h0 killed $s0
1402+
; CHECK-CVT-NEXT: ret
14031403
;
14041404
; CHECK-FP16-LABEL: test_fabs:
14051405
; CHECK-FP16: // %bb.0:
14061406
; CHECK-FP16-NEXT: fabs h0, h0
14071407
; CHECK-FP16-NEXT: ret
1408-
;
1409-
; CHECK-CVT-GI-LABEL: test_fabs:
1410-
; CHECK-CVT-GI: // %bb.0:
1411-
; CHECK-CVT-GI-NEXT: fcvt s0, h0
1412-
; CHECK-CVT-GI-NEXT: fabs s0, s0
1413-
; CHECK-CVT-GI-NEXT: fcvt h0, s0
1414-
; CHECK-CVT-GI-NEXT: ret
14151408
%r = call half @llvm.fabs.f16(half %a)
14161409
ret half %r
14171410
}

llvm/test/CodeGen/AArch64/fabs.ll

Lines changed: 14 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,11 @@ define half @fabs_f16(half %a) {
4141
;
4242
; CHECK-GI-NOFP16-LABEL: fabs_f16:
4343
; CHECK-GI-NOFP16: // %bb.0: // %entry
44-
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
45-
; CHECK-GI-NOFP16-NEXT: fabs s0, s0
46-
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
44+
; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 def $s0
45+
; CHECK-GI-NOFP16-NEXT: fmov w8, s0
46+
; CHECK-GI-NOFP16-NEXT: and w8, w8, #0x7fff
47+
; CHECK-GI-NOFP16-NEXT: fmov s0, w8
48+
; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
4749
; CHECK-GI-NOFP16-NEXT: ret
4850
;
4951
; CHECK-GI-FP16-LABEL: fabs_f16:
@@ -160,22 +162,8 @@ define <7 x half> @fabs_v7f16(<7 x half> %a) {
160162
;
161163
; CHECK-GI-NOFP16-LABEL: fabs_v7f16:
162164
; CHECK-GI-NOFP16: // %bb.0: // %entry
163-
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
164-
; CHECK-GI-NOFP16-NEXT: mov v2.h[0], v0.h[4]
165-
; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v1.4s
166-
; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v0.h[5]
167-
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
168-
; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[6]
169-
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v1.h[0]
170-
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
171-
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[1]
172-
; CHECK-GI-NOFP16-NEXT: fabs v2.4s, v2.4s
173-
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v1.h[2]
174-
; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s
175-
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[3]
176-
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v2.h[0]
177-
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[1]
178-
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v2.h[2]
165+
; CHECK-GI-NOFP16-NEXT: mvni v1.8h, #128, lsl #8
166+
; CHECK-GI-NOFP16-NEXT: and v0.16b, v0.16b, v1.16b
179167
; CHECK-GI-NOFP16-NEXT: ret
180168
;
181169
; CHECK-GI-FP16-LABEL: fabs_v7f16:
@@ -200,9 +188,8 @@ define <4 x half> @fabs_v4f16(<4 x half> %a) {
200188
;
201189
; CHECK-GI-NOFP16-LABEL: fabs_v4f16:
202190
; CHECK-GI-NOFP16: // %bb.0: // %entry
203-
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
204-
; CHECK-GI-NOFP16-NEXT: fabs v0.4s, v0.4s
205-
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
191+
; CHECK-GI-NOFP16-NEXT: mvni v1.4h, #128, lsl #8
192+
; CHECK-GI-NOFP16-NEXT: and v0.8b, v0.8b, v1.8b
206193
; CHECK-GI-NOFP16-NEXT: ret
207194
;
208195
; CHECK-GI-FP16-LABEL: fabs_v4f16:
@@ -227,12 +214,8 @@ define <8 x half> @fabs_v8f16(<8 x half> %a) {
227214
;
228215
; CHECK-GI-NOFP16-LABEL: fabs_v8f16:
229216
; CHECK-GI-NOFP16: // %bb.0: // %entry
230-
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
231-
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
232-
; CHECK-GI-NOFP16-NEXT: fabs v1.4s, v1.4s
233-
; CHECK-GI-NOFP16-NEXT: fabs v2.4s, v0.4s
234-
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
235-
; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
217+
; CHECK-GI-NOFP16-NEXT: mvni v1.8h, #128, lsl #8
218+
; CHECK-GI-NOFP16-NEXT: and v0.16b, v0.16b, v1.16b
236219
; CHECK-GI-NOFP16-NEXT: ret
237220
;
238221
; CHECK-GI-FP16-LABEL: fabs_v8f16:
@@ -259,18 +242,9 @@ define <16 x half> @fabs_v16f16(<16 x half> %a) {
259242
;
260243
; CHECK-GI-NOFP16-LABEL: fabs_v16f16:
261244
; CHECK-GI-NOFP16: // %bb.0: // %entry
262-
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
263-
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
264-
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
265-
; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
266-
; CHECK-GI-NOFP16-NEXT: fabs v2.4s, v2.4s
267-
; CHECK-GI-NOFP16-NEXT: fabs v3.4s, v3.4s
268-
; CHECK-GI-NOFP16-NEXT: fabs v4.4s, v0.4s
269-
; CHECK-GI-NOFP16-NEXT: fabs v5.4s, v1.4s
270-
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
271-
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
272-
; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
273-
; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
245+
; CHECK-GI-NOFP16-NEXT: mvni v2.8h, #128, lsl #8
246+
; CHECK-GI-NOFP16-NEXT: and v0.16b, v0.16b, v2.16b
247+
; CHECK-GI-NOFP16-NEXT: and v1.16b, v1.16b, v2.16b
274248
; CHECK-GI-NOFP16-NEXT: ret
275249
;
276250
; CHECK-GI-FP16-LABEL: fabs_v16f16:

llvm/test/CodeGen/AArch64/fneg.ll

Lines changed: 14 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,11 @@ define half @fabs_f16(half %a) {
4141
;
4242
; CHECK-GI-NOFP16-LABEL: fabs_f16:
4343
; CHECK-GI-NOFP16: // %bb.0: // %entry
44-
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
45-
; CHECK-GI-NOFP16-NEXT: fneg s0, s0
46-
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
44+
; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 def $s0
45+
; CHECK-GI-NOFP16-NEXT: fmov w8, s0
46+
; CHECK-GI-NOFP16-NEXT: eor w8, w8, #0xffff8000
47+
; CHECK-GI-NOFP16-NEXT: fmov s0, w8
48+
; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
4749
; CHECK-GI-NOFP16-NEXT: ret
4850
;
4951
; CHECK-GI-FP16-LABEL: fabs_f16:
@@ -161,22 +163,8 @@ define <7 x half> @fabs_v7f16(<7 x half> %a) {
161163
;
162164
; CHECK-GI-NOFP16-LABEL: fabs_v7f16:
163165
; CHECK-GI-NOFP16: // %bb.0: // %entry
164-
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
165-
; CHECK-GI-NOFP16-NEXT: mov v2.h[0], v0.h[4]
166-
; CHECK-GI-NOFP16-NEXT: fneg v1.4s, v1.4s
167-
; CHECK-GI-NOFP16-NEXT: mov v2.h[1], v0.h[5]
168-
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
169-
; CHECK-GI-NOFP16-NEXT: mov v2.h[2], v0.h[6]
170-
; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v1.h[0]
171-
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v2.4h
172-
; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[1]
173-
; CHECK-GI-NOFP16-NEXT: fneg v2.4s, v2.4s
174-
; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v1.h[2]
175-
; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s
176-
; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v1.h[3]
177-
; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v2.h[0]
178-
; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[1]
179-
; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v2.h[2]
166+
; CHECK-GI-NOFP16-NEXT: movi v1.8h, #128, lsl #8
167+
; CHECK-GI-NOFP16-NEXT: eor v0.16b, v0.16b, v1.16b
180168
; CHECK-GI-NOFP16-NEXT: ret
181169
;
182170
; CHECK-GI-FP16-LABEL: fabs_v7f16:
@@ -202,9 +190,8 @@ define <4 x half> @fabs_v4f16(<4 x half> %a) {
202190
;
203191
; CHECK-GI-NOFP16-LABEL: fabs_v4f16:
204192
; CHECK-GI-NOFP16: // %bb.0: // %entry
205-
; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
206-
; CHECK-GI-NOFP16-NEXT: fneg v0.4s, v0.4s
207-
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
193+
; CHECK-GI-NOFP16-NEXT: movi v1.4h, #128, lsl #8
194+
; CHECK-GI-NOFP16-NEXT: eor v0.8b, v0.8b, v1.8b
208195
; CHECK-GI-NOFP16-NEXT: ret
209196
;
210197
; CHECK-GI-FP16-LABEL: fabs_v4f16:
@@ -230,12 +217,8 @@ define <8 x half> @fabs_v8f16(<8 x half> %a) {
230217
;
231218
; CHECK-GI-NOFP16-LABEL: fabs_v8f16:
232219
; CHECK-GI-NOFP16: // %bb.0: // %entry
233-
; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
234-
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
235-
; CHECK-GI-NOFP16-NEXT: fneg v1.4s, v1.4s
236-
; CHECK-GI-NOFP16-NEXT: fneg v2.4s, v0.4s
237-
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v1.4s
238-
; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
220+
; CHECK-GI-NOFP16-NEXT: movi v1.8h, #128, lsl #8
221+
; CHECK-GI-NOFP16-NEXT: eor v0.16b, v0.16b, v1.16b
239222
; CHECK-GI-NOFP16-NEXT: ret
240223
;
241224
; CHECK-GI-FP16-LABEL: fabs_v8f16:
@@ -263,18 +246,9 @@ define <16 x half> @fabs_v16f16(<16 x half> %a) {
263246
;
264247
; CHECK-GI-NOFP16-LABEL: fabs_v16f16:
265248
; CHECK-GI-NOFP16: // %bb.0: // %entry
266-
; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
267-
; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
268-
; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
269-
; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
270-
; CHECK-GI-NOFP16-NEXT: fneg v2.4s, v2.4s
271-
; CHECK-GI-NOFP16-NEXT: fneg v3.4s, v3.4s
272-
; CHECK-GI-NOFP16-NEXT: fneg v4.4s, v0.4s
273-
; CHECK-GI-NOFP16-NEXT: fneg v5.4s, v1.4s
274-
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
275-
; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v3.4s
276-
; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v4.4s
277-
; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v5.4s
249+
; CHECK-GI-NOFP16-NEXT: movi v2.8h, #128, lsl #8
250+
; CHECK-GI-NOFP16-NEXT: eor v0.16b, v0.16b, v2.16b
251+
; CHECK-GI-NOFP16-NEXT: eor v1.16b, v1.16b, v2.16b
278252
; CHECK-GI-NOFP16-NEXT: ret
279253
;
280254
; CHECK-GI-FP16-LABEL: fabs_v16f16:

0 commit comments

Comments
 (0)