Skip to content

Commit 7bb87d5

Browse files
authored
[AArch64][GlobalISel] Take abs scalar codegen closer to SDAG (#84886)
This patch improves codegen for scalar (<128bits) version of llvm.abs intrinsic by using the existing non-XOR based lowering. This takes the generated code closer to SDAG. codegen with GISel for > 128 bit types is not very good with these method so not doing so.
1 parent a5d7fc1 commit 7bb87d5

File tree

5 files changed

+51
-26
lines changed

5 files changed

+51
-26
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,7 @@ class LegalizerHelper {
429429
LegalizeResult lowerDIVREM(MachineInstr &MI);
430430
LegalizeResult lowerAbsToAddXor(MachineInstr &MI);
431431
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI);
432+
LegalizeResult lowerAbsToCNeg(MachineInstr &MI);
432433
LegalizeResult lowerVectorReduction(MachineInstr &MI);
433434
LegalizeResult lowerMemcpyInline(MachineInstr &MI);
434435
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8215,9 +8215,22 @@ LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
82158215
// %res = G_SMAX %a, %v2
82168216
Register SrcReg = MI.getOperand(1).getReg();
82178217
LLT Ty = MRI.getType(SrcReg);
8218+
auto Zero = MIRBuilder.buildConstant(Ty, 0);
8219+
auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
8220+
MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
8221+
MI.eraseFromParent();
8222+
return Legalized;
8223+
}
8224+
8225+
LegalizerHelper::LegalizeResult
8226+
LegalizerHelper::lowerAbsToCNeg(MachineInstr &MI) {
8227+
Register SrcReg = MI.getOperand(1).getReg();
8228+
Register DestReg = MI.getOperand(0).getReg();
8229+
LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
82188230
auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
82198231
auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
8220-
MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
8232+
auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
8233+
MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
82218234
MI.eraseFromParent();
82228235
return Legalized;
82238236
}

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1012,6 +1012,11 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10121012
ABSActions
10131013
.legalFor({s32, s64});
10141014
ABSActions.legalFor(PackedVectorAllTypeList)
1015+
.customIf([=](const LegalityQuery &Q) {
1016+
// TODO: Fix suboptimal codegen for 128+ bit types.
1017+
LLT SrcTy = Q.Types[0];
1018+
return SrcTy.isScalar() && SrcTy.getSizeInBits() < 128;
1019+
})
10151020
.widenScalarIf(
10161021
[=](const LegalityQuery &Query) { return Query.Types[0] == v4s8; },
10171022
[=](const LegalityQuery &Query) { return std::make_pair(0, v4s16); })
@@ -1264,6 +1269,8 @@ bool AArch64LegalizerInfo::legalizeCustom(
12641269
return legalizeDynStackAlloc(MI, Helper);
12651270
case TargetOpcode::G_PREFETCH:
12661271
return legalizePrefetch(MI, Helper);
1272+
case TargetOpcode::G_ABS:
1273+
return Helper.lowerAbsToCNeg(MI);
12671274
}
12681275

12691276
llvm_unreachable("expected switch to return");

llvm/test/CodeGen/AArch64/GlobalISel/legalize-abs.mir

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@ body: |
88
bb.0:
99
; CHECK-LABEL: name: abs_s32
1010
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
11-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31
12-
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s64)
13-
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
14-
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
15-
; CHECK-NEXT: $w0 = COPY [[XOR]](s32)
11+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
12+
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[COPY]]
13+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s32), [[C]]
14+
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s32), [[COPY]], [[SUB]]
15+
; CHECK-NEXT: $w0 = COPY [[SELECT]](s32)
16+
;
1617
; CHECK-CSSC-LABEL: name: abs_s32
1718
; CHECK-CSSC: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
1819
; CHECK-CSSC-NEXT: [[ABS:%[0-9]+]]:_(s32) = G_ABS [[COPY]]
@@ -28,11 +29,12 @@ body: |
2829
bb.0:
2930
; CHECK-LABEL: name: abs_s64
3031
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
31-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63
32-
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s64)
33-
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[ASHR]]
34-
; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ADD]], [[ASHR]]
35-
; CHECK-NEXT: $x0 = COPY [[XOR]](s64)
32+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
33+
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[C]], [[COPY]]
34+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(sgt), [[COPY]](s64), [[C]]
35+
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s32), [[COPY]], [[SUB]]
36+
; CHECK-NEXT: $x0 = COPY [[SELECT]](s64)
37+
;
3638
; CHECK-CSSC-LABEL: name: abs_s64
3739
; CHECK-CSSC: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
3840
; CHECK-CSSC-NEXT: [[ABS:%[0-9]+]]:_(s64) = G_ABS [[COPY]]
@@ -55,6 +57,7 @@ body: |
5557
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<4 x s16>) = G_ABS [[COPY]]
5658
; CHECK-NEXT: $d0 = COPY [[ABS]](<4 x s16>)
5759
; CHECK-NEXT: RET_ReallyLR implicit $d0
60+
;
5861
; CHECK-CSSC-LABEL: name: abs_v4s16
5962
; CHECK-CSSC: liveins: $d0
6063
; CHECK-CSSC-NEXT: {{ $}}
@@ -82,6 +85,7 @@ body: |
8285
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<8 x s16>) = G_ABS [[COPY]]
8386
; CHECK-NEXT: $q0 = COPY [[ABS]](<8 x s16>)
8487
; CHECK-NEXT: RET_ReallyLR implicit $q0
88+
;
8589
; CHECK-CSSC-LABEL: name: abs_v8s16
8690
; CHECK-CSSC: liveins: $q0
8791
; CHECK-CSSC-NEXT: {{ $}}
@@ -109,6 +113,7 @@ body: |
109113
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<2 x s32>) = G_ABS [[COPY]]
110114
; CHECK-NEXT: $d0 = COPY [[ABS]](<2 x s32>)
111115
; CHECK-NEXT: RET_ReallyLR implicit $d0
116+
;
112117
; CHECK-CSSC-LABEL: name: abs_v2s32
113118
; CHECK-CSSC: liveins: $d0
114119
; CHECK-CSSC-NEXT: {{ $}}
@@ -136,6 +141,7 @@ body: |
136141
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<4 x s32>) = G_ABS [[COPY]]
137142
; CHECK-NEXT: $q0 = COPY [[ABS]](<4 x s32>)
138143
; CHECK-NEXT: RET_ReallyLR implicit $q0
144+
;
139145
; CHECK-CSSC-LABEL: name: abs_v4s32
140146
; CHECK-CSSC: liveins: $q0
141147
; CHECK-CSSC-NEXT: {{ $}}
@@ -163,6 +169,7 @@ body: |
163169
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<8 x s8>) = G_ABS [[COPY]]
164170
; CHECK-NEXT: $d0 = COPY [[ABS]](<8 x s8>)
165171
; CHECK-NEXT: RET_ReallyLR implicit $d0
172+
;
166173
; CHECK-CSSC-LABEL: name: abs_v4s8
167174
; CHECK-CSSC: liveins: $d0
168175
; CHECK-CSSC-NEXT: {{ $}}
@@ -190,6 +197,7 @@ body: |
190197
; CHECK-NEXT: [[ABS:%[0-9]+]]:_(<16 x s8>) = G_ABS [[COPY]]
191198
; CHECK-NEXT: $q0 = COPY [[ABS]](<16 x s8>)
192199
; CHECK-NEXT: RET_ReallyLR implicit $q0
200+
;
193201
; CHECK-CSSC-LABEL: name: abs_v16s8
194202
; CHECK-CSSC: liveins: $q0
195203
; CHECK-CSSC-NEXT: {{ $}}

llvm/test/CodeGen/AArch64/abs.ll

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ define i8 @abs_i8(i8 %a){
1515
; CHECK-GI-LABEL: abs_i8:
1616
; CHECK-GI: // %bb.0: // %entry
1717
; CHECK-GI-NEXT: sxtb w8, w0
18-
; CHECK-GI-NEXT: asr w8, w8, #7
19-
; CHECK-GI-NEXT: add w9, w0, w8
20-
; CHECK-GI-NEXT: eor w0, w9, w8
18+
; CHECK-GI-NEXT: cmp w8, #0
19+
; CHECK-GI-NEXT: cneg w0, w0, le
2120
; CHECK-GI-NEXT: ret
2221
entry:
2322
%res = call i8 @llvm.abs.i8(i8 %a, i1 0)
@@ -36,9 +35,8 @@ define i16 @abs_i16(i16 %a){
3635
; CHECK-GI-LABEL: abs_i16:
3736
; CHECK-GI: // %bb.0: // %entry
3837
; CHECK-GI-NEXT: sxth w8, w0
39-
; CHECK-GI-NEXT: asr w8, w8, #15
40-
; CHECK-GI-NEXT: add w9, w0, w8
41-
; CHECK-GI-NEXT: eor w0, w9, w8
38+
; CHECK-GI-NEXT: cmp w8, #0
39+
; CHECK-GI-NEXT: cneg w0, w0, le
4240
; CHECK-GI-NEXT: ret
4341
entry:
4442
%res = call i16 @llvm.abs.i16(i16 %a, i1 0)
@@ -55,9 +53,8 @@ define i32 @abs_i32(i32 %a){
5553
;
5654
; CHECK-GI-LABEL: abs_i32:
5755
; CHECK-GI: // %bb.0: // %entry
58-
; CHECK-GI-NEXT: asr w8, w0, #31
59-
; CHECK-GI-NEXT: add w9, w0, w8
60-
; CHECK-GI-NEXT: eor w0, w9, w8
56+
; CHECK-GI-NEXT: cmp w0, #0
57+
; CHECK-GI-NEXT: cneg w0, w0, le
6158
; CHECK-GI-NEXT: ret
6259
entry:
6360
%res = call i32 @llvm.abs.i32(i32 %a, i1 0)
@@ -74,9 +71,8 @@ define i64 @abs_i64(i64 %a){
7471
;
7572
; CHECK-GI-LABEL: abs_i64:
7673
; CHECK-GI: // %bb.0: // %entry
77-
; CHECK-GI-NEXT: asr x8, x0, #63
78-
; CHECK-GI-NEXT: add x9, x0, x8
79-
; CHECK-GI-NEXT: eor x0, x9, x8
74+
; CHECK-GI-NEXT: cmp x0, #0
75+
; CHECK-GI-NEXT: cneg x0, x0, le
8076
; CHECK-GI-NEXT: ret
8177
entry:
8278
%res = call i64 @llvm.abs.i64(i64 %a, i1 0)
@@ -248,9 +244,9 @@ define <1 x i32> @abs_v1i32(<1 x i32> %a){
248244
; CHECK-GI-LABEL: abs_v1i32:
249245
; CHECK-GI: // %bb.0: // %entry
250246
; CHECK-GI-NEXT: fmov w8, s0
251-
; CHECK-GI-NEXT: asr w9, w8, #31
252-
; CHECK-GI-NEXT: add w8, w8, w9
253-
; CHECK-GI-NEXT: eor w8, w8, w9
247+
; CHECK-GI-NEXT: fmov w9, s0
248+
; CHECK-GI-NEXT: cmp w8, #0
249+
; CHECK-GI-NEXT: cneg w8, w9, le
254250
; CHECK-GI-NEXT: fmov s0, w8
255251
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
256252
; CHECK-GI-NEXT: ret

0 commit comments

Comments
 (0)