Skip to content

Commit 7345753

Browse files
committed
[AArch64][GlobalISel] Use custom legalization for G_TRUNC for v8i8 vectors.
Truncating to v8i8 is a case where we want to split the source but also generate intermediate truncates to reduce the size of the source vector before truncating down to v8i8. This implements the same strategy as what SelectionDAG does, but I'm not certain where if anywhere in generic code it should live. Use it for legalization of v8s8 = G_ICMP v8s32. Differential Revision: https://reviews.llvm.org/D88191
1 parent 4c265ce commit 7345753

File tree

3 files changed

+140
-2
lines changed

3 files changed

+140
-2
lines changed

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/IR/DerivedTypes.h"
2525
#include "llvm/IR/Type.h"
2626
#include <initializer_list>
27+
#include "llvm/Support/MathExtras.h"
2728

2829
#define DEBUG_TYPE "aarch64-legalinfo"
2930

@@ -373,7 +374,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
373374
.minScalarOrEltIf(
374375
[=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
375376
s64)
376-
.widenScalarOrEltToNextPow2(1);
377+
.widenScalarOrEltToNextPow2(1)
378+
.clampNumElements(0, v2s32, v4s32);
377379

378380
getActionDefinitionsBuilder(G_FCMP)
379381
.legalFor({{s32, s32}, {s32, s64}})
@@ -412,7 +414,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
412414
.legalIf(ExtLegalFunc)
413415
.clampScalar(0, s64, s64); // Just for s128, others are handled above.
414416

415-
getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
417+
getActionDefinitionsBuilder(G_TRUNC)
418+
.minScalarOrEltIf(
419+
[=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
420+
0, s8)
421+
.customIf([=](const LegalityQuery &Query) {
422+
LLT DstTy = Query.Types[0];
423+
LLT SrcTy = Query.Types[1];
424+
return DstTy == v8s8 && SrcTy.getSizeInBits() > 128;
425+
})
426+
.alwaysLegal();
416427

417428
getActionDefinitionsBuilder(G_SEXT_INREG).legalFor({s32, s64}).lower();
418429

@@ -709,11 +720,60 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
709720
return legalizeShlAshrLshr(MI, MRI, MIRBuilder, Observer);
710721
case TargetOpcode::G_GLOBAL_VALUE:
711722
return legalizeSmallCMGlobalValue(MI, MRI, MIRBuilder, Observer);
723+
case TargetOpcode::G_TRUNC:
724+
return legalizeVectorTrunc(MI, Helper);
712725
}
713726

714727
llvm_unreachable("expected switch to return");
715728
}
716729

730+
static void extractParts(Register Reg, MachineRegisterInfo &MRI,
731+
MachineIRBuilder &MIRBuilder, LLT Ty, int NumParts,
732+
SmallVectorImpl<Register> &VRegs) {
733+
for (int I = 0; I < NumParts; ++I)
734+
VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
735+
MIRBuilder.buildUnmerge(VRegs, Reg);
736+
}
737+
738+
bool AArch64LegalizerInfo::legalizeVectorTrunc(
739+
MachineInstr &MI, LegalizerHelper &Helper) const {
740+
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
741+
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
742+
// Similar to how operand splitting is done in SelectiondDAG, we can handle
743+
// %res(v8s8) = G_TRUNC %in(v8s32) by generating:
744+
// %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
745+
// %lo16(<4 x s16>) = G_TRUNC %inlo
746+
// %hi16(<4 x s16>) = G_TRUNC %inhi
747+
// %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
748+
// %res(<8 x s8>) = G_TRUNC %in16
749+
750+
Register DstReg = MI.getOperand(0).getReg();
751+
Register SrcReg = MI.getOperand(1).getReg();
752+
LLT DstTy = MRI.getType(DstReg);
753+
LLT SrcTy = MRI.getType(SrcReg);
754+
assert(isPowerOf2_32(DstTy.getSizeInBits()) &&
755+
isPowerOf2_32(SrcTy.getSizeInBits()));
756+
757+
// Split input type.
758+
LLT SplitSrcTy = SrcTy.changeNumElements(SrcTy.getNumElements() / 2);
759+
// First, split the source into two smaller vectors.
760+
SmallVector<Register, 2> SplitSrcs;
761+
extractParts(SrcReg, MRI, MIRBuilder, SplitSrcTy, 2, SplitSrcs);
762+
763+
// Truncate the splits into intermediate narrower elements.
764+
LLT InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
765+
for (unsigned I = 0; I < SplitSrcs.size(); ++I)
766+
SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
767+
768+
auto Concat = MIRBuilder.buildConcatVectors(
769+
DstTy.changeElementSize(DstTy.getScalarSizeInBits() * 2), SplitSrcs);
770+
771+
Helper.Observer.changingInstr(MI);
772+
MI.getOperand(1).setReg(Concat.getReg(0));
773+
Helper.Observer.changedInstr(MI);
774+
return true;
775+
}
776+
717777
bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
718778
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder,
719779
GISelChangeObserver &Observer) const {

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H
1616

1717
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18+
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
1819
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
1920

2021
namespace llvm {
@@ -45,6 +46,7 @@ class AArch64LegalizerInfo : public LegalizerInfo {
4546
bool legalizeSmallCMGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
4647
MachineIRBuilder &MIRBuilder,
4748
GISelChangeObserver &Observer) const;
49+
bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const;
4850
const AArch64Subtarget *ST;
4951
};
5052
} // End llvm namespace.

llvm/test/CodeGen/AArch64/GlobalISel/legalize-vector-icmp.mir

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1920,3 +1920,79 @@ body: |
19201920
RET_ReallyLR implicit $d0
19211921
19221922
...
1923+
---
1924+
name: icmp_8xs1
1925+
alignment: 4
1926+
tracksRegLiveness: true
1927+
liveins:
1928+
- { reg: '$q0' }
1929+
- { reg: '$q1' }
1930+
- { reg: '$q2' }
1931+
- { reg: '$q3' }
1932+
body: |
1933+
bb.1:
1934+
liveins: $q0, $q1, $q2, $q3
1935+
1936+
; CHECK-LABEL: name: icmp_8xs1
1937+
; CHECK: liveins: $q0, $q1, $q2, $q3
1938+
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
1939+
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
1940+
; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2
1941+
; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3
1942+
; CHECK: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY]](<4 x s32>), [[COPY2]]
1943+
; CHECK: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY1]](<4 x s32>), [[COPY3]]
1944+
; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
1945+
; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
1946+
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>)
1947+
; CHECK: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
1948+
; CHECK: $d0 = COPY [[TRUNC2]](<8 x s8>)
1949+
; CHECK: RET_ReallyLR implicit $d0
1950+
%2:_(<4 x s32>) = COPY $q0
1951+
%3:_(<4 x s32>) = COPY $q1
1952+
%0:_(<8 x s32>) = G_CONCAT_VECTORS %2(<4 x s32>), %3(<4 x s32>)
1953+
%4:_(<4 x s32>) = COPY $q2
1954+
%5:_(<4 x s32>) = COPY $q3
1955+
%1:_(<8 x s32>) = G_CONCAT_VECTORS %4(<4 x s32>), %5(<4 x s32>)
1956+
%6:_(<8 x s1>) = G_ICMP intpred(eq), %0(<8 x s32>), %1
1957+
%7:_(<8 x s8>) = G_ANYEXT %6(<8 x s1>)
1958+
$d0 = COPY %7(<8 x s8>)
1959+
RET_ReallyLR implicit $d0
1960+
...
1961+
---
1962+
name: icmp_8xs32
1963+
alignment: 4
1964+
tracksRegLiveness: true
1965+
liveins:
1966+
- { reg: '$q0' }
1967+
- { reg: '$q1' }
1968+
- { reg: '$q2' }
1969+
- { reg: '$q3' }
1970+
body: |
1971+
bb.1:
1972+
liveins: $q0, $q1, $q2, $q3
1973+
1974+
; CHECK-LABEL: name: icmp_8xs32
1975+
; CHECK: liveins: $q0, $q1, $q2, $q3
1976+
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
1977+
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
1978+
; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2
1979+
; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3
1980+
; CHECK: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY]](<4 x s32>), [[COPY2]]
1981+
; CHECK: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(eq), [[COPY1]](<4 x s32>), [[COPY3]]
1982+
; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
1983+
; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
1984+
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>)
1985+
; CHECK: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
1986+
; CHECK: $d0 = COPY [[TRUNC2]](<8 x s8>)
1987+
; CHECK: RET_ReallyLR implicit $d0
1988+
%2:_(<4 x s32>) = COPY $q0
1989+
%3:_(<4 x s32>) = COPY $q1
1990+
%0:_(<8 x s32>) = G_CONCAT_VECTORS %2(<4 x s32>), %3(<4 x s32>)
1991+
%4:_(<4 x s32>) = COPY $q2
1992+
%5:_(<4 x s32>) = COPY $q3
1993+
%1:_(<8 x s32>) = G_CONCAT_VECTORS %4(<4 x s32>), %5(<4 x s32>)
1994+
%6:_(<8 x s32>) = G_ICMP intpred(eq), %0(<8 x s32>), %1
1995+
%7:_(<8 x s8>) = G_TRUNC %6(<8 x s32>)
1996+
$d0 = COPY %7(<8 x s8>)
1997+
RET_ReallyLR implicit $d0
1998+
...

0 commit comments

Comments
 (0)