Skip to content

Commit a39ed08

Browse files
committed
[AArch64][GlobalISel] Improve and expand fcopysign lowering
This alters the lowering of G_COPYSIGN to support vector types. The general idea is that we just lower it to vector operations using and/or and a mask, which are now converted to a BIF/BIT/BSP. In the process the existing AArch64LegalizerInfo::legalizeFCopySign can be removed, replying on expanding the scalar versions to vector instead, which just needs a small adjustment to allow widening scalars to vectors. With vector immediates now supported they are lowered to movi instructions, except for the f64 "negative zero", which was previously lowered as a fneg(mov 0), which can be added as a separate optimization.
1 parent 3d87043 commit a39ed08

File tree

7 files changed

+473
-414
lines changed

7 files changed

+473
-414
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5008,6 +5008,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
50085008
case TargetOpcode::G_FSUB:
50095009
case TargetOpcode::G_FMUL:
50105010
case TargetOpcode::G_FDIV:
5011+
case TargetOpcode::G_FCOPYSIGN:
50115012
case TargetOpcode::G_UADDSAT:
50125013
case TargetOpcode::G_USUBSAT:
50135014
case TargetOpcode::G_SADDSAT:

llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -269,14 +269,18 @@ MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res,
269269
LLT ResTy = Res.getLLTTy(*getMRI());
270270
LLT Op0Ty = Op0.getLLTTy(*getMRI());
271271

272-
assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
273-
assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
272+
assert(Op0Ty.isVector() && "Non vector type");
273+
assert(((ResTy.isScalar() && (ResTy == Op0Ty.getElementType())) ||
274+
(ResTy.isVector() &&
275+
(ResTy.getElementType() == Op0Ty.getElementType()))) &&
274276
"Different vector element types");
275-
assert((ResTy.getNumElements() < Op0Ty.getNumElements()) &&
277+
assert((ResTy.isScalar() || (ResTy.getNumElements() < Op0Ty.getNumElements())) &&
276278
"Op0 has fewer elements");
277279

278-
SmallVector<Register, 8> Regs;
279280
auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
281+
if (ResTy.isScalar())
282+
return buildCopy(Res, Unmerge.getReg(0));
283+
SmallVector<Register, 8> Regs;
280284
for (unsigned i = 0; i < ResTy.getNumElements(); ++i)
281285
Regs.push_back(Unmerge.getReg(i));
282286
return buildMergeLikeInstr(Res, Regs);

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 8 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,10 +1070,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10701070
getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
10711071
.legalFor({{s64, s32}, {s64, s64}});
10721072

1073-
// TODO: Custom legalization for vector types.
10741073
// TODO: Custom legalization for mismatched types.
1075-
// TODO: s16 support.
1076-
getActionDefinitionsBuilder(G_FCOPYSIGN).customFor({{s32, s32}, {s64, s64}});
1074+
getActionDefinitionsBuilder(G_FCOPYSIGN)
1075+
.moreElementsIf(
1076+
[](const LegalityQuery &Query) { return Query.Types[0].isScalar(); },
1077+
[=](const LegalityQuery &Query) {
1078+
const LLT Ty = Query.Types[0];
1079+
return std::pair(0, LLT::fixed_vector(Ty == s16 ? 4 : 2, Ty));
1080+
})
1081+
.lower();
10771082

10781083
getActionDefinitionsBuilder(G_FMAD).lower();
10791084

@@ -1124,8 +1129,6 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
11241129
case TargetOpcode::G_MEMMOVE:
11251130
case TargetOpcode::G_MEMSET:
11261131
return legalizeMemOps(MI, Helper);
1127-
case TargetOpcode::G_FCOPYSIGN:
1128-
return legalizeFCopySign(MI, Helper);
11291132
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
11301133
return legalizeExtractVectorElt(MI, MRI, Helper);
11311134
}
@@ -1829,66 +1832,6 @@ bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
18291832
return false;
18301833
}
18311834

1832-
bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
1833-
LegalizerHelper &Helper) const {
1834-
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
1835-
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
1836-
Register Dst = MI.getOperand(0).getReg();
1837-
LLT DstTy = MRI.getType(Dst);
1838-
assert(DstTy.isScalar() && "Only expected scalars right now!");
1839-
const unsigned DstSize = DstTy.getSizeInBits();
1840-
assert((DstSize == 32 || DstSize == 64) && "Unexpected dst type!");
1841-
assert(MRI.getType(MI.getOperand(2).getReg()) == DstTy &&
1842-
"Expected homogeneous types!");
1843-
1844-
// We want to materialize a mask with the high bit set.
1845-
uint64_t EltMask;
1846-
LLT VecTy;
1847-
1848-
// TODO: s16 support.
1849-
switch (DstSize) {
1850-
default:
1851-
llvm_unreachable("Unexpected type for G_FCOPYSIGN!");
1852-
case 64: {
1853-
// AdvSIMD immediate moves cannot materialize out mask in a single
1854-
// instruction for 64-bit elements. Instead, materialize zero and then
1855-
// negate it.
1856-
EltMask = 0;
1857-
VecTy = LLT::fixed_vector(2, DstTy);
1858-
break;
1859-
}
1860-
case 32:
1861-
EltMask = 0x80000000ULL;
1862-
VecTy = LLT::fixed_vector(4, DstTy);
1863-
break;
1864-
}
1865-
1866-
// Widen In1 and In2 to 128 bits. We want these to eventually become
1867-
// INSERT_SUBREGs.
1868-
auto Undef = MIRBuilder.buildUndef(VecTy);
1869-
auto Zero = MIRBuilder.buildConstant(DstTy, 0);
1870-
auto Ins1 = MIRBuilder.buildInsertVectorElement(
1871-
VecTy, Undef, MI.getOperand(1).getReg(), Zero);
1872-
auto Ins2 = MIRBuilder.buildInsertVectorElement(
1873-
VecTy, Undef, MI.getOperand(2).getReg(), Zero);
1874-
1875-
// Construct the mask.
1876-
auto Mask = MIRBuilder.buildConstant(VecTy, EltMask);
1877-
if (DstSize == 64)
1878-
Mask = MIRBuilder.buildFNeg(VecTy, Mask);
1879-
1880-
auto Sel = MIRBuilder.buildInstr(AArch64::G_BSP, {VecTy}, {Mask, Ins2, Ins1});
1881-
1882-
// Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We
1883-
// want this to eventually become an EXTRACT_SUBREG.
1884-
SmallVector<Register, 2> DstRegs(1, Dst);
1885-
for (unsigned I = 1, E = VecTy.getNumElements(); I < E; ++I)
1886-
DstRegs.push_back(MRI.createGenericVirtualRegister(DstTy));
1887-
MIRBuilder.buildUnmerge(DstRegs, Sel);
1888-
MI.eraseFromParent();
1889-
return true;
1890-
}
1891-
18921835
bool AArch64LegalizerInfo::legalizeExtractVectorElt(
18931836
MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
18941837
assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@ class AArch64LegalizerInfo : public LegalizerInfo {
6060
LegalizerHelper &Helper) const;
6161
bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const;
6262
bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const;
63-
bool legalizeFCopySign(MachineInstr &MI, LegalizerHelper &Helper) const;
6463
bool legalizeExtractVectorElt(MachineInstr &MI, MachineRegisterInfo &MRI,
6564
LegalizerHelper &Helper) const;
6665
const AArch64Subtarget *ST;

llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,18 @@ body: |
1313
; CHECK-NEXT: {{ $}}
1414
; CHECK-NEXT: %val:_(s32) = COPY $s0
1515
; CHECK-NEXT: %sign:_(s32) = COPY $s1
16-
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
17-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
18-
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s32), [[C]](s32)
19-
; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s32), [[C]](s32)
20-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
21-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
22-
; CHECK-NEXT: [[BSP:%[0-9]+]]:_(<4 x s32>) = G_BSP [[BUILD_VECTOR]], [[IVEC1]], [[IVEC]]
23-
; CHECK-NEXT: %fcopysign:_(s32), %10:_(s32), %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES [[BSP]](<4 x s32>)
16+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
17+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %val(s32), [[DEF]](s32)
18+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR %sign(s32), [[DEF]](s32)
19+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
20+
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
21+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647
22+
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32)
23+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR3]]
24+
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
25+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]]
26+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>)
27+
; CHECK-NEXT: %fcopysign:_(s32) = COPY [[UV]](s32)
2428
; CHECK-NEXT: $s0 = COPY %fcopysign(s32)
2529
; CHECK-NEXT: RET_ReallyLR implicit $s0
2630
%val:_(s32) = COPY $s0
@@ -41,14 +45,18 @@ body: |
4145
; CHECK-NEXT: {{ $}}
4246
; CHECK-NEXT: %val:_(s64) = COPY $d0
4347
; CHECK-NEXT: %sign:_(s64) = COPY $d1
44-
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
45-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
46-
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s64), [[C]](s64)
47-
; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s64), [[C]](s64)
48-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
49-
; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG [[BUILD_VECTOR]]
50-
; CHECK-NEXT: [[BSP:%[0-9]+]]:_(<2 x s64>) = G_BSP [[FNEG]], [[IVEC1]], [[IVEC]]
51-
; CHECK-NEXT: %fcopysign:_(s64), %10:_(s64) = G_UNMERGE_VALUES [[BSP]](<2 x s64>)
48+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
49+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %val(s64), [[DEF]](s64)
50+
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR %sign(s64), [[DEF]](s64)
51+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808
52+
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
53+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
54+
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64)
55+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR3]]
56+
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
57+
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[AND]], [[AND1]]
58+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[OR]](<2 x s64>)
59+
; CHECK-NEXT: %fcopysign:_(s64) = COPY [[UV]](s64)
5260
; CHECK-NEXT: $d0 = COPY %fcopysign(s64)
5361
; CHECK-NEXT: RET_ReallyLR implicit $d0
5462
%val:_(s64) = COPY $d0

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -521,8 +521,8 @@
521521
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
522522
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
523523
# DEBUG-NEXT: G_FCOPYSIGN (opcode {{[0-9]+}}): 2 type indices
524-
# DEBUG-NEXT: .. the first uncovered type index: 2, OK
525-
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
524+
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
525+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
526526
# DEBUG-NEXT: G_IS_FPCLASS (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
527527
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
528528
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined

0 commit comments

Comments
 (0)