Skip to content

Commit 713463a

Browse files
committed
[AArch64][GlobalISel] FNeg constant materialization
This is a Global ISel equivalent of #80641, creating fneg(movi) instead of the alternative constant pool load or gpr dup.
1 parent 0f8680b commit 713463a

File tree

2 files changed

+62
-26
lines changed

2 files changed

+62
-26
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 52 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5786,24 +5786,60 @@ AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
57865786

57875787
if (CV->getSplatValue()) {
57885788
APInt DefBits = APInt::getSplat(DstSize, CV->getUniqueInteger());
5789-
MachineInstr *NewOp;
5790-
bool Inv = false;
5791-
if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5792-
(NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5793-
(NewOp =
5794-
tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5795-
(NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5796-
(NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5797-
(NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5798-
return NewOp;
5789+
auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5790+
MachineInstr *NewOp;
5791+
bool Inv = false;
5792+
if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5793+
(NewOp =
5794+
tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5795+
(NewOp =
5796+
tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5797+
(NewOp =
5798+
tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5799+
(NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5800+
(NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5801+
return NewOp;
5802+
5803+
DefBits = ~DefBits;
5804+
Inv = true;
5805+
if ((NewOp =
5806+
tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5807+
(NewOp =
5808+
tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5809+
(NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5810+
return NewOp;
5811+
return nullptr;
5812+
};
57995813

5800-
DefBits = ~DefBits;
5801-
Inv = true;
5802-
if ((NewOp = tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5803-
(NewOp =
5804-
tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5805-
(NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5814+
if (auto *NewOp = TryMOVIWithBits(DefBits))
58065815
return NewOp;
5816+
5817+
// See if a fneg of the constant can be materialized with a MOVI, etc
5818+
auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5819+
unsigned NegOpc) -> MachineInstr * {
5820+
// FNegate each sub-element of the constant
5821+
APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5822+
APInt NegBits(DstSize, 0);
5823+
unsigned NumElts = DstSize / NumBits;
5824+
for (unsigned i = 0; i < NumElts; i++)
5825+
NegBits |= Neg << (NumBits * i);
5826+
NegBits = DefBits ^ NegBits;
5827+
5828+
// Try to create the new constants with MOVI, and if so generate a fneg
5829+
// for it.
5830+
if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5831+
Register NewDst = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
5832+
NewOp->getOperand(0).setReg(NewDst);
5833+
return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5834+
}
5835+
return nullptr;
5836+
};
5837+
MachineInstr *R;
5838+
if ((R = TryWithFNeg(DefBits, 32, AArch64::FNEGv4f32)) ||
5839+
(R = TryWithFNeg(DefBits, 64, AArch64::FNEGv2f64)) ||
5840+
(STI.hasFullFP16() &&
5841+
(R = TryWithFNeg(DefBits, 16, AArch64::FNEGv8f16))))
5842+
return R;
58075843
}
58085844

58095845
auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);

llvm/test/CodeGen/AArch64/neon-mov.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,14 @@ define <4 x i32> @movi4s_fneg() {
125125
;
126126
; CHECK-NOFP16-GI-LABEL: movi4s_fneg:
127127
; CHECK-NOFP16-GI: // %bb.0:
128-
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI13_0
129-
; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
128+
; CHECK-NOFP16-GI-NEXT: movi v0.4s, #240, lsl #8
129+
; CHECK-NOFP16-GI-NEXT: fneg v0.4s, v0.4s
130130
; CHECK-NOFP16-GI-NEXT: ret
131131
;
132132
; CHECK-FP16-GI-LABEL: movi4s_fneg:
133133
; CHECK-FP16-GI: // %bb.0:
134-
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI13_0
135-
; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI13_0]
134+
; CHECK-FP16-GI-NEXT: movi v0.4s, #240, lsl #8
135+
; CHECK-FP16-GI-NEXT: fneg v0.4s, v0.4s
136136
; CHECK-FP16-GI-NEXT: ret
137137
ret <4 x i32> <i32 2147545088, i32 2147545088, i32 2147545088, i32 2147545088>
138138
}
@@ -306,8 +306,8 @@ define <8 x i16> @mvni8h_neg() {
306306
;
307307
; CHECK-FP16-GI-LABEL: mvni8h_neg:
308308
; CHECK-FP16-GI: // %bb.0:
309-
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI32_0
310-
; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI32_0]
309+
; CHECK-FP16-GI-NEXT: movi v0.8h, #240
310+
; CHECK-FP16-GI-NEXT: fneg v0.8h, v0.8h
311311
; CHECK-FP16-GI-NEXT: ret
312312
ret <8 x i16> <i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008, i16 33008>
313313
}
@@ -492,14 +492,14 @@ define <2 x double> @fmov2d_neg0() {
492492
;
493493
; CHECK-NOFP16-GI-LABEL: fmov2d_neg0:
494494
; CHECK-NOFP16-GI: // %bb.0:
495-
; CHECK-NOFP16-GI-NEXT: adrp x8, .LCPI51_0
496-
; CHECK-NOFP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI51_0]
495+
; CHECK-NOFP16-GI-NEXT: movi v0.2d, #0000000000000000
496+
; CHECK-NOFP16-GI-NEXT: fneg v0.2d, v0.2d
497497
; CHECK-NOFP16-GI-NEXT: ret
498498
;
499499
; CHECK-FP16-GI-LABEL: fmov2d_neg0:
500500
; CHECK-FP16-GI: // %bb.0:
501-
; CHECK-FP16-GI-NEXT: adrp x8, .LCPI51_0
502-
; CHECK-FP16-GI-NEXT: ldr q0, [x8, :lo12:.LCPI51_0]
501+
; CHECK-FP16-GI-NEXT: movi v0.2d, #0000000000000000
502+
; CHECK-FP16-GI-NEXT: fneg v0.2d, v0.2d
503503
; CHECK-FP16-GI-NEXT: ret
504504
ret <2 x double> <double -0.0, double -0.0>
505505
}

0 commit comments

Comments
 (0)