Skip to content

Commit 922fafa

Browse files
authored
[GlobalISel] Micro-optimize getConstantVRegValWithLookThrough (#91969)
I was benchmarking the MatchTable when I found that `getConstantVRegValWithLookThrough` took a non-negligible amount of time, about 7.5% of all of `AArch64PreLegalizerCombinerImpl::tryCombineAll`. I decided to take a closer look to see if I could squeeze some performance out of it, and I landed on a few changes that: - Avoid copying APint unnecessarily, especially returning std::optional<APInt> can be expensive when a out parameter also works. - Avoid indirect call by using templated function pointers instead of function_ref/std::function Both of those changes seem to speedup this function by about 50%, but my benchmarking (`perf record`) seems inconsistent (so take measurements with a grain of salt), I saw as high as 4.5% and as low as 2% for this function on the exact same input after the changes, but it never got close again to 7% in a few runs so this looks like a stable improvement.
1 parent 12c0024 commit 922fafa

File tree

1 file changed

+44
-32
lines changed

1 file changed

+44
-32
lines changed

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 44 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -313,13 +313,22 @@ llvm::getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI) {
313313

314314
namespace {
315315

316-
typedef std::function<bool(const MachineInstr *)> IsOpcodeFn;
317-
typedef std::function<std::optional<APInt>(const MachineInstr *MI)> GetAPCstFn;
318-
319-
std::optional<ValueAndVReg> getConstantVRegValWithLookThrough(
320-
Register VReg, const MachineRegisterInfo &MRI, IsOpcodeFn IsConstantOpcode,
321-
GetAPCstFn getAPCstValue, bool LookThroughInstrs = true,
322-
bool LookThroughAnyExt = false) {
316+
// This function is used in many places, and as such, it has some
317+
// micro-optimizations to try and make it as fast as it can be.
318+
//
319+
// - We use template arguments to avoid an indirect call caused by passing a
320+
// function_ref/std::function
321+
// - GetAPCstValue does not return std::optional<APInt> as that's expensive.
322+
// Instead it returns true/false and places the result in a pre-constructed
323+
// APInt.
324+
//
325+
// Please change this function carefully and benchmark your changes.
326+
template <bool (*IsConstantOpcode)(const MachineInstr *),
327+
bool (*GetAPCstValue)(const MachineInstr *MI, APInt &)>
328+
std::optional<ValueAndVReg>
329+
getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI,
330+
bool LookThroughInstrs = true,
331+
bool LookThroughAnyExt = false) {
323332
SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
324333
MachineInstr *MI;
325334

@@ -353,26 +362,25 @@ std::optional<ValueAndVReg> getConstantVRegValWithLookThrough(
353362
if (!MI || !IsConstantOpcode(MI))
354363
return std::nullopt;
355364

356-
std::optional<APInt> MaybeVal = getAPCstValue(MI);
357-
if (!MaybeVal)
365+
APInt Val;
366+
if (!GetAPCstValue(MI, Val))
358367
return std::nullopt;
359-
APInt &Val = *MaybeVal;
360-
for (auto [Opcode, Size] : reverse(SeenOpcodes)) {
361-
switch (Opcode) {
368+
for (auto &Pair : reverse(SeenOpcodes)) {
369+
switch (Pair.first) {
362370
case TargetOpcode::G_TRUNC:
363-
Val = Val.trunc(Size);
371+
Val = Val.trunc(Pair.second);
364372
break;
365373
case TargetOpcode::G_ANYEXT:
366374
case TargetOpcode::G_SEXT:
367-
Val = Val.sext(Size);
375+
Val = Val.sext(Pair.second);
368376
break;
369377
case TargetOpcode::G_ZEXT:
370-
Val = Val.zext(Size);
378+
Val = Val.zext(Pair.second);
371379
break;
372380
}
373381
}
374382

375-
return ValueAndVReg{Val, VReg};
383+
return ValueAndVReg{std::move(Val), VReg};
376384
}
377385

378386
bool isIConstant(const MachineInstr *MI) {
@@ -394,42 +402,46 @@ bool isAnyConstant(const MachineInstr *MI) {
394402
return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT;
395403
}
396404

397-
std::optional<APInt> getCImmAsAPInt(const MachineInstr *MI) {
405+
bool getCImmAsAPInt(const MachineInstr *MI, APInt &Result) {
398406
const MachineOperand &CstVal = MI->getOperand(1);
399-
if (CstVal.isCImm())
400-
return CstVal.getCImm()->getValue();
401-
return std::nullopt;
407+
if (!CstVal.isCImm())
408+
return false;
409+
Result = CstVal.getCImm()->getValue();
410+
return true;
402411
}
403412

404-
std::optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) {
413+
bool getCImmOrFPImmAsAPInt(const MachineInstr *MI, APInt &Result) {
405414
const MachineOperand &CstVal = MI->getOperand(1);
406415
if (CstVal.isCImm())
407-
return CstVal.getCImm()->getValue();
408-
if (CstVal.isFPImm())
409-
return CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
410-
return std::nullopt;
416+
Result = CstVal.getCImm()->getValue();
417+
else if (CstVal.isFPImm())
418+
Result = CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
419+
else
420+
return false;
421+
return true;
411422
}
412423

413424
} // end anonymous namespace
414425

415426
std::optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough(
416427
Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
417-
return getConstantVRegValWithLookThrough(VReg, MRI, isIConstant,
418-
getCImmAsAPInt, LookThroughInstrs);
428+
return getConstantVRegValWithLookThrough<isIConstant, getCImmAsAPInt>(
429+
VReg, MRI, LookThroughInstrs);
419430
}
420431

421432
std::optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough(
422433
Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
423434
bool LookThroughAnyExt) {
424-
return getConstantVRegValWithLookThrough(
425-
VReg, MRI, isAnyConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs,
426-
LookThroughAnyExt);
435+
return getConstantVRegValWithLookThrough<isAnyConstant,
436+
getCImmOrFPImmAsAPInt>(
437+
VReg, MRI, LookThroughInstrs, LookThroughAnyExt);
427438
}
428439

429440
std::optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough(
430441
Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
431-
auto Reg = getConstantVRegValWithLookThrough(
432-
VReg, MRI, isFConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs);
442+
auto Reg =
443+
getConstantVRegValWithLookThrough<isFConstant, getCImmOrFPImmAsAPInt>(
444+
VReg, MRI, LookThroughInstrs);
433445
if (!Reg)
434446
return std::nullopt;
435447
return FPValueAndVReg{getConstantFPVRegVal(Reg->VReg, MRI)->getValueAPF(),

0 commit comments

Comments
 (0)