Introduce DIExpression::foldConstantMath()

rastogishubham · rastogishubham · commit 884ded7e0b1d · 2024-05-16T12:50:11.000-07:00
DIExpressions can get very long and have a lot of redundant operations.
This function uses simple pattern matching to fold constant math that
can be evaluated at compile time.
diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -3121,6 +3121,11 @@ class DIExpression : public MDNode {
   /// expression and constant on failure.
   std::pair<DIExpression *, const ConstantInt *>
   constantFold(const ConstantInt *CI);
+
+  /// Try to shorten an expression with constant math operations that can be
+  /// evaluated at compile time. Returns a new expression on success, or the old
+  /// expression if there is nothing to be reduced.
+  DIExpression *foldConstantMath();
 };
 
 inline bool operator==(const DIExpression::FragmentInfo &A,
diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt
@@ -17,6 +17,7 @@ add_llvm_component_library(LLVMCore
   DataLayout.cpp
   DebugInfo.cpp
   DebugInfoMetadata.cpp
+  DIExpressionOptimizer.cpp
   DebugProgramInstruction.cpp
   DebugLoc.cpp
   DiagnosticHandler.cpp
diff --git a/llvm/lib/IR/DIExpressionOptimizer.cpp b/llvm/lib/IR/DIExpressionOptimizer.cpp
@@ -0,0 +1,349 @@
+//===- DIExpressionOptimizer.cpp - Constant folding of DIExpressions ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements functions to constant fold DIExpressions. Which were
+// declared in DIExpressionOptimizer.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+
+using namespace llvm;
+
+static std::optional<uint64_t> isConstantVal(DIExpression::ExprOperand Op) {
+  if (Op.getOp() == dwarf::DW_OP_constu)
+    return Op.getArg(0);
+  return std::nullopt;
+}
+
+static bool isNeutralElement(uint64_t Op, uint64_t Val) {
+  switch (Op) {
+  case dwarf::DW_OP_plus:
+  case dwarf::DW_OP_minus:
+  case dwarf::DW_OP_shl:
+  case dwarf::DW_OP_shr:
+    return Val == 0;
+  case dwarf::DW_OP_mul:
+  case dwarf::DW_OP_div:
+    return Val == 1;
+  default:
+    return false;
+  }
+}
+
+static std::optional<uint64_t> foldOperationIfPossible(
+    DIExpression::ExprOperand Op1, DIExpression::ExprOperand Op2,
+    DIExpression::ExprOperand Op3, bool &ConstantValCheckFailed) {
+
+  auto Operand1 = isConstantVal(Op1);
+  auto Operand2 = isConstantVal(Op2);
+
+  if (!Operand1 || !Operand2) {
+    ConstantValCheckFailed = true;
+    return std::nullopt;
+  }
+
+  auto Oper1 = *Operand1;
+  auto Oper2 = *Operand2;
+
+  bool ResultOverflowed;
+  switch (Op3.getOp()) {
+  case dwarf::DW_OP_plus: {
+    auto Result = SaturatingAdd(Oper1, Oper2, &ResultOverflowed);
+    if (ResultOverflowed)
+      return std::nullopt;
+    return Result;
+  }
+  case dwarf::DW_OP_minus: {
+    if (Oper1 < Oper2)
+      return std::nullopt;
+    return Oper1 - Oper2;
+  }
+  case dwarf::DW_OP_shl: {
+    if ((uint64_t)countl_zero(Oper1) < Oper2)
+      return std::nullopt;
+    return Oper1 << Oper2;
+  }
+  case dwarf::DW_OP_shr: {
+    if ((uint64_t)countr_zero(Oper1) < Oper2)
+      return std::nullopt;
+    return Oper1 >> Oper2;
+  }
+  case dwarf::DW_OP_mul: {
+    auto Result = SaturatingMultiply(Oper1, Oper2, &ResultOverflowed);
+    if (ResultOverflowed)
+      return std::nullopt;
+    return Result;
+  }
+  case dwarf::DW_OP_div: {
+    if (Oper2)
+      return Oper1 / Oper2;
+    return std::nullopt;
+  }
+  default:
+    return std::nullopt;
+  }
+}
+
+static bool operationsAreFoldableAndCommutative(uint64_t Op1, uint64_t Op2) {
+  return Op1 == Op2 && (Op1 == dwarf::DW_OP_plus || Op1 == dwarf::DW_OP_mul);
+}
+
+static void consumeOneOperator(DIExpressionCursor &Cursor, uint64_t &Loc,
+                               const DIExpression::ExprOperand &Op) {
+  Cursor.consume(1);
+  Loc = Loc + Op.getSize();
+}
+
+void startFromBeginning(uint64_t &Loc, DIExpressionCursor &Cursor,
+                        ArrayRef<uint64_t> WorkingOps) {
+  Cursor.assignNewExpr(WorkingOps);
+  Loc = 0;
+}
+
+static SmallVector<uint64_t>
+canonicalizeDwarfOperations(ArrayRef<uint64_t> WorkingOps) {
+  DIExpressionCursor Cursor(WorkingOps);
+  uint64_t Loc = 0;
+  SmallVector<uint64_t> ResultOps;
+  while (Loc < WorkingOps.size()) {
+    auto Op = Cursor.peek();
+    /// Expression has no operations, break.
+    if (!Op)
+      break;
+    auto OpRaw = Op->getOp();
+    auto OpArg = Op->getArg(0);
+
+    if (OpRaw >= dwarf::DW_OP_lit0 && OpRaw <= dwarf::DW_OP_lit31) {
+      ResultOps.push_back(dwarf::DW_OP_constu);
+      ResultOps.push_back(OpRaw - dwarf::DW_OP_lit0);
+      consumeOneOperator(Cursor, Loc, *Cursor.peek());
+      continue;
+    }
+    if (OpRaw == dwarf::DW_OP_plus_uconst) {
+      ResultOps.push_back(dwarf::DW_OP_constu);
+      ResultOps.push_back(OpArg);
+      ResultOps.push_back(dwarf::DW_OP_plus);
+      consumeOneOperator(Cursor, Loc, *Cursor.peek());
+      continue;
+    }
+    uint64_t PrevLoc = Loc;
+    consumeOneOperator(Cursor, Loc, *Cursor.peek());
+    ResultOps.append(WorkingOps.begin() + PrevLoc, WorkingOps.begin() + Loc);
+  }
+  return ResultOps;
+}
+
+static SmallVector<uint64_t>
+optimizeDwarfOperations(ArrayRef<uint64_t> WorkingOps) {
+  DIExpressionCursor Cursor(WorkingOps);
+  uint64_t Loc = 0;
+  SmallVector<uint64_t> ResultOps;
+  while (Loc < WorkingOps.size()) {
+    auto Op1 = Cursor.peek();
+    /// Expression has no operations, exit.
+    if (!Op1)
+      break;
+    auto Op1Raw = Op1->getOp();
+    auto Op1Arg = Op1->getArg(0);
+
+    if (Op1Raw == dwarf::DW_OP_constu && Op1Arg == 0) {
+      ResultOps.push_back(dwarf::DW_OP_lit0);
+      consumeOneOperator(Cursor, Loc, *Cursor.peek());
+      continue;
+    }
+
+    auto Op2 = Cursor.peekNext();
+    /// Expression has no more operations, copy into ResultOps and exit.
+    if (!Op2) {
+      uint64_t PrevLoc = Loc;
+      consumeOneOperator(Cursor, Loc, *Cursor.peek());
+      ResultOps.append(WorkingOps.begin() + PrevLoc, WorkingOps.begin() + Loc);
+      break;
+    }
+    auto Op2Raw = Op2->getOp();
+
+    if (Op1Raw == dwarf::DW_OP_constu && Op2Raw == dwarf::DW_OP_plus) {
+      ResultOps.push_back(dwarf::DW_OP_plus_uconst);
+      ResultOps.push_back(Op1Arg);
+      consumeOneOperator(Cursor, Loc, *Cursor.peek());
+      consumeOneOperator(Cursor, Loc, *Cursor.peek());
+      continue;
+    }
+    uint64_t PrevLoc = Loc;
+    consumeOneOperator(Cursor, Loc, *Cursor.peek());
+    ResultOps.append(WorkingOps.begin() + PrevLoc, WorkingOps.begin() + Loc);
+  }
+  return ResultOps;
+}
+
+static bool tryFoldNoOpMath(ArrayRef<DIExpression::ExprOperand> Ops,
+                            uint64_t &Loc, DIExpressionCursor &Cursor,
+                            SmallVectorImpl<uint64_t> &WorkingOps) {
+
+  if (isConstantVal(Ops[0]) &&
+      isNeutralElement(Ops[1].getOp(), Ops[0].getArg(0))) {
+    WorkingOps.erase(WorkingOps.begin() + Loc, WorkingOps.begin() + Loc + 3);
+    startFromBeginning(Loc, Cursor, WorkingOps);
+    return true;
+  }
+  return false;
+}
+
+static bool tryFoldConstants(ArrayRef<DIExpression::ExprOperand> Ops,
+                             uint64_t &Loc, DIExpressionCursor &Cursor,
+                             SmallVectorImpl<uint64_t> &WorkingOps) {
+
+  bool ConstantValCheckFailed = false;
+  auto Result =
+      foldOperationIfPossible(Ops[0], Ops[1], Ops[2], ConstantValCheckFailed);
+  if (ConstantValCheckFailed)
+    return false;
+  if (!Result) {
+    consumeOneOperator(Cursor, Loc, Ops[0]);
+    return true;
+    }
+    WorkingOps.erase(WorkingOps.begin() + Loc + 2,
+                     WorkingOps.begin() + Loc + 5);
+    WorkingOps[Loc] = dwarf::DW_OP_constu;
+    WorkingOps[Loc + 1] = *Result;
+    startFromBeginning(Loc, Cursor, WorkingOps);
+    return true;
+}
+
+static bool tryFoldCommutativeMath(ArrayRef<DIExpression::ExprOperand> Ops,
+                                   uint64_t &Loc, DIExpressionCursor &Cursor,
+                                   SmallVectorImpl<uint64_t> &WorkingOps) {
+
+  bool ConstantValCheckFailed = false;
+  if (operationsAreFoldableAndCommutative(Ops[1].getOp(), Ops[3].getOp())) {
+    auto Result =
+        foldOperationIfPossible(Ops[0], Ops[2], Ops[1], ConstantValCheckFailed);
+    if (ConstantValCheckFailed)
+      return false;
+    if (!Result) {
+      consumeOneOperator(Cursor, Loc, Ops[0]);
+      return true;
+    }
+    WorkingOps.erase(WorkingOps.begin() + Loc + 3,
+                     WorkingOps.begin() + Loc + 6);
+    WorkingOps[Loc] = dwarf::DW_OP_constu;
+    WorkingOps[Loc + 1] = *Result;
+    startFromBeginning(Loc, Cursor, WorkingOps);
+    return true;
+  }
+  return false;
+}
+
+static bool tryFoldCommutativeMathWithArgInBetween(
+    ArrayRef<DIExpression::ExprOperand> Ops, uint64_t &Loc,
+    DIExpressionCursor &Cursor, SmallVectorImpl<uint64_t> &WorkingOps) {
+
+  bool ConstantValCheckFailed = false;
+  if (Ops[2].getOp() == dwarf::DW_OP_LLVM_arg &&
+      operationsAreFoldableAndCommutative(Ops[1].getOp(), Ops[3].getOp()) &&
+      operationsAreFoldableAndCommutative(Ops[3].getOp(), Ops[5].getOp())) {
+    auto Result =
+        foldOperationIfPossible(Ops[0], Ops[4], Ops[1], ConstantValCheckFailed);
+    if (ConstantValCheckFailed)
+      return false;
+    if (!Result) {
+      consumeOneOperator(Cursor, Loc, Ops[0]);
+      return true;
+    }
+    WorkingOps.erase(WorkingOps.begin() + Loc + 6,
+                     WorkingOps.begin() + Loc + 9);
+    WorkingOps[Loc] = dwarf::DW_OP_constu;
+    WorkingOps[Loc + 1] = *Result;
+    startFromBeginning(Loc, Cursor, WorkingOps);
+    return true;
+  }
+  return false;
+}
+
+DIExpression *DIExpression::foldConstantMath() {
+
+  SmallVector<uint64_t, 8> WorkingOps(Elements.begin(), Elements.end());
+  uint64_t Loc = 0;
+  SmallVector<uint64_t> ResultOps = canonicalizeDwarfOperations(WorkingOps);
+  DIExpressionCursor Cursor(ResultOps);
+  SmallVector<DIExpression::ExprOperand, 8> Ops;
+
+  while (Loc < ResultOps.size()) {
+    Ops.clear();
+
+    auto Op = Cursor.peek();
+    // Expression has no operations, exit.
+    if (!Op)
+      break;
+
+    Ops.push_back(*Op);
+
+    if (!isConstantVal(Ops[0])) {
+      // Early exit, all of the following patterns start with a constant value.
+      consumeOneOperator(Cursor, Loc, *Op);
+      continue;
+    }
+
+    Op = Cursor.peekNext();
+    // All following patterns require at least 2 Operations, exit.
+    if (!Op)
+      break;
+
+    Ops.push_back(*Op);
+
+    if (tryFoldNoOpMath(Ops, Loc, Cursor, ResultOps))
+      continue;
+
+    Op = Cursor.peekNextN(2);
+    // Op[1] could still match a pattern, skip iteration.
+    if (!Op) {
+      consumeOneOperator(Cursor, Loc, Ops[0]);
+      continue;
+    }
+
+    Ops.push_back(*Op);
+    if (tryFoldConstants(Ops, Loc, Cursor, ResultOps))
+      continue;
+
+    Op = Cursor.peekNextN(3);
+    // Op[1] and Op[2] could still match a pattern, skip iteration.
+    if (!Op) {
+      consumeOneOperator(Cursor, Loc, Ops[0]);
+      continue;
+    }
+
+    Ops.push_back(*Op);
+    if (tryFoldCommutativeMath(Ops, Loc, Cursor, ResultOps))
+      continue;
+
+    Op = Cursor.peekNextN(4);
+    if (!Op) {
+      consumeOneOperator(Cursor, Loc, Ops[0]);
+      continue;
+    }
+
+    Ops.push_back(*Op);
+    Op = Cursor.peekNextN(5);
+    if (!Op) {
+      consumeOneOperator(Cursor, Loc, Ops[0]);
+      continue;
+    }
+
+    Ops.push_back(*Op);
+    if (tryFoldCommutativeMathWithArgInBetween(Ops, Loc, Cursor, ResultOps))
+      continue;
+
+    consumeOneOperator(Cursor, Loc, Ops[0]);
+  }
+  ResultOps = optimizeDwarfOperations(ResultOps);
+  auto *Result = DIExpression::get(getContext(), ResultOps);
+  assert(Result->isValid() && "concatenated expression is not valid");
+  return Result;
+}
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -22,6 +22,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 
+#include <bit>
 #include <numeric>
 #include <optional>
 
@@ -1880,7 +1881,6 @@ DIExpression *DIExpression::append(const DIExpression *Expr,
     }
     Op.appendToVector(NewOps);
   }
-
   NewOps.append(Ops.begin(), Ops.end());
   auto *result = DIExpression::get(Expr->getContext(), NewOps);
   assert(result->isValid() && "concatenated expression is not valid");
diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp

Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@`
`22`	`22`	`#include "llvm/IR/Type.h"`
`23`	`23`	`#include "llvm/IR/Value.h"`
`24`	`24`
	`25`	`+#include <bit>`
`25`	`26`	`#include <numeric>`
`26`	`27`	`#include <optional>`
`27`	`28`
`@@ -1880,7 +1881,6 @@ DIExpression DIExpression::append(const DIExpression Expr,`
`1880`	`1881`	`}`
`1881`	`1882`	`Op.appendToVector(NewOps);`
`1882`	`1883`	`}`
`1883`		`-`
`1884`	`1884`	`NewOps.append(Ops.begin(), Ops.end());`
`1885`	`1885`	`auto *result = DIExpression::get(Expr->getContext(), NewOps);`
`1886`	`1886`	`assert(result->isValid() && "concatenated expression is not valid");`