[MLIR][Presburger] Implement function to evaluate the number of terms in a generating function. (#78078)

Abhinav271828 · web-flow · commit 68a5261d260e · 2024-01-22T14:22:01.000+05:30
We implement `computeNumTerms()`, which counts the number of terms in a
generating function by substituting the unit vector in it.
This is the main function in Barvinok's algorithm – the number of points
in a polytope is given by the number of terms in the generating function
corresponding to it.
We also modify the GeneratingFunction class to have `const` getters and
improve the simplification of QuasiPolynomials.
diff --git a/mlir/include/mlir/Analysis/Presburger/Barvinok.h b/mlir/include/mlir/Analysis/Presburger/Barvinok.h
@@ -99,6 +99,12 @@ QuasiPolynomial getCoefficientInRationalFunction(unsigned power,
                                                  ArrayRef<QuasiPolynomial> num,
                                                  ArrayRef<Fraction> den);
 
+/// Find the number of terms in a generating function, as
+/// a quasipolynomial in the parameter space of the input function.
+/// The generating function must be such that for all values of the
+/// parameters, the number of terms is finite.
+QuasiPolynomial computeNumTerms(const GeneratingFunction &gf);
+
 } // namespace detail
 } // namespace presburger
 } // namespace mlir
diff --git a/mlir/include/mlir/Analysis/Presburger/GeneratingFunction.h b/mlir/include/mlir/Analysis/Presburger/GeneratingFunction.h
@@ -62,13 +62,15 @@ class GeneratingFunction {
 #endif // NDEBUG
   }
 
-  unsigned getNumParams() { return numParam; }
+  unsigned getNumParams() const { return numParam; }
 
-  SmallVector<int> getSigns() { return signs; }
+  SmallVector<int> getSigns() const { return signs; }
 
-  std::vector<ParamPoint> getNumerators() { return numerators; }
+  std::vector<ParamPoint> getNumerators() const { return numerators; }
 
-  std::vector<std::vector<Point>> getDenominators() { return denominators; }
+  std::vector<std::vector<Point>> getDenominators() const {
+    return denominators;
+  }
 
   GeneratingFunction operator+(GeneratingFunction &gf) const {
     assert(numParam == gf.getNumParams() &&
diff --git a/mlir/include/mlir/Analysis/Presburger/QuasiPolynomial.h b/mlir/include/mlir/Analysis/Presburger/QuasiPolynomial.h
@@ -59,9 +59,14 @@ class QuasiPolynomial : public PresburgerSpace {
   QuasiPolynomial operator*(const QuasiPolynomial &x) const;
   QuasiPolynomial operator/(const Fraction x) const;
 
-  // Removes terms which evaluate to zero from the expression.
+  // Removes terms which evaluate to zero from the expression
+  // and folds affine functions which are constant into the
+  // constant coefficients.
   QuasiPolynomial simplify();
 
+  // Group together like terms in the expression.
+  QuasiPolynomial collectTerms();
+
   Fraction getConstantTerm();
 
 private:
diff --git a/mlir/include/mlir/Analysis/Presburger/Utils.h b/mlir/include/mlir/Analysis/Presburger/Utils.h
@@ -281,6 +281,11 @@ SmallVector<MPInt, 8> getComplementIneq(ArrayRef<MPInt> ineq);
 /// The vectors must have the same sizes.
 Fraction dotProduct(ArrayRef<Fraction> a, ArrayRef<Fraction> b);
 
+/// Find the product of two polynomials, each given by an array of
+/// coefficients.
+std::vector<Fraction> multiplyPolynomials(ArrayRef<Fraction> a,
+                                          ArrayRef<Fraction> b);
+
 } // namespace presburger
 } // namespace mlir
 
diff --git a/mlir/lib/Analysis/Presburger/Barvinok.cpp b/mlir/lib/Analysis/Presburger/Barvinok.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Analysis/Presburger/Barvinok.h"
+#include "mlir/Analysis/Presburger/Utils.h"
 #include "llvm/ADT/Sequence.h"
 #include <algorithm>
 
@@ -245,3 +246,241 @@ QuasiPolynomial mlir::presburger::detail::getCoefficientInRationalFunction(
   }
   return coefficients[power].simplify();
 }
+
+/// Substitute x_i = t^μ_i in one term of a generating function, returning
+/// a quasipolynomial which represents the exponent of the numerator
+/// of the result, and a vector which represents the exponents of the
+/// denominator of the result.
+/// If the returned value is {num, dens}, it represents the function
+/// t^num / \prod_j (1 - t^dens[j]).
+/// v represents the affine functions whose floors are multiplied by the
+/// generators, and ds represents the list of generators.
+std::pair<QuasiPolynomial, std::vector<Fraction>>
+substituteMuInTerm(unsigned numParams, ParamPoint v, std::vector<Point> ds,
+                   Point mu) {
+  unsigned numDims = mu.size();
+  for (const Point &d : ds)
+    assert(d.size() == numDims &&
+           "μ has to have the same number of dimensions as the generators!");
+
+  // First, the exponent in the numerator becomes
+  // - (μ • u_1) * (floor(first col of v))
+  // - (μ • u_2) * (floor(second col of v)) - ...
+  // - (μ • u_d) * (floor(d'th col of v))
+  // So we store the negation of the dot products.
+
+  // We have d terms, each of whose coefficient is the negative dot product.
+  SmallVector<Fraction> coefficients;
+  coefficients.reserve(numDims);
+  for (const Point &d : ds)
+    coefficients.push_back(-dotProduct(mu, d));
+
+  // Then, the affine function is a single floor expression, given by the
+  // corresponding column of v.
+  ParamPoint vTranspose = v.transpose();
+  std::vector<std::vector<SmallVector<Fraction>>> affine;
+  affine.reserve(numDims);
+  for (unsigned j = 0; j < numDims; ++j)
+    affine.push_back({SmallVector<Fraction>(vTranspose.getRow(j))});
+
+  QuasiPolynomial num(numParams, coefficients, affine);
+  num = num.simplify();
+
+  std::vector<Fraction> dens;
+  dens.reserve(ds.size());
+  // Similarly, each term in the denominator has exponent
+  // given by the dot product of μ with u_i.
+  for (const Point &d : ds) {
+    // This term in the denominator is
+    // (1 - t^dens.back())
+    dens.push_back(dotProduct(d, mu));
+  }
+
+  return {num, dens};
+}
+
+/// Normalize all denominator exponents `dens` to their absolute values
+/// by multiplying and dividing by the inverses, in a function of the form
+/// sign * t^num / prod_j (1 - t^dens[j]).
+/// Here, sign = ± 1,
+/// num is a QuasiPolynomial, and
+/// each dens[j] is a Fraction.
+void normalizeDenominatorExponents(int &sign, QuasiPolynomial &num,
+                                   std::vector<Fraction> &dens) {
+  // We track the number of exponents that are negative in the
+  // denominator, and convert them to their absolute values.
+  unsigned numNegExps = 0;
+  Fraction sumNegExps(0, 1);
+  for (unsigned j = 0, e = dens.size(); j < e; ++j) {
+    if (dens[j] < 0) {
+      numNegExps += 1;
+      sumNegExps += dens[j];
+    }
+  }
+
+  // If we have (1 - t^-c) in the denominator, for positive c,
+  // multiply and divide by t^c.
+  // We convert all negative-exponent terms at once; therefore
+  // we multiply and divide by t^sumNegExps.
+  // Then we get
+  // -(1 - t^c) in the denominator,
+  // increase the numerator by c, and
+  // flip the sign of the function.
+  if (numNegExps % 2 == 1)
+    sign = -sign;
+  num = num - QuasiPolynomial(num.getNumInputs(), sumNegExps);
+}
+
+/// Compute the binomial coefficients nCi for 0 ≤ i ≤ r,
+/// where n is a QuasiPolynomial.
+std::vector<QuasiPolynomial> getBinomialCoefficients(QuasiPolynomial n,
+                                                     unsigned r) {
+  unsigned numParams = n.getNumInputs();
+  std::vector<QuasiPolynomial> coefficients;
+  coefficients.reserve(r + 1);
+  coefficients.push_back(QuasiPolynomial(numParams, 1));
+  for (unsigned j = 1; j <= r; ++j)
+    // We use the recursive formula for binomial coefficients here and below.
+    coefficients.push_back(
+        (coefficients[j - 1] * (n - QuasiPolynomial(numParams, j - 1)) /
+         Fraction(j, 1))
+            .simplify());
+  return coefficients;
+}
+
+/// Compute the binomial coefficients nCi for 0 ≤ i ≤ r,
+/// where n is a QuasiPolynomial.
+std::vector<Fraction> getBinomialCoefficients(Fraction n, Fraction r) {
+  std::vector<Fraction> coefficients;
+  coefficients.reserve((int64_t)floor(r));
+  coefficients.push_back(1);
+  for (unsigned j = 1; j <= r; ++j)
+    coefficients.push_back(coefficients[j - 1] * (n - (j - 1)) / (j));
+  return coefficients;
+}
+
+/// We have a generating function of the form
+/// f_p(x) = \sum_i sign_i * (x^n_i(p)) / (\prod_j (1 - x^d_{ij})
+///
+/// where sign_i is ±1,
+/// n_i \in Q^p -> Q^d is the sum of the vectors d_{ij}, weighted by the
+/// floors of d affine functions on p parameters.
+/// d_{ij} \in Q^d are vectors.
+///
+/// We need to find the number of terms of the form x^t in the expansion of
+/// this function.
+/// However, direct substitution (x = (1, ..., 1)) causes the denominator
+/// to become zero.
+///
+/// We therefore use the following procedure instead:
+/// 1. Substitute x_i = (s+1)^μ_i for some vector μ. This makes the generating
+/// function a function of a scalar s.
+/// 2. Write each term in this function as P(s)/Q(s), where P and Q are
+/// polynomials. P has coefficients as quasipolynomials in d parameters, while
+/// Q has coefficients as scalars.
+/// 3. Find the constant term in the expansion of each term P(s)/Q(s). This is
+/// equivalent to substituting s = 0.
+///
+/// Verdoolaege, Sven, et al. "Counting integer points in parametric
+/// polytopes using Barvinok's rational functions." Algorithmica 48 (2007):
+/// 37-66.
+QuasiPolynomial
+mlir::presburger::detail::computeNumTerms(const GeneratingFunction &gf) {
+  // Step (1) We need to find a μ such that we can substitute x_i =
+  // (s+1)^μ_i. After this substitution, the exponent of (s+1) in the
+  // denominator is (μ_i • d_{ij}) in each term. Clearly, this cannot become
+  // zero. Hence we find a vector μ that is not orthogonal to any of the
+  // d_{ij} and substitute x accordingly.
+  std::vector<Point> allDenominators;
+  for (ArrayRef<Point> den : gf.getDenominators())
+    allDenominators.insert(allDenominators.end(), den.begin(), den.end());
+  Point mu = getNonOrthogonalVector(allDenominators);
+
+  unsigned numParams = gf.getNumParams();
+  const std::vector<std::vector<Point>> &ds = gf.getDenominators();
+  QuasiPolynomial totalTerm(numParams, 0);
+  for (unsigned i = 0, e = ds.size(); i < e; ++i) {
+    int sign = gf.getSigns()[i];
+
+    // Compute the new exponents of (s+1) for the numerator and the
+    // denominator after substituting μ.
+    auto [numExp, dens] =
+        substituteMuInTerm(numParams, gf.getNumerators()[i], ds[i], mu);
+    // Now the numerator is (s+1)^numExp
+    // and the denominator is \prod_j (1 - (s+1)^dens[j]).
+
+    // Step (2) We need to express the terms in the function as quotients of
+    // polynomials. Each term is now of the form
+    // sign_i * (s+1)^numExp / (\prod_j (1 - (s+1)^dens[j]))
+    // For the i'th term, we first normalize the denominator to have only
+    // positive exponents. We convert all the dens[j] to their
+    // absolute values and change the sign and exponent in the numerator.
+    normalizeDenominatorExponents(sign, numExp, dens);
+
+    // Then, using the formula for geometric series, we replace each (1 -
+    // (s+1)^(dens[j])) with
+    // (-s)(\sum_{0 ≤ k < dens[j]} (s+1)^k).
+    for (unsigned j = 0, e = dens.size(); j < e; ++j)
+      dens[j] = abs(dens[j]) - 1;
+    // Note that at this point, the semantics of `dens[j]` changes to mean
+    // a term (\sum_{0 ≤ k ≤ dens[j]} (s+1)^k). The denominator is, as before,
+    // a product of these terms.
+
+    // Since the -s are taken out, the sign changes if there is an odd number
+    // of such terms.
+    unsigned r = dens.size();
+    if (dens.size() % 2 == 1)
+      sign = -sign;
+
+    // Thus the term overall now has the form
+    // sign'_i * (s+1)^numExp /
+    // (s^r * \prod_j (\sum_{0 ≤ k < dens[j]} (s+1)^k)).
+    // This means that
+    // the numerator is a polynomial in s, with coefficients as
+    // quasipolynomials (given by binomial coefficients), and the denominator
+    // is a polynomial in s, with integral coefficients (given by taking the
+    // convolution over all j).
+
+    // Step (3) We need to find the constant term in the expansion of each
+    // term. Since each term has s^r as a factor in the denominator, we avoid
+    // substituting s = 0 directly; instead, we find the coefficient of s^r in
+    // sign'_i * (s+1)^numExp / (\prod_j (\sum_k (s+1)^k)),
+    // Letting P(s) = (s+1)^numExp and Q(s) = \prod_j (...),
+    // we need to find the coefficient of s^r in P(s)/Q(s),
+    // for which we use the `getCoefficientInRationalFunction()` function.
+
+    // First, we compute the coefficients of P(s), which are binomial
+    // coefficients.
+    // We only need the first r+1 of these, as higher-order terms do not
+    // contribute to the coefficient of s^r.
+    std::vector<QuasiPolynomial> numeratorCoefficients =
+        getBinomialCoefficients(numExp, r);
+
+    // Then we compute the coefficients of each individual term in Q(s),
+    // which are (dens[i]+1) C (k+1) for 0 ≤ k ≤ dens[i].
+    std::vector<std::vector<Fraction>> eachTermDenCoefficients;
+    std::vector<Fraction> singleTermDenCoefficients;
+    eachTermDenCoefficients.reserve(r);
+    for (const Fraction &den : dens) {
+      singleTermDenCoefficients = getBinomialCoefficients(den + 1, den + 1);
+      eachTermDenCoefficients.push_back(
+          ArrayRef<Fraction>(singleTermDenCoefficients).slice(1));
+    }
+
+    // Now we find the coefficients in Q(s) itself
+    // by taking the convolution of the coefficients
+    // of all the terms.
+    std::vector<Fraction> denominatorCoefficients;
+    denominatorCoefficients = eachTermDenCoefficients[0];
+    for (unsigned j = 1, e = eachTermDenCoefficients.size(); j < e; ++j)
+      denominatorCoefficients = multiplyPolynomials(denominatorCoefficients,
+                                                    eachTermDenCoefficients[j]);
+
+    totalTerm =
+        totalTerm + getCoefficientInRationalFunction(r, numeratorCoefficients,
+                                                     denominatorCoefficients) *
+                        QuasiPolynomial(numParams, sign);
+  }
+
+  return totalTerm.simplify();
+}
diff --git a/mlir/lib/Analysis/Presburger/QuasiPolynomial.cpp b/mlir/lib/Analysis/Presburger/QuasiPolynomial.cpp
@@ -97,10 +97,18 @@ QuasiPolynomial QuasiPolynomial::operator/(const Fraction x) const {
   return qp;
 }
 
-// Removes terms which evaluate to zero from the expression.
+// Removes terms which evaluate to zero from the expression and
+// integrate affine functions which are constants into the
+// coefficients.
 QuasiPolynomial QuasiPolynomial::simplify() {
+  Fraction newCoeff = 0;
   SmallVector<Fraction> newCoeffs({});
+
+  std::vector<SmallVector<Fraction>> newAffineTerm({});
   std::vector<std::vector<SmallVector<Fraction>>> newAffine({});
+
+  unsigned numParam = getNumInputs();
+
   for (unsigned i = 0, e = coefficients.size(); i < e; i++) {
     // A term is zero if its coefficient is zero, or
     if (coefficients[i] == Fraction(0, 1))
@@ -114,9 +122,46 @@ QuasiPolynomial QuasiPolynomial::simplify() {
         });
     if (product_is_zero)
       continue;
+
+    // Now, we know the term is nonzero.
+
+    // We now eliminate the affine functions which are constant
+    // by merging them into the coefficients.
+    newAffineTerm = {};
+    newCoeff = coefficients[i];
+    for (ArrayRef<Fraction> term : affine[i]) {
+      bool allCoeffsZero = llvm::all_of(
+          term.slice(0, numParam), [](const Fraction c) { return c == 0; });
+      if (allCoeffsZero)
+        newCoeff *= term[numParam];
+      else
+        newAffineTerm.push_back(SmallVector<Fraction>(term));
+    }
+
+    newCoeffs.push_back(newCoeff);
+    newAffine.push_back(newAffineTerm);
+  }
+  return QuasiPolynomial(getNumInputs(), newCoeffs, newAffine);
+}
+
+QuasiPolynomial QuasiPolynomial::collectTerms() {
+  SmallVector<Fraction> newCoeffs({});
+  std::vector<std::vector<SmallVector<Fraction>>> newAffine({});
+
+  for (unsigned i = 0, e = affine.size(); i < e; i++) {
+    bool alreadyPresent = false;
+    for (unsigned j = 0, f = newAffine.size(); j < f; j++) {
+      if (affine[i] == newAffine[j]) {
+        newCoeffs[j] += coefficients[i];
+        alreadyPresent = true;
+      }
+    }
+    if (alreadyPresent)
+      continue;
     newCoeffs.push_back(coefficients[i]);
     newAffine.push_back(affine[i]);
   }
+
   return QuasiPolynomial(getNumInputs(), newCoeffs, newAffine);
 }
 
diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp
diff --git a/mlir/unittests/Analysis/Presburger/BarvinokTest.cpp b/mlir/unittests/Analysis/Presburger/BarvinokTest.cpp
diff --git a/mlir/unittests/Analysis/Presburger/UtilsTest.cpp b/mlir/unittests/Analysis/Presburger/UtilsTest.cpp