[mlir][affine] Modify assertion into a user visible diagnostic (llvm#136474)

Prakhar-Dixit · GeorgeARM · commit b1cb86d1013c · 2025-05-07T16:29:27.000+01:00
Fixes llvm#122227 The loop’s induction variable (%i) is used to compute two different indices via affine.apply. And the Vectorization Assumption is Violated i.e, Each vectorized loop should contribute at most one non-invariant index. **Minimal example crashing :** ``` #map = affine_map<(d0)[s0] -> (d0 mod s0)> #map1 = affine_map<(d0)[s0] -> (d0 floordiv s0)> func.func @single_loop_unrolling_2D_access_pattern(%arg0: index) -> memref<2x2xf32> { %c2 = arith.constant 2 : index %cst = arith.constant 1.0 : f32 %alloc = memref.alloc() : memref<2x2xf32> affine.for %i = 0 to 4 { %row = affine.apply #map1(%i)[%c2] %col = affine.apply #map(%i)[%c2] affine.store %cst, %alloc[%row, %col] : memref<2x2xf32> } return %alloc : memref<2x2xf32> } ``` The single loop %i contributes two indices (%row and %col) to the 2D memref access. The permutation map expects one index per vectorized loop dimension, but here one loop (%i) maps to two indices (dim=0 and dim=1). The code detects this when trying to assign the second index (dim=1) to the same vector dimension (perm[0]).
diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp
@@ -1185,6 +1185,32 @@ static Value vectorizeOperand(Value operand, VectorizationState &state) {
   return nullptr;
 }
 
+/// Returns true if any vectorized loop IV drives more than one index.
+static bool isIVMappedToMultipleIndices(
+    ArrayRef<Value> indices,
+    const DenseMap<Operation *, unsigned> &loopToVectorDim) {
+  for (auto &kvp : loopToVectorDim) {
+    AffineForOp forOp = cast<AffineForOp>(kvp.first);
+    // Find which indices are invariant w.r.t. this loop IV.
+    llvm::DenseSet<Value> invariants =
+        affine::getInvariantAccesses(forOp.getInductionVar(), indices);
+    // Count how many vary (i.e. are not invariant).
+    unsigned nonInvariant = 0;
+    for (Value idx : indices) {
+      if (invariants.count(idx))
+        continue;
+
+      if (++nonInvariant > 1) {
+        LLVM_DEBUG(dbgs() << "[early‑vect] Bail out: IV "
+                          << forOp.getInductionVar() << " drives "
+                          << nonInvariant << " indices\n");
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 /// Vectorizes an affine load with the vectorization strategy in 'state' by
 /// generating a 'vector.transfer_read' op with the proper permutation map
 /// inferred from the indices of the load. The new 'vector.transfer_read' is
@@ -1217,6 +1243,9 @@ static Operation *vectorizeAffineLoad(AffineLoadOp loadOp,
     indices.append(mapOperands.begin(), mapOperands.end());
   }
 
+  if (isIVMappedToMultipleIndices(indices, state.vecLoopToVecDim))
+    return nullptr;
+
   // Compute permutation map using the information of new vector loops.
   auto permutationMap = makePermutationMap(state.builder.getInsertionBlock(),
                                            indices, state.vecLoopToVecDim);
@@ -1262,6 +1291,9 @@ static Operation *vectorizeAffineStore(AffineStoreOp storeOp,
   else
     indices.append(mapOperands.begin(), mapOperands.end());
 
+  if (isIVMappedToMultipleIndices(indices, state.vecLoopToVecDim))
+    return nullptr;
+
   // Compute permutation map using the information of new vector loops.
   auto permutationMap = makePermutationMap(state.builder.getInsertionBlock(),
                                            indices, state.vecLoopToVecDim);
diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir
@@ -9,3 +9,34 @@ func.func @unparallel_loop_reduction_unsupported(%in: memref<256x512xf32>, %out:
  }
  return
 }
+
+// -----
+
+#map = affine_map<(d0)[s0] -> (d0 mod s0)>
+#map1 = affine_map<(d0)[s0] -> (d0 floordiv s0)>
+
+func.func @iv_mapped_to_multiple_indices_unsupported(%arg0: index) -> memref<2x2xf32> {
+  %c2 = arith.constant 2 : index
+  %cst = arith.constant 1.0 : f32
+  %alloc = memref.alloc() : memref<2x2xf32>
+    
+    affine.for %i = 0 to 4 {
+      %row = affine.apply #map1(%i)[%c2]  
+      %col = affine.apply #map(%i)[%c2]  
+      affine.store %cst, %alloc[%row, %col] : memref<2x2xf32>
+    }
+    
+    return %alloc : memref<2x2xf32>
+  }
+
+// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0)[s0] -> (d0 floordiv s0)>
+// CHECK: #[[$ATTR_1:.+]] = affine_map<(d0)[s0] -> (d0 mod s0)>
+
+// CHECK-LABEL:   func.func @iv_mapped_to_multiple_indices_unsupported(
+// CHECK-SAME:      %[[VAL_0:.*]]: index) -> memref<2x2xf32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 2 : index
+// CHECK:           affine.for %[[VAL_4:.*]] = 0 to 4 {
+// CHECK:             %[[VAL_5:.*]] = affine.apply #[[$ATTR_0]](%[[VAL_4]]){{\[}}%[[VAL_1]]]
+// CHECK:             %[[VAL_6:.*]] = affine.apply #[[$ATTR_1]](%[[VAL_4]]){{\[}}%[[VAL_1]]]
+// CHECK:           }
+// CHECK:         }