[VPlan] Simplify MUL operands at recipe construction.

fhahn · fhahn · commit 954ed05c1001 · 2024-09-02T21:25:31.000+01:00
This moves the logic to create simplified operands using SCEV to MUL recipe creation. This is needed to match the behavior of the legacy's cost model. TODOs are to extend to other opcodes and move to a transform. Note that this also restricts the number of SCEV simplifications we apply to more precisely match the cases handled by the legacy cost model. Fixes #107015.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8393,6 +8393,20 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
   case Instruction::Sub:
   case Instruction::Xor:
   case Instruction::Freeze:
+    if (I->getOpcode() == Instruction::Mul) {
+      // Simplify operands of multiplications using SCEV. This is needed at the
+      // moment to match the behavior of the legacy cost-model.
+      // TODO: Generalize to any opcode and move to VPlan transformation.
+      SmallVector<VPValue *> NewOps(Operands);
+      ScalarEvolution &SE = *PSE.getSE();
+      for (unsigned I = 0; I < Operands.size(); ++I) {
+        Value *V = NewOps[I]->getUnderlyingValue();
+        if (!isa<Constant>(V) && SE.isSCEVable(V->getType()))
+          if (auto *C = dyn_cast<SCEVConstant>(PSE.getSE()->getSCEV(V)))
+            NewOps[I] = Plan.getOrAddLiveIn(C->getValue());
+      }
+      return new VPWidenRecipe(*I, make_range(NewOps.begin(), NewOps.end()));
+    }
     return new VPWidenRecipe(*I, make_range(Operands.begin(), Operands.end()));
   };
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -174,11 +174,6 @@ class VPRecipeBuilder {
       if (auto *R = Ingredient2Recipe.lookup(I))
         return R->getVPSingleValue();
     }
-    ScalarEvolution &SE = *PSE.getSE();
-    if (!isa<Constant>(V) && SE.isSCEVable(V->getType()))
-      if (auto *C = dyn_cast<SCEVConstant>(PSE.getSE()->getSCEV(V)))
-        return Plan.getOrAddLiveIn(C->getValue());
-
     return Plan.getOrAddLiveIn(V);
   }
 };
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll b/llvm/test/Transforms/LoopVectorize/AArch64/mul-simplification.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -p loop-vectorize -S %s | FileCheck %s
+
+target triple = "arm64-apple-macosx"
+
+; Test case for https://github.com/llvm/llvm-project/issues/107015.
+define i64 @mul_select_operand_known_1_via_scev() {
+; CHECK-LABEL: define i64 @mul_select_operand_known_1_via_scev() {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ <i64 12, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_PHI]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[VEC_PHI]])
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ 12, %[[ENTRY]] ]
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IV]], 1
+; CHECK-NEXT:    [[CMP1_I:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[NARROW_I:%.*]] = select i1 [[CMP1_I]], i32 1, i32 [[IV]]
+; CHECK-NEXT:    [[MUL:%.*]] = zext nneg i32 [[NARROW_I]] to i64
+; CHECK-NEXT:    [[RED_NEXT]] = mul nsw i64 [[RED]], [[MUL]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV]], 1
+; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[RES:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP0]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %red = phi i64 [ 12, %entry ], [ %red.next, %loop ]
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
+  %0 = and i32 %iv, 1
+  %cmp1.i = icmp eq i32 %0, 0
+  %narrow.i = select i1 %cmp1.i, i32 1, i32 %iv
+  %mul = zext nneg i32 %narrow.i to i64
+  %red.next = mul nsw i64 %red, %mul
+  %iv.next = add nuw nsw i32 %iv, 1
+  %ec = icmp eq i32 %iv, 1
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = phi i64 [ %red.next, %loop ]
+  ret i64 %res
+}
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.

Original file line number	Diff line number	Diff line change
`@@ -174,11 +174,6 @@ class VPRecipeBuilder {`
`174`	`174`	`if (auto *R = Ingredient2Recipe.lookup(I))`
`175`	`175`	`return R->getVPSingleValue();`
`176`	`176`	`}`
`177`		`- ScalarEvolution &SE = *PSE.getSE();`
`178`		`- if (!isa<Constant>(V) && SE.isSCEVable(V->getType()))`
`179`		`- if (auto *C = dyn_cast<SCEVConstant>(PSE.getSE()->getSCEV(V)))`
`180`		`- return Plan.getOrAddLiveIn(C->getValue());`
`181`		`-`
`182`	`177`	`return Plan.getOrAddLiveIn(V);`
`183`	`178`	`}`
`184`	`179`	`};`