[ValueTracking] Implement computeKnownBits for llvm.vector.reduce.{add,mul}

goldsteinn · goldsteinn · commit 9b0be08258cb · 2024-04-10T16:12:44.000-05:00
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1647,6 +1647,26 @@ static void computeKnownBitsFromOperator(const Operator *I,
         } else
           Known.One.clearAllBits();
         break;
+      case Intrinsic::vector_reduce_mul:
+      case Intrinsic::vector_reduce_add:
+        // We compute the common bits for all elements then apply the reduce op
+        // NumEle times. This is mostly useful for known high zeros.
+        if (auto *VecTy =
+                dyn_cast<FixedVectorType>(I->getOperand(0)->getType())) {
+          computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+          KnownBits SingleKnown = Known;
+          for (unsigned i = 1, e = VecTy->getNumElements(); i < e; ++i) {
+            if (Known.isUnknown())
+              break;
+            if (II->getIntrinsicID() == Intrinsic::vector_reduce_add)
+              Known = KnownBits::computeForAddSub(
+                  /*Add=*/true, /*NSW=*/false, /*NUW=*/false, SingleKnown,
+                  Known);
+            else
+              Known = KnownBits::mul(SingleKnown, Known);
+          }
+        }
+        break;
       case Intrinsic::umin:
         computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
         computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll
@@ -1126,10 +1126,7 @@ define i8 @known_reduce_xor_odd_fail(<3 x i8> %xx) {
 
 define i8 @known_reduce_add(<2 x i8> %xx) {
 ; CHECK-LABEL: @known_reduce_add(
-; CHECK-NEXT:    [[X:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 8
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 0
 ;
   %x = and <2 x i8> %xx, <i8 3, i8 3>
   %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
@@ -1165,10 +1162,7 @@ define i8 @known_reduce_add_fail2(<4 x i8> %xx) {
 
 define i8 @known_reduce_add2(<4 x i8> %xx) {
 ; CHECK-LABEL: @known_reduce_add2(
-; CHECK-NEXT:    [[X:%.*]] = and <4 x i8> [[XX:%.*]], <i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 32
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 0
 ;
   %x = and <4 x i8> %xx, <i8 3, i8 3, i8 3, i8 3>
   %v = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %x)
@@ -1178,10 +1172,7 @@ define i8 @known_reduce_add2(<4 x i8> %xx) {
 
 define i8 @known_reduce_add3(<2 x i8> %xx) {
 ; CHECK-LABEL: @known_reduce_add3(
-; CHECK-NEXT:    [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 1, i8 1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 0
 ;
   %x = or <2 x i8> %xx, <i8 1, i8 1>
   %v = call i8 @llvm.vector.reduce.add(<2 x i8> %x)
@@ -1191,10 +1182,7 @@ define i8 @known_reduce_add3(<2 x i8> %xx) {
 
 define i8 @known_reduce_add33(<3 x i8> %xx) {
 ; CHECK-LABEL: @known_reduce_add33(
-; CHECK-NEXT:    [[X:%.*]] = or <3 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 1
 ;
   %x = or <3 x i8> %xx, <i8 1, i8 1, i8 1>
   %v = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> %x)
@@ -1204,10 +1192,7 @@ define i8 @known_reduce_add33(<3 x i8> %xx) {
 
 define i8 @known_reduce_add34(<4 x i8> %xx) {
 ; CHECK-LABEL: @known_reduce_add34(
-; CHECK-NEXT:    [[X:%.*]] = or <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 0
 ;
   %x = or <4 x i8> %xx, <i8 1, i8 1, i8 1, i8 1>
   %v = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %x)
@@ -1245,10 +1230,7 @@ define i8 @known_reduce_add4_fail(<2 x i8> %xx) {
 
 define i8 @known_reduce_mul(<2 x i8> %xx) {
 ; CHECK-LABEL: @known_reduce_mul(
-; CHECK-NEXT:    [[X:%.*]] = and <2 x i8> [[XX:%.*]], <i8 3, i8 3>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 16
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 0
 ;
   %x = and <2 x i8> %xx, <i8 3, i8 3>
   %v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)
@@ -1284,10 +1266,7 @@ define i8 @known_reduce_mul_fail2(<3 x i8> %xx) {
 
 define i8 @known_reduce_mul2(<3 x i8> %xx) {
 ; CHECK-LABEL: @known_reduce_mul2(
-; CHECK-NEXT:    [[X:%.*]] = and <3 x i8> [[XX:%.*]], <i8 3, i8 3, i8 3>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 64
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 0
 ;
   %x = and <3 x i8> %xx, <i8 3, i8 3, i8 3>
   %v = call i8 @llvm.vector.reduce.mul.v3i8(<3 x i8> %x)
@@ -1297,10 +1276,7 @@ define i8 @known_reduce_mul2(<3 x i8> %xx) {
 
 define i8 @known_reduce_mul3(<2 x i8> %xx) {
 ; CHECK-LABEL: @known_reduce_mul3(
-; CHECK-NEXT:    [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 1, i8 1>
-; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.mul.v2i8(<2 x i8> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[V]], 1
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 1
 ;
   %x = or <2 x i8> %xx, <i8 1, i8 1>
   %v = call i8 @llvm.vector.reduce.mul(<2 x i8> %x)