[ValueTracking] ComputeNumSignBitsImpl - add basic handling of BITCAST nodes (#127218)

vortex73 · web-flow · commit 6311e3fcc853 · 2025-03-06T08:30:36.000Z
When a wider scalar/vector type containing all sign bits is bitcast to a narrower vector type, we can deduce that the resulting narrow elements will also be all sign bits. This matches existing behavior in SelectionDAG and helps optimize cases involving SSE intrinsics where sign-extended values are bitcast between different vector types. The current implementation fails to recognize that an arithmetic right shift is redundant when applied to elements that are already known to be all sign bits. This PR improves ComputeNumSignBitsImpl to track this information through bitcasts, enabling the optimization of such cases. ``` %ext = sext <1 x i1> %cmp to <1 x i8> %sub = bitcast <1 x i8> %ext to <4 x i2> %sra = ashr <4 x i2> %sub, <i2 1, i2 1, i2 1, i2 1> ; Can be simplified to just: %sub = bitcast <1 x i8> %ext to <4 x i2> ``` Closes #87624
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
@@ -3987,6 +3987,31 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
   if (auto *U = dyn_cast<Operator>(V)) {
     switch (Operator::getOpcode(V)) {
     default: break;
+    case Instruction::BitCast: {
+      Value *Src = U->getOperand(0);
+      Type *SrcTy = Src->getType();
+
+      // Skip if the source type is not an integer or integer vector type
+      // This ensures we only process integer-like types
+      if (!SrcTy->isIntOrIntVectorTy())
+        break;
+
+      unsigned SrcBits = SrcTy->getScalarSizeInBits();
+
+      // Bitcast 'large element' scalar/vector to 'small element' vector.
+      if ((SrcBits % TyBits) != 0)
+        break;
+
+      // Only proceed if the destination type is a fixed-size vector
+      if (isa<FixedVectorType>(Ty)) {
+        // Fast case - sign splat can be simply split across the small elements.
+        // This works for both vector and scalar sources
+        Tmp = ComputeNumSignBits(Src, Depth + 1, Q);
+        if (Tmp == SrcBits)
+          return TyBits;
+      }
+      break;
+    }
     case Instruction::SExt:
       Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
       return ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q) +
diff --git a/llvm/test/Transforms/InstCombine/compute-sign-bits-bitcast.ll b/llvm/test/Transforms/InstCombine/compute-sign-bits-bitcast.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
+
+; Case 1: Vector to Vector bitcast
+define <4 x i2> @test_vector_to_vector(<1 x i8> %a0, <1 x i8> %a1) {
+; CHECK-LABEL: define <4 x i2> @test_vector_to_vector(
+; CHECK-SAME: <1 x i8> [[A0:%.*]], <1 x i8> [[A1:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <1 x i8> [[A0]], [[A1]]
+; CHECK-NEXT:    [[EXT:%.*]] = sext <1 x i1> [[CMP]] to <1 x i8>
+; CHECK-NEXT:    [[SUB:%.*]] = bitcast <1 x i8> [[EXT]] to <4 x i2>
+; CHECK-NEXT:    ret <4 x i2> [[SUB]]
+;
+  %cmp = icmp sgt <1 x i8> %a0, %a1
+  %ext = sext <1 x i1> %cmp to <1 x i8>
+  %sub = bitcast <1 x i8> %ext to <4 x i2>
+  %sra = ashr <4 x i2> %sub, <i2 1, i2 1, i2 1, i2 1>
+  ret <4 x i2> %sra
+}
+
+; Case 2: Scalar to Vector bitcast
+define <2 x i16> @test_scalar_to_vector(i1 %cond) {
+; CHECK-LABEL: define <2 x i16> @test_scalar_to_vector(
+; CHECK-SAME: i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[EXT:%.*]] = sext i1 [[COND]] to i32
+; CHECK-NEXT:    [[BC:%.*]] = bitcast i32 [[EXT]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[BC]]
+;
+  %ext = sext i1 %cond to i32
+  %bc = bitcast i32 %ext to <2 x i16>
+  %sra = ashr <2 x i16> %bc, <i16 8, i16 8>
+  ret <2 x i16> %sra
+}
+
+
+; Case 3: Multiple right shifts
+define <8 x i8> @test_multiple_shifts(i1 %cond) {
+; CHECK-LABEL: define <8 x i8> @test_multiple_shifts(
+; CHECK-SAME: i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[EXT:%.*]] = sext i1 [[COND]] to i64
+; CHECK-NEXT:    [[BC:%.*]] = bitcast i64 [[EXT]] to <8 x i8>
+; CHECK-NEXT:    ret <8 x i8> [[BC]]
+;
+  %ext = sext i1 %cond to i64
+  %bc = bitcast i64 %ext to <8 x i8>
+  %sra1 = ashr <8 x i8> %bc, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 1>
+  %sra2 = ashr <8 x i8> %sra1, <i8 2, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <8 x i8> %sra2
+}
+
+; (Negative) Case 4: Test with non-sign-extended source
+define <4 x i8> @test_non_sign_extended(i32 %val) {
+; CHECK-LABEL: define <4 x i8> @test_non_sign_extended(
+; CHECK-SAME: i32 [[VAL:%.*]]) {
+; CHECK-NEXT:    [[BC:%.*]] = bitcast i32 [[VAL]] to <4 x i8>
+; CHECK-NEXT:    [[SRA:%.*]] = ashr <4 x i8> [[BC]], splat (i8 1)
+; CHECK-NEXT:    ret <4 x i8> [[SRA]]
+;
+  %bc = bitcast i32 %val to <4 x i8>
+  %sra = ashr <4 x i8> %bc, <i8 1, i8 1, i8 1, i8 1>
+  ret <4 x i8> %sra
+}