Skip to content

Revert "[ValueTracking] Improve Bitcast handling to match SDAG" #145191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 21, 2025

Conversation

aeubanks
Copy link
Contributor

Reverts #125935

Causes miscompiles, see comments in #125935

@aeubanks aeubanks requested a review from nikic as a code owner June 21, 2025 22:19
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms labels Jun 21, 2025
@llvmbot
Copy link
Member

llvmbot commented Jun 21, 2025

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-llvm-analysis

Author: Arthur Eubanks (aeubanks)

Changes

Reverts llvm/llvm-project#125935

Causes miscompiles, see comments in #125935


Full diff: https://github.com/llvm/llvm-project/pull/145191.diff

3 Files Affected:

  • (modified) llvm/lib/Analysis/ValueTracking.cpp (+2-25)
  • (modified) llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll (+2-2)
  • (modified) llvm/test/Transforms/InstCombine/bitcast-known-bits.ll (+14-7)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 73320b556f825..a17417cb5189c 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1346,8 +1346,6 @@ static void computeKnownBitsFromOperator(const Operator *I,
         isa<ScalableVectorType>(I->getType()))
       break;
 
-    unsigned NumElts = DemandedElts.getBitWidth();
-    bool IsLE = Q.DL.isLittleEndian();
     // Look through a cast from narrow vector elements to wider type.
     // Examples: v4i32 -> v2i64, v3i8 -> v24
     unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
@@ -1366,6 +1364,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
       //
       // The known bits of each sub-element are then inserted into place
       // (dependent on endian) to form the full result of known bits.
+      unsigned NumElts = DemandedElts.getBitWidth();
       unsigned SubScale = BitWidth / SubBitWidth;
       APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
       for (unsigned i = 0; i != NumElts; ++i) {
@@ -1377,32 +1376,10 @@ static void computeKnownBitsFromOperator(const Operator *I,
       for (unsigned i = 0; i != SubScale; ++i) {
         computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc, Q,
                          Depth + 1);
-        unsigned ShiftElt = IsLE ? i : SubScale - 1 - i;
+        unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
         Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
       }
     }
-    // Look through a cast from wider vector elements to narrow type.
-    // Examples: v2i64 -> v4i32
-    if (SubBitWidth % BitWidth == 0) {
-      unsigned SubScale = SubBitWidth / BitWidth;
-      KnownBits KnownSrc(SubBitWidth);
-      APInt SubDemandedElts =
-          APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale);
-      computeKnownBits(I->getOperand(0), SubDemandedElts, KnownSrc, Q,
-                       Depth + 1);
-
-      Known.Zero.setAllBits();
-      Known.One.setAllBits();
-      for (unsigned i = 0; i != SubScale; ++i) {
-        if (DemandedElts[i]) {
-          unsigned Shifts = IsLE ? i : NumElts - 1 - i;
-          unsigned Offset = (Shifts % SubScale) * BitWidth;
-          Known = Known.intersectWith(KnownSrc.extractBits(BitWidth, Offset));
-          if (Known.isUnknown())
-            break;
-        }
-      }
-    }
     break;
   }
   case Instruction::SExt: {
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
index cc252ae53803b..db56080a3ea2b 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
@@ -3732,6 +3732,7 @@ define <4 x i64> @test_avx2_psrl_0() {
   ret <4 x i64> %16
 }
 
+; FIXME: Failure to peek through bitcasts to ensure psllq shift amount is within bounds.
 define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) {
 ; CHECK-LABEL: @PR125228(
 ; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i64> [[S:%.*]], splat (i64 63)
@@ -3740,8 +3741,7 @@ define <2 x i64> @PR125228(<2 x i64> %v, <2 x i64> %s) {
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast <2 x i64> [[MASK]] to <16 x i8>
 ; CHECK-NEXT:    [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[CAST3:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i64> [[CAST3]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[SLL1:%.*]] = shl <2 x i64> [[V]], [[TMP2]]
+; CHECK-NEXT:    [[SLL1:%.*]] = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> [[V]], <2 x i64> [[CAST3]])
 ; CHECK-NEXT:    [[SHUFP_UNCASTED:%.*]] = shufflevector <2 x i64> [[SLL0]], <2 x i64> [[SLL1]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    ret <2 x i64> [[SHUFP_UNCASTED]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll b/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll
index 65b43df752f76..3e47e775e3a28 100644
--- a/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll
+++ b/llvm/test/Transforms/InstCombine/bitcast-known-bits.ll
@@ -12,7 +12,8 @@ define <16 x i8> @knownbits_bitcast_masked_shift(<16 x i8> %arg1, <16 x i8> %arg
 ; CHECK-NEXT:    [[BITCAST4:%.*]] = bitcast <16 x i8> [[OR]] to <8 x i16>
 ; CHECK-NEXT:    [[SHL5:%.*]] = shl nuw <8 x i16> [[BITCAST4]], splat (i16 2)
 ; CHECK-NEXT:    [[BITCAST6:%.*]] = bitcast <8 x i16> [[SHL5]] to <16 x i8>
-; CHECK-NEXT:    ret <16 x i8> [[BITCAST6]]
+; CHECK-NEXT:    [[AND7:%.*]] = and <16 x i8> [[BITCAST6]], splat (i8 -52)
+; CHECK-NEXT:    ret <16 x i8> [[AND7]]
 ;
   %and = and <16 x i8> %arg1, splat (i8 3)
   %and3 = and <16 x i8> %arg2, splat (i8 48)
@@ -32,7 +33,8 @@ define <16 x i8> @knownbits_shuffle_masked_nibble_shift(<16 x i8> %arg)  {
 ; CHECK-NEXT:    [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16>
 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4)
 ; CHECK-NEXT:    [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8>
-; CHECK-NEXT:    ret <16 x i8> [[BITCAST2]]
+; CHECK-NEXT:    [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16)
+; CHECK-NEXT:    ret <16 x i8> [[AND3]]
 ;
   %and = and <16 x i8> %arg, splat (i8 15)
   %shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
@@ -51,7 +53,8 @@ define <16 x i8> @knownbits_reverse_shuffle_masked_shift(<16 x i8> %arg)  {
 ; CHECK-NEXT:    [[BITCAST1:%.*]] = bitcast <16 x i8> [[SHUFFLEVECTOR]] to <8 x i16>
 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <8 x i16> [[BITCAST1]], splat (i16 4)
 ; CHECK-NEXT:    [[BITCAST2:%.*]] = bitcast <8 x i16> [[SHL]] to <16 x i8>
-; CHECK-NEXT:    ret <16 x i8> [[BITCAST2]]
+; CHECK-NEXT:    [[AND3:%.*]] = and <16 x i8> [[BITCAST2]], splat (i8 -16)
+; CHECK-NEXT:    ret <16 x i8> [[AND3]]
 ;
   %and = and <16 x i8> %arg, splat (i8 15)
   %shufflevector = shufflevector <16 x i8> %and, <16 x i8> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
@@ -67,7 +70,8 @@ define <16 x i8> @knownbits_extract_bit(<8 x i16> %arg)  {
 ; CHECK-SAME: <8 x i16> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <8 x i16> [[ARG]], splat (i16 15)
 ; CHECK-NEXT:    [[BITCAST1:%.*]] = bitcast <8 x i16> [[LSHR]] to <16 x i8>
-; CHECK-NEXT:    ret <16 x i8> [[BITCAST1]]
+; CHECK-NEXT:    [[AND:%.*]] = and <16 x i8> [[BITCAST1]], splat (i8 1)
+; CHECK-NEXT:    ret <16 x i8> [[AND]]
 ;
   %lshr = lshr <8 x i16> %arg, splat (i16 15)
   %bitcast1 = bitcast <8 x i16> %lshr to <16 x i8>
@@ -84,8 +88,7 @@ define { i32, i1 } @knownbits_popcount_add_with_overflow(<2 x i64> %arg1, <2 x i
 ; CHECK-NEXT:    [[CALL9:%.*]] = tail call range(i64 0, 65) <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[ARG2]])
 ; CHECK-NEXT:    [[BITCAST10:%.*]] = bitcast <2 x i64> [[CALL9]] to <4 x i32>
 ; CHECK-NEXT:    [[EXTRACTELEMENT11:%.*]] = extractelement <4 x i32> [[BITCAST10]], i64 0
-; CHECK-NEXT:    [[CALL12:%.*]] = add nuw nsw i32 [[EXTRACTELEMENT]], [[EXTRACTELEMENT11]]
-; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { i32, i1 } { i32 poison, i1 false }, i32 [[CALL12]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[EXTRACTELEMENT]], i32 [[EXTRACTELEMENT11]])
 ; CHECK-NEXT:    ret { i32, i1 } [[TMP1]]
 ;
   %call = tail call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %arg1)
@@ -107,7 +110,11 @@ define <16 x i8> @knownbits_shuffle_add_shift_v32i8(<16 x i8> %arg1, <8 x i16> %
 ; CHECK-NEXT:    [[BITCAST11:%.*]] = bitcast <8 x i16> [[SHL10]] to <16 x i8>
 ; CHECK-NEXT:    [[ADD12:%.*]] = add <16 x i8> [[BITCAST11]], [[BITCAST7]]
 ; CHECK-NEXT:    [[ADD14:%.*]] = add <16 x i8> [[ADD12]], [[ARG1]]
-; CHECK-NEXT:    ret <16 x i8> [[ADD14]]
+; CHECK-NEXT:    [[BITCAST14:%.*]] = bitcast <16 x i8> [[ADD12]] to <8 x i16>
+; CHECK-NEXT:    [[SHL15:%.*]] = shl <8 x i16> [[BITCAST14]], splat (i16 8)
+; CHECK-NEXT:    [[BITCAST16:%.*]] = bitcast <8 x i16> [[SHL15]] to <16 x i8>
+; CHECK-NEXT:    [[ADD13:%.*]] = add <16 x i8> [[ADD14]], [[BITCAST16]]
+; CHECK-NEXT:    ret <16 x i8> [[ADD13]]
 ;
   %shl6 = shl <8 x i16> %arg2, splat (i16 8)
   %bitcast7 = bitcast <8 x i16> %shl6 to <16 x i8>

@aeubanks aeubanks merged commit 5708851 into main Jun 21, 2025
10 of 11 checks passed
@aeubanks aeubanks deleted the revert-125935-tracking branch June 21, 2025 23:02
Jaddyen pushed a commit to Jaddyen/llvm-project that referenced this pull request Jun 23, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
llvm:analysis Includes value tracking, cost tables and constant folding llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants