Skip to content

Commit 6311e3f

Browse files
authored
[ValueTracking] ComputeNumSignBitsImpl - add basic handling of BITCAST nodes (#127218)
When a wider scalar/vector type containing all sign bits is bitcast to a narrower vector type, we can deduce that the resulting narrow elements will also be all sign bits. This matches existing behavior in SelectionDAG and helps optimize cases involving SSE intrinsics where sign-extended values are bitcast between different vector types. The current implementation fails to recognize that an arithmetic right shift is redundant when applied to elements that are already known to be all sign bits. This PR improves ComputeNumSignBitsImpl to track this information through bitcasts, enabling the optimization of such cases. ``` %ext = sext <1 x i1> %cmp to <1 x i8> %sub = bitcast <1 x i8> %ext to <4 x i2> %sra = ashr <4 x i2> %sub, <i2 1, i2 1, i2 1, i2 1> ; Can be simplified to just: %sub = bitcast <1 x i8> %ext to <4 x i2> ``` Closes #87624
1 parent 1987d18 commit 6311e3f

File tree

2 files changed

+86
-0
lines changed

2 files changed

+86
-0
lines changed

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3987,6 +3987,31 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
39873987
if (auto *U = dyn_cast<Operator>(V)) {
39883988
switch (Operator::getOpcode(V)) {
39893989
default: break;
3990+
case Instruction::BitCast: {
3991+
Value *Src = U->getOperand(0);
3992+
Type *SrcTy = Src->getType();
3993+
3994+
// Skip if the source type is not an integer or integer vector type
3995+
// This ensures we only process integer-like types
3996+
if (!SrcTy->isIntOrIntVectorTy())
3997+
break;
3998+
3999+
unsigned SrcBits = SrcTy->getScalarSizeInBits();
4000+
4001+
// Bitcast 'large element' scalar/vector to 'small element' vector.
4002+
if ((SrcBits % TyBits) != 0)
4003+
break;
4004+
4005+
// Only proceed if the destination type is a fixed-size vector
4006+
if (isa<FixedVectorType>(Ty)) {
4007+
// Fast case - sign splat can be simply split across the small elements.
4008+
// This works for both vector and scalar sources
4009+
Tmp = ComputeNumSignBits(Src, Depth + 1, Q);
4010+
if (Tmp == SrcBits)
4011+
return TyBits;
4012+
}
4013+
break;
4014+
}
39904015
case Instruction::SExt:
39914016
Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
39924017
return ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q) +
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
3+
4+
; Case 1: Vector to Vector bitcast
5+
define <4 x i2> @test_vector_to_vector(<1 x i8> %a0, <1 x i8> %a1) {
6+
; CHECK-LABEL: define <4 x i2> @test_vector_to_vector(
7+
; CHECK-SAME: <1 x i8> [[A0:%.*]], <1 x i8> [[A1:%.*]]) {
8+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <1 x i8> [[A0]], [[A1]]
9+
; CHECK-NEXT: [[EXT:%.*]] = sext <1 x i1> [[CMP]] to <1 x i8>
10+
; CHECK-NEXT: [[SUB:%.*]] = bitcast <1 x i8> [[EXT]] to <4 x i2>
11+
; CHECK-NEXT: ret <4 x i2> [[SUB]]
12+
;
13+
%cmp = icmp sgt <1 x i8> %a0, %a1
14+
%ext = sext <1 x i1> %cmp to <1 x i8>
15+
%sub = bitcast <1 x i8> %ext to <4 x i2>
16+
%sra = ashr <4 x i2> %sub, <i2 1, i2 1, i2 1, i2 1>
17+
ret <4 x i2> %sra
18+
}
19+
20+
; Case 2: Scalar to Vector bitcast
21+
define <2 x i16> @test_scalar_to_vector(i1 %cond) {
22+
; CHECK-LABEL: define <2 x i16> @test_scalar_to_vector(
23+
; CHECK-SAME: i1 [[COND:%.*]]) {
24+
; CHECK-NEXT: [[EXT:%.*]] = sext i1 [[COND]] to i32
25+
; CHECK-NEXT: [[BC:%.*]] = bitcast i32 [[EXT]] to <2 x i16>
26+
; CHECK-NEXT: ret <2 x i16> [[BC]]
27+
;
28+
%ext = sext i1 %cond to i32
29+
%bc = bitcast i32 %ext to <2 x i16>
30+
%sra = ashr <2 x i16> %bc, <i16 8, i16 8>
31+
ret <2 x i16> %sra
32+
}
33+
34+
35+
; Case 3: Multiple right shifts
36+
define <8 x i8> @test_multiple_shifts(i1 %cond) {
37+
; CHECK-LABEL: define <8 x i8> @test_multiple_shifts(
38+
; CHECK-SAME: i1 [[COND:%.*]]) {
39+
; CHECK-NEXT: [[EXT:%.*]] = sext i1 [[COND]] to i64
40+
; CHECK-NEXT: [[BC:%.*]] = bitcast i64 [[EXT]] to <8 x i8>
41+
; CHECK-NEXT: ret <8 x i8> [[BC]]
42+
;
43+
%ext = sext i1 %cond to i64
44+
%bc = bitcast i64 %ext to <8 x i8>
45+
%sra1 = ashr <8 x i8> %bc, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 1>
46+
%sra2 = ashr <8 x i8> %sra1, <i8 2, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
47+
ret <8 x i8> %sra2
48+
}
49+
50+
; (Negative) Case 4: Test with non-sign-extended source
51+
define <4 x i8> @test_non_sign_extended(i32 %val) {
52+
; CHECK-LABEL: define <4 x i8> @test_non_sign_extended(
53+
; CHECK-SAME: i32 [[VAL:%.*]]) {
54+
; CHECK-NEXT: [[BC:%.*]] = bitcast i32 [[VAL]] to <4 x i8>
55+
; CHECK-NEXT: [[SRA:%.*]] = ashr <4 x i8> [[BC]], splat (i8 1)
56+
; CHECK-NEXT: ret <4 x i8> [[SRA]]
57+
;
58+
%bc = bitcast i32 %val to <4 x i8>
59+
%sra = ashr <4 x i8> %bc, <i8 1, i8 1, i8 1, i8 1>
60+
ret <4 x i8> %sra
61+
}

0 commit comments

Comments
 (0)