Skip to content

Commit 251377c

Browse files
authored
[InstCombine] Fold shift+cttz with power of 2 operands (#127055)
#121386 Introduced cttz intrinsics which caused a regression where vscale/vscale divisions could no longer be constant folded. This fold was suggested as a fix in #126411. https://alive2.llvm.org/ce/z/gWbtPw
1 parent 77410f2 commit 251377c

File tree

2 files changed

+46
-0
lines changed

2 files changed

+46
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1613,6 +1613,22 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
16131613
if (Instruction *Overflow = foldLShrOverflowBit(I))
16141614
return Overflow;
16151615

1616+
// Transform ((pow2 << x) >> cttz(pow2 << y)) -> ((1 << x) >> y)
1617+
Value *Shl0_Op0, *Shl0_Op1, *Shl1_Op1;
1618+
BinaryOperator *Shl1;
1619+
if (match(Op0, m_Shl(m_Value(Shl0_Op0), m_Value(Shl0_Op1))) &&
1620+
match(Op1, m_Intrinsic<Intrinsic::cttz>(m_BinOp(Shl1))) &&
1621+
match(Shl1, m_Shl(m_Specific(Shl0_Op0), m_Value(Shl1_Op1))) &&
1622+
isKnownToBeAPowerOfTwo(Shl0_Op0, /*OrZero=*/true, 0, &I)) {
1623+
auto *Shl0 = cast<BinaryOperator>(Op0);
1624+
bool HasNUW = Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap();
1625+
bool HasNSW = Shl0->hasNoSignedWrap() && Shl1->hasNoSignedWrap();
1626+
if (HasNUW || HasNSW) {
1627+
Value *NewShl = Builder.CreateShl(ConstantInt::get(Shl1->getType(), 1),
1628+
Shl0_Op1, "", HasNUW, HasNSW);
1629+
return BinaryOperator::CreateLShr(NewShl, Shl1_Op1);
1630+
}
1631+
}
16161632
return nullptr;
16171633
}
16181634

llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,34 @@ entry:
103103
ret i32 %res
104104
}
105105

106+
define i64 @fold_cttz_64() vscale_range(1,16) {
107+
; CHECK-LABEL: define i64 @fold_cttz_64(
108+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
109+
; CHECK-NEXT: entry:
110+
; CHECK-NEXT: ret i64 4
111+
;
112+
entry:
113+
%vscale = tail call i64 @llvm.vscale.i64()
114+
%shl0 = shl nuw nsw i64 %vscale, 4
115+
%shl1 = shl nuw nsw i64 %vscale, 2
116+
%cttz = tail call range(i64 2, 65) i64 @llvm.cttz.i64(i64 %shl1, i1 true)
117+
%div1 = lshr i64 %shl0, %cttz
118+
ret i64 %div1
119+
}
120+
121+
define i32 @fold_cttz_32() vscale_range(1,16) {
122+
; CHECK-LABEL: define i32 @fold_cttz_32(
123+
; CHECK-SAME: ) #[[ATTR0]] {
124+
; CHECK-NEXT: entry:
125+
; CHECK-NEXT: ret i32 4
126+
;
127+
entry:
128+
%vscale = tail call i32 @llvm.vscale.i32()
129+
%shl0 = shl nuw nsw i32 %vscale, 4
130+
%shl1 = shl nuw nsw i32 %vscale, 2
131+
%cttz = tail call range(i32 2, 65) i32 @llvm.cttz.i32(i32 %shl1, i1 true)
132+
%div1 = lshr i32 %shl0, %cttz
133+
ret i32 %div1
134+
}
135+
106136
declare void @use(i32)

0 commit comments

Comments
 (0)