Skip to content

Commit c0645f1

Browse files
committed
[InstCombine] fold popcount of exactly one bit to shift
This is discussed in https://llvm.org/PR48999 , but it does not solve that request. The difference in the vector test shows that some other logic transform is limited to scalar types.
1 parent f18efb7 commit c0645f1

File tree

2 files changed

+14
-8
lines changed

2 files changed

+14
-8
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -522,18 +522,24 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
522522
return CallInst::Create(F, {X, IC.Builder.getFalse()});
523523
}
524524

525+
KnownBits Known(BitWidth);
526+
IC.computeKnownBits(Op0, Known, 0, &II);
527+
528+
// If all bits are zero except for exactly one fixed bit, then the result
529+
// must be 0 or 1, and we can get that answer by shifting to LSB:
530+
// ctpop (X & 32) --> (X & 32) >> 5
531+
if ((~Known.Zero).isPowerOf2())
532+
return BinaryOperator::CreateLShr(
533+
Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
534+
525535
// FIXME: Try to simplify vectors of integers.
526536
auto *IT = dyn_cast<IntegerType>(Ty);
527537
if (!IT)
528538
return nullptr;
529539

530-
KnownBits Known(BitWidth);
531-
IC.computeKnownBits(Op0, Known, 0, &II);
532-
540+
// Add range metadata since known bits can't completely reflect what we know.
533541
unsigned MinCount = Known.countMinPopulation();
534542
unsigned MaxCount = Known.countMaxPopulation();
535-
536-
// Add range metadata since known bits can't completely reflect what we know.
537543
if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
538544
Metadata *LowAndHigh[] = {
539545
ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)),

llvm/test/Transforms/InstCombine/ctpop.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ define i1 @test6(i1 %arg) {
9696

9797
define i8 @mask_one_bit(i8 %x) {
9898
; CHECK-LABEL: @mask_one_bit(
99-
; CHECK-NEXT: [[A:%.*]] = and i8 [[X:%.*]], 16
100-
; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ctpop.i8(i8 [[A]]), !range [[RNG1:![0-9]+]]
99+
; CHECK-NEXT: [[A:%.*]] = lshr i8 [[X:%.*]], 4
100+
; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], 1
101101
; CHECK-NEXT: ret i8 [[R]]
102102
;
103103
%a = and i8 %x, 16
@@ -109,7 +109,7 @@ define <2 x i32> @mask_one_bit_splat(<2 x i32> %x, <2 x i32>* %p) {
109109
; CHECK-LABEL: @mask_one_bit_splat(
110110
; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[X:%.*]], <i32 2048, i32 2048>
111111
; CHECK-NEXT: store <2 x i32> [[A]], <2 x i32>* [[P:%.*]], align 8
112-
; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[A]])
112+
; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i32> [[A]], <i32 11, i32 11>
113113
; CHECK-NEXT: ret <2 x i32> [[R]]
114114
;
115115
%a = and <2 x i32> %x, <i32 2048, i32 2048>

0 commit comments

Comments
 (0)