Skip to content

Commit 80b897e

Browse files
[InstCombine] ctpop(X) ^ ctpop(Y) & 1 --> ctpop(X^Y) & 1 (PR50094)
Original pattern: (__builtin_parity(x) ^ __builtin_parity(y)) LLVM rewrites it as: (__builtin_popcount(x) ^ __builtin_popcount(y)) & 1 Optimized form: __builtin_popcount(X^Y) & 1 Alive proof: https://alive2.llvm.org/ce/z/-GdWFr Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D101802
1 parent c2e9baf commit 80b897e

File tree

2 files changed

+20
-12
lines changed

2 files changed

+20
-12
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,17 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
220220
if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) ||
221221
SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1))
222222
return I;
223+
Value *LHS, *RHS;
224+
if (DemandedMask == 1 &&
225+
match(I->getOperand(0), m_Intrinsic<Intrinsic::ctpop>(m_Value(LHS))) &&
226+
match(I->getOperand(1), m_Intrinsic<Intrinsic::ctpop>(m_Value(RHS)))) {
227+
// (ctpop(X) ^ ctpop(Y)) & 1 --> ctpop(X^Y) & 1
228+
IRBuilderBase::InsertPointGuard Guard(Builder);
229+
Builder.SetInsertPoint(I);
230+
auto *Xor = Builder.CreateXor(LHS, RHS);
231+
return Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Xor);
232+
}
233+
223234
assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
224235
assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
225236

llvm/test/Transforms/InstCombine/ctpop.ll

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -386,10 +386,9 @@ define i32 @zext_ctpop_extra_use(i16 %x, i32* %q) {
386386

387387
define i32 @parity_xor(i32 %arg, i32 %arg1) {
388388
; CHECK-LABEL: @parity_xor(
389-
; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG:%.*]]), !range [[RNG1]]
390-
; CHECK-NEXT: [[I2:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG1:%.*]]), !range [[RNG1]]
391-
; CHECK-NEXT: [[I3:%.*]] = xor i32 [[I2]], [[I]]
392-
; CHECK-NEXT: [[I4:%.*]] = and i32 [[I3]], 1
389+
; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[ARG1:%.*]], [[ARG:%.*]]
390+
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP1]]), !range [[RNG1]]
391+
; CHECK-NEXT: [[I4:%.*]] = and i32 [[TMP2]], 1
393392
; CHECK-NEXT: ret i32 [[I4]]
394393
;
395394
%i = tail call i32 @llvm.ctpop.i32(i32 %arg)
@@ -401,10 +400,9 @@ define i32 @parity_xor(i32 %arg, i32 %arg1) {
401400

402401
define i32 @parity_xor_trunc(i64 %arg, i64 %arg1) {
403402
; CHECK-LABEL: @parity_xor_trunc(
404-
; CHECK-NEXT: [[I:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[ARG:%.*]]), !range [[RNG5:![0-9]+]]
405-
; CHECK-NEXT: [[I2:%.*]] = tail call i64 @llvm.ctpop.i64(i64 [[ARG1:%.*]]), !range [[RNG5]]
406-
; CHECK-NEXT: [[I3:%.*]] = xor i64 [[I2]], [[I]]
407-
; CHECK-NEXT: [[I4:%.*]] = trunc i64 [[I3]] to i32
403+
; CHECK-NEXT: [[TMP1:%.*]] = xor i64 [[ARG1:%.*]], [[ARG:%.*]]
404+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP1]]), !range [[RNG5:![0-9]+]]
405+
; CHECK-NEXT: [[I4:%.*]] = trunc i64 [[TMP2]] to i32
408406
; CHECK-NEXT: [[I5:%.*]] = and i32 [[I4]], 1
409407
; CHECK-NEXT: ret i32 [[I5]]
410408
;
@@ -418,10 +416,9 @@ define i32 @parity_xor_trunc(i64 %arg, i64 %arg1) {
418416

419417
define <2 x i32> @parity_xor_vec(<2 x i32> %arg, <2 x i32> %arg1) {
420418
; CHECK-LABEL: @parity_xor_vec(
421-
; CHECK-NEXT: [[I:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[ARG:%.*]])
422-
; CHECK-NEXT: [[I2:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[ARG1:%.*]])
423-
; CHECK-NEXT: [[I3:%.*]] = xor <2 x i32> [[I2]], [[I]]
424-
; CHECK-NEXT: [[I4:%.*]] = and <2 x i32> [[I3]], <i32 1, i32 1>
419+
; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[ARG1:%.*]], [[ARG:%.*]]
420+
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]])
421+
; CHECK-NEXT: [[I4:%.*]] = and <2 x i32> [[TMP2]], <i32 1, i32 1>
425422
; CHECK-NEXT: ret <2 x i32> [[I4]]
426423
;
427424
%i = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %arg)

0 commit comments

Comments
 (0)