-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86] Add i8 CTPOP lowering using i32 MUL #79989
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
a5f3837
to
64594e7
Compare
@llvm/pr-subscribers-llvm-selectiondag Author: Simon Pilgrim (RKSimon) ChangesFixes #79823 Full diff: https://github.com/llvm/llvm-project/pull/79989.diff 5 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index d39094aa7fed7..f4e9d7baf82b6 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3258,6 +3258,12 @@ class TargetLoweringBase {
return false;
}
+ /// Return true if CTPOP/CTTZ/CTLZ/PARITY expansions should try to use integer
+ /// multiples should the input value be suitable.
+ virtual bool shouldAllowMultiplyInBitCounts(EVT CntVT, EVT MulVT) const {
+ return false;
+ }
+
// Should we fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x))
// A) where y has a single bit set?
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT,
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5828822e062b1..f224930bb5bb9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8639,6 +8639,24 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
return SDValue();
+ // i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply.
+ if (VT == MVT::i8 && shouldAllowMultiplyInBitCounts(MVT::i8, MVT::i32) &&
+ isOperationLegal(ISD::AND, MVT::i32) &&
+ isOperationLegal(ISD::SRL, MVT::i32) &&
+ isOperationLegal(ISD::MUL, MVT::i32)) {
+ SDValue Mask11 = DAG.getConstant(0x11111111U, dl, MVT::i32);
+ Op = DAG.getZExtOrTrunc(Op, dl, MVT::i32);
+ Op = DAG.getNode(ISD::MUL, dl, MVT::i32, Op,
+ DAG.getConstant(0x08040201U, dl, MVT::i32));
+ Op = DAG.getNode(ISD::SRL, dl, MVT::i32, Op,
+ DAG.getShiftAmountConstant(3, MVT::i32, dl));
+ Op = DAG.getNode(ISD::AND, dl, MVT::i32, Op, Mask11);
+ Op = DAG.getNode(ISD::MUL, dl, MVT::i32, Op, Mask11);
+ Op = DAG.getNode(ISD::SRL, dl, MVT::i32, Op,
+ DAG.getShiftAmountConstant(28, MVT::i32, dl));
+ return DAG.getZExtOrTrunc(Op, dl, MVT::i8);
+ }
+
// This is the "best" algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
SDValue Mask55 =
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 32745400a38b7..c87e29dc46db9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1174,6 +1174,10 @@ namespace llvm {
bool shouldSplatInsEltVarIndex(EVT VT) const override;
+ bool shouldAllowMultiplyInBitCounts(EVT CntVT, EVT MulVT) const override {
+ return CntVT.isScalarInteger() && isOperationLegal(ISD::MUL, MulVT);
+ }
+
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
// Converting to sat variants holds little benefit on X86 as we will just
// need to saturate the value back using fp arithmatic.
diff --git a/llvm/test/CodeGen/X86/ctpop-combine.ll b/llvm/test/CodeGen/X86/ctpop-combine.ll
index fba44218e0572..73152e9f909cf 100644
--- a/llvm/test/CodeGen/X86/ctpop-combine.ll
+++ b/llvm/test/CodeGen/X86/ctpop-combine.ll
@@ -88,20 +88,13 @@ define i8 @test4(i8 %x) nounwind readnone {
;
; NO-POPCOUNT-LABEL: test4:
; NO-POPCOUNT: # %bb.0:
-; NO-POPCOUNT-NEXT: movl %edi, %ecx
-; NO-POPCOUNT-NEXT: andb $127, %cl
-; NO-POPCOUNT-NEXT: shrb %dil
-; NO-POPCOUNT-NEXT: andb $21, %dil
-; NO-POPCOUNT-NEXT: subb %dil, %cl
-; NO-POPCOUNT-NEXT: movl %ecx, %eax
-; NO-POPCOUNT-NEXT: andb $51, %al
-; NO-POPCOUNT-NEXT: shrb $2, %cl
-; NO-POPCOUNT-NEXT: andb $51, %cl
-; NO-POPCOUNT-NEXT: addb %al, %cl
-; NO-POPCOUNT-NEXT: movl %ecx, %eax
-; NO-POPCOUNT-NEXT: shrb $4, %al
-; NO-POPCOUNT-NEXT: addb %cl, %al
-; NO-POPCOUNT-NEXT: andb $15, %al
+; NO-POPCOUNT-NEXT: andl $127, %edi
+; NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
+; NO-POPCOUNT-NEXT: shrl $3, %eax
+; NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
+; NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; NO-POPCOUNT-NEXT: shrl $28, %eax
+; NO-POPCOUNT-NEXT: # kill: def $al killed $al killed $eax
; NO-POPCOUNT-NEXT: retq
%x2 = and i8 %x, 127
%count = tail call i8 @llvm.ctpop.i8(i8 %x2)
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index a9d77fd2c0a61..c8d060dfee182 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -10,37 +10,24 @@
define i8 @cnt8(i8 %x) nounwind readnone {
; X86-LABEL: cnt8:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shrb %al
-; X86-NEXT: andb $85, %al
-; X86-NEXT: subb %al, %cl
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andb $51, %al
-; X86-NEXT: shrb $2, %cl
-; X86-NEXT: andb $51, %cl
-; X86-NEXT: addb %al, %cl
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shrb $4, %al
-; X86-NEXT: addb %cl, %al
-; X86-NEXT: andb $15, %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NEXT: shrl $3, %eax
+; X86-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X86-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NEXT: shrl $28, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: cnt8:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrb %al
-; X64-NEXT: andb $85, %al
-; X64-NEXT: subb %al, %dil
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: andb $51, %cl
-; X64-NEXT: shrb $2, %dil
-; X64-NEXT: andb $51, %dil
-; X64-NEXT: addb %dil, %cl
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: shrb $4, %al
-; X64-NEXT: addb %cl, %al
-; X64-NEXT: andb $15, %al
+; X64-NEXT: movzbl %dil, %eax
+; X64-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X64-NEXT: shrl $3, %eax
+; X64-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X64-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X64-NEXT: shrl $28, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
; X86-POPCNT-LABEL: cnt8:
@@ -59,16 +46,13 @@ define i8 @cnt8(i8 %x) nounwind readnone {
;
; X64-NDD-LABEL: cnt8:
; X64-NDD: # %bb.0:
-; X64-NDD-NEXT: shrb %dil, %al
-; X64-NDD-NEXT: andb $85, %al
-; X64-NDD-NEXT: subb %al, %dil, %al
-; X64-NDD-NEXT: andb $51, %al, %cl
-; X64-NDD-NEXT: shrb $2, %al
-; X64-NDD-NEXT: andb $51, %al
-; X64-NDD-NEXT: addb %cl, %al
-; X64-NDD-NEXT: shrb $4, %al, %cl
-; X64-NDD-NEXT: addb %cl, %al
-; X64-NDD-NEXT: andb $15, %al
+; X64-NDD-NEXT: movzbl %dil, %eax
+; X64-NDD-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X64-NDD-NEXT: shrl $3, %eax
+; X64-NDD-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X64-NDD-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X64-NDD-NEXT: shrl $28, %eax
+; X64-NDD-NEXT: # kill: def $al killed $al killed $eax
; X64-NDD-NEXT: retq
%cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
ret i8 %cnt
|
@llvm/pr-subscribers-backend-x86 Author: Simon Pilgrim (RKSimon) ChangesFixes #79823 Full diff: https://github.com/llvm/llvm-project/pull/79989.diff 5 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index d39094aa7fed7..f4e9d7baf82b6 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3258,6 +3258,12 @@ class TargetLoweringBase {
return false;
}
+ /// Return true if CTPOP/CTTZ/CTLZ/PARITY expansions should try to use integer
+ /// multiples should the input value be suitable.
+ virtual bool shouldAllowMultiplyInBitCounts(EVT CntVT, EVT MulVT) const {
+ return false;
+ }
+
// Should we fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x))
// A) where y has a single bit set?
virtual bool shouldFoldSelectWithSingleBitTest(EVT VT,
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5828822e062b1..f224930bb5bb9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8639,6 +8639,24 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
return SDValue();
+ // i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply.
+ if (VT == MVT::i8 && shouldAllowMultiplyInBitCounts(MVT::i8, MVT::i32) &&
+ isOperationLegal(ISD::AND, MVT::i32) &&
+ isOperationLegal(ISD::SRL, MVT::i32) &&
+ isOperationLegal(ISD::MUL, MVT::i32)) {
+ SDValue Mask11 = DAG.getConstant(0x11111111U, dl, MVT::i32);
+ Op = DAG.getZExtOrTrunc(Op, dl, MVT::i32);
+ Op = DAG.getNode(ISD::MUL, dl, MVT::i32, Op,
+ DAG.getConstant(0x08040201U, dl, MVT::i32));
+ Op = DAG.getNode(ISD::SRL, dl, MVT::i32, Op,
+ DAG.getShiftAmountConstant(3, MVT::i32, dl));
+ Op = DAG.getNode(ISD::AND, dl, MVT::i32, Op, Mask11);
+ Op = DAG.getNode(ISD::MUL, dl, MVT::i32, Op, Mask11);
+ Op = DAG.getNode(ISD::SRL, dl, MVT::i32, Op,
+ DAG.getShiftAmountConstant(28, MVT::i32, dl));
+ return DAG.getZExtOrTrunc(Op, dl, MVT::i8);
+ }
+
// This is the "best" algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
SDValue Mask55 =
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 32745400a38b7..c87e29dc46db9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1174,6 +1174,10 @@ namespace llvm {
bool shouldSplatInsEltVarIndex(EVT VT) const override;
+ bool shouldAllowMultiplyInBitCounts(EVT CntVT, EVT MulVT) const override {
+ return CntVT.isScalarInteger() && isOperationLegal(ISD::MUL, MulVT);
+ }
+
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
// Converting to sat variants holds little benefit on X86 as we will just
// need to saturate the value back using fp arithmatic.
diff --git a/llvm/test/CodeGen/X86/ctpop-combine.ll b/llvm/test/CodeGen/X86/ctpop-combine.ll
index fba44218e0572..73152e9f909cf 100644
--- a/llvm/test/CodeGen/X86/ctpop-combine.ll
+++ b/llvm/test/CodeGen/X86/ctpop-combine.ll
@@ -88,20 +88,13 @@ define i8 @test4(i8 %x) nounwind readnone {
;
; NO-POPCOUNT-LABEL: test4:
; NO-POPCOUNT: # %bb.0:
-; NO-POPCOUNT-NEXT: movl %edi, %ecx
-; NO-POPCOUNT-NEXT: andb $127, %cl
-; NO-POPCOUNT-NEXT: shrb %dil
-; NO-POPCOUNT-NEXT: andb $21, %dil
-; NO-POPCOUNT-NEXT: subb %dil, %cl
-; NO-POPCOUNT-NEXT: movl %ecx, %eax
-; NO-POPCOUNT-NEXT: andb $51, %al
-; NO-POPCOUNT-NEXT: shrb $2, %cl
-; NO-POPCOUNT-NEXT: andb $51, %cl
-; NO-POPCOUNT-NEXT: addb %al, %cl
-; NO-POPCOUNT-NEXT: movl %ecx, %eax
-; NO-POPCOUNT-NEXT: shrb $4, %al
-; NO-POPCOUNT-NEXT: addb %cl, %al
-; NO-POPCOUNT-NEXT: andb $15, %al
+; NO-POPCOUNT-NEXT: andl $127, %edi
+; NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
+; NO-POPCOUNT-NEXT: shrl $3, %eax
+; NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
+; NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; NO-POPCOUNT-NEXT: shrl $28, %eax
+; NO-POPCOUNT-NEXT: # kill: def $al killed $al killed $eax
; NO-POPCOUNT-NEXT: retq
%x2 = and i8 %x, 127
%count = tail call i8 @llvm.ctpop.i8(i8 %x2)
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index a9d77fd2c0a61..c8d060dfee182 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -10,37 +10,24 @@
define i8 @cnt8(i8 %x) nounwind readnone {
; X86-LABEL: cnt8:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shrb %al
-; X86-NEXT: andb $85, %al
-; X86-NEXT: subb %al, %cl
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andb $51, %al
-; X86-NEXT: shrb $2, %cl
-; X86-NEXT: andb $51, %cl
-; X86-NEXT: addb %al, %cl
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shrb $4, %al
-; X86-NEXT: addb %cl, %al
-; X86-NEXT: andb $15, %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NEXT: shrl $3, %eax
+; X86-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X86-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NEXT: shrl $28, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: cnt8:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrb %al
-; X64-NEXT: andb $85, %al
-; X64-NEXT: subb %al, %dil
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: andb $51, %cl
-; X64-NEXT: shrb $2, %dil
-; X64-NEXT: andb $51, %dil
-; X64-NEXT: addb %dil, %cl
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: shrb $4, %al
-; X64-NEXT: addb %cl, %al
-; X64-NEXT: andb $15, %al
+; X64-NEXT: movzbl %dil, %eax
+; X64-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X64-NEXT: shrl $3, %eax
+; X64-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X64-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X64-NEXT: shrl $28, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
; X86-POPCNT-LABEL: cnt8:
@@ -59,16 +46,13 @@ define i8 @cnt8(i8 %x) nounwind readnone {
;
; X64-NDD-LABEL: cnt8:
; X64-NDD: # %bb.0:
-; X64-NDD-NEXT: shrb %dil, %al
-; X64-NDD-NEXT: andb $85, %al
-; X64-NDD-NEXT: subb %al, %dil, %al
-; X64-NDD-NEXT: andb $51, %al, %cl
-; X64-NDD-NEXT: shrb $2, %al
-; X64-NDD-NEXT: andb $51, %al
-; X64-NDD-NEXT: addb %cl, %al
-; X64-NDD-NEXT: shrb $4, %al, %cl
-; X64-NDD-NEXT: addb %cl, %al
-; X64-NDD-NEXT: andb $15, %al
+; X64-NDD-NEXT: movzbl %dil, %eax
+; X64-NDD-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X64-NDD-NEXT: shrl $3, %eax
+; X64-NDD-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X64-NDD-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X64-NDD-NEXT: shrl $28, %eax
+; X64-NDD-NEXT: # kill: def $al killed $al killed $eax
; X64-NDD-NEXT: retq
%cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
ret i8 %cnt
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
llvm/test/CodeGen/X86/popcnt.ll
Outdated
; X64-NDD-NEXT: movzbl %dil, %eax | ||
; X64-NDD-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201 | ||
; X64-NDD-NEXT: shrl $3, %eax | ||
; X64-NDD-NEXT: andl $286331153, %eax # imm = 0x11111111 | ||
; X64-NDD-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111 | ||
; X64-NDD-NEXT: shrl $28, %eax | ||
; X64-NDD-NEXT: # kill: def $al killed $al killed $eax |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any chance to merge the three together?
DAG.getShiftAmountConstant(28, MVT::i32, dl)); | ||
return DAG.getZExtOrTrunc(Op, dl, MVT::i8); | ||
} | ||
|
||
// This is the "best" algorithm from |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So it is not the "best" algorithm now :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The algorithm is not the "best" in all cases. Although it does work for the common cases of counting the 32-bit and 64-bit integers. :)
@@ -8639,6 +8639,24 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { | |||
if (VT.isVector() && !canExpandVectorCTPOP(*this, VT)) | |||
return SDValue(); | |||
|
|||
// i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply. | |||
if (VT == MVT::i8 && shouldAllowMultiplyInBitCounts(MVT::i8, MVT::i32) && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we really need yet another extremely specific target hook? I would expect the isOperationLegal checks be the base implementation if so
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So far only x86 (and maybe arm) have benefited from this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would checking the immediate cost be able to distinguish targets where this is profitable?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've started going down that route, but its not looking great - I'm very tempted to just make this X86 only for now.
65b452e
to
a23c221
Compare
Limited this to X86 |
You can test this locally with the following command:git-clang-format --diff ea2984287d91b96f5e2cc0aa66d146d6dbd1d1bb 618e28224775186af3e0d219eebeb8fae1c8acc8 -- llvm/lib/Target/X86/X86ISelLowering.cpp View the diff from clang-format here.diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index de2df5c036..8315745f57 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -427,7 +427,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// on the dest that popcntl hasn't had since Cannon Lake.
setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
} else {
- setOperationAction(ISD::CTPOP , MVT::i8 , Custom);
+ setOperationAction(ISD::CTPOP, MVT::i8, Custom);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
if (Subtarget.is64Bit())
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
This is the first basic proposal in llvm#79823 - we can investigate improving support for other widths if we can find further use cases.
…less active bits Extend llvm#79989 slightly to use KnownBits on the CTPOP input - this should make it easier to add additional cases identified in llvm#79823
This is first basic proposal in #79823 - we can investigate improving support for other widths if we can find further use cases.