Skip to content

Commit 2d1390e

Browse files
committed
[DAG] SimplifyDemandedBits - mul(x,x) - if only demand bit[1] then fold to zero
1 parent 48f45f6 commit 2d1390e

File tree

2 files changed

+9
-12
lines changed

2 files changed

+9
-12
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2247,8 +2247,12 @@ bool TargetLowering::SimplifyDemandedBits(
22472247
}
22482248
break;
22492249
}
2250-
case ISD::ADD:
22512250
case ISD::MUL:
2251+
// 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1]
2252+
if (DemandedBits == 2 && Op.getOperand(0) == Op.getOperand(1))
2253+
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
2254+
LLVM_FALLTHROUGH;
2255+
case ISD::ADD:
22522256
case ISD::SUB: {
22532257
// Add, Sub, and Mul don't demand any bits in positions beyond that
22542258
// of the highest bit demanded of them.

llvm/test/CodeGen/X86/combine-mul.ll

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -366,16 +366,12 @@ define <2 x i64> @combine_mul_to_abs_v2i64(<2 x i64> %x) {
366366
define i64 @combine_mul_self_knownbits(i64 %x) {
367367
; SSE-LABEL: combine_mul_self_knownbits:
368368
; SSE: # %bb.0:
369-
; SSE-NEXT: movq %rdi, %rax
370-
; SSE-NEXT: imull %eax, %eax
371-
; SSE-NEXT: andl $2, %eax
369+
; SSE-NEXT: xorl %eax, %eax
372370
; SSE-NEXT: retq
373371
;
374372
; AVX-LABEL: combine_mul_self_knownbits:
375373
; AVX: # %bb.0:
376-
; AVX-NEXT: movq %rdi, %rax
377-
; AVX-NEXT: imull %eax, %eax
378-
; AVX-NEXT: andl $2, %eax
374+
; AVX-NEXT: xorl %eax, %eax
379375
; AVX-NEXT: retq
380376
%1 = mul i64 %x, %x
381377
%2 = and i64 %1, 2
@@ -385,15 +381,12 @@ define i64 @combine_mul_self_knownbits(i64 %x) {
385381
define <4 x i32> @combine_mul_self_knownbits_vector(<4 x i32> %x) {
386382
; SSE-LABEL: combine_mul_self_knownbits_vector:
387383
; SSE: # %bb.0:
388-
; SSE-NEXT: pmulld %xmm0, %xmm0
389-
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
384+
; SSE-NEXT: xorps %xmm0, %xmm0
390385
; SSE-NEXT: retq
391386
;
392387
; AVX-LABEL: combine_mul_self_knownbits_vector:
393388
; AVX: # %bb.0:
394-
; AVX-NEXT: vpmulld %xmm0, %xmm0, %xmm0
395-
; AVX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
396-
; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
389+
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
397390
; AVX-NEXT: retq
398391
%1 = mul <4 x i32> %x, %x
399392
%2 = and <4 x i32> %1, <i32 2, i32 2, i32 2, i32 2>

0 commit comments

Comments
 (0)