Skip to content

Commit fc6bee1

Browse files
committed
[SDAG] SimplifyDemandedBits - generalize fold for 2 LSB of X*X
This is translated from recent changes to the IR version of this function: D119060 D119139
1 parent e4e671c commit fc6bee1

File tree

2 files changed

+10
-14
lines changed

2 files changed

+10
-14
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2265,9 +2265,14 @@ bool TargetLowering::SimplifyDemandedBits(
22652265
break;
22662266
}
22672267
case ISD::MUL:
2268-
// 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1]
2269-
if (DemandedBits == 2 && Op.getOperand(0) == Op.getOperand(1))
2270-
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
2268+
// For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2269+
// X * X is odd iff X is odd.
2270+
// 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2271+
if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2272+
SDValue One = TLO.DAG.getConstant(1, dl, VT);
2273+
SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2274+
return TLO.CombineTo(Op, And1);
2275+
}
22712276
LLVM_FALLTHROUGH;
22722277
case ISD::ADD:
22732278
case ISD::SUB: {

llvm/test/CodeGen/AArch64/combine-mul.ll

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,7 @@ define i32 @one_demanded_low_bit(i32 %x) {
108108
define i16 @squared_one_demanded_low_bit(i16 %x) {
109109
; CHECK-LABEL: squared_one_demanded_low_bit:
110110
; CHECK: // %bb.0:
111-
; CHECK-NEXT: mul w8, w0, w0
112-
; CHECK-NEXT: and w0, w8, #0x1
111+
; CHECK-NEXT: and w0, w0, #0x1
113112
; CHECK-NEXT: ret
114113
%mul = mul i16 %x, %x
115114
%and = and i16 %mul, 1
@@ -120,7 +119,6 @@ define <4 x i32> @squared_one_demanded_low_bit_splat(<4 x i32> %x) {
120119
; CHECK-LABEL: squared_one_demanded_low_bit_splat:
121120
; CHECK: // %bb.0:
122121
; CHECK-NEXT: mvni v1.4s, #1
123-
; CHECK-NEXT: mul v0.4s, v0.4s, v0.4s
124122
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
125123
; CHECK-NEXT: ret
126124
%mul = mul <4 x i32> %x, %x
@@ -131,8 +129,7 @@ define <4 x i32> @squared_one_demanded_low_bit_splat(<4 x i32> %x) {
131129
define i32 @squared_demanded_2_low_bits(i32 %x) {
132130
; CHECK-LABEL: squared_demanded_2_low_bits:
133131
; CHECK: // %bb.0:
134-
; CHECK-NEXT: mul w8, w0, w0
135-
; CHECK-NEXT: and w0, w8, #0x3
132+
; CHECK-NEXT: and w0, w0, #0x1
136133
; CHECK-NEXT: ret
137134
%mul = mul i32 %x, %x
138135
%and = and i32 %mul, 3
@@ -142,13 +139,7 @@ define i32 @squared_demanded_2_low_bits(i32 %x) {
142139
define <2 x i64> @squared_demanded_2_low_bits_splat(<2 x i64> %x) {
143140
; CHECK-LABEL: squared_demanded_2_low_bits_splat:
144141
; CHECK: // %bb.0:
145-
; CHECK-NEXT: fmov x8, d0
146-
; CHECK-NEXT: mov x9, v0.d[1]
147-
; CHECK-NEXT: mul x8, x8, x8
148-
; CHECK-NEXT: mul x9, x9, x9
149-
; CHECK-NEXT: fmov d0, x8
150142
; CHECK-NEXT: mov x8, #-2
151-
; CHECK-NEXT: mov v0.d[1], x9
152143
; CHECK-NEXT: dup v1.2d, x8
153144
; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
154145
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)