Skip to content

Commit 68c197f

Browse files
committed
[InstCombine] factor difference-of-squares to reduce multiplication
(X * X) - (Y * Y) --> (X + Y) * (X - Y) https://alive2.llvm.org/ce/z/BAuRCf The no-wrap propagation could be relaxed in some cases, but there does not seem to be an obvious rule for that.
1 parent bf44905 commit 68c197f

File tree

2 files changed

+45
-15
lines changed

2 files changed

+45
-15
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2372,6 +2372,22 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
23722372
I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
23732373
{Builder.CreateNot(X)}));
23742374

2375+
// Reduce multiplies for difference-of-squares by factoring:
2376+
// (X * X) - (Y * Y) --> (X + Y) * (X - Y)
2377+
if (match(Op0, m_OneUse(m_Mul(m_Value(X), m_Deferred(X)))) &&
2378+
match(Op1, m_OneUse(m_Mul(m_Value(Y), m_Deferred(Y))))) {
2379+
auto *OBO0 = cast<OverflowingBinaryOperator>(Op0);
2380+
auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
2381+
bool PropagateNSW = I.hasNoSignedWrap() && OBO0->hasNoSignedWrap() &&
2382+
OBO1->hasNoSignedWrap();
2383+
bool PropagateNUW = I.hasNoUnsignedWrap() && OBO0->hasNoUnsignedWrap() &&
2384+
OBO1->hasNoUnsignedWrap();
2385+
Value *Add = Builder.CreateAdd(X, Y, "add", PropagateNUW, PropagateNSW);
2386+
Value *Sub = Builder.CreateSub(X, Y, "sub", PropagateNUW, PropagateNSW);
2387+
Value *Mul = Builder.CreateMul(Add, Sub, "", PropagateNUW, PropagateNSW);
2388+
return replaceInstUsesWith(I, Mul);
2389+
}
2390+
23752391
return TryToNarrowDeduceFlags();
23762392
}
23772393

llvm/test/Transforms/InstCombine/sub.ll

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2390,11 +2390,13 @@ define <2 x i8> @sub_to_and_vector4(<2 x i8> %x) {
23902390
ret <2 x i8> %r
23912391
}
23922392

2393+
; (X * X) - (Y * Y) --> (X + Y) * (X - Y)
2394+
23932395
define i8 @diff_of_squares(i8 %x, i8 %y) {
23942396
; CHECK-LABEL: @diff_of_squares(
2395-
; CHECK-NEXT: [[X2:%.*]] = mul i8 [[X:%.*]], [[X]]
2396-
; CHECK-NEXT: [[Y2:%.*]] = mul i8 [[Y:%.*]], [[Y]]
2397-
; CHECK-NEXT: [[R:%.*]] = sub i8 [[X2]], [[Y2]]
2397+
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X:%.*]], [[Y:%.*]]
2398+
; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[Y]]
2399+
; CHECK-NEXT: [[R:%.*]] = mul i8 [[ADD]], [[SUB]]
23982400
; CHECK-NEXT: ret i8 [[R]]
23992401
;
24002402
%x2 = mul i8 %x, %x
@@ -2403,11 +2405,13 @@ define i8 @diff_of_squares(i8 %x, i8 %y) {
24032405
ret i8 %r
24042406
}
24052407

2408+
; All-or-nothing for propagation of no-wrap flags (possibly conservative)
2409+
24062410
define i5 @diff_of_squares_nuw(i5 %x, i5 %y) {
24072411
; CHECK-LABEL: @diff_of_squares_nuw(
2408-
; CHECK-NEXT: [[X2:%.*]] = mul nuw i5 [[X:%.*]], [[X]]
2409-
; CHECK-NEXT: [[Y2:%.*]] = mul nuw i5 [[Y:%.*]], [[Y]]
2410-
; CHECK-NEXT: [[R:%.*]] = sub nuw i5 [[X2]], [[Y2]]
2412+
; CHECK-NEXT: [[ADD:%.*]] = add nuw i5 [[X:%.*]], [[Y:%.*]]
2413+
; CHECK-NEXT: [[SUB:%.*]] = sub nuw i5 [[X]], [[Y]]
2414+
; CHECK-NEXT: [[R:%.*]] = mul nuw i5 [[ADD]], [[SUB]]
24112415
; CHECK-NEXT: ret i5 [[R]]
24122416
;
24132417
%x2 = mul nuw i5 %x, %x
@@ -2416,11 +2420,13 @@ define i5 @diff_of_squares_nuw(i5 %x, i5 %y) {
24162420
ret i5 %r
24172421
}
24182422

2423+
; All-or-nothing for propagation of no-wrap flags (possibly conservative)
2424+
24192425
define i5 @diff_of_squares_partial_nuw(i5 %x, i5 %y) {
24202426
; CHECK-LABEL: @diff_of_squares_partial_nuw(
2421-
; CHECK-NEXT: [[X2:%.*]] = mul nuw i5 [[X:%.*]], [[X]]
2422-
; CHECK-NEXT: [[Y2:%.*]] = mul nuw i5 [[Y:%.*]], [[Y]]
2423-
; CHECK-NEXT: [[R:%.*]] = sub i5 [[X2]], [[Y2]]
2427+
; CHECK-NEXT: [[ADD:%.*]] = add i5 [[X:%.*]], [[Y:%.*]]
2428+
; CHECK-NEXT: [[SUB:%.*]] = sub i5 [[X]], [[Y]]
2429+
; CHECK-NEXT: [[R:%.*]] = mul i5 [[ADD]], [[SUB]]
24242430
; CHECK-NEXT: ret i5 [[R]]
24252431
;
24262432
%x2 = mul nuw i5 %x, %x
@@ -2429,11 +2435,13 @@ define i5 @diff_of_squares_partial_nuw(i5 %x, i5 %y) {
24292435
ret i5 %r
24302436
}
24312437

2438+
; All-or-nothing for propagation of no-wrap flags (possibly conservative)
2439+
24322440
define <2 x i5> @diff_of_squares_nsw(<2 x i5> %x, <2 x i5> %y) {
24332441
; CHECK-LABEL: @diff_of_squares_nsw(
2434-
; CHECK-NEXT: [[X2:%.*]] = mul nsw <2 x i5> [[X:%.*]], [[X]]
2435-
; CHECK-NEXT: [[Y2:%.*]] = mul nsw <2 x i5> [[Y:%.*]], [[Y]]
2436-
; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i5> [[X2]], [[Y2]]
2442+
; CHECK-NEXT: [[ADD:%.*]] = add nsw <2 x i5> [[X:%.*]], [[Y:%.*]]
2443+
; CHECK-NEXT: [[SUB:%.*]] = sub nsw <2 x i5> [[X]], [[Y]]
2444+
; CHECK-NEXT: [[R:%.*]] = mul nsw <2 x i5> [[ADD]], [[SUB]]
24372445
; CHECK-NEXT: ret <2 x i5> [[R]]
24382446
;
24392447
%x2 = mul nsw <2 x i5> %x, %x
@@ -2442,11 +2450,13 @@ define <2 x i5> @diff_of_squares_nsw(<2 x i5> %x, <2 x i5> %y) {
24422450
ret <2 x i5> %r
24432451
}
24442452

2453+
; All-or-nothing for propagation of no-wrap flags (possibly conservative)
2454+
24452455
define <2 x i5> @diff_of_squares_partial_nsw(<2 x i5> %x, <2 x i5> %y) {
24462456
; CHECK-LABEL: @diff_of_squares_partial_nsw(
2447-
; CHECK-NEXT: [[X2:%.*]] = mul nsw <2 x i5> [[X:%.*]], [[X]]
2448-
; CHECK-NEXT: [[Y2:%.*]] = mul <2 x i5> [[Y:%.*]], [[Y]]
2449-
; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i5> [[X2]], [[Y2]]
2457+
; CHECK-NEXT: [[ADD:%.*]] = add <2 x i5> [[X:%.*]], [[Y:%.*]]
2458+
; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i5> [[X]], [[Y]]
2459+
; CHECK-NEXT: [[R:%.*]] = mul <2 x i5> [[ADD]], [[SUB]]
24502460
; CHECK-NEXT: ret <2 x i5> [[R]]
24512461
;
24522462
%x2 = mul nsw <2 x i5> %x, %x
@@ -2455,6 +2465,8 @@ define <2 x i5> @diff_of_squares_partial_nsw(<2 x i5> %x, <2 x i5> %y) {
24552465
ret <2 x i5> %r
24562466
}
24572467

2468+
; negative test
2469+
24582470
define i8 @diff_of_squares_use1(i8 %x, i8 %y) {
24592471
; CHECK-LABEL: @diff_of_squares_use1(
24602472
; CHECK-NEXT: [[X2:%.*]] = mul i8 [[X:%.*]], [[X]]
@@ -2470,6 +2482,8 @@ define i8 @diff_of_squares_use1(i8 %x, i8 %y) {
24702482
ret i8 %r
24712483
}
24722484

2485+
; negative test
2486+
24732487
define i8 @diff_of_squares_use2(i8 %x, i8 %y) {
24742488
; CHECK-LABEL: @diff_of_squares_use2(
24752489
; CHECK-NEXT: [[X2:%.*]] = mul i8 [[X:%.*]], [[X]]

0 commit comments

Comments
 (0)