Skip to content

Commit 82040d4

Browse files
committed
[InstCombine] reduce right-shift-of-left-shifted constant via demanded bits
If we don't demand high bits (zeros) and it is valid to pre-shift a constant: (C2 << X) >> C1 --> (C2 >> C1) << X https://alive2.llvm.org/ce/z/P3dWDW There are a variety of related patterns, but I haven't found a single solution that gets all of the motivating examples - so pulling this piece out of D126617 along with more tests. We should also handle the case where we shift-right followed by shift-left, but I'll make that a follow-on patch assuming this one is ok. It seems likely that we would want to add this to the SDAG version of the code too to keep it on par with IR. Differential Revision: https://reviews.llvm.org/D127122
1 parent 8956f80 commit 82040d4

File tree

2 files changed

+46
-13
lines changed

2 files changed

+46
-13
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,21 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
630630
ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI);
631631
if (SignBits >= NumHiDemandedBits)
632632
return I->getOperand(0);
633+
634+
// If we can pre-shift a left-shifted constant to the right without
635+
// losing any low bits (we already know we don't demand the high bits),
636+
// then eliminate the right-shift:
637+
// (C << X) >> RightShiftAmtC --> (C >> RightShiftAmtC) << X
638+
Value *X;
639+
Constant *C;
640+
if (match(I->getOperand(0), m_Shl(m_ImmConstant(C), m_Value(X)))) {
641+
Constant *RightShiftAmtC = ConstantInt::get(VTy, ShiftAmt);
642+
Constant *NewC = ConstantExpr::getLShr(C, RightShiftAmtC);
643+
if (ConstantExpr::getShl(NewC, RightShiftAmtC) == C) {
644+
Instruction *Shl = BinaryOperator::CreateShl(NewC, X);
645+
return InsertNewInstWith(Shl, *I);
646+
}
647+
}
633648
}
634649

635650
// Unsigned shift right.

llvm/test/Transforms/InstCombine/shift-shift.ll

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -421,11 +421,12 @@ define i32 @shl_lshr_constants(i32 %x) {
421421
ret i32 %r
422422
}
423423

424+
; Pre-shift a constant to eliminate lshr.
425+
424426
define i8 @shl_lshr_demand1(i8 %x) {
425427
; CHECK-LABEL: @shl_lshr_demand1(
426-
; CHECK-NEXT: [[SHL:%.*]] = shl i8 40, [[X:%.*]]
427-
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i8 [[SHL]], 3
428-
; CHECK-NEXT: [[R:%.*]] = or i8 [[LSHR]], -32
428+
; CHECK-NEXT: [[TMP1:%.*]] = shl i8 5, [[X:%.*]]
429+
; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP1]], -32
429430
; CHECK-NEXT: ret i8 [[R]]
430431
;
431432
%shl = shl i8 40, %x ; 0b0010_1000
@@ -434,11 +435,13 @@ define i8 @shl_lshr_demand1(i8 %x) {
434435
ret i8 %r
435436
}
436437

438+
; Pre-shift a constant to eliminate disguised lshr.
439+
437440
define i8 @shl_ashr_demand2(i8 %x) {
438441
; CHECK-LABEL: @shl_ashr_demand2(
439442
; CHECK-NEXT: [[SHL:%.*]] = shl i8 40, [[X:%.*]]
440443
; CHECK-NEXT: call void @use8(i8 [[SHL]])
441-
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i8 [[SHL]], 3
444+
; CHECK-NEXT: [[TMP1:%.*]] = shl i8 5, [[X]]
442445
; CHECK-NEXT: [[R:%.*]] = or i8 [[TMP1]], -32
443446
; CHECK-NEXT: ret i8 [[R]]
444447
;
@@ -449,6 +452,8 @@ define i8 @shl_ashr_demand2(i8 %x) {
449452
ret i8 %r
450453
}
451454

455+
; It is not safe to pre-shift because we demand an extra high bit.
456+
452457
define i8 @shl_lshr_demand3(i8 %x) {
453458
; CHECK-LABEL: @shl_lshr_demand3(
454459
; CHECK-NEXT: [[SHL:%.*]] = shl i8 40, [[X:%.*]]
@@ -462,6 +467,8 @@ define i8 @shl_lshr_demand3(i8 %x) {
462467
ret i8 %r
463468
}
464469

470+
; It is not valid to pre-shift because we lose the low bit of 44.
471+
465472
define i8 @shl_lshr_demand4(i8 %x) {
466473
; CHECK-LABEL: @shl_lshr_demand4(
467474
; CHECK-NEXT: [[SHL:%.*]] = shl i8 44, [[X:%.*]]
@@ -475,11 +482,12 @@ define i8 @shl_lshr_demand4(i8 %x) {
475482
ret i8 %r
476483
}
477484

485+
; Splat vectors work too, and we don't care what instruction reduces demand for high bits.
486+
478487
define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) {
479488
; CHECK-LABEL: @shl_lshr_demand5(
480-
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -108>, [[X:%.*]]
481-
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact <2 x i8> [[SHL]], <i8 2, i8 2>
482-
; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6>
489+
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> <i8 37, i8 37>, [[X:%.*]]
490+
; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6>
483491
; CHECK-NEXT: ret <2 x i6> [[R]]
484492
;
485493
%shl = shl <2 x i8> <i8 148, i8 148>, %x ; 0b1001_0100
@@ -488,6 +496,8 @@ define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) {
488496
ret <2 x i6> %r
489497
}
490498

499+
; TODO: allow undef/poison elements for this transform.
500+
491501
define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) {
492502
; CHECK-LABEL: @shl_lshr_demand5_undef_left(
493503
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> <i8 undef, i8 -108>, [[X:%.*]]
@@ -501,6 +511,8 @@ define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) {
501511
ret <2 x i6> %r
502512
}
503513

514+
; TODO: allow undef/poison elements for this transform.
515+
504516
define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) {
505517
; CHECK-LABEL: @shl_lshr_demand5_undef_right(
506518
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -108>, [[X:%.*]]
@@ -514,6 +526,8 @@ define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) {
514526
ret <2 x i6> %r
515527
}
516528

529+
; TODO: allow non-splat vector constants.
530+
517531
define <2 x i6> @shl_lshr_demand5_nonuniform_vec_left(<2 x i8> %x) {
518532
; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_left(
519533
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -108>, [[X:%.*]]
@@ -527,11 +541,12 @@ define <2 x i6> @shl_lshr_demand5_nonuniform_vec_left(<2 x i8> %x) {
527541
ret <2 x i6> %r
528542
}
529543

544+
; non-splat shl constant is ok.
545+
530546
define <2 x i6> @shl_lshr_demand5_nonuniform_vec_right(<2 x i8> %x) {
531547
; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_right(
532-
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -112>, [[X:%.*]]
533-
; CHECK-NEXT: [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], <i8 2, i8 2>
534-
; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6>
548+
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> <i8 37, i8 36>, [[X:%.*]]
549+
; CHECK-NEXT: [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6>
535550
; CHECK-NEXT: ret <2 x i6> [[R]]
536551
;
537552
%shl = shl <2 x i8> <i8 148, i8 144>, %x ; 0b1001_0100, 0b1001_0000
@@ -540,6 +555,8 @@ define <2 x i6> @shl_lshr_demand5_nonuniform_vec_right(<2 x i8> %x) {
540555
ret <2 x i6> %r
541556
}
542557

558+
; This is possible, but may require significant changes to the demanded bits framework.
559+
543560
define <2 x i6> @shl_lshr_demand5_nonuniform_vec_both(<2 x i8> %x) {
544561
; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_both(
545562
; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> <i8 -104, i8 -108>, [[X:%.*]]
@@ -553,11 +570,12 @@ define <2 x i6> @shl_lshr_demand5_nonuniform_vec_both(<2 x i8> %x) {
553570
ret <2 x i6> %r
554571
}
555572

573+
; 'and' can reduce demand for high bits too.
574+
556575
define i16 @shl_lshr_demand6(i16 %x) {
557576
; CHECK-LABEL: @shl_lshr_demand6(
558-
; CHECK-NEXT: [[SHL:%.*]] = shl i16 -32624, [[X:%.*]]
559-
; CHECK-NEXT: [[LSHR:%.*]] = lshr exact i16 [[SHL]], 4
560-
; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR]], 4094
577+
; CHECK-NEXT: [[TMP1:%.*]] = shl i16 2057, [[X:%.*]]
578+
; CHECK-NEXT: [[R:%.*]] = and i16 [[TMP1]], 4094
561579
; CHECK-NEXT: ret i16 [[R]]
562580
;
563581
%shl = shl i16 32912, %x ; 0b1000_0000_1001_0000

0 commit comments

Comments
 (0)