Skip to content

Commit 9760b28

Browse files
topperctstellar
authored andcommitted
[DAGCombiner][X86] Don't peek through ANDs on the shift amount in matchRotateSub when called from MatchFunnelPosNeg.
Peeking through AND is only valid if the input to both shifts is the same. If the inputs are different, then the original pattern ORs the two values when the masked shift amount is 0. This is ok if the values are the same since the OR would be a NOP which is why its ok for rotate. Fixes PR49365 and reverts PR34641 Differential Revision: https://reviews.llvm.org/D97637 (cherry picked from commit 5de09ef)
1 parent 3442169 commit 9760b28

File tree

2 files changed

+40
-19
lines changed

2 files changed

+40
-19
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6517,8 +6517,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
65176517
// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
65186518
// in direction shift1 by Neg. The range [0, EltSize) means that we only need
65196519
// to consider shift amounts with defined behavior.
6520+
//
6521+
// The IsRotate flag should be set when the LHS of both shifts is the same.
6522+
// Otherwise if matching a general funnel shift, it should be clear.
65206523
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6521-
SelectionDAG &DAG) {
6524+
SelectionDAG &DAG, bool IsRotate) {
65226525
// If EltSize is a power of 2 then:
65236526
//
65246527
// (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
@@ -6550,8 +6553,11 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
65506553
// always invokes undefined behavior for 32-bit X.
65516554
//
65526555
// Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6556+
//
6557+
// NOTE: We can only do this when matching an AND and not a general
6558+
// funnel shift.
65536559
unsigned MaskLoBits = 0;
6554-
if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6560+
if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
65556561
if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
65566562
KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
65576563
unsigned Bits = Log2_64(EltSize);
@@ -6641,7 +6647,8 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
66416647
// (srl x, (*ext y))) ->
66426648
// (rotr x, y) or (rotl x, (sub 32, y))
66436649
EVT VT = Shifted.getValueType();
6644-
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6650+
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
6651+
/*IsRotate*/ true)) {
66456652
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
66466653
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
66476654
HasPos ? Pos : Neg);
@@ -6670,7 +6677,7 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
66706677
// fold (or (shl x0, (*ext (sub 32, y))),
66716678
// (srl x1, (*ext y))) ->
66726679
// (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
6673-
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
6680+
if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
66746681
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
66756682
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
66766683
HasPos ? Pos : Neg);

llvm/test/CodeGen/X86/shift-double.ll

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -480,23 +480,31 @@ define i32 @test18(i32 %hi, i32 %lo, i32 %bits) nounwind {
480480
ret i32 %sh
481481
}
482482

483-
; PR34641 - Masked Shift Counts
483+
; These are not valid shld/shrd patterns. When the shift amount modulo
484+
; the bitwidth is zero, the result should be an OR of both operands not a
485+
; shift.
484486

485-
define i32 @shld_safe_i32(i32, i32, i32) {
486-
; X86-LABEL: shld_safe_i32:
487+
define i32 @not_shld_i32(i32, i32, i32) {
488+
; X86-LABEL: not_shld_i32:
487489
; X86: # %bb.0:
490+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
488491
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
489492
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
490-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
491-
; X86-NEXT: shldl %cl, %edx, %eax
493+
; X86-NEXT: shll %cl, %edx
494+
; X86-NEXT: negb %cl
495+
; X86-NEXT: shrl %cl, %eax
496+
; X86-NEXT: orl %edx, %eax
492497
; X86-NEXT: retl
493498
;
494-
; X64-LABEL: shld_safe_i32:
499+
; X64-LABEL: not_shld_i32:
495500
; X64: # %bb.0:
496501
; X64-NEXT: movl %edx, %ecx
497-
; X64-NEXT: movl %edi, %eax
502+
; X64-NEXT: movl %esi, %eax
503+
; X64-NEXT: shll %cl, %edi
504+
; X64-NEXT: negb %cl
498505
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
499-
; X64-NEXT: shldl %cl, %esi, %eax
506+
; X64-NEXT: shrl %cl, %eax
507+
; X64-NEXT: orl %edi, %eax
500508
; X64-NEXT: retq
501509
%4 = and i32 %2, 31
502510
%5 = shl i32 %0, %4
@@ -507,21 +515,27 @@ define i32 @shld_safe_i32(i32, i32, i32) {
507515
ret i32 %9
508516
}
509517

510-
define i32 @shrd_safe_i32(i32, i32, i32) {
511-
; X86-LABEL: shrd_safe_i32:
518+
define i32 @not_shrd_i32(i32, i32, i32) {
519+
; X86-LABEL: not_shrd_i32:
512520
; X86: # %bb.0:
521+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
513522
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
514523
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
515-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
516-
; X86-NEXT: shrdl %cl, %edx, %eax
524+
; X86-NEXT: shrl %cl, %edx
525+
; X86-NEXT: negb %cl
526+
; X86-NEXT: shll %cl, %eax
527+
; X86-NEXT: orl %edx, %eax
517528
; X86-NEXT: retl
518529
;
519-
; X64-LABEL: shrd_safe_i32:
530+
; X64-LABEL: not_shrd_i32:
520531
; X64: # %bb.0:
521532
; X64-NEXT: movl %edx, %ecx
522-
; X64-NEXT: movl %edi, %eax
533+
; X64-NEXT: movl %esi, %eax
534+
; X64-NEXT: shrl %cl, %edi
535+
; X64-NEXT: negb %cl
523536
; X64-NEXT: # kill: def $cl killed $cl killed $ecx
524-
; X64-NEXT: shrdl %cl, %esi, %eax
537+
; X64-NEXT: shll %cl, %eax
538+
; X64-NEXT: orl %edi, %eax
525539
; X64-NEXT: retq
526540
%4 = and i32 %2, 31
527541
%5 = lshr i32 %0, %4

0 commit comments

Comments
 (0)