Skip to content

Commit 05f9877

Browse files
committed
[X86] Add handling for shift_logical(select(icmp_uge(amt,BW),0,x),amt) -> avx2 shift(x,amt)
We need to catch this otherwise pre-AVX512 targets will fold this to shift_logical(and(icmp_ult(amt,BW),x),amt)
1 parent 7918e62 commit 05f9877

File tree

3 files changed

+26
-28
lines changed

3 files changed

+26
-28
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48042,6 +48042,14 @@ static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG,
4804248042
SV == VT.getScalarSizeInBits()) {
4804348043
return DAG.getNode(X86ISD::VSHLV, DL, VT, N00, N1);
4804448044
}
48045+
// fold shl(select(icmp_uge(amt,BW),0,x),amt) -> avx2 psllv(x,amt)
48046+
if (Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == N1 &&
48047+
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUGE &&
48048+
ISD::isConstantSplatVector(Cond.getOperand(1).getNode(), SV) &&
48049+
ISD::isConstantSplatVectorAllZeros(N00.getNode()) &&
48050+
SV == VT.getScalarSizeInBits()) {
48051+
return DAG.getNode(X86ISD::VSHLV, DL, VT, N01, N1);
48052+
}
4804548053
}
4804648054

4804748055
// fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
@@ -48176,6 +48184,14 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
4817648184
SV == VT.getScalarSizeInBits()) {
4817748185
return DAG.getNode(X86ISD::VSRLV, DL, VT, N00, N1);
4817848186
}
48187+
// fold srl(select(icmp_uge(amt,BW),0,x),amt) -> avx2 psrlv(x,amt)
48188+
if (Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == N1 &&
48189+
cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUGE &&
48190+
ISD::isConstantSplatVector(Cond.getOperand(1).getNode(), SV) &&
48191+
ISD::isConstantSplatVectorAllZeros(N00.getNode()) &&
48192+
SV == VT.getScalarSizeInBits()) {
48193+
return DAG.getNode(X86ISD::VSRLV, DL, VT, N01, N1);
48194+
}
4817948195
}
4818048196

4818148197
// Only do this on the last DAG combine as it can interfere with other

llvm/test/CodeGen/X86/combine-shl.ll

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1044,19 +1044,10 @@ define <4 x i32> @combine_vec_shl_commuted_clamped(<4 x i32> %sh, <4 x i32> %amt
10441044
; SSE41-NEXT: pmulld %xmm1, %xmm0
10451045
; SSE41-NEXT: retq
10461046
;
1047-
; AVX2-LABEL: combine_vec_shl_commuted_clamped:
1048-
; AVX2: # %bb.0:
1049-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
1050-
; AVX2-NEXT: vpminud %xmm2, %xmm1, %xmm2
1051-
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
1052-
; AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0
1053-
; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
1054-
; AVX2-NEXT: retq
1055-
;
1056-
; AVX512-LABEL: combine_vec_shl_commuted_clamped:
1057-
; AVX512: # %bb.0:
1058-
; AVX512-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
1059-
; AVX512-NEXT: retq
1047+
; AVX-LABEL: combine_vec_shl_commuted_clamped:
1048+
; AVX: # %bb.0:
1049+
; AVX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
1050+
; AVX-NEXT: retq
10601051
%cmp.i = icmp uge <4 x i32> %amt, <i32 32, i32 32, i32 32, i32 32>
10611052
%1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %sh
10621053
%shl = shl <4 x i32> %1, %amt
@@ -1112,4 +1103,4 @@ define <4 x i32> @combine_vec_shl_commuted_clamped1(<4 x i32> %sh, <4 x i32> %am
11121103
%shl = shl <4 x i32> %sh, %amt
11131104
%1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %shl
11141105
ret <4 x i32> %1
1115-
}
1106+
}

llvm/test/CodeGen/X86/combine-srl.ll

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -771,19 +771,10 @@ define <4 x i32> @combine_vec_lshr_commuted_clamped(<4 x i32> %sh, <4 x i32> %am
771771
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3],xmm0[4,5],xmm5[6,7]
772772
; SSE41-NEXT: retq
773773
;
774-
; AVX2-LABEL: combine_vec_lshr_commuted_clamped:
775-
; AVX2: # %bb.0:
776-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31]
777-
; AVX2-NEXT: vpminud %xmm2, %xmm1, %xmm2
778-
; AVX2-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
779-
; AVX2-NEXT: vpand %xmm0, %xmm2, %xmm0
780-
; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
781-
; AVX2-NEXT: retq
782-
;
783-
; AVX512-LABEL: combine_vec_lshr_commuted_clamped:
784-
; AVX512: # %bb.0:
785-
; AVX512-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
786-
; AVX512-NEXT: retq
774+
; AVX-LABEL: combine_vec_lshr_commuted_clamped:
775+
; AVX: # %bb.0:
776+
; AVX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0
777+
; AVX-NEXT: retq
787778
%cmp.i = icmp uge <4 x i32> %amt, <i32 32, i32 32, i32 32, i32 32>
788779
%1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %sh
789780
%shr = lshr <4 x i32> %1, %amt
@@ -854,4 +845,4 @@ define <4 x i32> @combine_vec_lshr_commuted_clamped1(<4 x i32> %sh, <4 x i32> %a
854845
%shr = lshr <4 x i32> %sh, %amt
855846
%1 = select <4 x i1> %cmp.i, <4 x i32> zeroinitializer, <4 x i32> %shr
856847
ret <4 x i32> %1
857-
}
848+
}

0 commit comments

Comments
 (0)