Skip to content

Commit 10c9032

Browse files
committed
[X86][SSE] detectAVGPattern - Match zext(or(x,y)) 'add like' patterns (PR41316)
Fixes PR41316 where the expanded PAVG intrinsic had had one of its ADDs turned into an OR due to its operands having no conflicting bits. llvm-svn: 357351
1 parent b5498cb commit 10c9032

File tree

2 files changed

+28
-69
lines changed

2 files changed

+28
-69
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38203,10 +38203,19 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
3820338203
AVGBuilder);
3820438204
}
3820538205

38206-
// Matches 'add like' patterns.
38207-
// TODO: Extend this to include or/zext cases.
38206+
// Matches 'add like' patterns: add(Op0,Op1) + zext(or(Op0,Op1)).
38207+
// Match the or case only if its 'add-like' - can be replaced by an add.
3820838208
auto FindAddLike = [&](SDValue V, SDValue &Op0, SDValue &Op1) {
38209-
if (ISD::ADD != V.getOpcode())
38209+
if (ISD::ADD == V.getOpcode()) {
38210+
Op0 = V.getOperand(0);
38211+
Op1 = V.getOperand(1);
38212+
return true;
38213+
}
38214+
if (ISD::ZERO_EXTEND != V.getOpcode())
38215+
return false;
38216+
V = V.getOperand(0);
38217+
if (V.getValueType() != VT || ISD::OR != V.getOpcode() ||
38218+
!DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1)))
3821038219
return false;
3821138220
Op0 = V.getOperand(0);
3821238221
Op1 = V.getOperand(1);
@@ -38222,22 +38231,24 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
3822238231
Operands[1] = Op1;
3822338232

3822438233
// Now we have three operands of two additions. Check that one of them is a
38225-
// constant vector with ones, and the other two are promoted from i8/i16.
38234+
// constant vector with ones, and the other two can be promoted from i8/i16.
3822638235
for (int i = 0; i < 3; ++i) {
3822738236
if (!IsConstVectorInRange(Operands[i], 1, 1))
3822838237
continue;
3822938238
std::swap(Operands[i], Operands[2]);
3823038239

3823138240
// Check if Operands[0] and Operands[1] are results of type promotion.
3823238241
for (int j = 0; j < 2; ++j)
38233-
if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
38234-
Operands[j].getOperand(0).getValueType() != VT)
38235-
return SDValue();
38242+
if (Operands[j].getValueType() != VT) {
38243+
if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
38244+
Operands[j].getOperand(0).getValueType() != VT)
38245+
return SDValue();
38246+
Operands[j] = Operands[j].getOperand(0);
38247+
}
3823638248

3823738249
// The pattern is detected, emit X86ISD::AVG instruction(s).
38238-
return SplitOpsAndApply(DAG, Subtarget, DL, VT,
38239-
{ Operands[0].getOperand(0),
38240-
Operands[1].getOperand(0) }, AVGBuilder);
38250+
return SplitOpsAndApply(DAG, Subtarget, DL, VT, {Operands[0], Operands[1]},
38251+
AVGBuilder);
3824138252
}
3824238253

3824338254
return SDValue();

llvm/test/CodeGen/X86/avg.ll

Lines changed: 7 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2479,67 +2479,15 @@ define <2 x i64> @PR41316(<2 x i64>, <2 x i64>) {
24792479
; SSE2: # %bb.0:
24802480
; SSE2-NEXT: psllw $2, %xmm0
24812481
; SSE2-NEXT: psllw $2, %xmm1
2482-
; SSE2-NEXT: pxor %xmm2, %xmm2
2483-
; SSE2-NEXT: movdqa %xmm1, %xmm3
2484-
; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
2485-
; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
2486-
; SSE2-NEXT: por {{.*}}(%rip), %xmm0
2487-
; SSE2-NEXT: movdqa %xmm0, %xmm4
2488-
; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
2489-
; SSE2-NEXT: paddd %xmm3, %xmm4
2490-
; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
2491-
; SSE2-NEXT: paddd %xmm1, %xmm0
2492-
; SSE2-NEXT: pslld $15, %xmm4
2493-
; SSE2-NEXT: psrad $16, %xmm4
2494-
; SSE2-NEXT: pslld $15, %xmm0
2495-
; SSE2-NEXT: psrad $16, %xmm0
2496-
; SSE2-NEXT: packssdw %xmm4, %xmm0
2482+
; SSE2-NEXT: pavgw %xmm1, %xmm0
24972483
; SSE2-NEXT: retq
24982484
;
2499-
; AVX1-LABEL: PR41316:
2500-
; AVX1: # %bb.0:
2501-
; AVX1-NEXT: vpsllw $2, %xmm0, %xmm0
2502-
; AVX1-NEXT: vpsllw $2, %xmm1, %xmm1
2503-
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
2504-
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
2505-
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
2506-
; AVX1-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
2507-
; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
2508-
; AVX1-NEXT: vpaddd %xmm3, %xmm2, %xmm2
2509-
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
2510-
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
2511-
; AVX1-NEXT: vpsrld $1, %xmm2, %xmm1
2512-
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
2513-
; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2514-
; AVX1-NEXT: retq
2515-
;
2516-
; AVX2-LABEL: PR41316:
2517-
; AVX2: # %bb.0:
2518-
; AVX2-NEXT: vpsllw $2, %xmm0, %xmm0
2519-
; AVX2-NEXT: vpsllw $2, %xmm1, %xmm1
2520-
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
2521-
; AVX2-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
2522-
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2523-
; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
2524-
; AVX2-NEXT: vpsrld $1, %ymm0, %ymm0
2525-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
2526-
; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
2527-
; AVX2-NEXT: vzeroupper
2528-
; AVX2-NEXT: retq
2529-
;
2530-
; AVX512-LABEL: PR41316:
2531-
; AVX512: # %bb.0:
2532-
; AVX512-NEXT: vpsllw $2, %xmm0, %xmm0
2533-
; AVX512-NEXT: vpsllw $2, %xmm1, %xmm1
2534-
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
2535-
; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
2536-
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
2537-
; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0
2538-
; AVX512-NEXT: vpsrld $1, %ymm0, %ymm0
2539-
; AVX512-NEXT: vpmovdw %zmm0, %ymm0
2540-
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
2541-
; AVX512-NEXT: vzeroupper
2542-
; AVX512-NEXT: retq
2485+
; AVX-LABEL: PR41316:
2486+
; AVX: # %bb.0:
2487+
; AVX-NEXT: vpsllw $2, %xmm0, %xmm0
2488+
; AVX-NEXT: vpsllw $2, %xmm1, %xmm1
2489+
; AVX-NEXT: vpavgw %xmm0, %xmm1, %xmm0
2490+
; AVX-NEXT: retq
25432491
%3 = bitcast <2 x i64> %0 to <8 x i16>
25442492
%4 = shl <8 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
25452493
%5 = bitcast <2 x i64> %1 to <8 x i16>

0 commit comments

Comments
 (0)