Skip to content

Commit 9a368d2

Browse files
committed
[X86][SSE] shuffle(hop,hop) - canonicalize unary hop(x,x) shuffle masks
If a shuffle is referring to both the lower and upper half lanes of an unary horizontal op, then canonicalize the mask to only refer to the lower half.
1 parent 90f7214 commit 9a368d2

File tree

2 files changed

+21
-6
lines changed

2 files changed

+21
-6
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35880,6 +35880,25 @@ static SDValue combineShuffleWithHorizOp(SDValue N, MVT VT, const SDLoc &DL,
3588035880
if (!isHoriz && !isPack)
3588135881
return SDValue();
3588235882

35883+
// Canonicalize unary horizontal ops to only refer to lower halves.
35884+
if (TargetMask.size() == VT0.getVectorNumElements()) {
35885+
int NumElts = VT0.getVectorNumElements();
35886+
int NumLanes = VT0.getSizeInBits() / 128;
35887+
int NumEltsPerLane = NumElts / NumLanes;
35888+
int NumHalfEltsPerLane = NumEltsPerLane / 2;
35889+
for (int i = 0; i != NumElts; ++i) {
35890+
int &M = TargetMask[i];
35891+
if (isUndefOrZero(M))
35892+
continue;
35893+
if (M < NumElts && BC0.getOperand(0) == BC0.getOperand(1) &&
35894+
(M % NumEltsPerLane) >= NumHalfEltsPerLane)
35895+
M -= NumHalfEltsPerLane;
35896+
if (NumElts <= M && BC1.getOperand(0) == BC1.getOperand(1) &&
35897+
((M - NumElts) % NumEltsPerLane) >= NumHalfEltsPerLane)
35898+
M -= NumHalfEltsPerLane;
35899+
}
35900+
}
35901+
3588335902
SmallVector<int, 16> TargetMask128, WideMask128;
3588435903
if (isRepeatedTargetShuffleMask(128, VT, TargetMask, TargetMask128) &&
3588535904
scaleShuffleElements(TargetMask128, 2, WideMask128)) {

llvm/test/CodeGen/X86/haddsub-undef.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,9 +1015,7 @@ define <4 x float> @PR34724_add_v4f32_0u23(<4 x float> %0, <4 x float> %1) {
10151015
;
10161016
; SSE-FAST-LABEL: PR34724_add_v4f32_0u23:
10171017
; SSE-FAST: # %bb.0:
1018-
; SSE-FAST-NEXT: haddps %xmm0, %xmm0
1019-
; SSE-FAST-NEXT: haddps %xmm1, %xmm1
1020-
; SSE-FAST-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,3]
1018+
; SSE-FAST-NEXT: haddps %xmm1, %xmm0
10211019
; SSE-FAST-NEXT: retq
10221020
;
10231021
; AVX-SLOW-LABEL: PR34724_add_v4f32_0u23:
@@ -1034,9 +1032,7 @@ define <4 x float> @PR34724_add_v4f32_0u23(<4 x float> %0, <4 x float> %1) {
10341032
;
10351033
; AVX-FAST-LABEL: PR34724_add_v4f32_0u23:
10361034
; AVX-FAST: # %bb.0:
1037-
; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
1038-
; AVX-FAST-NEXT: vhaddps %xmm1, %xmm1, %xmm1
1039-
; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,3]
1035+
; AVX-FAST-NEXT: vhaddps %xmm1, %xmm0, %xmm0
10401036
; AVX-FAST-NEXT: retq
10411037
%3 = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
10421038
%4 = fadd <4 x float> %3, %0

0 commit comments

Comments
 (0)