Skip to content

Commit 1fce5a8

Browse files
committed
[X86][AVX] Support shuffle combining for VBROADCAST with smaller vector sources
getTargetShuffleMask can only do this safely if we're extracting the lowest subvector from a vector of the same result type. llvm-svn: 352999
1 parent 837552f commit 1fce5a8

File tree

3 files changed

+34
-17
lines changed

3 files changed

+34
-17
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6773,6 +6773,26 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
67736773
}
67746774
return true;
67756775
}
6776+
case X86ISD::VBROADCAST: {
6777+
SDValue Src = N.getOperand(0);
6778+
MVT SrcVT = Src.getSimpleValueType();
6779+
if (!SrcVT.isVector())
6780+
return false;
6781+
6782+
if (NumSizeInBits != SrcVT.getSizeInBits()) {
6783+
assert((NumSizeInBits % SrcVT.getSizeInBits()) == 0 &&
6784+
"Illegal broadcast type");
6785+
SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
6786+
NumSizeInBits / SrcVT.getScalarSizeInBits());
6787+
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), SrcVT,
6788+
DAG.getUNDEF(SrcVT), Src,
6789+
DAG.getIntPtrConstant(0, SDLoc(N)));
6790+
}
6791+
6792+
Ops.push_back(Src);
6793+
Mask.append(NumElts, 0);
6794+
return true;
6795+
}
67766796
case ISD::ZERO_EXTEND_VECTOR_INREG:
67776797
case ISD::ZERO_EXTEND: {
67786798
SDValue Src = N.getOperand(0);

llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2174,8 +2174,7 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec, <4 x i64
21742174
; CHECK-LABEL: test_masked_8xi64_to_4xi64_perm_mask7:
21752175
; CHECK: # %bb.0:
21762176
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm3
2177-
; CHECK-NEXT: vpbroadcastq %xmm3, %ymm3
2178-
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [2,0,3,7]
2177+
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [2,0,3,4]
21792178
; CHECK-NEXT: vpermi2q %ymm3, %ymm0, %ymm4
21802179
; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
21812180
; CHECK-NEXT: vpblendmq %ymm4, %ymm1, %ymm0 {%k1}
@@ -2189,9 +2188,8 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec, <4 x i64
21892188
define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec, <4 x i64> %mask) {
21902189
; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mask7:
21912190
; CHECK: # %bb.0:
2192-
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm2
2193-
; CHECK-NEXT: vpbroadcastq %xmm2, %ymm3
2194-
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [2,0,3,7]
2191+
; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm3
2192+
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [2,0,3,4]
21952193
; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
21962194
; CHECK-NEXT: vpermi2q %ymm3, %ymm0, %ymm2 {%k1} {z}
21972195
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
@@ -3806,9 +3804,8 @@ define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask2(<8 x double>
38063804
define <4 x double> @test_8xdouble_to_4xdouble_perm_mask3(<8 x double> %vec) {
38073805
; CHECK-LABEL: test_8xdouble_to_4xdouble_perm_mask3:
38083806
; CHECK: # %bb.0:
3809-
; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1
3810-
; CHECK-NEXT: vbroadcastsd %xmm1, %ymm2
3811-
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,1,7]
3807+
; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm2
3808+
; CHECK-NEXT: vmovapd {{.*#+}} ymm1 = [0,2,1,4]
38123809
; CHECK-NEXT: vpermi2pd %ymm2, %ymm0, %ymm1
38133810
; CHECK-NEXT: vmovapd %ymm1, %ymm0
38143811
; CHECK-NEXT: retq
@@ -3819,8 +3816,7 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask3(<8 x double> %v
38193816
; CHECK-LABEL: test_masked_8xdouble_to_4xdouble_perm_mask3:
38203817
; CHECK: # %bb.0:
38213818
; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm3
3822-
; CHECK-NEXT: vbroadcastsd %xmm3, %ymm3
3823-
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,1,7]
3819+
; CHECK-NEXT: vmovapd {{.*#+}} ymm4 = [0,2,1,4]
38243820
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm4
38253821
; CHECK-NEXT: vxorpd %xmm0, %xmm0, %xmm0
38263822
; CHECK-NEXT: vcmpeqpd %ymm0, %ymm2, %k1
@@ -3835,9 +3831,8 @@ define <4 x double> @test_masked_8xdouble_to_4xdouble_perm_mask3(<8 x double> %v
38353831
define <4 x double> @test_masked_z_8xdouble_to_4xdouble_perm_mask3(<8 x double> %vec, <4 x double> %mask) {
38363832
; CHECK-LABEL: test_masked_z_8xdouble_to_4xdouble_perm_mask3:
38373833
; CHECK: # %bb.0:
3838-
; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm2
3839-
; CHECK-NEXT: vbroadcastsd %xmm2, %ymm3
3840-
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,1,7]
3834+
; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm3
3835+
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [0,2,1,4]
38413836
; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
38423837
; CHECK-NEXT: vcmpeqpd %ymm4, %ymm1, %k1
38433838
; CHECK-NEXT: vpermi2pd %ymm3, %ymm0, %ymm2 {%k1} {z}

llvm/test/CodeGen/X86/vector-shuffle-avx512.ll

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,9 +190,10 @@ define <4 x i64> @expand4(<2 x i64> %a ) {
190190
define <8 x float> @expand5(<4 x float> %a ) {
191191
; SKX64-LABEL: expand5:
192192
; SKX64: # %bb.0:
193-
; SKX64-NEXT: vbroadcastss %xmm0, %ymm0
193+
; SKX64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
194194
; SKX64-NEXT: vxorps %xmm1, %xmm1, %xmm1
195-
; SKX64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
195+
; SKX64-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,10,0,12,0,14,0]
196+
; SKX64-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
196197
; SKX64-NEXT: retq
197198
;
198199
; KNL64-LABEL: expand5:
@@ -204,9 +205,10 @@ define <8 x float> @expand5(<4 x float> %a ) {
204205
;
205206
; SKX32-LABEL: expand5:
206207
; SKX32: # %bb.0:
207-
; SKX32-NEXT: vbroadcastss %xmm0, %ymm0
208+
; SKX32-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
208209
; SKX32-NEXT: vxorps %xmm1, %xmm1, %xmm1
209-
; SKX32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
210+
; SKX32-NEXT: vmovaps {{.*#+}} ymm2 = [8,0,10,0,12,0,14,0]
211+
; SKX32-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
210212
; SKX32-NEXT: retl
211213
;
212214
; KNL32-LABEL: expand5:

0 commit comments

Comments
 (0)