Skip to content

Commit 19f657d

Browse files
committed
[X86] combineToExtendBoolVectorInReg - use broadcast on AVX2+ targets
Make use of AVX2 broadcasts to splat the source integer across all lanes to simplify the per-lane byte shuffles. Prep work to avoid a regression in the fix for #66150
1 parent e5e15f9 commit 19f657d

File tree

5 files changed

+25
-21
lines changed

5 files changed

+25
-21
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46183,11 +46183,17 @@ static SDValue combineToExtendBoolVectorInReg(
4618346183
assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale");
4618446184
unsigned Scale = NumElts / EltSizeInBits;
4618546185
EVT BroadcastVT = EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits);
46186-
Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
46186+
bool UseBroadcast = Subtarget.hasInt256() &&
46187+
(!BroadcastVT.is128BitVector() || isa<LoadSDNode>(N00));
46188+
Vec = UseBroadcast
46189+
? DAG.getSplat(BroadcastVT, DL, N00)
46190+
: DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
4618746191
Vec = DAG.getBitcast(VT, Vec);
4618846192

46189-
for (unsigned i = 0; i != Scale; ++i)
46190-
ShuffleMask.append(EltSizeInBits, i);
46193+
for (unsigned i = 0; i != Scale; ++i) {
46194+
int Offset = UseBroadcast ? (i * EltSizeInBits) : 0;
46195+
ShuffleMask.append(EltSizeInBits, i + Offset);
46196+
}
4619146197
Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);
4619246198
} else if (Subtarget.hasAVX2() && NumElts < EltSizeInBits &&
4619346199
(SclVT == MVT::i8 || SclVT == MVT::i16 || SclVT == MVT::i32)) {

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -365,8 +365,8 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
365365
; AVX2-LABEL: ext_i32_32i8:
366366
; AVX2: # %bb.0:
367367
; AVX2-NEXT: vmovd %edi, %xmm0
368-
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
369-
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
368+
; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
369+
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
370370
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
371371
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
372372
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
@@ -635,12 +635,12 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
635635
; AVX2-LABEL: ext_i64_64i8:
636636
; AVX2: # %bb.0:
637637
; AVX2-NEXT: vmovq %rdi, %xmm0
638-
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
639-
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
638+
; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
639+
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
640640
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
641641
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
642642
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
643-
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
643+
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,13,13,13,13,13,13,13,13,22,22,22,22,22,22,22,22,31,31,31,31,31,31,31,31]
644644
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
645645
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
646646
; AVX2-NEXT: retq

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -456,8 +456,8 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) {
456456
; AVX2-LABEL: ext_i32_32i8:
457457
; AVX2: # %bb.0:
458458
; AVX2-NEXT: vmovd %edi, %xmm0
459-
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
460-
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
459+
; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
460+
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
461461
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
462462
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
463463
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
@@ -806,14 +806,14 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) {
806806
; AVX2-LABEL: ext_i64_64i8:
807807
; AVX2: # %bb.0:
808808
; AVX2-NEXT: vmovq %rdi, %xmm0
809-
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,1,0,1]
810-
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
809+
; AVX2-NEXT: vpbroadcastq %xmm0, %ymm1
810+
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
811811
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
812812
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
813813
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0
814814
; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
815815
; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0
816-
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,22,22,22,22,22,22,22,22,23,23,23,23,23,23,23,23]
816+
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[4,4,4,4,4,4,4,4,13,13,13,13,13,13,13,13,22,22,22,22,22,22,22,22,31,31,31,31,31,31,31,31]
817817
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
818818
; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
819819
; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1

llvm/test/CodeGen/X86/bitcast-int-to-vector-bool.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,8 +256,8 @@ define <32 x i1> @bitcast_i32_32i1(i32 %a0) {
256256
; AVX2-LABEL: bitcast_i32_32i1:
257257
; AVX2: # %bb.0:
258258
; AVX2-NEXT: vmovd %edi, %xmm0
259-
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
260-
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
259+
; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
260+
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
261261
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
262262
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
263263
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0

llvm/test/CodeGen/X86/vector-sext.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2596,9 +2596,8 @@ define <16 x i8> @load_sext_16i1_to_16i8(ptr%ptr) nounwind readnone {
25962596
;
25972597
; AVX2-LABEL: load_sext_16i1_to_16i8:
25982598
; AVX2: # %bb.0: # %entry
2599-
; AVX2-NEXT: movzwl (%rdi), %eax
2600-
; AVX2-NEXT: vmovd %eax, %xmm0
2601-
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
2599+
; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
2600+
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3]
26022601
; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
26032602
; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
26042603
; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
@@ -2804,9 +2803,8 @@ define <32 x i8> @load_sext_32i1_to_32i8(ptr%ptr) nounwind readnone {
28042803
;
28052804
; AVX2-LABEL: load_sext_32i1_to_32i8:
28062805
; AVX2: # %bb.0: # %entry
2807-
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
2808-
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
2809-
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19]
2806+
; AVX2-NEXT: vpbroadcastd (%rdi), %ymm0
2807+
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,18,18,18,18,18,18,18,18,27,27,27,27,27,27,27,27]
28102808
; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
28112809
; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
28122810
; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0

0 commit comments

Comments
 (0)