Skip to content

Commit 5b30587

Browse files
RKSimonJaddyen
authored andcommitted
[X86] IsElementEquivalent - add basic handling for X86ISD::SUBV_BROADCAST_LOAD (llvm#139727)
If we're broadcasting a subvector, then the elements at equal positions within each subvector are the same.
1 parent c231742 commit 5b30587

File tree

3 files changed

+56
-79
lines changed

3 files changed

+56
-79
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10009,6 +10009,14 @@ static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp,
1000910009
case X86ISD::VBROADCAST_LOAD:
1001010010
// TODO: Handle MaskSize != VT.getVectorNumElements()?
1001110011
return (Op == ExpectedOp && (int)VT.getVectorNumElements() == MaskSize);
10012+
case X86ISD::SUBV_BROADCAST_LOAD:
10013+
// TODO: Handle MaskSize != VT.getVectorNumElements()?
10014+
if (Op == ExpectedOp && (int)VT.getVectorNumElements() == MaskSize) {
10015+
auto *MemOp = cast<MemSDNode>(Op);
10016+
unsigned NumMemElts = MemOp->getMemoryVT().getVectorNumElements();
10017+
return (Idx % NumMemElts) == (ExpectedIdx % NumMemElts);
10018+
}
10019+
break;
1001210020
case X86ISD::HADD:
1001310021
case X86ISD::HSUB:
1001410022
case X86ISD::FHADD:

llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,8 +1560,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
15601560
;
15611561
; AVX2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
15621562
; AVX2: # %bb.0:
1563-
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
1564-
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1563+
; AVX2-NEXT: vpbroadcastd (%rdi), %ymm0
15651564
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
15661565
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
15671566
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
@@ -3381,15 +3380,13 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
33813380
;
33823381
; AVX2-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
33833382
; AVX2: # %bb.0:
3384-
; AVX2-NEXT: vpbroadcastd (%rdi), %xmm0
3385-
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
3386-
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
3387-
; AVX2-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
3388-
; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
3389-
; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3390-
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3391-
; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
3392-
; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
3383+
; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
3384+
; AVX2-NEXT: vpbroadcastd (%rdi), %ymm1
3385+
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5,6,7]
3386+
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3387+
; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
3388+
; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
3389+
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
33933390
; AVX2-NEXT: vzeroupper
33943391
; AVX2-NEXT: retq
33953392
;

llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 40 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,8 +1560,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
15601560
;
15611561
; AVX2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
15621562
; AVX2: # %bb.0:
1563-
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
1564-
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1563+
; AVX2-NEXT: vpbroadcastd (%rdi), %ymm0
15651564
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
15661565
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
15671566
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
@@ -3723,53 +3722,49 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
37233722
;
37243723
; AVX2-SLOW-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
37253724
; AVX2-SLOW: # %bb.0:
3726-
; AVX2-SLOW-NEXT: vpbroadcastd (%rdi), %xmm0
3727-
; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
3728-
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3729-
; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm1
3730-
; AVX2-SLOW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
3731-
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
3732-
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
3725+
; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm0
3726+
; AVX2-SLOW-NEXT: vpbroadcastd (%rdi), %ymm1
3727+
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5,6,7]
37333728
; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
3734-
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
3735-
; AVX2-SLOW-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3736-
; AVX2-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3737-
; AVX2-SLOW-NEXT: vmovdqa %ymm0, 32(%rdx)
3738-
; AVX2-SLOW-NEXT: vmovdqa %ymm1, (%rdx)
3729+
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm2[5],ymm0[6],ymm2[7]
3730+
; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
3731+
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
3732+
; AVX2-SLOW-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
3733+
; AVX2-SLOW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3734+
; AVX2-SLOW-NEXT: vmovdqa %ymm0, (%rdx)
3735+
; AVX2-SLOW-NEXT: vmovdqa %ymm1, 32(%rdx)
37393736
; AVX2-SLOW-NEXT: vzeroupper
37403737
; AVX2-SLOW-NEXT: retq
37413738
;
37423739
; AVX2-FAST-PERLANE-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
37433740
; AVX2-FAST-PERLANE: # %bb.0:
37443741
; AVX2-FAST-PERLANE-NEXT: vmovdqa (%rdi), %xmm0
37453742
; AVX2-FAST-PERLANE-NEXT: vmovdqa 48(%rdi), %xmm1
3746-
; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
3747-
; AVX2-FAST-PERLANE-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
3748-
; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
3749-
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
3750-
; AVX2-FAST-PERLANE-NEXT: vpxor %xmm2, %xmm2, %xmm2
3751-
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
3752-
; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3753-
; AVX2-FAST-PERLANE-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3754-
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, 32(%rdx)
3755-
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, (%rdx)
3743+
; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
3744+
; AVX2-FAST-PERLANE-NEXT: vpbroadcastd %xmm0, %ymm0
3745+
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4,5,6,7]
3746+
; AVX2-FAST-PERLANE-NEXT: vpxor %xmm1, %xmm1, %xmm1
3747+
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6],ymm1[7]
3748+
; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3749+
; AVX2-FAST-PERLANE-NEXT: vpaddb 32(%rsi), %ymm2, %ymm1
3750+
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, 32(%rdx)
3751+
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, (%rdx)
37563752
; AVX2-FAST-PERLANE-NEXT: vzeroupper
37573753
; AVX2-FAST-PERLANE-NEXT: retq
37583754
;
37593755
; AVX2-FAST-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
37603756
; AVX2-FAST: # %bb.0:
37613757
; AVX2-FAST-NEXT: vmovdqa (%rdi), %xmm0
37623758
; AVX2-FAST-NEXT: vmovdqa 48(%rdi), %xmm1
3763-
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
3764-
; AVX2-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
3765-
; AVX2-FAST-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
3766-
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
3767-
; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
3768-
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
3769-
; AVX2-FAST-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3770-
; AVX2-FAST-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3771-
; AVX2-FAST-NEXT: vmovdqa %ymm0, 32(%rdx)
3772-
; AVX2-FAST-NEXT: vmovdqa %ymm1, (%rdx)
3759+
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
3760+
; AVX2-FAST-NEXT: vpbroadcastd %xmm0, %ymm0
3761+
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4,5,6,7]
3762+
; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
3763+
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6],ymm1[7]
3764+
; AVX2-FAST-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3765+
; AVX2-FAST-NEXT: vpaddb 32(%rsi), %ymm2, %ymm1
3766+
; AVX2-FAST-NEXT: vmovdqa %ymm1, 32(%rdx)
3767+
; AVX2-FAST-NEXT: vmovdqa %ymm0, (%rdx)
37733768
; AVX2-FAST-NEXT: vzeroupper
37743769
; AVX2-FAST-NEXT: retq
37753770
;
@@ -5317,40 +5312,17 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
53175312
; AVX-NEXT: vzeroupper
53185313
; AVX-NEXT: retq
53195314
;
5320-
; AVX2-SLOW-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5321-
; AVX2-SLOW: # %bb.0:
5322-
; AVX2-SLOW-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
5323-
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
5324-
; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
5325-
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
5326-
; AVX2-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
5327-
; AVX2-SLOW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5328-
; AVX2-SLOW-NEXT: vmovdqa %ymm0, (%rdx)
5329-
; AVX2-SLOW-NEXT: vmovdqa %ymm1, 32(%rdx)
5330-
; AVX2-SLOW-NEXT: vzeroupper
5331-
; AVX2-SLOW-NEXT: retq
5332-
;
5333-
; AVX2-FAST-PERLANE-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5334-
; AVX2-FAST-PERLANE: # %bb.0:
5335-
; AVX2-FAST-PERLANE-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
5336-
; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero
5337-
; AVX2-FAST-PERLANE-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
5338-
; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5339-
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, (%rdx)
5340-
; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, 32(%rdx)
5341-
; AVX2-FAST-PERLANE-NEXT: vzeroupper
5342-
; AVX2-FAST-PERLANE-NEXT: retq
5343-
;
5344-
; AVX2-FAST-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5345-
; AVX2-FAST: # %bb.0:
5346-
; AVX2-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
5347-
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero
5348-
; AVX2-FAST-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
5349-
; AVX2-FAST-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5350-
; AVX2-FAST-NEXT: vmovdqa %ymm0, (%rdx)
5351-
; AVX2-FAST-NEXT: vmovdqa %ymm1, 32(%rdx)
5352-
; AVX2-FAST-NEXT: vzeroupper
5353-
; AVX2-FAST-NEXT: retq
5315+
; AVX2-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5316+
; AVX2: # %bb.0:
5317+
; AVX2-NEXT: vpbroadcastd (%rdi), %ymm0
5318+
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
5319+
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
5320+
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
5321+
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5322+
; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
5323+
; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
5324+
; AVX2-NEXT: vzeroupper
5325+
; AVX2-NEXT: retq
53545326
;
53555327
; AVX512F-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
53565328
; AVX512F: # %bb.0:

0 commit comments

Comments
 (0)