Skip to content

Commit 9ec1c33

Browse files
RKSimonJaddyen
authored andcommitted
[X86] lowerShuffleAsSplitOrBlend - use isElementEquivalent to help identify hidden splat/broadcasts (llvm#141035)
Noticed while yak shaving llvm#139741
1 parent 7051ffe commit 9ec1c33

File tree

3 files changed

+22
-26
lines changed

3 files changed

+22
-26
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15221,12 +15221,14 @@ static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1,
1522115221
if (M >= Size) {
1522215222
if (V2BroadcastIdx < 0)
1522315223
V2BroadcastIdx = M - Size;
15224-
else if (M - Size != V2BroadcastIdx)
15224+
else if ((M - Size) != V2BroadcastIdx &&
15225+
!IsElementEquivalent(Size, V2, V2, M - Size, V2BroadcastIdx))
1522515226
return false;
1522615227
} else if (M >= 0) {
1522715228
if (V1BroadcastIdx < 0)
1522815229
V1BroadcastIdx = M;
15229-
else if (M != V1BroadcastIdx)
15230+
else if (M != V1BroadcastIdx &&
15231+
!IsElementEquivalent(Size, V1, V1, M, V1BroadcastIdx))
1523015232
return false;
1523115233
}
1523215234
return true;

llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2569,9 +2569,8 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
25692569
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
25702570
; AVX2-NEXT: vmovdqa (%rdi), %xmm1
25712571
; AVX2-NEXT: vpaddb (%rsi), %xmm1, %xmm1
2572-
; AVX2-NEXT: vpbroadcastb %xmm1, %xmm2
2573-
; AVX2-NEXT: vpshufb {{.*#+}} xmm3 = zero,zero,xmm1[0],zero,zero,xmm1[0],zero,zero,xmm1[0],zero,zero,xmm1[0],zero,zero,xmm1[0],zero
2574-
; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
2572+
; AVX2-NEXT: vpbroadcastb %xmm1, %ymm2
2573+
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
25752574
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
25762575
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,255,255,0,255,255,0,255,255,0,255,255,0,255,255,0,0,255,255,0,255,255,0,255,255,0,255,255,0,255,255,0]
25772576
; AVX2-NEXT: # ymm3 = mem[0,1,0,1]
@@ -2862,9 +2861,8 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28622861
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
28632862
; AVX2-NEXT: vmovdqa (%rdi), %xmm1
28642863
; AVX2-NEXT: vpaddb (%rsi), %xmm1, %xmm1
2865-
; AVX2-NEXT: vpbroadcastb %xmm1, %xmm2
2866-
; AVX2-NEXT: vpshufb {{.*#+}} xmm3 = zero,zero,xmm1[0],zero,zero,zero,zero,zero,xmm1[0],zero,zero,zero,zero,zero,xmm1[0],zero
2867-
; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
2864+
; AVX2-NEXT: vpbroadcastb %xmm1, %ymm2
2865+
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
28682866
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
28692867
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,255,255,255,255,255,0,255,255,255,255,255,0,255,255,255,0,255,255,255,255,255,0,255,255,255,255,255,0,255,255,255]
28702868
; AVX2-NEXT: # ymm3 = mem[0,1,0,1]
@@ -3154,9 +3152,8 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
31543152
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
31553153
; AVX2-NEXT: vmovdqa (%rdi), %xmm1
31563154
; AVX2-NEXT: vpaddb (%rsi), %xmm1, %xmm1
3157-
; AVX2-NEXT: vpbroadcastb %xmm1, %xmm2
3158-
; AVX2-NEXT: vpshufb {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0],zero,zero,zero,zero,zero,zero,zero
3159-
; AVX2-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
3155+
; AVX2-NEXT: vpbroadcastb %xmm1, %ymm2
3156+
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
31603157
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm2[2,3]
31613158
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255]
31623159
; AVX2-NEXT: # ymm3 = mem[0,1,0,1]

llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2011,15 +2011,14 @@ define void @vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16(ptr %in
20112011
;
20122012
; AVX2-LABEL: vec384_i8_widen_to_i24_factor3_broadcast_to_v16i24_factor16:
20132013
; AVX2: # %bb.0:
2014-
; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
2015-
; AVX2-NEXT: vmovdqa (%rdi), %xmm1
2016-
; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,xmm1[0],zero,zero,xmm1[0],zero,zero,xmm1[0],zero,zero,xmm1[0],zero,zero,xmm1[0],zero
2017-
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
2014+
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
2015+
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = zero,xmm0[0],zero,zero,xmm0[0],zero,zero,xmm0[0],zero,zero,xmm0[0],zero,zero,xmm0[0],zero,zero
2016+
; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
2017+
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
20182018
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = mem[2,3],ymm0[2,3]
20192019
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,255,255,0,255,255,0,255,255,0,255,255,0,255,255,0,0,255,255,0,255,255,0,255,255,0,255,255,0,255,255,0]
20202020
; AVX2-NEXT: # ymm3 = mem[0,1,0,1]
20212021
; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
2022-
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = zero,xmm1[0],zero,zero,xmm1[0],zero,zero,xmm1[0],zero,zero,xmm1[0],zero,zero,xmm1[0],zero,zero
20232022
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
20242023
; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
20252024
; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2263,15 +2262,14 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
22632262
;
22642263
; AVX2-LABEL: vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8:
22652264
; AVX2: # %bb.0:
2266-
; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
2267-
; AVX2-NEXT: vmovdqa (%rdi), %xmm1
2268-
; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,xmm1[0],zero,zero,zero,zero,zero,xmm1[0],zero,zero,zero,zero,zero,xmm1[0],zero
2269-
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
2265+
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
2266+
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero
2267+
; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
2268+
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
22702269
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = mem[2,3],ymm0[2,3]
22712270
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,255,255,255,255,255,0,255,255,255,255,255,0,255,255,255,0,255,255,255,255,255,0,255,255,255,255,255,0,255,255,255]
22722271
; AVX2-NEXT: # ymm3 = mem[0,1,0,1]
22732272
; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
2274-
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[0],zero,zero,zero,zero,zero,xmm1[0],zero,zero,zero,zero,zero
22752273
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
22762274
; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
22772275
; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
@@ -2515,15 +2513,14 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
25152513
;
25162514
; AVX2-LABEL: vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4:
25172515
; AVX2: # %bb.0:
2518-
; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
2519-
; AVX2-NEXT: vmovdqa (%rdi), %xmm1
2520-
; AVX2-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0],zero,zero,zero,zero,zero,zero,zero
2521-
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
2516+
; AVX2-NEXT: vmovdqa (%rdi), %xmm0
2517+
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
2518+
; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
2519+
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
25222520
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm2 = mem[2,3],ymm0[2,3]
25232521
; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm3 = [0,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255]
25242522
; AVX2-NEXT: # ymm3 = mem[0,1,0,1]
25252523
; AVX2-NEXT: vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
2526-
; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
25272524
; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
25282525
; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
25292526
; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)

0 commit comments

Comments
 (0)