Skip to content

Commit 14d60e9

Browse files
RKSimontstellar
authored andcommitted
[X86][AVX] Only share broadcasts of different widths from the same SDValue of the same SDNode (PR48215)
D57663 allowed us to reuse broadcasts of the same scalar value by extracting low subvectors from the widest type. Unfortunately we weren't ensuring the broadcasts were from the same SDValue, just the same SDNode - which failed on multiple-value nodes like ISD::SDIVREM FYI: I intend to request this be merged into the 11.x release branch. Differential Revision: https://reviews.llvm.org/D91709 (cherry picked from commit 14ae02f) Signed-off-by: Warren Ristow <[email protected]>
1 parent a21e609 commit 14d60e9

File tree

2 files changed

+11
-6
lines changed

2 files changed

+11
-6
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36018,8 +36018,10 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
3601836018
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
3601936019

3602036020
// Share broadcast with the longest vector and extract low subvector (free).
36021+
// Ensure the same SDValue from the SDNode use is being used.
3602136022
for (SDNode *User : Src->uses())
3602236023
if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST &&
36024+
Src == User->getOperand(0) &&
3602336025
User->getValueSizeInBits(0) > VT.getSizeInBits()) {
3602436026
return extractSubVector(SDValue(User, 0), 0, DAG, DL,
3602536027
VT.getSizeInBits());

llvm/test/CodeGen/X86/pr48215.ll

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,14 @@ define i32 @PR48215(i32 %a0, i32 %a1) {
3333
; AVX2-NEXT: idivl %esi
3434
; AVX2-NEXT: vmovd %eax, %xmm0
3535
; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
36-
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7]
36+
; AVX2-NEXT: vmovd %edx, %xmm1
37+
; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
38+
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7]
39+
; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
3740
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,7]
38-
; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm1
39-
; AVX2-NEXT: vmovmskps %ymm1, %ecx
40-
; AVX2-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
41-
; AVX2-NEXT: vmovmskps %xmm0, %eax
41+
; AVX2-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
42+
; AVX2-NEXT: vmovmskps %ymm0, %ecx
43+
; AVX2-NEXT: vmovmskps %xmm1, %eax
4244
; AVX2-NEXT: addl %ecx, %eax
4345
; AVX2-NEXT: vzeroupper
4446
; AVX2-NEXT: retq
@@ -49,8 +51,9 @@ define i32 @PR48215(i32 %a0, i32 %a1) {
4951
; AVX512-NEXT: cltd
5052
; AVX512-NEXT: idivl %esi
5153
; AVX512-NEXT: vpbroadcastd %eax, %ymm0
54+
; AVX512-NEXT: vpbroadcastd %edx, %xmm1
5255
; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %ymm0, %k0
53-
; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %xmm0, %k1
56+
; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %xmm1, %k1
5457
; AVX512-NEXT: kmovw %k0, %eax
5558
; AVX512-NEXT: movzbl %al, %ecx
5659
; AVX512-NEXT: kmovw %k1, %eax

0 commit comments

Comments
 (0)