Skip to content

Commit 14ae02f

Browse files
committed
[X86][AVX] Only share broadcasts of different widths from the same SDValue of the same SDNode (PR48215)
D57663 allowed us to reuse broadcasts of the same scalar value by extracting low subvectors from the widest type. Unfortunately we weren't ensuring the broadcasts were from the same SDValue, just the same SDNode - which failed on multiple-value nodes like ISD::SDIVREM FYI: I intend to request this be merged into the 11.x release branch. Differential Revision: https://reviews.llvm.org/D91709
1 parent 1e2da38 commit 14ae02f

File tree

2 files changed

+11
-6
lines changed

2 files changed

+11
-6
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36715,8 +36715,10 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
3671536715
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
3671636716

3671736717
// Share broadcast with the longest vector and extract low subvector (free).
36718+
// Ensure the same SDValue from the SDNode use is being used.
3671836719
for (SDNode *User : Src->uses())
3671936720
if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST &&
36721+
Src == User->getOperand(0) &&
3672036722
User->getValueSizeInBits(0).getFixedSize() >
3672136723
VT.getFixedSizeInBits()) {
3672236724
return extractSubVector(SDValue(User, 0), 0, DAG, DL,

llvm/test/CodeGen/X86/pr48215.ll

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,14 @@ define i32 @PR48215(i32 %a0, i32 %a1) {
3333
; AVX2-NEXT: idivl %esi
3434
; AVX2-NEXT: vmovd %eax, %xmm0
3535
; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
36-
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7]
36+
; AVX2-NEXT: vmovd %edx, %xmm1
37+
; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
38+
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7]
39+
; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
3740
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,7]
38-
; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm1
39-
; AVX2-NEXT: vmovmskps %ymm1, %ecx
40-
; AVX2-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
41-
; AVX2-NEXT: vmovmskps %xmm0, %eax
41+
; AVX2-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
42+
; AVX2-NEXT: vmovmskps %ymm0, %ecx
43+
; AVX2-NEXT: vmovmskps %xmm1, %eax
4244
; AVX2-NEXT: addl %ecx, %eax
4345
; AVX2-NEXT: vzeroupper
4446
; AVX2-NEXT: retq
@@ -49,8 +51,9 @@ define i32 @PR48215(i32 %a0, i32 %a1) {
4951
; AVX512-NEXT: cltd
5052
; AVX512-NEXT: idivl %esi
5153
; AVX512-NEXT: vpbroadcastd %eax, %ymm0
54+
; AVX512-NEXT: vpbroadcastd %edx, %xmm1
5255
; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %ymm0, %k0
53-
; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %xmm0, %k1
56+
; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %xmm1, %k1
5457
; AVX512-NEXT: kmovw %k0, %eax
5558
; AVX512-NEXT: movzbl %al, %ecx
5659
; AVX512-NEXT: kmovw %k1, %eax

0 commit comments

Comments
 (0)