-
Notifications
You must be signed in to change notification settings - Fork 14.3k
Revert "[X86] combineBROADCAST_LOAD - merge across chains" #128380
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Revert "[X86] combineBROADCAST_LOAD - merge across chains" #128380
Conversation
This reverts commit e21a173.
@llvm/pr-subscribers-backend-x86 Author: Vitaly Buka (vitalybuka) ChangesReverts llvm/llvm-project#128209 Introduces "AddressSanitizer: use-after-poison". Patch is 25.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/128380.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a4357197e2843..2bc76d3814792 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -59360,14 +59360,21 @@ static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::FP_EXTEND, dl, VT, Cvt);
}
-// Try to find a larger VBROADCAST_LOAD/SUBV_BROADCAST_LOAD that we can extract.
+// Try to find a larger VBROADCAST_LOAD/SUBV_BROADCAST_LOAD that we can extract
+// from. Limit this to cases where the loads have the same input chain and the
+// output chains are unused. This avoids any memory ordering issues.
static SDValue combineBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
assert((N->getOpcode() == X86ISD::VBROADCAST_LOAD ||
N->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) &&
"Unknown broadcast load type");
+ // Only do this if the chain result is unused.
+ if (N->hasAnyUseOfValue(1))
+ return SDValue();
+
auto *MemIntrin = cast<MemIntrinsicSDNode>(N);
+
SDValue Ptr = MemIntrin->getBasePtr();
SDValue Chain = MemIntrin->getChain();
EVT VT = N->getSimpleValueType(0);
@@ -59381,15 +59388,12 @@ static SDValue combineBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG,
cast<MemIntrinsicSDNode>(User)->getChain() == Chain &&
cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() ==
MemVT.getSizeInBits() &&
+ !User->hasAnyUseOfValue(1) &&
User->getValueSizeInBits(0).getFixedValue() > VT.getFixedSizeInBits()) {
- assert(cast<MemIntrinsicSDNode>(User)->isSimple() &&
- MemIntrin->isSimple() && "Illegal broadcast load type");
SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N),
VT.getSizeInBits());
Extract = DAG.getBitcast(VT, Extract);
- Extract = DCI.CombineTo(N, Extract, SDValue(User, 1));
- DAG.makeEquivalentMemoryOrdering(SDValue(N, 1), Extract.getValue(1));
- return Extract;
+ return DCI.CombineTo(N, Extract, SDValue(User, 1));
}
return SDValue();
diff --git a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
index ca589e63b671a..d617cfb6aedee 100644
--- a/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/any_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -1888,14 +1888,15 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
;
; AVX2-LABEL: vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
-; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
-; AVX2-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
-; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
-; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
-; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
+; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
+; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
+; AVX2-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpbroadcastb (%rdi), %ymm2
+; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23]
+; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
+; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
+; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@@ -2111,14 +2112,15 @@ define void @vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12(ptr %in
;
; AVX2-LABEL: vec384_i8_widen_to_i32_factor4_broadcast_to_v12i32_factor12:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
-; AVX2-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
-; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
-; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
+; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
+; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
+; AVX2-NEXT: vpbroadcastb (%rdi), %ymm2
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
+; AVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
+; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
+; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@@ -2235,29 +2237,33 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
;
; AVX512F-LABEL: vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
-; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
-; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
-; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
+; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa (%rdi), %xmm1
+; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
+; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
+; AVX512F-NEXT: vpbroadcastb (%rdi), %xmm2
+; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
+; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
+; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
+; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
-; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
-; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
-; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
-; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
+; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm1
+; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
+; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
+; AVX512DQ-NEXT: vpbroadcastb (%rdi), %xmm2
+; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
+; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
+; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
+; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
@@ -2266,8 +2272,9 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.e
; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
-; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
+; AVX512BW-NEXT: vpbroadcastb (%rdi), %xmm1
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
@@ -2332,14 +2339,15 @@ define void @vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6(ptr %in.e
;
; AVX2-LABEL: vec384_i8_widen_to_i64_factor8_broadcast_to_v6i64_factor6:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
-; AVX2-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
-; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
-; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
-; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
+; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0
+; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
+; AVX2-NEXT: vpbroadcastb (%rdi), %ymm2
+; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
+; AVX2-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
+; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
+; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
+; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@@ -2454,29 +2462,33 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
;
; AVX512F-LABEL: vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
-; AVX512F-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
-; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
-; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
-; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
+; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512F-NEXT: vmovdqa (%rdi), %xmm1
+; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
+; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
+; AVX512F-NEXT: vpbroadcastb (%rdi), %xmm2
+; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
+; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
+; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
+; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
-; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
-; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
-; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
-; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
-; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
+; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm1
+; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
+; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
+; AVX512DQ-NEXT: vpbroadcastb (%rdi), %xmm2
+; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
+; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
+; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
+; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
@@ -2485,8 +2497,9 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
; AVX512BW-NEXT: vpalignr {{.*#+}} xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
-; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
+; AVX512BW-NEXT: vpbroadcastb (%rdi), %xmm1
; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm1
; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
@@ -2775,13 +2788,14 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
;
; AVX2-LABEL: vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
-; AVX2-NEXT: vpbroadcastw (%rdi), %ymm1
-; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
-; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
-; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
-; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
+; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
+; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
+; AVX2-NEXT: vpbroadcastw (%rdi), %ymm2
+; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4],ymm1[5],ymm2[6],ymm1[7],ymm2[8],ymm1[9],ymm2[10],ymm1[11],ymm2[12],ymm1[13],ymm2[14],ymm1[15]
+; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
+; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
+; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@@ -2976,13 +2990,14 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
;
; AVX2-LABEL: vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa 48(%rdi), %xmm0
-; AVX2-NEXT: vpbroadcastw (%rdi), %ymm1
-; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5,6,7],ymm1[8],ymm0[9,10,11],ymm1[12],ymm0[13,14,15]
-; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
-; AVX2-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
-; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
-; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
+; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0
+; AVX2-NEXT: vmovdqa 48(%rdi), %xmm1
+; AVX2-NEXT: vpbroadcastw (%rdi), %ymm2
+; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3],ymm2[4],ymm1[5,6,7],ymm2[8],ymm1[9,10,11],ymm2[12],ymm1[13,14,15]
+; AVX2-NEXT: vpaddb (%rsi), %ymm1, %ymm1
+; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
+; AVX2-NEXT: vmovdqa %ymm0, 32(%rdx)
+; AVX2-NEXT: vmovdqa %ymm1, (%rdx)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
@@ -3093,25 +3108,27 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
;
; AVX512F-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm0
+; AVX512F-NEXT: vpbroadcastw (%rdi), %xmm0
; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
-; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
-; AVX512F-NEXT: vpaddb (%rsi), %ymm1, %ymm1
-; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
-; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
-; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
+; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX512F-NEXT: vpbroadcastw (%rdi), %ymm1
+; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
+; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
+; AVX512F-NEXT: vmovdqa %ymm1, 32(%rdx)
+; AVX512F-NEXT: vmovdqa %ymm0, (%rdx)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm0
+; AVX512DQ-NEXT: vpbroadcastw (%rdi), %xmm0
; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1,2,3,4,5],xmm0[6],mem[7]
-; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1, %ymm1
-; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
-; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
-; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
+; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX512DQ-NEXT: vpbroadcastw (%rdi), %ymm1
+; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
+; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
+; AVX512DQ-NEXT: vmovdqa %ymm1, 32(%rdx)
+; AVX512DQ-NEXT: vmovdqa %ymm0, (%rdx)
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
index 7f206fd3bf2aa..dbd9df36239bd 100644
--- a/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
+++ b/llvm/test/CodeGen/X86/zero_extend_vector_inreg_of_broadcast_from_memory.ll
@@ -1906,6 +1906,7 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
; AVX512F-NEXT: vpbroadcastb (%rdi), %ymm1
; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512F-NEXT: vpbroadcastb (%rdi), %xmm1
; AVX512F-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX512F-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512F-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
@@ -1921,6 +1922,7 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
; AVX512DQ-NEXT: vpbroadcastb (%rdi), %ymm1
; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
; AVX512DQ-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; AVX512DQ-NEXT: vpbroadcastb (%rdi), %xmm1
; AVX512DQ-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX512DQ-NEXT: vpaddb (%rsi), %ymm0, %ymm0
; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
@@ -1931,13 +1933,14 @@ define void @vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24(ptr %in
;
; AVX512BW-LABEL: vec384_i8_widen_to_i16_factor2_broadcast_to_v24i16_factor24:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm0
-; AVX512BW-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512BW-NEXT: vmovdqa 48(%rdi), %xmm2
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
-; AVX512BW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],...
[truncated]
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/13476 Here is the relevant piece of the build log for the reference
|
Remove the restriction when reusing wider BROADCAST_LOAD nodes that both nodes couldn't have uses of their load chains - use makeEquivalentMemoryOrdering to merge the chains instead. Reapplied - move makeEquivalentMemoryOrdering prior to the CombineTo call to ensure that the original node hasn't already been removed. Fixes asan use-after-poison error reported in #128380 / 50b0669.
Reverts #128209
Introduces "AddressSanitizer: use-after-poison".