Skip to content

Commit 88f1a2c

Browse files
committed
[X86] combineLoad - allow constant loads to share matching 'lower constant bits' with larger VBROADCAST_LOAD/SUBV_BROADCAST_LOAD nodes
We already had separate support for VBROADCAST_LOAD - merge this with the generic load handling and add SUBV_BROADCAST_LOAD support as well.
1 parent 5e38ba2 commit 88f1a2c

10 files changed

+25535
-25876
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -49963,33 +49963,15 @@ static SDValue combineLoad(SDNode *N, SelectionDAG &DAG,
4996349963
}
4996449964
return true;
4996549965
};
49966-
if (User->getOpcode() == X86ISD::VBROADCAST_LOAD &&
49967-
getTargetConstantFromBasePtr(Ptr)) {
49968-
// See if we are loading a constant that has also been broadcast.
49969-
APInt Undefs, UserUndefs;
49970-
SmallVector<APInt> Bits, UserBits;
49971-
if (getTargetConstantBitsFromNode(SDValue(N, 0), 8, Undefs, Bits) &&
49972-
getTargetConstantBitsFromNode(SDValue(User, 0), 8, UserUndefs,
49973-
UserBits)) {
49974-
UserUndefs = UserUndefs.trunc(Undefs.getBitWidth());
49975-
UserBits.truncate(Bits.size());
49976-
if (MatchingBits(Undefs, UserUndefs, Bits, UserBits)) {
49977-
SDValue Extract = extractSubVector(
49978-
SDValue(User, 0), 0, DAG, SDLoc(N), RegVT.getSizeInBits());
49979-
Extract = DAG.getBitcast(RegVT, Extract);
49980-
return DCI.CombineTo(N, Extract, SDValue(User, 1));
49981-
}
49982-
}
49983-
}
49984-
if (ISD::isNormalLoad(User)) {
49985-
// See if we are loading a constant that matches in the lower
49986-
// bits of a longer constant (but from a different constant pool ptr).
49987-
SDValue UserPtr = cast<MemSDNode>(User)->getBasePtr();
49988-
const Constant *LdC = getTargetConstantFromBasePtr(Ptr);
49989-
const Constant *UserC = getTargetConstantFromBasePtr(UserPtr);
49990-
if (LdC && UserC && UserPtr != Ptr &&
49991-
LdC->getType()->getPrimitiveSizeInBits() <
49992-
UserC->getType()->getPrimitiveSizeInBits()) {
49966+
// See if we are loading a constant that matches in the lower
49967+
// bits of a longer constant (but from a different constant pool ptr).
49968+
SDValue UserPtr = cast<MemSDNode>(User)->getBasePtr();
49969+
const Constant *LdC = getTargetConstantFromBasePtr(Ptr);
49970+
const Constant *UserC = getTargetConstantFromBasePtr(UserPtr);
49971+
if (LdC && UserC && UserPtr != Ptr) {
49972+
unsigned LdSize = LdC->getType()->getPrimitiveSizeInBits();
49973+
unsigned UserSize = UserC->getType()->getPrimitiveSizeInBits();
49974+
if (LdSize < UserSize || !ISD::isNormalLoad(User)) {
4999349975
APInt Undefs, UserUndefs;
4999449976
SmallVector<APInt> Bits, UserBits;
4999549977
if (getTargetConstantBitsFromNode(SDValue(N, 0), 8, Undefs, Bits) &&

llvm/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,17 +1459,16 @@ define <8 x i64> @f8xi64_i128(<8 x i64> %a) {
14591459
;
14601460
; AVX-64-LABEL: f8xi64_i128:
14611461
; AVX-64: # %bb.0:
1462-
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2
1463-
; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
1464-
; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
1465-
; AVX-64-NEXT: vpaddq %xmm3, %xmm1, %xmm1
1466-
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
1467-
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
1468-
; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2
1469-
; AVX-64-NEXT: vpaddq %xmm3, %xmm0, %xmm0
1470-
; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
1471-
; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0]
1462+
; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,1,0,1]
14721463
; AVX-64-NEXT: # ymm2 = mem[0,1,0,1]
1464+
; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm3
1465+
; AVX-64-NEXT: vpaddq %xmm2, %xmm3, %xmm3
1466+
; AVX-64-NEXT: vpaddq %xmm2, %xmm1, %xmm1
1467+
; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
1468+
; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3
1469+
; AVX-64-NEXT: vpaddq %xmm2, %xmm3, %xmm3
1470+
; AVX-64-NEXT: vpaddq %xmm2, %xmm0, %xmm0
1471+
; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
14731472
; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0
14741473
; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1
14751474
; AVX-64-NEXT: retq

llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-8.ll

Lines changed: 247 additions & 318 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-load-i32-stride-8.ll

Lines changed: 1581 additions & 1755 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-5.ll

Lines changed: 1290 additions & 1286 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-7.ll

Lines changed: 3094 additions & 2977 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-load-i64-stride-8.ll

Lines changed: 3102 additions & 3104 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-6.ll

Lines changed: 2062 additions & 2032 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-7.ll

Lines changed: 11267 additions & 11359 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/X86/vector-interleaved-store-i64-stride-8.ll

Lines changed: 2874 additions & 3008 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)