Skip to content

Commit fc2152b

Browse files
RKSimonyuxuanchen1997
authored andcommitted
X86] combineConcatVectorOps - IsConcatFree - peek through bitcasts to find inplace subvectors.
The EXTRACT_SUBVECTOR nodes don't have to be the same type, they just need to be at the correct bit offsets when concatenated back together. This reapplies d43ec97 (after being reverted 68cb903) now that 65e86a8 has landed to address a downstream issue.
1 parent 0dfff02 commit fc2152b

File tree

2 files changed

+12
-22
lines changed

2 files changed

+12
-22
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56183,18 +56183,19 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5618356183
};
5618456184
auto IsConcatFree = [](MVT VT, ArrayRef<SDValue> SubOps, unsigned Op) {
5618556185
bool AllConstants = true;
56186-
bool AllSubVectors = true;
56186+
bool AllSubs = true;
56187+
unsigned VecSize = VT.getSizeInBits();
5618756188
for (unsigned I = 0, E = SubOps.size(); I != E; ++I) {
56188-
SDValue Sub = SubOps[I].getOperand(Op);
56189-
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
56190-
SDValue BC = peekThroughBitcasts(Sub);
56189+
SDValue BC = peekThroughBitcasts(SubOps[I].getOperand(Op));
56190+
unsigned SubSize = BC.getValueSizeInBits();
56191+
unsigned EltSize = BC.getScalarValueSizeInBits();
5619156192
AllConstants &= ISD::isBuildVectorOfConstantSDNodes(BC.getNode()) ||
5619256193
ISD::isBuildVectorOfConstantFPSDNodes(BC.getNode());
56193-
AllSubVectors &= Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
56194-
Sub.getOperand(0).getValueType() == VT &&
56195-
Sub.getConstantOperandAPInt(1) == (I * NumSubElts);
56194+
AllSubs &= BC.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
56195+
BC.getOperand(0).getValueSizeInBits() == VecSize &&
56196+
(BC.getConstantOperandVal(1) * EltSize) == (I * SubSize);
5619656197
}
56197-
return AllConstants || AllSubVectors;
56198+
return AllConstants || AllSubs;
5619856199
};
5619956200

5620056201
switch (Op0.getOpcode()) {

llvm/test/CodeGen/X86/vselect-avx.ll

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ define void @blendv_split(ptr %p, <8 x i32> %cond, <8 x i32> %a, <8 x i32> %x, <
259259
ret void
260260
}
261261

262-
; TODO: Concatenate 128-bit pblendvb back together on AVX2+ targets (hidden by SSE __m128i bitcasts)
262+
; Concatenate 128-bit pblendvb back together on AVX2+ targets (hidden by SSE __m128i bitcasts)
263263
define <4 x i64> @vselect_concat_split_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) {
264264
; AVX1-LABEL: vselect_concat_split_v16i8:
265265
; AVX1: ## %bb.0:
@@ -277,24 +277,13 @@ define <4 x i64> @vselect_concat_split_v16i8(<4 x i64> %a, <4 x i64> %b, <4 x i6
277277
; AVX2-LABEL: vselect_concat_split_v16i8:
278278
; AVX2: ## %bb.0:
279279
; AVX2-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2
280-
; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm3
281-
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
282-
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
283-
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
284-
; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
285-
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0
280+
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
286281
; AVX2-NEXT: retq
287282
;
288283
; AVX512-LABEL: vselect_concat_split_v16i8:
289284
; AVX512: ## %bb.0:
290285
; AVX512-NEXT: vpcmpgtb %ymm2, %ymm3, %ymm2
291-
; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
292-
; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm4
293-
; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 killed $ymm1 def $ymm1
294-
; AVX512-NEXT: vpternlogq $226, %xmm0, %xmm2, %xmm1
295-
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
296-
; AVX512-NEXT: vpternlogq $226, %xmm0, %xmm3, %xmm4
297-
; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm0
286+
; AVX512-NEXT: vpternlogq $216, %ymm2, %ymm1, %ymm0
298287
; AVX512-NEXT: retq
299288
%a.bc = bitcast <4 x i64> %a to <32 x i8>
300289
%b.bc = bitcast <4 x i64> %b to <32 x i8>

0 commit comments

Comments
 (0)