Skip to content

Commit 5340434

Browse files
committed
[X86][SSE] combineExtractWithShuffle - extract(bitcast(broadcast(x))) --> x
Removes some unnecessary gpr<-->fpu traffic
1 parent 58991ba commit 5340434

File tree

2 files changed

+15
-12
lines changed

2 files changed

+15
-12
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37102,11 +37102,24 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
3710237102

3710337103
SDValue SrcBC = peekThroughBitcasts(Src);
3710437104

37105-
// Handle extract(broadcast(scalar_value)), it doesn't matter what index is.
37105+
// Handle extract(bitcast(broadcast(scalar_value))).
3710637106
if (X86ISD::VBROADCAST == SrcBC.getOpcode()) {
3710737107
SDValue SrcOp = SrcBC.getOperand(0);
3710837108
if (SrcOp.getValueSizeInBits() == VT.getSizeInBits())
3710937109
return DAG.getBitcast(VT, SrcOp);
37110+
37111+
EVT SrcOpVT = SrcOp.getValueType();
37112+
if (SrcOpVT.isScalarInteger() && VT.isInteger() &&
37113+
(SrcOpVT.getSizeInBits() % SrcSVT.getSizeInBits()) == 0) {
37114+
unsigned Scale = SrcOpVT.getSizeInBits() / SrcSVT.getSizeInBits();
37115+
unsigned Offset = IdxC.urem(Scale) * SrcSVT.getSizeInBits();
37116+
// TODO support non-zero offsets.
37117+
if (Offset == 0) {
37118+
SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, SrcVT.getScalarType());
37119+
SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, VT);
37120+
return SrcOp;
37121+
}
37122+
}
3711037123
}
3711137124

3711237125
// If we're extracting a single element from a broadcast load and there are
@@ -37126,7 +37139,7 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
3712637139
}
3712737140
}
3712837141

37129-
// Handle extract(scalar_to_vector(scalar_value)) for integers.
37142+
// Handle extract(bitcast(scalar_to_vector(scalar_value))) for integers.
3713037143
// TODO: Move to DAGCombine?
3713137144
if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() &&
3713237145
SrcBC.getValueType().isInteger() &&

llvm/test/CodeGen/X86/bitcast-vector-bool.ll

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,7 @@ define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind {
7777
; AVX512-NEXT: movzbl %cl, %eax
7878
; AVX512-NEXT: shrl $2, %eax
7979
; AVX512-NEXT: andl $3, %eax
80-
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
8180
; AVX512-NEXT: andl $3, %ecx
82-
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
8381
; AVX512-NEXT: addb %cl, %al
8482
; AVX512-NEXT: # kill: def $al killed $al killed $eax
8583
; AVX512-NEXT: retq
@@ -124,9 +122,7 @@ define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind {
124122
; AVX512-NEXT: kmovd %k0, %ecx
125123
; AVX512-NEXT: movzbl %cl, %eax
126124
; AVX512-NEXT: shrl $4, %eax
127-
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
128125
; AVX512-NEXT: andl $15, %ecx
129-
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
130126
; AVX512-NEXT: addb %cl, %al
131127
; AVX512-NEXT: # kill: def $al killed $al killed $eax
132128
; AVX512-NEXT: retq
@@ -214,9 +210,7 @@ define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind {
214210
; AVX512-NEXT: movzbl %cl, %eax
215211
; AVX512-NEXT: shrl $2, %eax
216212
; AVX512-NEXT: andl $3, %eax
217-
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
218213
; AVX512-NEXT: andl $3, %ecx
219-
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
220214
; AVX512-NEXT: addb %cl, %al
221215
; AVX512-NEXT: # kill: def $al killed $al killed $eax
222216
; AVX512-NEXT: vzeroupper
@@ -264,9 +258,7 @@ define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind {
264258
; AVX512-NEXT: kmovd %k0, %ecx
265259
; AVX512-NEXT: movzbl %cl, %eax
266260
; AVX512-NEXT: shrl $4, %eax
267-
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
268261
; AVX512-NEXT: andl $15, %ecx
269-
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
270262
; AVX512-NEXT: addb %cl, %al
271263
; AVX512-NEXT: # kill: def $al killed $al killed $eax
272264
; AVX512-NEXT: vzeroupper
@@ -451,9 +443,7 @@ define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind {
451443
; AVX512-NEXT: kmovd %k0, %ecx
452444
; AVX512-NEXT: movzbl %cl, %eax
453445
; AVX512-NEXT: shrl $4, %eax
454-
; AVX512-NEXT: vpbroadcastq %rax, %xmm0
455446
; AVX512-NEXT: andl $15, %ecx
456-
; AVX512-NEXT: vpextrb $8, %xmm0, %eax
457447
; AVX512-NEXT: addb %cl, %al
458448
; AVX512-NEXT: # kill: def $al killed $al killed $eax
459449
; AVX512-NEXT: vzeroupper

0 commit comments

Comments
 (0)