Skip to content

Commit 7ff3f97

Browse files
committed
[X86] getFauxShuffleMask - handle insert_vector_elt(bitcast(extract_vector_elt(x))) shuffle patterns
If the bitcast is between types of equal scalar size (i.e. fp<->int bitcasts), then we can safely peek through them Fixes llvm#83289
1 parent 30b63de commit 7ff3f97

11 files changed

+200
-411
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5878,13 +5878,16 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
58785878
}
58795879
}
58805880

5881-
// Peek through trunc/aext/zext.
5881+
// Peek through trunc/aext/zext/bitcast.
58825882
// TODO: aext shouldn't require SM_SentinelZero padding.
58835883
// TODO: handle shift of scalars.
58845884
unsigned MinBitsPerElt = Scl.getScalarValueSizeInBits();
58855885
while (Scl.getOpcode() == ISD::TRUNCATE ||
58865886
Scl.getOpcode() == ISD::ANY_EXTEND ||
5887-
Scl.getOpcode() == ISD::ZERO_EXTEND) {
5887+
Scl.getOpcode() == ISD::ZERO_EXTEND ||
5888+
(Scl.getOpcode() == ISD::BITCAST &&
5889+
Scl.getScalarValueSizeInBits() ==
5890+
Scl.getOperand(0).getScalarValueSizeInBits())) {
58885891
Scl = Scl.getOperand(0);
58895892
MinBitsPerElt =
58905893
std::min<unsigned>(MinBitsPerElt, Scl.getScalarValueSizeInBits());

llvm/test/CodeGen/X86/avx512-insert-extract.ll

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2171,19 +2171,14 @@ define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind {
21712171
; KNL-LABEL: test_concat_v2i1:
21722172
; KNL: ## %bb.0:
21732173
; KNL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2174-
; KNL-NEXT: vpextrw $0, %xmm0, %eax
2175-
; KNL-NEXT: movzwl %ax, %eax
2176-
; KNL-NEXT: vmovd %eax, %xmm1
2174+
; KNL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
21772175
; KNL-NEXT: vcvtph2ps %xmm1, %xmm1
21782176
; KNL-NEXT: vmovss {{.*#+}} xmm2 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0]
21792177
; KNL-NEXT: vucomiss %xmm2, %xmm1
21802178
; KNL-NEXT: setb %al
21812179
; KNL-NEXT: andl $1, %eax
21822180
; KNL-NEXT: kmovw %eax, %k0
2183-
; KNL-NEXT: vpsrld $16, %xmm0, %xmm0
2184-
; KNL-NEXT: vpextrw $0, %xmm0, %eax
2185-
; KNL-NEXT: movzwl %ax, %eax
2186-
; KNL-NEXT: vmovd %eax, %xmm0
2181+
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
21872182
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0
21882183
; KNL-NEXT: vucomiss %xmm2, %xmm0
21892184
; KNL-NEXT: setb %al
@@ -2212,19 +2207,14 @@ define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind {
22122207
; SKX-LABEL: test_concat_v2i1:
22132208
; SKX: ## %bb.0:
22142209
; SKX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2215-
; SKX-NEXT: vpsrld $16, %xmm0, %xmm1
2216-
; SKX-NEXT: vpextrw $0, %xmm1, %eax
2217-
; SKX-NEXT: movzwl %ax, %eax
2218-
; SKX-NEXT: vmovd %eax, %xmm1
2210+
; SKX-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
22192211
; SKX-NEXT: vcvtph2ps %xmm1, %xmm1
22202212
; SKX-NEXT: vmovss {{.*#+}} xmm2 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0]
22212213
; SKX-NEXT: vucomiss %xmm2, %xmm1
22222214
; SKX-NEXT: setb %al
22232215
; SKX-NEXT: kmovd %eax, %k0
22242216
; SKX-NEXT: kshiftlb $1, %k0, %k0
2225-
; SKX-NEXT: vpextrw $0, %xmm0, %eax
2226-
; SKX-NEXT: movzwl %ax, %eax
2227-
; SKX-NEXT: vmovd %eax, %xmm0
2217+
; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
22282218
; SKX-NEXT: vcvtph2ps %xmm0, %xmm0
22292219
; SKX-NEXT: vucomiss %xmm2, %xmm0
22302220
; SKX-NEXT: setb %al

llvm/test/CodeGen/X86/avx512-vec-cmp.ll

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1436,10 +1436,9 @@ define void @half_vec_compare(ptr %x, ptr %y) {
14361436
; KNL: ## %bb.0: ## %entry
14371437
; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
14381438
; KNL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
1439-
; KNL-NEXT: vpsrld $16, %xmm0, %xmm1 ## encoding: [0xc5,0xf1,0x72,0xd0,0x10]
1440-
; KNL-NEXT: vpextrw $0, %xmm1, %eax ## encoding: [0xc5,0xf9,0xc5,0xc1,0x00]
1441-
; KNL-NEXT: movzwl %ax, %eax ## encoding: [0x0f,0xb7,0xc0]
1442-
; KNL-NEXT: vmovd %eax, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc8]
1439+
; KNL-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
1440+
; KNL-NEXT: ## encoding: [0xc4,0xe2,0x79,0x00,0x0d,A,A,A,A]
1441+
; KNL-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
14431442
; KNL-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9]
14441443
; KNL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
14451444
; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
@@ -1449,9 +1448,8 @@ define void @half_vec_compare(ptr %x, ptr %y) {
14491448
; KNL-NEXT: movl $0, %edx ## encoding: [0xba,0x00,0x00,0x00,0x00]
14501449
; KNL-NEXT: cmovnel %ecx, %edx ## encoding: [0x0f,0x45,0xd1]
14511450
; KNL-NEXT: cmovpl %ecx, %edx ## encoding: [0x0f,0x4a,0xd1]
1452-
; KNL-NEXT: vpextrw $0, %xmm0, %edi ## encoding: [0xc5,0xf9,0xc5,0xf8,0x00]
1453-
; KNL-NEXT: movzwl %di, %edi ## encoding: [0x0f,0xb7,0xff]
1454-
; KNL-NEXT: vmovd %edi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
1451+
; KNL-NEXT: vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0]
1452+
; KNL-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
14551453
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
14561454
; KNL-NEXT: vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2]
14571455
; KNL-NEXT: cmovnel %ecx, %eax ## encoding: [0x0f,0x45,0xc1]
@@ -1468,10 +1466,9 @@ define void @half_vec_compare(ptr %x, ptr %y) {
14681466
; AVX512BW: ## %bb.0: ## %entry
14691467
; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
14701468
; AVX512BW-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
1471-
; AVX512BW-NEXT: vpsrld $16, %xmm0, %xmm1 ## encoding: [0xc5,0xf1,0x72,0xd0,0x10]
1472-
; AVX512BW-NEXT: vpextrw $0, %xmm1, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc5,0xc1,0x00]
1473-
; AVX512BW-NEXT: movzwl %ax, %eax ## encoding: [0x0f,0xb7,0xc0]
1474-
; AVX512BW-NEXT: vmovd %eax, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc8]
1469+
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
1470+
; AVX512BW-NEXT: ## encoding: [0xc4,0xe2,0x79,0x00,0x0d,A,A,A,A]
1471+
; AVX512BW-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
14751472
; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9]
14761473
; AVX512BW-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
14771474
; AVX512BW-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
@@ -1481,9 +1478,8 @@ define void @half_vec_compare(ptr %x, ptr %y) {
14811478
; AVX512BW-NEXT: movl $0, %edx ## encoding: [0xba,0x00,0x00,0x00,0x00]
14821479
; AVX512BW-NEXT: cmovnel %ecx, %edx ## encoding: [0x0f,0x45,0xd1]
14831480
; AVX512BW-NEXT: cmovpl %ecx, %edx ## encoding: [0x0f,0x4a,0xd1]
1484-
; AVX512BW-NEXT: vpextrw $0, %xmm0, %edi ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc5,0xf8,0x00]
1485-
; AVX512BW-NEXT: movzwl %di, %edi ## encoding: [0x0f,0xb7,0xff]
1486-
; AVX512BW-NEXT: vmovd %edi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
1481+
; AVX512BW-NEXT: vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0]
1482+
; AVX512BW-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
14871483
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
14881484
; AVX512BW-NEXT: vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2]
14891485
; AVX512BW-NEXT: cmovnel %ecx, %eax ## encoding: [0x0f,0x45,0xc1]
@@ -1500,10 +1496,9 @@ define void @half_vec_compare(ptr %x, ptr %y) {
15001496
; SKX: ## %bb.0: ## %entry
15011497
; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
15021498
; SKX-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
1503-
; SKX-NEXT: vpsrld $16, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x72,0xd0,0x10]
1504-
; SKX-NEXT: vpextrw $0, %xmm1, %eax ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc5,0xc1,0x00]
1505-
; SKX-NEXT: movzwl %ax, %eax ## encoding: [0x0f,0xb7,0xc0]
1506-
; SKX-NEXT: vmovd %eax, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc8]
1499+
; SKX-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
1500+
; SKX-NEXT: ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0x0d,A,A,A,A]
1501+
; SKX-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
15071502
; SKX-NEXT: vcvtph2ps %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc9]
15081503
; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x57,0xd2]
15091504
; SKX-NEXT: vucomiss %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xca]
@@ -1512,9 +1507,8 @@ define void @half_vec_compare(ptr %x, ptr %y) {
15121507
; SKX-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
15131508
; SKX-NEXT: testb %cl, %cl ## encoding: [0x84,0xc9]
15141509
; SKX-NEXT: setne %al ## encoding: [0x0f,0x95,0xc0]
1515-
; SKX-NEXT: vpextrw $0, %xmm0, %ecx ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc5,0xc8,0x00]
1516-
; SKX-NEXT: movzwl %cx, %ecx ## encoding: [0x0f,0xb7,0xc9]
1517-
; SKX-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
1510+
; SKX-NEXT: vpmovzxwq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0]
1511+
; SKX-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
15181512
; SKX-NEXT: vcvtph2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
15191513
; SKX-NEXT: vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2]
15201514
; SKX-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1]

0 commit comments

Comments
 (0)