Skip to content

Commit 139bcda

Browse files
committed
[X86] SimplifyDemandedVectorEltsForTargetNode - add basic CVTPH2PS/CVTPS2PH handling
Allows us to peek through the F16 conversion nodes, mainly to simplify shuffles An easy part of #83414
1 parent b50b50b commit 139bcda

15 files changed

+160
-227
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41402,7 +41402,9 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4140241402
break;
4140341403
}
4140441404
case X86ISD::CVTSI2P:
41405-
case X86ISD::CVTUI2P: {
41405+
case X86ISD::CVTUI2P:
41406+
case X86ISD::CVTPH2PS:
41407+
case X86ISD::CVTPS2PH: {
4140641408
SDValue Src = Op.getOperand(0);
4140741409
MVT SrcVT = Src.getSimpleValueType();
4140841410
APInt SrcUndef, SrcZero;

llvm/test/CodeGen/X86/avx512-insert-extract.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2171,14 +2171,13 @@ define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind {
21712171
; KNL-LABEL: test_concat_v2i1:
21722172
; KNL: ## %bb.0:
21732173
; KNL-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2174-
; KNL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
2175-
; KNL-NEXT: vcvtph2ps %xmm1, %xmm1
2174+
; KNL-NEXT: vcvtph2ps %xmm0, %xmm1
21762175
; KNL-NEXT: vmovss {{.*#+}} xmm2 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0]
21772176
; KNL-NEXT: vucomiss %xmm2, %xmm1
21782177
; KNL-NEXT: setb %al
21792178
; KNL-NEXT: andl $1, %eax
21802179
; KNL-NEXT: kmovw %eax, %k0
2181-
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
2180+
; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
21822181
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0
21832182
; KNL-NEXT: vucomiss %xmm2, %xmm0
21842183
; KNL-NEXT: setb %al
@@ -2207,14 +2206,13 @@ define void @test_concat_v2i1(ptr %arg, ptr %arg1, ptr %arg2) nounwind {
22072206
; SKX-LABEL: test_concat_v2i1:
22082207
; SKX: ## %bb.0:
22092208
; SKX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
2210-
; SKX-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
2209+
; SKX-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[1,1,1,1,4,5,6,7]
22112210
; SKX-NEXT: vcvtph2ps %xmm1, %xmm1
22122211
; SKX-NEXT: vmovss {{.*#+}} xmm2 = [6.0E+0,0.0E+0,0.0E+0,0.0E+0]
22132212
; SKX-NEXT: vucomiss %xmm2, %xmm1
22142213
; SKX-NEXT: setb %al
22152214
; SKX-NEXT: kmovd %eax, %k0
22162215
; SKX-NEXT: kshiftlb $1, %k0, %k0
2217-
; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
22182216
; SKX-NEXT: vcvtph2ps %xmm0, %xmm0
22192217
; SKX-NEXT: vucomiss %xmm2, %xmm0
22202218
; SKX-NEXT: setb %al

llvm/test/CodeGen/X86/avx512-vec-cmp.ll

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1436,9 +1436,8 @@ define void @half_vec_compare(ptr %x, ptr %y) {
14361436
; KNL: ## %bb.0: ## %entry
14371437
; KNL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
14381438
; KNL-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
1439-
; KNL-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
1440-
; KNL-NEXT: ## encoding: [0xc4,0xe2,0x79,0x00,0x0d,A,A,A,A]
1441-
; KNL-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1439+
; KNL-NEXT: vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55]
1440+
; KNL-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7]
14421441
; KNL-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9]
14431442
; KNL-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
14441443
; KNL-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
@@ -1448,8 +1447,6 @@ define void @half_vec_compare(ptr %x, ptr %y) {
14481447
; KNL-NEXT: movl $0, %edx ## encoding: [0xba,0x00,0x00,0x00,0x00]
14491448
; KNL-NEXT: cmovnel %ecx, %edx ## encoding: [0x0f,0x45,0xd1]
14501449
; KNL-NEXT: cmovpl %ecx, %edx ## encoding: [0x0f,0x4a,0xd1]
1451-
; KNL-NEXT: vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0]
1452-
; KNL-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
14531450
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
14541451
; KNL-NEXT: vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2]
14551452
; KNL-NEXT: cmovnel %ecx, %eax ## encoding: [0x0f,0x45,0xc1]
@@ -1466,9 +1463,8 @@ define void @half_vec_compare(ptr %x, ptr %y) {
14661463
; AVX512BW: ## %bb.0: ## %entry
14671464
; AVX512BW-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
14681465
; AVX512BW-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
1469-
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
1470-
; AVX512BW-NEXT: ## encoding: [0xc4,0xe2,0x79,0x00,0x0d,A,A,A,A]
1471-
; AVX512BW-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1466+
; AVX512BW-NEXT: vpshuflw $85, %xmm0, %xmm1 ## encoding: [0xc5,0xfb,0x70,0xc8,0x55]
1467+
; AVX512BW-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7]
14721468
; AVX512BW-NEXT: vcvtph2ps %xmm1, %xmm1 ## encoding: [0xc4,0xe2,0x79,0x13,0xc9]
14731469
; AVX512BW-NEXT: xorl %eax, %eax ## encoding: [0x31,0xc0]
14741470
; AVX512BW-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## encoding: [0xc5,0xe8,0x57,0xd2]
@@ -1478,8 +1474,6 @@ define void @half_vec_compare(ptr %x, ptr %y) {
14781474
; AVX512BW-NEXT: movl $0, %edx ## encoding: [0xba,0x00,0x00,0x00,0x00]
14791475
; AVX512BW-NEXT: cmovnel %ecx, %edx ## encoding: [0x0f,0x45,0xd1]
14801476
; AVX512BW-NEXT: cmovpl %ecx, %edx ## encoding: [0x0f,0x4a,0xd1]
1481-
; AVX512BW-NEXT: vpmovzxwq %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x34,0xc0]
1482-
; AVX512BW-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
14831477
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
14841478
; AVX512BW-NEXT: vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2]
14851479
; AVX512BW-NEXT: cmovnel %ecx, %eax ## encoding: [0x0f,0x45,0xc1]
@@ -1496,9 +1490,8 @@ define void @half_vec_compare(ptr %x, ptr %y) {
14961490
; SKX: ## %bb.0: ## %entry
14971491
; SKX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
14981492
; SKX-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x07]
1499-
; SKX-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3],zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
1500-
; SKX-NEXT: ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0x0d,A,A,A,A]
1501-
; SKX-NEXT: ## fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte
1493+
; SKX-NEXT: vpshuflw $85, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc8,0x55]
1494+
; SKX-NEXT: ## xmm1 = xmm0[1,1,1,1,4,5,6,7]
15021495
; SKX-NEXT: vcvtph2ps %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc9]
15031496
; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xe8,0x57,0xd2]
15041497
; SKX-NEXT: vucomiss %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xca]
@@ -1507,8 +1500,6 @@ define void @half_vec_compare(ptr %x, ptr %y) {
15071500
; SKX-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
15081501
; SKX-NEXT: testb %cl, %cl ## encoding: [0x84,0xc9]
15091502
; SKX-NEXT: setne %al ## encoding: [0x0f,0x95,0xc0]
1510-
; SKX-NEXT: vpmovzxwq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0]
1511-
; SKX-NEXT: ## xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
15121503
; SKX-NEXT: vcvtph2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x13,0xc0]
15131504
; SKX-NEXT: vucomiss %xmm2, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc2]
15141505
; SKX-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1]

llvm/test/CodeGen/X86/cvt16.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ define float @test3(float %src) nounwind uwtable readnone {
8989
; F16C-LABEL: test3:
9090
; F16C: # %bb.0:
9191
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
92-
; F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
9392
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
9493
; F16C-NEXT: retq
9594
;

llvm/test/CodeGen/X86/f16c-intrinsics-fast-isel.ll

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,7 @@ define float @test_cvtsh_ss(i16 %a0) nounwind {
1818
;
1919
; X64-LABEL: test_cvtsh_ss:
2020
; X64: # %bb.0:
21-
; X64-NEXT: movzwl %di, %eax
22-
; X64-NEXT: vmovd %eax, %xmm0
21+
; X64-NEXT: vmovd %edi, %xmm0
2322
; X64-NEXT: vcvtph2ps %xmm0, %xmm0
2423
; X64-NEXT: retq
2524
%ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0
@@ -41,17 +40,13 @@ define i16 @test_cvtss_sh(float %a0) nounwind {
4140
; X86-LABEL: test_cvtss_sh:
4241
; X86: # %bb.0:
4342
; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
44-
; X86-NEXT: vxorps %xmm1, %xmm1, %xmm1
45-
; X86-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
4643
; X86-NEXT: vcvtps2ph $0, %xmm0, %xmm0
4744
; X86-NEXT: vmovd %xmm0, %eax
4845
; X86-NEXT: # kill: def $ax killed $ax killed $eax
4946
; X86-NEXT: retl
5047
;
5148
; X64-LABEL: test_cvtss_sh:
5249
; X64: # %bb.0:
53-
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
54-
; X64-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
5550
; X64-NEXT: vcvtps2ph $0, %xmm0, %xmm0
5651
; X64-NEXT: vmovd %xmm0, %eax
5752
; X64-NEXT: # kill: def $ax killed $ax killed $eax

llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1432,7 +1432,6 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bounds(i32 %cnt) nounwind {
14321432
; CHECK-NO-FASTFMA-NEXT: shll %cl, %eax
14331433
; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %eax, %xmm0, %xmm0
14341434
; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1435-
; CHECK-NO-FASTFMA-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
14361435
; CHECK-NO-FASTFMA-NEXT: vcvtph2ps %xmm0, %xmm0
14371436
; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
14381437
; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
@@ -1447,7 +1446,6 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bounds(i32 %cnt) nounwind {
14471446
; CHECK-FMA-NEXT: shlxl %edi, %eax, %eax
14481447
; CHECK-FMA-NEXT: vcvtusi2ss %eax, %xmm0, %xmm0
14491448
; CHECK-FMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1450-
; CHECK-FMA-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
14511449
; CHECK-FMA-NEXT: vcvtph2ps %xmm0, %xmm0
14521450
; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
14531451
; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
@@ -1550,7 +1548,6 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) nounwind {
15501548
; CHECK-NO-FASTFMA-NEXT: movzwl %ax, %eax
15511549
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
15521550
; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1553-
; CHECK-NO-FASTFMA-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
15541551
; CHECK-NO-FASTFMA-NEXT: vcvtph2ps %xmm0, %xmm0
15551552
; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
15561553
; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
@@ -1566,7 +1563,6 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) nounwind {
15661563
; CHECK-FMA-NEXT: movzwl %ax, %eax
15671564
; CHECK-FMA-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
15681565
; CHECK-FMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1569-
; CHECK-FMA-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
15701566
; CHECK-FMA-NEXT: vcvtph2ps %xmm0, %xmm0
15711567
; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
15721568
; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0

llvm/test/CodeGen/X86/fp-roundeven.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,6 @@ define half @roundeven_f16(half %h) {
5151
; AVX512F-LABEL: roundeven_f16:
5252
; AVX512F: ## %bb.0: ## %entry
5353
; AVX512F-NEXT: vpextrw $0, %xmm0, %eax
54-
; AVX512F-NEXT: movzwl %ax, %eax
5554
; AVX512F-NEXT: vmovd %eax, %xmm0
5655
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
5756
; AVX512F-NEXT: vroundss $8, %xmm0, %xmm0, %xmm0

0 commit comments

Comments
 (0)