Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 91284ba

Browse files
committed
[X86][AVX] Use extract_subvector to reduce vector op widths (PR36761)
We have a number of cases where we fail to reduce vector op widths, performing the op in a larger vector and then extracting a subvector. This is often because by default it would create illegal types. This peephole patch attempts to handle a few common cases detailed in PR36761, which typically involved extension+conversion to vX2f64 types. Differential Revision: https://reviews.llvm.org/D49556 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@337500 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent e05bab2 commit 91284ba

File tree

5 files changed

+74
-124
lines changed

5 files changed

+74
-124
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39275,6 +39275,31 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG,
3927539275
OpVT, SDLoc(N),
3927639276
InVec.getNode()->ops().slice(IdxVal, OpVT.getVectorNumElements()));
3927739277

39278+
// If we're extracting the lowest subvector and we're the only user,
39279+
// we may be able to perform this with a smaller vector width.
39280+
if (IdxVal == 0 && InVec.hasOneUse()) {
39281+
unsigned InOpcode = InVec.getOpcode();
39282+
if (OpVT == MVT::v2f64 && InVec.getValueType() == MVT::v4f64) {
39283+
// v2f64 CVTDQ2PD(v4i32).
39284+
if (InOpcode == ISD::SINT_TO_FP &&
39285+
InVec.getOperand(0).getValueType() == MVT::v4i32) {
39286+
return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), OpVT, InVec.getOperand(0));
39287+
}
39288+
// v2f64 CVTPS2PD(v4f32).
39289+
if (InOpcode == ISD::FP_EXTEND &&
39290+
InVec.getOperand(0).getValueType() == MVT::v4f32) {
39291+
return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), OpVT, InVec.getOperand(0));
39292+
}
39293+
}
39294+
if ((InOpcode == X86ISD::VZEXT || InOpcode == X86ISD::VSEXT) &&
39295+
OpVT.is128BitVector() &&
39296+
InVec.getOperand(0).getSimpleValueType().is128BitVector()) {
39297+
unsigned ExtOp = InOpcode == X86ISD::VZEXT ? ISD::ZERO_EXTEND_VECTOR_INREG
39298+
: ISD::SIGN_EXTEND_VECTOR_INREG;
39299+
return DAG.getNode(ExtOp, SDLoc(N), OpVT, InVec.getOperand(0));
39300+
}
39301+
}
39302+
3927839303
return SDValue();
3927939304
}
3928039305

test/CodeGen/X86/prefer-avx256-mask-shuffle.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0
131131
; AVX256VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
132132
; AVX256VL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
133133
; AVX256VL-NEXT: vextracti128 $1, %ymm0, %xmm1
134-
; AVX256VL-NEXT: vpmovsxbw %xmm1, %ymm1
134+
; AVX256VL-NEXT: vpmovsxbw %xmm1, %xmm1
135135
; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1
136136
; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k1
137137
; AVX256VL-NEXT: vpmovsxbw %xmm0, %ymm0

test/CodeGen/X86/trunc-subvector.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,7 @@ define <2 x i32> @test3(<8 x i32> %v) {
4949
; AVX2-LABEL: test3:
5050
; AVX2: # %bb.0:
5151
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
52-
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
53-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
52+
; AVX2-NEXT: vpmovsxdq %xmm0, %xmm0
5453
; AVX2-NEXT: vzeroupper
5554
; AVX2-NEXT: retq
5655
;
@@ -76,8 +75,7 @@ define <2 x i32> @test4(<8 x i32> %v) {
7675
;
7776
; AVX2-LABEL: test4:
7877
; AVX2: # %bb.0:
79-
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
80-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
78+
; AVX2-NEXT: vpmovsxdq %xmm0, %xmm0
8179
; AVX2-NEXT: vzeroupper
8280
; AVX2-NEXT: retq
8381
;
@@ -175,8 +173,7 @@ define <2 x i32> @test8(<8 x i32> %v) {
175173
; AVX2-LABEL: test8:
176174
; AVX2: # %bb.0:
177175
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
178-
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
179-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
176+
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
180177
; AVX2-NEXT: vzeroupper
181178
; AVX2-NEXT: retq
182179
;
@@ -201,8 +198,7 @@ define <2 x i32> @test9(<8 x i32> %v) {
201198
;
202199
; AVX2-LABEL: test9:
203200
; AVX2: # %bb.0:
204-
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
205-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
201+
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
206202
; AVX2-NEXT: vzeroupper
207203
; AVX2-NEXT: retq
208204
;

test/CodeGen/X86/vec_fpext.ll

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,12 @@ define <2 x double> @fpext_4f32_to_2f64(<4 x float> %a) {
1414
;
1515
; AVX-LABEL: fpext_4f32_to_2f64:
1616
; AVX: # %bb.0:
17-
; AVX-NEXT: vcvtps2pd %xmm0, %ymm0 # encoding: [0xc5,0xfc,0x5a,0xc0]
18-
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
19-
; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
17+
; AVX-NEXT: vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
2018
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2119
;
2220
; AVX512VL-LABEL: fpext_4f32_to_2f64:
2321
; AVX512VL: # %bb.0:
24-
; AVX512VL-NEXT: vcvtps2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xc0]
25-
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
26-
; AVX512VL-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
22+
; AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
2723
; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3]
2824
%cvt = fpext <4 x float> %a to <4 x double>
2925
%shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
@@ -38,8 +34,7 @@ define <2 x double> @fpext_8f32_to_2f64(<8 x float> %a) {
3834
;
3935
; AVX-LABEL: fpext_8f32_to_2f64:
4036
; AVX: # %bb.0:
41-
; AVX-NEXT: vcvtps2pd %xmm0, %ymm0 # encoding: [0xc5,0xfc,0x5a,0xc0]
42-
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
37+
; AVX-NEXT: vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
4338
; AVX-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77]
4439
; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3]
4540
;

test/CodeGen/X86/vec_int_to_fp.ll

Lines changed: 41 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,7 @@ define <2 x double> @sitofp_4i32_to_2f64(<4 x i32> %a) {
9595
;
9696
; AVX-LABEL: sitofp_4i32_to_2f64:
9797
; AVX: # %bb.0:
98-
; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0
99-
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
100-
; AVX-NEXT: vzeroupper
98+
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
10199
; AVX-NEXT: retq
102100
%cvt = sitofp <4 x i32> %a to <4 x double>
103101
%shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
@@ -130,21 +128,11 @@ define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) {
130128
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
131129
; SSE-NEXT: retq
132130
;
133-
; AVX1-LABEL: sitofp_8i16_to_2f64:
134-
; AVX1: # %bb.0:
135-
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
136-
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
137-
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
138-
; AVX1-NEXT: vzeroupper
139-
; AVX1-NEXT: retq
140-
;
141-
; AVX2-LABEL: sitofp_8i16_to_2f64:
142-
; AVX2: # %bb.0:
143-
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
144-
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
145-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
146-
; AVX2-NEXT: vzeroupper
147-
; AVX2-NEXT: retq
131+
; VEX-LABEL: sitofp_8i16_to_2f64:
132+
; VEX: # %bb.0:
133+
; VEX-NEXT: vpmovsxwd %xmm0, %xmm0
134+
; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
135+
; VEX-NEXT: retq
148136
;
149137
; AVX512-LABEL: sitofp_8i16_to_2f64:
150138
; AVX512: # %bb.0:
@@ -186,21 +174,11 @@ define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) {
186174
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
187175
; SSE-NEXT: retq
188176
;
189-
; AVX1-LABEL: sitofp_16i8_to_2f64:
190-
; AVX1: # %bb.0:
191-
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
192-
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
193-
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
194-
; AVX1-NEXT: vzeroupper
195-
; AVX1-NEXT: retq
196-
;
197-
; AVX2-LABEL: sitofp_16i8_to_2f64:
198-
; AVX2: # %bb.0:
199-
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
200-
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
201-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
202-
; AVX2-NEXT: vzeroupper
203-
; AVX2-NEXT: retq
177+
; VEX-LABEL: sitofp_16i8_to_2f64:
178+
; VEX: # %bb.0:
179+
; VEX-NEXT: vpmovsxbd %xmm0, %xmm0
180+
; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
181+
; VEX-NEXT: retq
204182
;
205183
; AVX512-LABEL: sitofp_16i8_to_2f64:
206184
; AVX512: # %bb.0:
@@ -361,17 +339,11 @@ define <4 x double> @sitofp_8i16_to_4f64(<8 x i16> %a) {
361339
; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
362340
; SSE-NEXT: retq
363341
;
364-
; AVX1-LABEL: sitofp_8i16_to_4f64:
365-
; AVX1: # %bb.0:
366-
; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0
367-
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
368-
; AVX1-NEXT: retq
369-
;
370-
; AVX2-LABEL: sitofp_8i16_to_4f64:
371-
; AVX2: # %bb.0:
372-
; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
373-
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
374-
; AVX2-NEXT: retq
342+
; VEX-LABEL: sitofp_8i16_to_4f64:
343+
; VEX: # %bb.0:
344+
; VEX-NEXT: vpmovsxwd %xmm0, %xmm0
345+
; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0
346+
; VEX-NEXT: retq
375347
;
376348
; AVX512-LABEL: sitofp_8i16_to_4f64:
377349
; AVX512: # %bb.0:
@@ -416,17 +388,11 @@ define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) {
416388
; SSE-NEXT: cvtdq2pd %xmm1, %xmm1
417389
; SSE-NEXT: retq
418390
;
419-
; AVX1-LABEL: sitofp_16i8_to_4f64:
420-
; AVX1: # %bb.0:
421-
; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0
422-
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
423-
; AVX1-NEXT: retq
424-
;
425-
; AVX2-LABEL: sitofp_16i8_to_4f64:
426-
; AVX2: # %bb.0:
427-
; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
428-
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
429-
; AVX2-NEXT: retq
391+
; VEX-LABEL: sitofp_16i8_to_4f64:
392+
; VEX: # %bb.0:
393+
; VEX-NEXT: vpmovsxbd %xmm0, %xmm0
394+
; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0
395+
; VEX-NEXT: retq
430396
;
431397
; AVX512-LABEL: sitofp_16i8_to_4f64:
432398
; AVX512: # %bb.0:
@@ -658,21 +624,11 @@ define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) {
658624
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
659625
; SSE-NEXT: retq
660626
;
661-
; AVX1-LABEL: uitofp_8i16_to_2f64:
662-
; AVX1: # %bb.0:
663-
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
664-
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
665-
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
666-
; AVX1-NEXT: vzeroupper
667-
; AVX1-NEXT: retq
668-
;
669-
; AVX2-LABEL: uitofp_8i16_to_2f64:
670-
; AVX2: # %bb.0:
671-
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
672-
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
673-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
674-
; AVX2-NEXT: vzeroupper
675-
; AVX2-NEXT: retq
627+
; VEX-LABEL: uitofp_8i16_to_2f64:
628+
; VEX: # %bb.0:
629+
; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
630+
; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
631+
; VEX-NEXT: retq
676632
;
677633
; AVX512-LABEL: uitofp_8i16_to_2f64:
678634
; AVX512: # %bb.0:
@@ -714,21 +670,11 @@ define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) {
714670
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
715671
; SSE-NEXT: retq
716672
;
717-
; AVX1-LABEL: uitofp_16i8_to_2f64:
718-
; AVX1: # %bb.0:
719-
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
720-
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
721-
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
722-
; AVX1-NEXT: vzeroupper
723-
; AVX1-NEXT: retq
724-
;
725-
; AVX2-LABEL: uitofp_16i8_to_2f64:
726-
; AVX2: # %bb.0:
727-
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
728-
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
729-
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
730-
; AVX2-NEXT: vzeroupper
731-
; AVX2-NEXT: retq
673+
; VEX-LABEL: uitofp_16i8_to_2f64:
674+
; VEX: # %bb.0:
675+
; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
676+
; VEX-NEXT: vcvtdq2pd %xmm0, %xmm0
677+
; VEX-NEXT: retq
732678
;
733679
; AVX512-LABEL: uitofp_16i8_to_2f64:
734680
; AVX512: # %bb.0:
@@ -939,17 +885,11 @@ define <4 x double> @uitofp_8i16_to_4f64(<8 x i16> %a) {
939885
; SSE-NEXT: movaps %xmm2, %xmm0
940886
; SSE-NEXT: retq
941887
;
942-
; AVX1-LABEL: uitofp_8i16_to_4f64:
943-
; AVX1: # %bb.0:
944-
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
945-
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
946-
; AVX1-NEXT: retq
947-
;
948-
; AVX2-LABEL: uitofp_8i16_to_4f64:
949-
; AVX2: # %bb.0:
950-
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
951-
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
952-
; AVX2-NEXT: retq
888+
; VEX-LABEL: uitofp_8i16_to_4f64:
889+
; VEX: # %bb.0:
890+
; VEX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
891+
; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0
892+
; VEX-NEXT: retq
953893
;
954894
; AVX512-LABEL: uitofp_8i16_to_4f64:
955895
; AVX512: # %bb.0:
@@ -996,17 +936,11 @@ define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) {
996936
; SSE-NEXT: movaps %xmm2, %xmm0
997937
; SSE-NEXT: retq
998938
;
999-
; AVX1-LABEL: uitofp_16i8_to_4f64:
1000-
; AVX1: # %bb.0:
1001-
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
1002-
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
1003-
; AVX1-NEXT: retq
1004-
;
1005-
; AVX2-LABEL: uitofp_16i8_to_4f64:
1006-
; AVX2: # %bb.0:
1007-
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
1008-
; AVX2-NEXT: vcvtdq2pd %xmm0, %ymm0
1009-
; AVX2-NEXT: retq
939+
; VEX-LABEL: uitofp_16i8_to_4f64:
940+
; VEX: # %bb.0:
941+
; VEX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
942+
; VEX-NEXT: vcvtdq2pd %xmm0, %ymm0
943+
; VEX-NEXT: retq
1010944
;
1011945
; AVX512-LABEL: uitofp_16i8_to_4f64:
1012946
; AVX512: # %bb.0:

0 commit comments

Comments
 (0)