Skip to content

Commit a277dd8

Browse files
committed
[X86] vector-half-conversions.ll - add v4f16->v4i32 fptosi/fptoui test coverage
1 parent b3f98df commit a277dd8

File tree

1 file changed

+254
-0
lines changed

1 file changed

+254
-0
lines changed

llvm/test/CodeGen/X86/vector-half-conversions.ll

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4989,3 +4989,257 @@ define <4 x i32> @fptosi_2f16_to_4i32(<2 x half> %a) nounwind {
49894989
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
49904990
ret <4 x i32> %ext
49914991
}
4992+
4993+
define <4 x i32> @fptosi_4f16_to_4i32(<4 x half> %a) nounwind {
4994+
; AVX-LABEL: fptosi_4f16_to_4i32:
4995+
; AVX: # %bb.0:
4996+
; AVX-NEXT: subq $72, %rsp
4997+
; AVX-NEXT: vmovdqa %xmm0, %xmm1
4998+
; AVX-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
4999+
; AVX-NEXT: vpsrld $16, %xmm0, %xmm0
5000+
; AVX-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5001+
; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
5002+
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
5003+
; AVX-NEXT: vpsrlq $48, %xmm1, %xmm0
5004+
; AVX-NEXT: callq __extendhfsf2@PLT
5005+
; AVX-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5006+
; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
5007+
; AVX-NEXT: callq __extendhfsf2@PLT
5008+
; AVX-NEXT: vinsertps $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
5009+
; AVX-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
5010+
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
5011+
; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
5012+
; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
5013+
; AVX-NEXT: callq __extendhfsf2@PLT
5014+
; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5015+
; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
5016+
; AVX-NEXT: callq __extendhfsf2@PLT
5017+
; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
5018+
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
5019+
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
5020+
; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
5021+
; AVX-NEXT: # xmm0 = xmm0[0],mem[0]
5022+
; AVX-NEXT: addq $72, %rsp
5023+
; AVX-NEXT: retq
5024+
;
5025+
; F16C-LABEL: fptosi_4f16_to_4i32:
5026+
; F16C: # %bb.0:
5027+
; F16C-NEXT: vcvtph2ps %xmm0, %ymm0
5028+
; F16C-NEXT: vcvttps2dq %ymm0, %ymm0
5029+
; F16C-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
5030+
; F16C-NEXT: vzeroupper
5031+
; F16C-NEXT: retq
5032+
;
5033+
; AVX512-LABEL: fptosi_4f16_to_4i32:
5034+
; AVX512: # %bb.0:
5035+
; AVX512-NEXT: vcvtph2ps %xmm0, %ymm0
5036+
; AVX512-NEXT: vcvttps2dq %ymm0, %ymm0
5037+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
5038+
; AVX512-NEXT: vzeroupper
5039+
; AVX512-NEXT: retq
5040+
%cvt = fptosi <4 x half> %a to <4 x i32>
5041+
ret <4 x i32> %cvt
5042+
}
5043+
5044+
define <4 x i32> @fptoui_2f16_to_4i32(<2 x half> %a) nounwind {
5045+
; AVX1-LABEL: fptoui_2f16_to_4i32:
5046+
; AVX1: # %bb.0:
5047+
; AVX1-NEXT: subq $40, %rsp
5048+
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
5049+
; AVX1-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5050+
; AVX1-NEXT: callq __extendhfsf2@PLT
5051+
; AVX1-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
5052+
; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
5053+
; AVX1-NEXT: callq __extendhfsf2@PLT
5054+
; AVX1-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
5055+
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
5056+
; AVX1-NEXT: vcvttps2dq %xmm0, %xmm1
5057+
; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
5058+
; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5059+
; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
5060+
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
5061+
; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
5062+
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
5063+
; AVX1-NEXT: addq $40, %rsp
5064+
; AVX1-NEXT: retq
5065+
;
5066+
; AVX2-LABEL: fptoui_2f16_to_4i32:
5067+
; AVX2: # %bb.0:
5068+
; AVX2-NEXT: subq $40, %rsp
5069+
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1
5070+
; AVX2-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5071+
; AVX2-NEXT: callq __extendhfsf2@PLT
5072+
; AVX2-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
5073+
; AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
5074+
; AVX2-NEXT: callq __extendhfsf2@PLT
5075+
; AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
5076+
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
5077+
; AVX2-NEXT: vcvttps2dq %xmm0, %xmm1
5078+
; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2
5079+
; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
5080+
; AVX2-NEXT: vsubps %xmm3, %xmm0, %xmm0
5081+
; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
5082+
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
5083+
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
5084+
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
5085+
; AVX2-NEXT: addq $40, %rsp
5086+
; AVX2-NEXT: retq
5087+
;
5088+
; F16C-LABEL: fptoui_2f16_to_4i32:
5089+
; F16C: # %bb.0:
5090+
; F16C-NEXT: vpsrld $16, %xmm0, %xmm1
5091+
; F16C-NEXT: vcvtph2ps %xmm1, %xmm1
5092+
; F16C-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5093+
; F16C-NEXT: vcvtph2ps %xmm0, %xmm0
5094+
; F16C-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5095+
; F16C-NEXT: vcvttps2dq %xmm0, %xmm1
5096+
; F16C-NEXT: vpsrad $31, %xmm1, %xmm2
5097+
; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5098+
; F16C-NEXT: vcvttps2dq %xmm0, %xmm0
5099+
; F16C-NEXT: vpand %xmm2, %xmm0, %xmm0
5100+
; F16C-NEXT: vpor %xmm0, %xmm1, %xmm0
5101+
; F16C-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
5102+
; F16C-NEXT: retq
5103+
;
5104+
; AVX512F-LABEL: fptoui_2f16_to_4i32:
5105+
; AVX512F: # %bb.0:
5106+
; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm1
5107+
; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm1
5108+
; AVX512F-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5109+
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
5110+
; AVX512F-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5111+
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
5112+
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
5113+
; AVX512F-NEXT: vzeroupper
5114+
; AVX512F-NEXT: retq
5115+
;
5116+
; AVX512-FASTLANE-LABEL: fptoui_2f16_to_4i32:
5117+
; AVX512-FASTLANE: # %bb.0:
5118+
; AVX512-FASTLANE-NEXT: vpsrld $16, %xmm0, %xmm1
5119+
; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm1, %xmm1
5120+
; AVX512-FASTLANE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
5121+
; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %xmm0
5122+
; AVX512-FASTLANE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
5123+
; AVX512-FASTLANE-NEXT: vcvttps2udq %xmm0, %xmm0
5124+
; AVX512-FASTLANE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
5125+
; AVX512-FASTLANE-NEXT: retq
5126+
%cvt = fptoui <2 x half> %a to <2 x i32>
5127+
%ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
5128+
ret <4 x i32> %ext
5129+
}
5130+
5131+
define <4 x i32> @fptoui_4f16_to_4i32(<4 x half> %a) nounwind {
5132+
; AVX1-LABEL: fptoui_4f16_to_4i32:
5133+
; AVX1: # %bb.0:
5134+
; AVX1-NEXT: subq $72, %rsp
5135+
; AVX1-NEXT: vmovdqa %xmm0, %xmm1
5136+
; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5137+
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
5138+
; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5139+
; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
5140+
; AVX1-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
5141+
; AVX1-NEXT: vpsrlq $48, %xmm1, %xmm0
5142+
; AVX1-NEXT: callq __extendhfsf2@PLT
5143+
; AVX1-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5144+
; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
5145+
; AVX1-NEXT: callq __extendhfsf2@PLT
5146+
; AVX1-NEXT: vinsertps $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
5147+
; AVX1-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
5148+
; AVX1-NEXT: vcvttps2dq %xmm0, %xmm1
5149+
; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
5150+
; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5151+
; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
5152+
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
5153+
; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
5154+
; AVX1-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
5155+
; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
5156+
; AVX1-NEXT: callq __extendhfsf2@PLT
5157+
; AVX1-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5158+
; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
5159+
; AVX1-NEXT: callq __extendhfsf2@PLT
5160+
; AVX1-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
5161+
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
5162+
; AVX1-NEXT: vcvttps2dq %xmm0, %xmm1
5163+
; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
5164+
; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
5165+
; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0
5166+
; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
5167+
; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0
5168+
; AVX1-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
5169+
; AVX1-NEXT: # xmm0 = xmm0[0],mem[0]
5170+
; AVX1-NEXT: addq $72, %rsp
5171+
; AVX1-NEXT: retq
5172+
;
5173+
; AVX2-LABEL: fptoui_4f16_to_4i32:
5174+
; AVX2: # %bb.0:
5175+
; AVX2-NEXT: subq $72, %rsp
5176+
; AVX2-NEXT: vmovdqa %xmm0, %xmm1
5177+
; AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5178+
; AVX2-NEXT: vpsrld $16, %xmm0, %xmm0
5179+
; AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5180+
; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
5181+
; AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
5182+
; AVX2-NEXT: vpsrlq $48, %xmm1, %xmm0
5183+
; AVX2-NEXT: callq __extendhfsf2@PLT
5184+
; AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5185+
; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
5186+
; AVX2-NEXT: callq __extendhfsf2@PLT
5187+
; AVX2-NEXT: vinsertps $16, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
5188+
; AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
5189+
; AVX2-NEXT: vcvttps2dq %xmm0, %xmm1
5190+
; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2
5191+
; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
5192+
; AVX2-NEXT: vsubps %xmm3, %xmm0, %xmm0
5193+
; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
5194+
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
5195+
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
5196+
; AVX2-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
5197+
; AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
5198+
; AVX2-NEXT: callq __extendhfsf2@PLT
5199+
; AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
5200+
; AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
5201+
; AVX2-NEXT: callq __extendhfsf2@PLT
5202+
; AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
5203+
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
5204+
; AVX2-NEXT: vcvttps2dq %xmm0, %xmm1
5205+
; AVX2-NEXT: vpsrad $31, %xmm1, %xmm2
5206+
; AVX2-NEXT: vbroadcastss {{.*#+}} xmm3 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9]
5207+
; AVX2-NEXT: vsubps %xmm3, %xmm0, %xmm0
5208+
; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
5209+
; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
5210+
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
5211+
; AVX2-NEXT: vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
5212+
; AVX2-NEXT: # xmm0 = xmm0[0],mem[0]
5213+
; AVX2-NEXT: addq $72, %rsp
5214+
; AVX2-NEXT: retq
5215+
;
5216+
; F16C-LABEL: fptoui_4f16_to_4i32:
5217+
; F16C: # %bb.0:
5218+
; F16C-NEXT: vcvtph2ps %xmm0, %ymm0
5219+
; F16C-NEXT: vcvttps2dq %ymm0, %ymm1
5220+
; F16C-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
5221+
; F16C-NEXT: vcvttps2dq %ymm0, %ymm0
5222+
; F16C-NEXT: vorps %ymm0, %ymm1, %ymm0
5223+
; F16C-NEXT: vblendvps %ymm1, %ymm0, %ymm1, %ymm0
5224+
; F16C-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
5225+
; F16C-NEXT: vzeroupper
5226+
; F16C-NEXT: retq
5227+
;
5228+
; AVX512F-LABEL: fptoui_4f16_to_4i32:
5229+
; AVX512F: # %bb.0:
5230+
; AVX512F-NEXT: vcvtph2ps %xmm0, %ymm0
5231+
; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0
5232+
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
5233+
; AVX512F-NEXT: vzeroupper
5234+
; AVX512F-NEXT: retq
5235+
;
5236+
; AVX512-FASTLANE-LABEL: fptoui_4f16_to_4i32:
5237+
; AVX512-FASTLANE: # %bb.0:
5238+
; AVX512-FASTLANE-NEXT: vcvtph2ps %xmm0, %ymm0
5239+
; AVX512-FASTLANE-NEXT: vcvttps2udq %ymm0, %ymm0
5240+
; AVX512-FASTLANE-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
5241+
; AVX512-FASTLANE-NEXT: vzeroupper
5242+
; AVX512-FASTLANE-NEXT: retq
5243+
%cvt = fptoui <4 x half> %a to <4 x i32>
5244+
ret <4 x i32> %cvt
5245+
}

0 commit comments

Comments
 (0)