Skip to content

Commit f471f6f

Browse files
committed
[X86] combineTruncateWithSat - relax minimum truncation size for PACKSS/PACKUS
truncateVectorWithPACK handling of sub-128-bit result types was improved some time ago, so remove the old 64-bit limit Fixes #68466
1 parent 39dfaf0 commit f471f6f

File tree

5 files changed

+159
-599
lines changed

5 files changed

+159
-599
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49604,14 +49604,12 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
4960449604
(Subtarget.hasVLX() || InVT.getSizeInBits() > 256) &&
4960549605
!(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256);
4960649606

49607-
if (isPowerOf2_32(VT.getVectorNumElements()) && !PreferAVX512 &&
49608-
VT.getSizeInBits() >= 64 &&
49607+
if (!PreferAVX512 && VT.getVectorNumElements() > 1 &&
49608+
isPowerOf2_32(VT.getVectorNumElements()) &&
4960949609
(SVT == MVT::i8 || SVT == MVT::i16) &&
4961049610
(InSVT == MVT::i16 || InSVT == MVT::i32)) {
4961149611
if (SDValue USatVal = detectSSatPattern(In, VT, true)) {
4961249612
// vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW).
49613-
// Only do this when the result is at least 64 bits or we'll leaving
49614-
// dangling PACKSSDW nodes.
4961549613
if (SVT == MVT::i8 && InSVT == MVT::i32) {
4961649614
EVT MidVT = VT.changeVectorElementType(MVT::i16);
4961749615
SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL,

llvm/test/CodeGen/X86/fpclamptosat_vec.ll

Lines changed: 48 additions & 211 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,38 +1092,14 @@ define <2 x i16> @stest_f64i16(<2 x double> %x) nounwind {
10921092
; SSE-LABEL: stest_f64i16:
10931093
; SSE: # %bb.0: # %entry
10941094
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
1095-
; SSE-NEXT: movdqa {{.*#+}} xmm1 = <32767,32767,u,u>
1096-
; SSE-NEXT: movdqa %xmm1, %xmm2
1097-
; SSE-NEXT: pcmpgtd %xmm0, %xmm2
1098-
; SSE-NEXT: pand %xmm2, %xmm0
1099-
; SSE-NEXT: pandn %xmm1, %xmm2
1100-
; SSE-NEXT: por %xmm0, %xmm2
1101-
; SSE-NEXT: movdqa {{.*#+}} xmm0 = <4294934528,4294934528,u,u>
1102-
; SSE-NEXT: movdqa %xmm2, %xmm1
1103-
; SSE-NEXT: pcmpgtd %xmm0, %xmm1
1104-
; SSE-NEXT: pand %xmm1, %xmm2
1105-
; SSE-NEXT: pandn %xmm0, %xmm1
1106-
; SSE-NEXT: por %xmm2, %xmm1
1107-
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
1095+
; SSE-NEXT: packssdw %xmm0, %xmm0
11081096
; SSE-NEXT: retq
11091097
;
1110-
; AVX2-LABEL: stest_f64i16:
1111-
; AVX2: # %bb.0: # %entry
1112-
; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0
1113-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
1114-
; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1115-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528]
1116-
; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1117-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1118-
; AVX2-NEXT: retq
1119-
;
1120-
; AVX512-LABEL: stest_f64i16:
1121-
; AVX512: # %bb.0: # %entry
1122-
; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
1123-
; AVX512-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1124-
; AVX512-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1125-
; AVX512-NEXT: vpmovdw %xmm0, %xmm0
1126-
; AVX512-NEXT: retq
1098+
; AVX-LABEL: stest_f64i16:
1099+
; AVX: # %bb.0: # %entry
1100+
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
1101+
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1102+
; AVX-NEXT: retq
11271103
entry:
11281104
%conv = fptosi <2 x double> %x to <2 x i32>
11291105
%0 = icmp slt <2 x i32> %conv, <i32 32767, i32 32767>
@@ -1198,24 +1174,11 @@ define <2 x i16> @ustest_f64i16(<2 x double> %x) nounwind {
11981174
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
11991175
; SSE-NEXT: retq
12001176
;
1201-
; AVX2-LABEL: ustest_f64i16:
1202-
; AVX2: # %bb.0: # %entry
1203-
; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0
1204-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535]
1205-
; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1206-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1207-
; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1208-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
1209-
; AVX2-NEXT: retq
1210-
;
1211-
; AVX512-LABEL: ustest_f64i16:
1212-
; AVX512: # %bb.0: # %entry
1213-
; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
1214-
; AVX512-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1215-
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
1216-
; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1217-
; AVX512-NEXT: vpmovdw %xmm0, %xmm0
1218-
; AVX512-NEXT: retq
1177+
; AVX-LABEL: ustest_f64i16:
1178+
; AVX: # %bb.0: # %entry
1179+
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
1180+
; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
1181+
; AVX-NEXT: retq
12191182
entry:
12201183
%conv = fptosi <2 x double> %x to <2 x i32>
12211184
%0 = icmp slt <2 x i32> %conv, <i32 65535, i32 65535>
@@ -1652,40 +1615,16 @@ define <2 x i8> @stest_f64i8(<2 x double> %x) nounwind {
16521615
; SSE-LABEL: stest_f64i8:
16531616
; SSE: # %bb.0: # %entry
16541617
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
1655-
; SSE-NEXT: movdqa {{.*#+}} xmm1 = <127,127,u,u>
1656-
; SSE-NEXT: movdqa %xmm1, %xmm2
1657-
; SSE-NEXT: pcmpgtd %xmm0, %xmm2
1658-
; SSE-NEXT: pand %xmm2, %xmm0
1659-
; SSE-NEXT: pandn %xmm1, %xmm2
1660-
; SSE-NEXT: por %xmm0, %xmm2
1661-
; SSE-NEXT: movdqa {{.*#+}} xmm1 = <4294967168,4294967168,u,u>
1662-
; SSE-NEXT: movdqa %xmm2, %xmm0
1663-
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
1664-
; SSE-NEXT: pand %xmm0, %xmm2
1665-
; SSE-NEXT: pandn %xmm1, %xmm0
1666-
; SSE-NEXT: por %xmm2, %xmm0
16671618
; SSE-NEXT: packssdw %xmm0, %xmm0
16681619
; SSE-NEXT: packsswb %xmm0, %xmm0
16691620
; SSE-NEXT: retq
16701621
;
1671-
; AVX2-LABEL: stest_f64i8:
1672-
; AVX2: # %bb.0: # %entry
1673-
; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0
1674-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
1675-
; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1676-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
1677-
; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1678-
; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1679-
; AVX2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1680-
; AVX2-NEXT: retq
1681-
;
1682-
; AVX512-LABEL: stest_f64i8:
1683-
; AVX512: # %bb.0: # %entry
1684-
; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
1685-
; AVX512-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1686-
; AVX512-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1687-
; AVX512-NEXT: vpmovdb %xmm0, %xmm0
1688-
; AVX512-NEXT: retq
1622+
; AVX-LABEL: stest_f64i8:
1623+
; AVX: # %bb.0: # %entry
1624+
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
1625+
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1626+
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1627+
; AVX-NEXT: retq
16891628
entry:
16901629
%conv = fptosi <2 x double> %x to <2 x i32>
16911630
%0 = icmp slt <2 x i32> %conv, <i32 127, i32 127>
@@ -1748,39 +1687,16 @@ define <2 x i8> @ustest_f64i8(<2 x double> %x) nounwind {
17481687
; SSE-LABEL: ustest_f64i8:
17491688
; SSE: # %bb.0: # %entry
17501689
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
1751-
; SSE-NEXT: movdqa {{.*#+}} xmm1 = <255,255,u,u>
1752-
; SSE-NEXT: movdqa %xmm1, %xmm2
1753-
; SSE-NEXT: pcmpgtd %xmm0, %xmm2
1754-
; SSE-NEXT: pand %xmm2, %xmm0
1755-
; SSE-NEXT: pandn %xmm1, %xmm2
1756-
; SSE-NEXT: por %xmm0, %xmm2
1757-
; SSE-NEXT: pxor %xmm1, %xmm1
1758-
; SSE-NEXT: movdqa %xmm2, %xmm0
1759-
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
1760-
; SSE-NEXT: pand %xmm2, %xmm0
1761-
; SSE-NEXT: packuswb %xmm0, %xmm0
1690+
; SSE-NEXT: packssdw %xmm0, %xmm0
17621691
; SSE-NEXT: packuswb %xmm0, %xmm0
17631692
; SSE-NEXT: retq
17641693
;
1765-
; AVX2-LABEL: ustest_f64i8:
1766-
; AVX2: # %bb.0: # %entry
1767-
; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0
1768-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
1769-
; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1770-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1771-
; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1772-
; AVX2-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
1773-
; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
1774-
; AVX2-NEXT: retq
1775-
;
1776-
; AVX512-LABEL: ustest_f64i8:
1777-
; AVX512: # %bb.0: # %entry
1778-
; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
1779-
; AVX512-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
1780-
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
1781-
; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1782-
; AVX512-NEXT: vpmovdb %xmm0, %xmm0
1783-
; AVX512-NEXT: retq
1694+
; AVX-LABEL: ustest_f64i8:
1695+
; AVX: # %bb.0: # %entry
1696+
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
1697+
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1698+
; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
1699+
; AVX-NEXT: retq
17841700
entry:
17851701
%conv = fptosi <2 x double> %x to <2 x i32>
17861702
%0 = icmp slt <2 x i32> %conv, <i32 255, i32 255>
@@ -1795,37 +1711,16 @@ define <4 x i8> @stest_f32i8(<4 x float> %x) nounwind {
17951711
; SSE-LABEL: stest_f32i8:
17961712
; SSE: # %bb.0: # %entry
17971713
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
1798-
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [127,127,127,127]
1799-
; SSE-NEXT: movdqa %xmm1, %xmm2
1800-
; SSE-NEXT: pcmpgtd %xmm0, %xmm2
1801-
; SSE-NEXT: pand %xmm2, %xmm0
1802-
; SSE-NEXT: pandn %xmm1, %xmm2
1803-
; SSE-NEXT: por %xmm0, %xmm2
1804-
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
1805-
; SSE-NEXT: movdqa %xmm2, %xmm0
1806-
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
1807-
; SSE-NEXT: pand %xmm0, %xmm2
1808-
; SSE-NEXT: pandn %xmm1, %xmm0
1809-
; SSE-NEXT: por %xmm2, %xmm0
18101714
; SSE-NEXT: packssdw %xmm0, %xmm0
18111715
; SSE-NEXT: packsswb %xmm0, %xmm0
18121716
; SSE-NEXT: retq
18131717
;
1814-
; AVX2-LABEL: stest_f32i8:
1815-
; AVX2: # %bb.0: # %entry
1816-
; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
1817-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
1818-
; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1819-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
1820-
; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1821-
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
1822-
; AVX2-NEXT: retq
1823-
;
1824-
; AVX512-LABEL: stest_f32i8:
1825-
; AVX512: # %bb.0: # %entry
1826-
; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
1827-
; AVX512-NEXT: vpmovsdb %xmm0, %xmm0
1828-
; AVX512-NEXT: retq
1718+
; AVX-LABEL: stest_f32i8:
1719+
; AVX: # %bb.0: # %entry
1720+
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
1721+
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1722+
; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
1723+
; AVX-NEXT: retq
18291724
entry:
18301725
%conv = fptosi <4 x float> %x to <4 x i32>
18311726
%0 = icmp slt <4 x i32> %conv, <i32 127, i32 127, i32 127, i32 127>
@@ -1888,37 +1783,16 @@ define <4 x i8> @ustest_f32i8(<4 x float> %x) nounwind {
18881783
; SSE-LABEL: ustest_f32i8:
18891784
; SSE: # %bb.0: # %entry
18901785
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
1891-
; SSE-NEXT: movdqa {{.*#+}} xmm1 = [255,255,255,255]
1892-
; SSE-NEXT: movdqa %xmm1, %xmm2
1893-
; SSE-NEXT: pcmpgtd %xmm0, %xmm2
1894-
; SSE-NEXT: pand %xmm2, %xmm0
1895-
; SSE-NEXT: pandn %xmm1, %xmm2
1896-
; SSE-NEXT: por %xmm0, %xmm2
1897-
; SSE-NEXT: pxor %xmm1, %xmm1
1898-
; SSE-NEXT: movdqa %xmm2, %xmm0
1899-
; SSE-NEXT: pcmpgtd %xmm1, %xmm0
1900-
; SSE-NEXT: pand %xmm2, %xmm0
1901-
; SSE-NEXT: packuswb %xmm0, %xmm0
1786+
; SSE-NEXT: packssdw %xmm0, %xmm0
19021787
; SSE-NEXT: packuswb %xmm0, %xmm0
19031788
; SSE-NEXT: retq
19041789
;
1905-
; AVX2-LABEL: ustest_f32i8:
1906-
; AVX2: # %bb.0: # %entry
1907-
; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0
1908-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
1909-
; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1910-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1911-
; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1912-
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,0,4,8,12,0,4,8,12,0,4,8,12]
1913-
; AVX2-NEXT: retq
1914-
;
1915-
; AVX512-LABEL: ustest_f32i8:
1916-
; AVX512: # %bb.0: # %entry
1917-
; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0
1918-
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
1919-
; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
1920-
; AVX512-NEXT: vpmovusdb %xmm0, %xmm0
1921-
; AVX512-NEXT: retq
1790+
; AVX-LABEL: ustest_f32i8:
1791+
; AVX: # %bb.0: # %entry
1792+
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
1793+
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1794+
; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
1795+
; AVX-NEXT: retq
19221796
entry:
19231797
%conv = fptosi <4 x float> %x to <4 x i32>
19241798
%0 = icmp slt <4 x i32> %conv, <i32 255, i32 255, i32 255, i32 255>
@@ -3863,38 +3737,14 @@ define <2 x i16> @stest_f64i16_mm(<2 x double> %x) nounwind {
38633737
; SSE-LABEL: stest_f64i16_mm:
38643738
; SSE: # %bb.0: # %entry
38653739
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
3866-
; SSE-NEXT: movdqa {{.*#+}} xmm1 = <32767,32767,u,u>
3867-
; SSE-NEXT: movdqa %xmm1, %xmm2
3868-
; SSE-NEXT: pcmpgtd %xmm0, %xmm2
3869-
; SSE-NEXT: pand %xmm2, %xmm0
3870-
; SSE-NEXT: pandn %xmm1, %xmm2
3871-
; SSE-NEXT: por %xmm0, %xmm2
3872-
; SSE-NEXT: movdqa {{.*#+}} xmm0 = <4294934528,4294934528,u,u>
3873-
; SSE-NEXT: movdqa %xmm2, %xmm1
3874-
; SSE-NEXT: pcmpgtd %xmm0, %xmm1
3875-
; SSE-NEXT: pand %xmm1, %xmm2
3876-
; SSE-NEXT: pandn %xmm0, %xmm1
3877-
; SSE-NEXT: por %xmm2, %xmm1
3878-
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
3740+
; SSE-NEXT: packssdw %xmm0, %xmm0
38793741
; SSE-NEXT: retq
38803742
;
3881-
; AVX2-LABEL: stest_f64i16_mm:
3882-
; AVX2: # %bb.0: # %entry
3883-
; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0
3884-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [32767,32767,32767,32767]
3885-
; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
3886-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294934528,4294934528,4294934528,4294934528]
3887-
; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
3888-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
3889-
; AVX2-NEXT: retq
3890-
;
3891-
; AVX512-LABEL: stest_f64i16_mm:
3892-
; AVX512: # %bb.0: # %entry
3893-
; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
3894-
; AVX512-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
3895-
; AVX512-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
3896-
; AVX512-NEXT: vpmovdw %xmm0, %xmm0
3897-
; AVX512-NEXT: retq
3743+
; AVX-LABEL: stest_f64i16_mm:
3744+
; AVX: # %bb.0: # %entry
3745+
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
3746+
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
3747+
; AVX-NEXT: retq
38983748
entry:
38993749
%conv = fptosi <2 x double> %x to <2 x i32>
39003750
%spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 32767, i32 32767>)
@@ -3966,24 +3816,11 @@ define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) nounwind {
39663816
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
39673817
; SSE-NEXT: retq
39683818
;
3969-
; AVX2-LABEL: ustest_f64i16_mm:
3970-
; AVX2: # %bb.0: # %entry
3971-
; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0
3972-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535]
3973-
; AVX2-NEXT: vpminsd %xmm1, %xmm0, %xmm0
3974-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
3975-
; AVX2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
3976-
; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
3977-
; AVX2-NEXT: retq
3978-
;
3979-
; AVX512-LABEL: ustest_f64i16_mm:
3980-
; AVX512: # %bb.0: # %entry
3981-
; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0
3982-
; AVX512-NEXT: vpminsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
3983-
; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
3984-
; AVX512-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
3985-
; AVX512-NEXT: vpmovdw %xmm0, %xmm0
3986-
; AVX512-NEXT: retq
3819+
; AVX-LABEL: ustest_f64i16_mm:
3820+
; AVX: # %bb.0: # %entry
3821+
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
3822+
; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
3823+
; AVX-NEXT: retq
39873824
entry:
39883825
%conv = fptosi <2 x double> %x to <2 x i32>
39893826
%spec.store.select = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %conv, <2 x i32> <i32 65535, i32 65535>)

0 commit comments

Comments
 (0)