Skip to content

Commit 821dfc3

Browse files
author
Bogdan Graur
committed
Revert "[X86] Change target of __builtin_ia32_cmp[p|s][s|d] from avx into sse/sse2 (#67410)"
Does not respect `__attribute__((target("avx"))`. This reverts commit ccd5b8d.
1 parent baecc9e commit 821dfc3

File tree

11 files changed

+353
-393
lines changed

11 files changed

+353
-393
lines changed

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,6 @@ TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "ncV:128:", "sse")
233233
TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "ncV:128:", "sse")
234234
TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "ncV:128:", "sse")
235235
TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "ncV:128:", "sse")
236-
TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "sse")
237-
TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "sse")
238236

239237
TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "ncV:128:", "sse2")
240238
TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "ncV:128:", "sse2")
@@ -252,8 +250,6 @@ TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "ncV:128:", "sse2")
252250
TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "ncV:128:", "sse2")
253251
TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "ncV:128:", "sse2")
254252
TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "ncV:128:", "sse2")
255-
TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "sse2")
256-
TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "sse2")
257253
TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "ncV:128:", "sse2")
258254
TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "ncV:128:", "sse2")
259255
TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "ncV:128:", "sse2")
@@ -473,8 +469,12 @@ TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "ncV:256:", "avx")
473469
TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "ncV:256:", "avx")
474470
TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "ncV:256:", "avx")
475471
TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
472+
TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "avx")
476473
TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "ncV:256:", "avx")
474+
TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "avx")
477475
TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
476+
TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "avx")
477+
TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "avx")
478478
TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "ncV:256:", "avx")
479479
TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "ncV:256:", "avx")
480480
TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "ncV:256:", "avx")

clang/lib/Headers/avxintrin.h

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,6 +1569,15 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
15691569
((__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \
15701570
(__v4df)(__m256d)(b), (int)(mask)))
15711571

1572+
/* Compare */
1573+
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
1574+
#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
1575+
#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
1576+
#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
1577+
#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
1578+
#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
1579+
#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
1580+
#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */
15721581
#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
15731582
#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */
15741583
#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
@@ -1594,6 +1603,126 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
15941603
#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
15951604
#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */
15961605

1606+
/// Compares each of the corresponding double-precision values of two
1607+
/// 128-bit vectors of [2 x double], using the operation specified by the
1608+
/// immediate integer operand.
1609+
///
1610+
/// Returns a [2 x double] vector consisting of two doubles corresponding to
1611+
/// the two comparison results: zero if the comparison is false, and all 1's
1612+
/// if the comparison is true.
1613+
///
1614+
/// \headerfile <x86intrin.h>
1615+
///
1616+
/// \code
1617+
/// __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c);
1618+
/// \endcode
1619+
///
1620+
/// This intrinsic corresponds to the <c> VCMPPD </c> instruction.
1621+
///
1622+
/// \param a
1623+
/// A 128-bit vector of [2 x double].
1624+
/// \param b
1625+
/// A 128-bit vector of [2 x double].
1626+
/// \param c
1627+
/// An immediate integer operand, with bits [4:0] specifying which comparison
1628+
/// operation to use: \n
1629+
/// 0x00: Equal (ordered, non-signaling) \n
1630+
/// 0x01: Less-than (ordered, signaling) \n
1631+
/// 0x02: Less-than-or-equal (ordered, signaling) \n
1632+
/// 0x03: Unordered (non-signaling) \n
1633+
/// 0x04: Not-equal (unordered, non-signaling) \n
1634+
/// 0x05: Not-less-than (unordered, signaling) \n
1635+
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
1636+
/// 0x07: Ordered (non-signaling) \n
1637+
/// 0x08: Equal (unordered, non-signaling) \n
1638+
/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n
1639+
/// 0x0A: Not-greater-than (unordered, signaling) \n
1640+
/// 0x0B: False (ordered, non-signaling) \n
1641+
/// 0x0C: Not-equal (ordered, non-signaling) \n
1642+
/// 0x0D: Greater-than-or-equal (ordered, signaling) \n
1643+
/// 0x0E: Greater-than (ordered, signaling) \n
1644+
/// 0x0F: True (unordered, non-signaling) \n
1645+
/// 0x10: Equal (ordered, signaling) \n
1646+
/// 0x11: Less-than (ordered, non-signaling) \n
1647+
/// 0x12: Less-than-or-equal (ordered, non-signaling) \n
1648+
/// 0x13: Unordered (signaling) \n
1649+
/// 0x14: Not-equal (unordered, signaling) \n
1650+
/// 0x15: Not-less-than (unordered, non-signaling) \n
1651+
/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n
1652+
/// 0x17: Ordered (signaling) \n
1653+
/// 0x18: Equal (unordered, signaling) \n
1654+
/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
1655+
/// 0x1A: Not-greater-than (unordered, non-signaling) \n
1656+
/// 0x1B: False (ordered, signaling) \n
1657+
/// 0x1C: Not-equal (ordered, signaling) \n
1658+
/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n
1659+
/// 0x1E: Greater-than (ordered, non-signaling) \n
1660+
/// 0x1F: True (unordered, signaling)
1661+
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
1662+
#define _mm_cmp_pd(a, b, c) \
1663+
((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
1664+
(__v2df)(__m128d)(b), (c)))
1665+
1666+
/// Compares each of the corresponding values of two 128-bit vectors of
1667+
/// [4 x float], using the operation specified by the immediate integer
1668+
/// operand.
1669+
///
1670+
/// Returns a [4 x float] vector consisting of four floats corresponding to
1671+
/// the four comparison results: zero if the comparison is false, and all 1's
1672+
/// if the comparison is true.
1673+
///
1674+
/// \headerfile <x86intrin.h>
1675+
///
1676+
/// \code
1677+
/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);
1678+
/// \endcode
1679+
///
1680+
/// This intrinsic corresponds to the <c> VCMPPS </c> instruction.
1681+
///
1682+
/// \param a
1683+
/// A 128-bit vector of [4 x float].
1684+
/// \param b
1685+
/// A 128-bit vector of [4 x float].
1686+
/// \param c
1687+
/// An immediate integer operand, with bits [4:0] specifying which comparison
1688+
/// operation to use: \n
1689+
/// 0x00: Equal (ordered, non-signaling) \n
1690+
/// 0x01: Less-than (ordered, signaling) \n
1691+
/// 0x02: Less-than-or-equal (ordered, signaling) \n
1692+
/// 0x03: Unordered (non-signaling) \n
1693+
/// 0x04: Not-equal (unordered, non-signaling) \n
1694+
/// 0x05: Not-less-than (unordered, signaling) \n
1695+
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
1696+
/// 0x07: Ordered (non-signaling) \n
1697+
/// 0x08: Equal (unordered, non-signaling) \n
1698+
/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n
1699+
/// 0x0A: Not-greater-than (unordered, signaling) \n
1700+
/// 0x0B: False (ordered, non-signaling) \n
1701+
/// 0x0C: Not-equal (ordered, non-signaling) \n
1702+
/// 0x0D: Greater-than-or-equal (ordered, signaling) \n
1703+
/// 0x0E: Greater-than (ordered, signaling) \n
1704+
/// 0x0F: True (unordered, non-signaling) \n
1705+
/// 0x10: Equal (ordered, signaling) \n
1706+
/// 0x11: Less-than (ordered, non-signaling) \n
1707+
/// 0x12: Less-than-or-equal (ordered, non-signaling) \n
1708+
/// 0x13: Unordered (signaling) \n
1709+
/// 0x14: Not-equal (unordered, signaling) \n
1710+
/// 0x15: Not-less-than (unordered, non-signaling) \n
1711+
/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n
1712+
/// 0x17: Ordered (signaling) \n
1713+
/// 0x18: Equal (unordered, signaling) \n
1714+
/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
1715+
/// 0x1A: Not-greater-than (unordered, non-signaling) \n
1716+
/// 0x1B: False (ordered, signaling) \n
1717+
/// 0x1C: Not-equal (ordered, signaling) \n
1718+
/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n
1719+
/// 0x1E: Greater-than (ordered, non-signaling) \n
1720+
/// 0x1F: True (unordered, signaling)
1721+
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
1722+
#define _mm_cmp_ps(a, b, c) \
1723+
((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
1724+
(__v4sf)(__m128)(b), (c)))
1725+
15971726
/// Compares each of the corresponding double-precision values of two
15981727
/// 256-bit vectors of [4 x double], using the operation specified by the
15991728
/// immediate integer operand.
@@ -1714,6 +1843,124 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
17141843
((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
17151844
(__v8sf)(__m256)(b), (c)))
17161845

1846+
/// Compares each of the corresponding scalar double-precision values of
1847+
/// two 128-bit vectors of [2 x double], using the operation specified by the
1848+
/// immediate integer operand.
1849+
///
1850+
/// If the result is true, all 64 bits of the destination vector are set;
1851+
/// otherwise they are cleared.
1852+
///
1853+
/// \headerfile <x86intrin.h>
1854+
///
1855+
/// \code
1856+
/// __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c);
1857+
/// \endcode
1858+
///
1859+
/// This intrinsic corresponds to the <c> VCMPSD </c> instruction.
1860+
///
1861+
/// \param a
1862+
/// A 128-bit vector of [2 x double].
1863+
/// \param b
1864+
/// A 128-bit vector of [2 x double].
1865+
/// \param c
1866+
/// An immediate integer operand, with bits [4:0] specifying which comparison
1867+
/// operation to use: \n
1868+
/// 0x00: Equal (ordered, non-signaling) \n
1869+
/// 0x01: Less-than (ordered, signaling) \n
1870+
/// 0x02: Less-than-or-equal (ordered, signaling) \n
1871+
/// 0x03: Unordered (non-signaling) \n
1872+
/// 0x04: Not-equal (unordered, non-signaling) \n
1873+
/// 0x05: Not-less-than (unordered, signaling) \n
1874+
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
1875+
/// 0x07: Ordered (non-signaling) \n
1876+
/// 0x08: Equal (unordered, non-signaling) \n
1877+
/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n
1878+
/// 0x0A: Not-greater-than (unordered, signaling) \n
1879+
/// 0x0B: False (ordered, non-signaling) \n
1880+
/// 0x0C: Not-equal (ordered, non-signaling) \n
1881+
/// 0x0D: Greater-than-or-equal (ordered, signaling) \n
1882+
/// 0x0E: Greater-than (ordered, signaling) \n
1883+
/// 0x0F: True (unordered, non-signaling) \n
1884+
/// 0x10: Equal (ordered, signaling) \n
1885+
/// 0x11: Less-than (ordered, non-signaling) \n
1886+
/// 0x12: Less-than-or-equal (ordered, non-signaling) \n
1887+
/// 0x13: Unordered (signaling) \n
1888+
/// 0x14: Not-equal (unordered, signaling) \n
1889+
/// 0x15: Not-less-than (unordered, non-signaling) \n
1890+
/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n
1891+
/// 0x17: Ordered (signaling) \n
1892+
/// 0x18: Equal (unordered, signaling) \n
1893+
/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
1894+
/// 0x1A: Not-greater-than (unordered, non-signaling) \n
1895+
/// 0x1B: False (ordered, signaling) \n
1896+
/// 0x1C: Not-equal (ordered, signaling) \n
1897+
/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n
1898+
/// 0x1E: Greater-than (ordered, non-signaling) \n
1899+
/// 0x1F: True (unordered, signaling)
1900+
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
1901+
#define _mm_cmp_sd(a, b, c) \
1902+
((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
1903+
(__v2df)(__m128d)(b), (c)))
1904+
1905+
/// Compares each of the corresponding scalar values of two 128-bit
1906+
/// vectors of [4 x float], using the operation specified by the immediate
1907+
/// integer operand.
1908+
///
1909+
/// If the result is true, all 32 bits of the destination vector are set;
1910+
/// otherwise they are cleared.
1911+
///
1912+
/// \headerfile <x86intrin.h>
1913+
///
1914+
/// \code
1915+
/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);
1916+
/// \endcode
1917+
///
1918+
/// This intrinsic corresponds to the <c> VCMPSS </c> instruction.
1919+
///
1920+
/// \param a
1921+
/// A 128-bit vector of [4 x float].
1922+
/// \param b
1923+
/// A 128-bit vector of [4 x float].
1924+
/// \param c
1925+
/// An immediate integer operand, with bits [4:0] specifying which comparison
1926+
/// operation to use: \n
1927+
/// 0x00: Equal (ordered, non-signaling) \n
1928+
/// 0x01: Less-than (ordered, signaling) \n
1929+
/// 0x02: Less-than-or-equal (ordered, signaling) \n
1930+
/// 0x03: Unordered (non-signaling) \n
1931+
/// 0x04: Not-equal (unordered, non-signaling) \n
1932+
/// 0x05: Not-less-than (unordered, signaling) \n
1933+
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
1934+
/// 0x07: Ordered (non-signaling) \n
1935+
/// 0x08: Equal (unordered, non-signaling) \n
1936+
/// 0x09: Not-greater-than-or-equal (unordered, signaling) \n
1937+
/// 0x0A: Not-greater-than (unordered, signaling) \n
1938+
/// 0x0B: False (ordered, non-signaling) \n
1939+
/// 0x0C: Not-equal (ordered, non-signaling) \n
1940+
/// 0x0D: Greater-than-or-equal (ordered, signaling) \n
1941+
/// 0x0E: Greater-than (ordered, signaling) \n
1942+
/// 0x0F: True (unordered, non-signaling) \n
1943+
/// 0x10: Equal (ordered, signaling) \n
1944+
/// 0x11: Less-than (ordered, non-signaling) \n
1945+
/// 0x12: Less-than-or-equal (ordered, non-signaling) \n
1946+
/// 0x13: Unordered (signaling) \n
1947+
/// 0x14: Not-equal (unordered, signaling) \n
1948+
/// 0x15: Not-less-than (unordered, non-signaling) \n
1949+
/// 0x16: Not-less-than-or-equal (unordered, non-signaling) \n
1950+
/// 0x17: Ordered (signaling) \n
1951+
/// 0x18: Equal (unordered, signaling) \n
1952+
/// 0x19: Not-greater-than-or-equal (unordered, non-signaling) \n
1953+
/// 0x1A: Not-greater-than (unordered, non-signaling) \n
1954+
/// 0x1B: False (ordered, signaling) \n
1955+
/// 0x1C: Not-equal (ordered, signaling) \n
1956+
/// 0x1D: Greater-than-or-equal (ordered, non-signaling) \n
1957+
/// 0x1E: Greater-than (ordered, non-signaling) \n
1958+
/// 0x1F: True (unordered, signaling)
1959+
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
1960+
#define _mm_cmp_ss(a, b, c) \
1961+
((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
1962+
(__v4sf)(__m128)(b), (c)))
1963+
17171964
/// Takes a [8 x i32] vector and returns the vector element value
17181965
/// indexed by the immediate constant operand.
17191966
///

0 commit comments

Comments
 (0)