29
29
#define __F16CINTRIN_H
30
30
31
31
/* Define the default attributes for the functions in this file. */
32
- #define __DEFAULT_FN_ATTRS \
32
+ #define __DEFAULT_FN_ATTRS \
33
33
__attribute__((__always_inline__, __nodebug__, __target__("f16c")))
34
34
35
+ /// \brief Converts a 16-bit half-precision float value into a 32-bit float
36
+ /// value.
37
+ ///
38
+ /// \headerfile <x86intrin.h>
39
+ ///
40
+ /// This intrinsic corresponds to the \c VCVTPH2PS instruction.
41
+ ///
42
+ /// \param a
43
+ /// A 16-bit half-precision float value.
44
+ /// \returns The converted 32-bit float value.
35
45
static __inline float __DEFAULT_FN_ATTRS
36
46
_cvtsh_ss (unsigned short a )
37
47
{
@@ -40,17 +50,73 @@ _cvtsh_ss(unsigned short a)
40
50
return r [0 ];
41
51
}
42
52
53
+ /// \brief Converts a 32-bit single-precision float value to a 16-bit
54
+ /// half-precision float value.
55
+ ///
56
+ /// \headerfile <x86intrin.h>
57
+ ///
58
+ /// \code
59
+ /// unsigned short _cvtss_sh(float a, const int imm);
60
+ /// \endcode
61
+ ///
62
+ /// This intrinsic corresponds to the \c VCVTPS2PH instruction.
63
+ ///
64
+ /// \param a
65
+ /// A 32-bit single-precision float value to be converted to a 16-bit
66
+ /// half-precision float value.
67
+ /// \param imm
68
+ /// An immediate value controlling rounding using bits [2:0]:
69
+ /// 000: Nearest
70
+ /// 001: Down
71
+ /// 010: Up
72
+ /// 011: Truncate
73
+ /// 1XX: Use MXCSR.RC for rounding
74
+ /// \returns The converted 16-bit half-precision float value.
43
75
#define _cvtss_sh (a , imm ) \
44
76
((unsigned short)(((__v8hi)__builtin_ia32_vcvtps2ph((__v4sf){a, 0, 0, 0}, \
45
77
(imm)))[0]))
46
78
79
+ /// \brief Converts a 128-bit vector containing 32-bit float values into a
80
+ /// 128-bit vector containing 16-bit half-precision float values.
81
+ ///
82
+ /// \headerfile <x86intrin.h>
83
+ ///
84
+ /// \code
85
+ /// __m128i _mm_cvtps_ph(__m128 a, const int imm);
86
+ /// \endcode
87
+ ///
88
+ /// This intrinsic corresponds to the \c VCVTPS2PH instruction.
89
+ ///
90
+ /// \param a
91
+ /// A 128-bit vector containing 32-bit float values.
92
+ /// \param imm
93
+ /// An immediate value controlling rounding using bits [2:0]:
94
+ /// 000: Nearest
95
+ /// 001: Down
96
+ /// 010: Up
97
+ /// 011: Truncate
98
+ /// 1XX: Use MXCSR.RC for rounding
99
+ /// \returns A 128-bit vector containing converted 16-bit half-precision float
100
+ /// values. The lower 64 bits are used to store the converted 16-bit
101
+ /// half-precision floating-point values.
47
102
#define _mm_cvtps_ph (a , imm ) \
48
103
((__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)))
49
104
105
+ /// \brief Converts a 128-bit vector containing 16-bit half-precision float
106
+ /// values into a 128-bit vector containing 32-bit float values.
107
+ ///
108
+ /// \headerfile <x86intrin.h>
109
+ ///
110
+ /// This intrinsic corresponds to the \c VCVTPH2PS instruction.
111
+ ///
112
+ /// \param a
113
+ /// A 128-bit vector containing 16-bit half-precision float values. The lower
114
+ /// 64 bits are used in the conversion.
115
+ /// \returns A 128-bit vector of [4 x float] containing converted float values.
50
116
static __inline __m128 __DEFAULT_FN_ATTRS
51
- _mm_cvtph_ps (__m128i __a )
117
+ _mm_cvtph_ps (__m128i a )
52
118
{
53
- return (__m128 )__builtin_ia32_vcvtph2ps ((__v8hi )__a );
119
+ return (__m128 )__builtin_ia32_vcvtph2ps ((__v8hi )a );
54
120
}
55
121
56
122
#undef __DEFAULT_FN_ATTRS
0 commit comments