1
+ ** TIP** : Use the following command to generate a section in this list for
2
+ Intel intrinsics. Replace ` SSE4.2 ` with the intended type.
3
+
4
+ ```
5
+ rg '^<intrinsic' intel-intrinsics-3.3.15.xml | rg "'SSE4.2'" | rg '^.*name=\x27([^\x27]+)\x27.*$' -r '* [ ] `$1`' >> TODO.md
6
+ ```
7
+
8
+
9
+ sse
10
+ ---
11
+ * [ ] ` _MM_TRANSPOSE4_PS `
12
+ * [ ] ` _mm_getcsr `
13
+ * [ ] ` _mm_setcsr `
14
+ * [ ] ` _MM_GET_EXCEPTION_STATE `
15
+ * [ ] ` _MM_SET_EXCEPTION_STATE `
16
+ * [ ] ` _MM_GET_EXCEPTION_MASK `
17
+ * [ ] ` _MM_SET_EXCEPTION_MASK `
18
+ * [ ] ` _MM_GET_ROUNDING_MODE `
19
+ * [ ] ` _MM_SET_ROUNDING_MODE `
20
+ * [ ] ` _MM_GET_FLUSH_ZERO_MODE `
21
+ * [ ] ` _MM_SET_FLUSH_ZERO_MODE `
22
+ * [ ] ` _mm_prefetch `
23
+ * [ ] ` _mm_sfence `
24
+ * [ ] ` _mm_max_pi16 `
25
+ * [ ] ` _m_pmaxsw `
26
+ * [ ] ` _mm_max_pu8 `
27
+ * [ ] ` _m_pmaxub `
28
+ * [ ] ` _mm_min_pi16 `
29
+ * [ ] ` _m_pminsw `
30
+ * [ ] ` _mm_min_pu8 `
31
+ * [ ] ` _m_pminub `
32
+ * [ ] ` _mm_mulhi_pu16 `
33
+ * [ ] ` _m_pmulhuw `
34
+ * [ ] ` _mm_avg_pu8 `
35
+ * [ ] ` _m_pavgb `
36
+ * [ ] ` _mm_avg_pu16 `
37
+ * [ ] ` _m_pavgw `
38
+ * [ ] ` _mm_sad_pu8 `
39
+ * [ ] ` _m_psadbw `
40
+ * [ ] ` _mm_cvtsi32_ss `
41
+ * [ ] ` _mm_cvt_si2ss `
42
+ * [ ] ` _mm_cvtsi64_ss `
43
+ * [ ] ` _mm_cvtpi32_ps `
44
+ * [ ] ` _mm_cvt_pi2ps `
45
+ * [ ] ` _mm_cvtpi16_ps `
46
+ * [ ] ` _mm_cvtpu16_ps `
47
+ * [ ] ` _mm_cvtpi8_ps `
48
+ * [ ] ` _mm_cvtpu8_ps `
49
+ * [ ] ` _mm_cvtpi32x2_ps `
50
+ * [ ] ` _mm_stream_pi `
51
+ * [ ] ` _mm_maskmove_si64 `
52
+ * [ ] ` _m_maskmovq `
53
+ * [ ] ` _mm_extract_pi16 `
54
+ * [ ] ` _m_pextrw `
55
+ * [ ] ` _mm_insert_pi16 `
56
+ * [ ] ` _m_pinsrw `
57
+ * [ ] ` _mm_movemask_pi8 `
58
+ * [ ] ` _m_pmovmskb `
59
+ * [ ] ` _mm_shuffle_pi16 `
60
+ * [ ] ` _m_pshufw `
61
+ * [ ] ` _mm_add_ss `
62
+ * [ ] ` _mm_add_ps `
63
+ * [ ] ` _mm_sub_ss `
64
+ * [ ] ` _mm_sub_ps `
65
+ * [ ] ` _mm_mul_ss `
66
+ * [ ] ` _mm_mul_ps `
67
+ * [ ] ` _mm_div_ss `
68
+ * [ ] ` _mm_div_ps `
69
+ * [ ] ` _mm_sqrt_ss `
70
+ * [x] ` _mm_sqrt_ps `
71
+ * [ ] ` _mm_rcp_ss `
72
+ * [x] ` _mm_rcp_ps `
73
+ * [ ] ` _mm_rsqrt_ss `
74
+ * [x] ` _mm_rsqrt_ps `
75
+ * [ ] ` _mm_min_ss `
76
+ * [x] ` _mm_min_ps `
77
+ * [ ] ` _mm_max_ss `
78
+ * [x] ` _mm_max_ps `
79
+ * [ ] ` _mm_and_ps `
80
+ * [ ] ` _mm_andnot_ps `
81
+ * [ ] ` _mm_or_ps `
82
+ * [ ] ` _mm_xor_ps `
83
+ * [ ] ` _mm_cmpeq_ss `
84
+ * [ ] ` _mm_cmpeq_ps `
85
+ * [ ] ` _mm_cmplt_ss `
86
+ * [ ] ` _mm_cmplt_ps `
87
+ * [ ] ` _mm_cmple_ss `
88
+ * [ ] ` _mm_cmple_ps `
89
+ * [ ] ` _mm_cmpgt_ss `
90
+ * [ ] ` _mm_cmpgt_ps `
91
+ * [ ] ` _mm_cmpge_ss `
92
+ * [ ] ` _mm_cmpge_ps `
93
+ * [ ] ` _mm_cmpneq_ss `
94
+ * [ ] ` _mm_cmpneq_ps `
95
+ * [ ] ` _mm_cmpnlt_ss `
96
+ * [ ] ` _mm_cmpnlt_ps `
97
+ * [ ] ` _mm_cmpnle_ss `
98
+ * [ ] ` _mm_cmpnle_ps `
99
+ * [ ] ` _mm_cmpngt_ss `
100
+ * [ ] ` _mm_cmpngt_ps `
101
+ * [ ] ` _mm_cmpnge_ss `
102
+ * [ ] ` _mm_cmpnge_ps `
103
+ * [ ] ` _mm_cmpord_ss `
104
+ * [ ] ` _mm_cmpord_ps `
105
+ * [ ] ` _mm_cmpunord_ss `
106
+ * [ ] ` _mm_cmpunord_ps `
107
+ * [ ] ` _mm_comieq_ss `
108
+ * [ ] ` _mm_comilt_ss `
109
+ * [ ] ` _mm_comile_ss `
110
+ * [ ] ` _mm_comigt_ss `
111
+ * [ ] ` _mm_comige_ss `
112
+ * [ ] ` _mm_comineq_ss `
113
+ * [ ] ` _mm_ucomieq_ss `
114
+ * [ ] ` _mm_ucomilt_ss `
115
+ * [ ] ` _mm_ucomile_ss `
116
+ * [ ] ` _mm_ucomigt_ss `
117
+ * [ ] ` _mm_ucomige_ss `
118
+ * [ ] ` _mm_ucomineq_ss `
119
+ * [ ] ` _mm_cvtss_si32 `
120
+ * [ ] ` _mm_cvt_ss2si `
121
+ * [ ] ` _mm_cvtss_si64 `
122
+ * [ ] ` _mm_cvtss_f32 `
123
+ * [ ] ` _mm_cvtps_pi32 `
124
+ * [ ] ` _mm_cvt_ps2pi `
125
+ * [ ] ` _mm_cvttss_si32 `
126
+ * [ ] ` _mm_cvtt_ss2si `
127
+ * [ ] ` _mm_cvttss_si64 `
128
+ * [ ] ` _mm_cvttps_pi32 `
129
+ * [ ] ` _mm_cvtt_ps2pi `
130
+ * [ ] ` _mm_cvtps_pi16 `
131
+ * [ ] ` _mm_cvtps_pi8 `
132
+ * [ ] ` _mm_set_ss `
133
+ * [ ] ` _mm_set1_ps `
134
+ * [ ] ` _mm_set_ps1 `
135
+ * [ ] ` _mm_set_ps `
136
+ * [ ] ` _mm_setr_ps `
137
+ * [ ] ` _mm_setzero_ps `
138
+ * [ ] ` _mm_loadh_pi `
139
+ * [ ] ` _mm_loadl_pi `
140
+ * [ ] ` _mm_load_ss `
141
+ * [ ] ` _mm_load1_ps `
142
+ * [ ] ` _mm_load_ps1 `
143
+ * [ ] ` _mm_load_ps `
144
+ * [ ] ` _mm_loadu_ps `
145
+ * [ ] ` _mm_loadr_ps `
146
+ * [ ] ` _mm_stream_ps `
147
+ * [ ] ` _mm_storeh_pi `
148
+ * [ ] ` _mm_storel_pi `
149
+ * [ ] ` _mm_store_ss `
150
+ * [ ] ` _mm_store1_ps `
151
+ * [ ] ` _mm_store_ps1 `
152
+ * [ ] ` _mm_store_ps `
153
+ * [ ] ` _mm_storeu_ps `
154
+ * [ ] ` _mm_storer_ps `
155
+ * [ ] ` _mm_move_ss `
156
+ * [ ] ` _mm_shuffle_ps `
157
+ * [ ] ` _mm_unpackhi_ps `
158
+ * [ ] ` _mm_unpacklo_ps `
159
+ * [ ] ` _mm_movehl_ps `
160
+ * [ ] ` _mm_movelh_ps `
161
+ * [x] ` _mm_movemask_ps `
162
+ * [ ] ` _mm_undefined_ps `
163
+
164
+
1
165
sse2
2
166
----
3
167
* [x] ` _mm_pause `
221
385
* [ ] ` _mm_storel_pd `
222
386
* [ ] ` _mm_unpackhi_pd `
223
387
* [ ] ` _mm_unpacklo_pd `
224
- * [ ] ` _mm_movemask_pd `
388
+ * [x ] ` _mm_movemask_pd `
225
389
* [ ] ` _mm_shuffle_pd `
226
390
* [ ] ` _mm_move_sd `
227
391
* [ ] ` _mm_castpd_ps `
@@ -234,6 +398,21 @@ sse2
234
398
* [ ] ` _mm_undefined_si128 `
235
399
236
400
401
+ sse3
402
+ ----
403
+ * [ ] ` _mm_addsub_ps `
404
+ * [ ] ` _mm_addsub_pd `
405
+ * [ ] ` _mm_hadd_pd `
406
+ * [ ] ` _mm_hadd_ps `
407
+ * [ ] ` _mm_hsub_pd `
408
+ * [ ] ` _mm_hsub_ps `
409
+ * [ ] ` _mm_lddqu_si128 `
410
+ * [ ] ` _mm_movedup_pd `
411
+ * [ ] ` _mm_loaddup_pd `
412
+ * [ ] ` _mm_movehdup_ps `
413
+ * [ ] ` _mm_moveldup_ps `
414
+
415
+
237
416
ssse3
238
417
-----
239
418
* [ ] ` _mm_abs_pi8 `
@@ -268,3 +447,91 @@ ssse3
268
447
* [ ] ` _mm_sign_pi8 `
269
448
* [ ] ` _mm_sign_pi16 `
270
449
* [ ] ` _mm_sign_pi32 `
450
+
451
+
452
+ sse4.1
453
+ ------
454
+ * [ ] ` _mm_blend_pd `
455
+ * [ ] ` _mm_blend_ps `
456
+ * [ ] ` _mm_blendv_pd `
457
+ * [ ] ` _mm_blendv_ps `
458
+ * [ ] ` _mm_blendv_epi8 `
459
+ * [ ] ` _mm_blend_epi16 `
460
+ * [ ] ` _mm_dp_pd `
461
+ * [ ] ` _mm_dp_ps `
462
+ * [ ] ` _mm_extract_ps `
463
+ * [ ] ` _mm_extract_epi8 `
464
+ * [ ] ` _mm_extract_epi32 `
465
+ * [ ] ` _mm_extract_epi64 `
466
+ * [ ] ` _mm_insert_ps `
467
+ * [ ] ` _mm_insert_epi8 `
468
+ * [ ] ` _mm_insert_epi32 `
469
+ * [ ] ` _mm_insert_epi64 `
470
+ * [ ] ` _mm_max_epi8 `
471
+ * [ ] ` _mm_max_epi32 `
472
+ * [ ] ` _mm_max_epu32 `
473
+ * [ ] ` _mm_max_epu16 `
474
+ * [ ] ` _mm_min_epi8 `
475
+ * [ ] ` _mm_min_epi32 `
476
+ * [ ] ` _mm_min_epu32 `
477
+ * [ ] ` _mm_min_epu16 `
478
+ * [ ] ` _mm_packus_epi32 `
479
+ * [ ] ` _mm_cmpeq_epi64 `
480
+ * [ ] ` _mm_cvtepi8_epi16 `
481
+ * [ ] ` _mm_cvtepi8_epi32 `
482
+ * [ ] ` _mm_cvtepi8_epi64 `
483
+ * [ ] ` _mm_cvtepi16_epi32 `
484
+ * [ ] ` _mm_cvtepi16_epi64 `
485
+ * [ ] ` _mm_cvtepi32_epi64 `
486
+ * [ ] ` _mm_cvtepu8_epi16 `
487
+ * [ ] ` _mm_cvtepu8_epi32 `
488
+ * [ ] ` _mm_cvtepu8_epi64 `
489
+ * [ ] ` _mm_cvtepu16_epi32 `
490
+ * [ ] ` _mm_cvtepu16_epi64 `
491
+ * [ ] ` _mm_cvtepu32_epi64 `
492
+ * [ ] ` _mm_mul_epi32 `
493
+ * [ ] ` _mm_mullo_epi32 `
494
+ * [ ] ` _mm_testz_si128 `
495
+ * [ ] ` _mm_testc_si128 `
496
+ * [ ] ` _mm_testnzc_si128 `
497
+ * [ ] ` _mm_test_all_zeros `
498
+ * [ ] ` _mm_test_mix_ones_zeros `
499
+ * [ ] ` _mm_test_all_ones `
500
+ * [ ] ` _mm_round_pd `
501
+ * [ ] ` _mm_floor_pd `
502
+ * [ ] ` _mm_ceil_pd `
503
+ * [ ] ` _mm_round_ps `
504
+ * [ ] ` _mm_floor_ps `
505
+ * [ ] ` _mm_ceil_ps `
506
+ * [ ] ` _mm_round_sd `
507
+ * [ ] ` _mm_floor_sd `
508
+ * [ ] ` _mm_ceil_sd `
509
+ * [ ] ` _mm_round_ss `
510
+ * [ ] ` _mm_floor_ss `
511
+ * [ ] ` _mm_ceil_ss `
512
+ * [ ] ` _mm_minpos_epu16 `
513
+ * [ ] ` _mm_mpsadbw_epu8 `
514
+ * [ ] ` _mm_stream_load_si128 `
515
+
516
+
517
+ sse4.2
518
+ ------
519
+ * [ ] ` _mm_cmpistrm `
520
+ * [ ] ` _mm_cmpistri `
521
+ * [ ] ` _mm_cmpistrz `
522
+ * [ ] ` _mm_cmpistrc `
523
+ * [ ] ` _mm_cmpistrs `
524
+ * [ ] ` _mm_cmpistro `
525
+ * [ ] ` _mm_cmpistra `
526
+ * [ ] ` _mm_cmpestrm `
527
+ * [ ] ` _mm_cmpestri `
528
+ * [ ] ` _mm_cmpestrz `
529
+ * [ ] ` _mm_cmpestrc `
530
+ * [ ] ` _mm_cmpestrs `
531
+ * [ ] ` _mm_cmpestro `
532
+ * [ ] ` _mm_cmpestra `
533
+ * [ ] ` _mm_cmpgt_epi64 `
534
+ * [ ] ` _mm_crc32_u8 `
535
+ * [ ] ` _mm_crc32_u16 `
536
+ * [ ] ` _mm_crc32_u32 `
537
+ * [ ] ` _mm_crc32_u64 `
0 commit comments