@@ -55,7 +55,7 @@ define half @round_f16(half %h) {
55
55
; AVX512F-NEXT: vmovd %eax, %xmm0
56
56
; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
57
57
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
58
- ; AVX512F-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, % xmm1
58
+ ; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
59
59
; AVX512F-NEXT: vaddss %xmm1, %xmm0, %xmm0
60
60
; AVX512F-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
61
61
; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@@ -67,7 +67,7 @@ define half @round_f16(half %h) {
67
67
; AVX512FP16: ## %bb.0: ## %entry
68
68
; AVX512FP16-NEXT: vpbroadcastw {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
69
69
; AVX512FP16-NEXT: vpbroadcastw {{.*#+}} xmm2 = [4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1,4.9976E-1]
70
- ; AVX512FP16-NEXT: vpternlogq $248, %xmm1, %xmm0, % xmm2
70
+ ; AVX512FP16-NEXT: vpternlogq {{.*#+}} xmm2 = xmm2 | (xmm0 & xmm1)
71
71
; AVX512FP16-NEXT: vaddsh %xmm2, %xmm0, %xmm0
72
72
; AVX512FP16-NEXT: vrndscalesh $11, %xmm0, %xmm0, %xmm0
73
73
; AVX512FP16-NEXT: retq
@@ -103,15 +103,15 @@ define float @round_f32(float %x) {
103
103
; AVX512F-LABEL: round_f32:
104
104
; AVX512F: # %bb.0:
105
105
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
106
- ; AVX512F-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, % xmm1
106
+ ; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
107
107
; AVX512F-NEXT: vaddss %xmm1, %xmm0, %xmm0
108
108
; AVX512F-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
109
109
; AVX512F-NEXT: retq
110
110
;
111
111
; AVX512FP16-LABEL: round_f32:
112
112
; AVX512FP16: ## %bb.0:
113
113
; AVX512FP16-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
114
- ; AVX512FP16-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, % xmm1
114
+ ; AVX512FP16-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
115
115
; AVX512FP16-NEXT: vaddss %xmm1, %xmm0, %xmm0
116
116
; AVX512FP16-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0
117
117
; AVX512FP16-NEXT: retq
@@ -147,15 +147,15 @@ define double @round_f64(double %x) {
147
147
; AVX512F-LABEL: round_f64:
148
148
; AVX512F: # %bb.0:
149
149
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
150
- ; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, % xmm1
150
+ ; AVX512F-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
151
151
; AVX512F-NEXT: vaddsd %xmm1, %xmm0, %xmm0
152
152
; AVX512F-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
153
153
; AVX512F-NEXT: retq
154
154
;
155
155
; AVX512FP16-LABEL: round_f64:
156
156
; AVX512FP16: ## %bb.0:
157
157
; AVX512FP16-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
158
- ; AVX512FP16-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, % xmm1
158
+ ; AVX512FP16-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
159
159
; AVX512FP16-NEXT: vaddsd %xmm1, %xmm0, %xmm0
160
160
; AVX512FP16-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0
161
161
; AVX512FP16-NEXT: retq
@@ -213,15 +213,15 @@ define <4 x float> @round_v4f32(<4 x float> %x) {
213
213
; AVX512F-LABEL: round_v4f32:
214
214
; AVX512F: # %bb.0:
215
215
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
216
- ; AVX512F-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, % xmm1
216
+ ; AVX512F-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
217
217
; AVX512F-NEXT: vaddps %xmm1, %xmm0, %xmm0
218
218
; AVX512F-NEXT: vroundps $11, %xmm0, %xmm0
219
219
; AVX512F-NEXT: retq
220
220
;
221
221
; AVX512FP16-LABEL: round_v4f32:
222
222
; AVX512FP16: ## %bb.0:
223
223
; AVX512FP16-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
224
- ; AVX512FP16-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, % xmm1
224
+ ; AVX512FP16-NEXT: vpternlogd {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
225
225
; AVX512FP16-NEXT: vaddps %xmm1, %xmm0, %xmm0
226
226
; AVX512FP16-NEXT: vroundps $11, %xmm0, %xmm0
227
227
; AVX512FP16-NEXT: retq
@@ -267,15 +267,15 @@ define <2 x double> @round_v2f64(<2 x double> %x) {
267
267
; AVX512F-LABEL: round_v2f64:
268
268
; AVX512F: # %bb.0:
269
269
; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
270
- ; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, % xmm1
270
+ ; AVX512F-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
271
271
; AVX512F-NEXT: vaddpd %xmm1, %xmm0, %xmm0
272
272
; AVX512F-NEXT: vroundpd $11, %xmm0, %xmm0
273
273
; AVX512F-NEXT: retq
274
274
;
275
275
; AVX512FP16-LABEL: round_v2f64:
276
276
; AVX512FP16: ## %bb.0:
277
277
; AVX512FP16-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4.9999999999999994E-1,4.9999999999999994E-1]
278
- ; AVX512FP16-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, % xmm1
278
+ ; AVX512FP16-NEXT: vpternlogq {{.*#+}} xmm1 = xmm1 | (xmm0 & mem)
279
279
; AVX512FP16-NEXT: vaddpd %xmm1, %xmm0, %xmm0
280
280
; AVX512FP16-NEXT: vroundpd $11, %xmm0, %xmm0
281
281
; AVX512FP16-NEXT: retq
@@ -361,15 +361,15 @@ define <8 x float> @round_v8f32(<8 x float> %x) {
361
361
; AVX512F-LABEL: round_v8f32:
362
362
; AVX512F: # %bb.0:
363
363
; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
364
- ; AVX512F-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, % ymm1
364
+ ; AVX512F-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
365
365
; AVX512F-NEXT: vaddps %ymm1, %ymm0, %ymm0
366
366
; AVX512F-NEXT: vroundps $11, %ymm0, %ymm0
367
367
; AVX512F-NEXT: retq
368
368
;
369
369
; AVX512FP16-LABEL: round_v8f32:
370
370
; AVX512FP16: ## %bb.0:
371
371
; AVX512FP16-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
372
- ; AVX512FP16-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, % ymm1
372
+ ; AVX512FP16-NEXT: vpternlogd {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
373
373
; AVX512FP16-NEXT: vaddps %ymm1, %ymm0, %ymm0
374
374
; AVX512FP16-NEXT: vroundps $11, %ymm0, %ymm0
375
375
; AVX512FP16-NEXT: retq
@@ -431,15 +431,15 @@ define <4 x double> @round_v4f64(<4 x double> %x) {
431
431
; AVX512F-LABEL: round_v4f64:
432
432
; AVX512F: # %bb.0:
433
433
; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
434
- ; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, % ymm1
434
+ ; AVX512F-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
435
435
; AVX512F-NEXT: vaddpd %ymm1, %ymm0, %ymm0
436
436
; AVX512F-NEXT: vroundpd $11, %ymm0, %ymm0
437
437
; AVX512F-NEXT: retq
438
438
;
439
439
; AVX512FP16-LABEL: round_v4f64:
440
440
; AVX512FP16: ## %bb.0:
441
441
; AVX512FP16-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
442
- ; AVX512FP16-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, % ymm1
442
+ ; AVX512FP16-NEXT: vpternlogq {{.*#+}} ymm1 = ymm1 | (ymm0 & mem)
443
443
; AVX512FP16-NEXT: vaddpd %ymm1, %ymm0, %ymm0
444
444
; AVX512FP16-NEXT: vroundpd $11, %ymm0, %ymm0
445
445
; AVX512FP16-NEXT: retq
@@ -587,15 +587,15 @@ define <16 x float> @round_v16f32(<16 x float> %x) {
587
587
; AVX512F-LABEL: round_v16f32:
588
588
; AVX512F: # %bb.0:
589
589
; AVX512F-NEXT: vpbroadcastd {{.*#+}} zmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
590
- ; AVX512F-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, % zmm1
590
+ ; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
591
591
; AVX512F-NEXT: vaddps %zmm1, %zmm0, %zmm0
592
592
; AVX512F-NEXT: vrndscaleps $11, %zmm0, %zmm0
593
593
; AVX512F-NEXT: retq
594
594
;
595
595
; AVX512FP16-LABEL: round_v16f32:
596
596
; AVX512FP16: ## %bb.0:
597
597
; AVX512FP16-NEXT: vpbroadcastd {{.*#+}} zmm1 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
598
- ; AVX512FP16-NEXT: vpternlogd $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, % zmm1
598
+ ; AVX512FP16-NEXT: vpternlogd {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
599
599
; AVX512FP16-NEXT: vaddps %zmm1, %zmm0, %zmm0
600
600
; AVX512FP16-NEXT: vrndscaleps $11, %zmm0, %zmm0
601
601
; AVX512FP16-NEXT: retq
@@ -695,15 +695,15 @@ define <8 x double> @round_v8f64(<8 x double> %x) {
695
695
; AVX512F-LABEL: round_v8f64:
696
696
; AVX512F: # %bb.0:
697
697
; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
698
- ; AVX512F-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, % zmm1
698
+ ; AVX512F-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
699
699
; AVX512F-NEXT: vaddpd %zmm1, %zmm0, %zmm0
700
700
; AVX512F-NEXT: vrndscalepd $11, %zmm0, %zmm0
701
701
; AVX512F-NEXT: retq
702
702
;
703
703
; AVX512FP16-LABEL: round_v8f64:
704
704
; AVX512FP16: ## %bb.0:
705
705
; AVX512FP16-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
706
- ; AVX512FP16-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, % zmm1
706
+ ; AVX512FP16-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
707
707
; AVX512FP16-NEXT: vaddpd %zmm1, %zmm0, %zmm0
708
708
; AVX512FP16-NEXT: vrndscalepd $11, %zmm0, %zmm0
709
709
; AVX512FP16-NEXT: retq
0 commit comments