Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 933aa2e

Browse files
committed
[X86][SSE] Added extract to scalar nontemporal store tests
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@272577 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 204d455 commit 933aa2e

File tree

1 file changed

+131
-0
lines changed

1 file changed

+131
-0
lines changed

test/CodeGen/X86/nontemporal-2.ll

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,137 @@ define void @test_arg_i64(i64 %arg, i64* %dst) {
461461
ret void
462462
}
463463

464+
; Extract versions
465+
466+
define void @test_extract_f32(<4 x float> %arg, float* %dst) {
467+
; SSE2-LABEL: test_extract_f32:
468+
; SSE2: # BB#0:
469+
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
470+
; SSE2-NEXT: movss %xmm0, (%rdi)
471+
; SSE2-NEXT: retq
472+
;
473+
; SSE4A-LABEL: test_extract_f32:
474+
; SSE4A: # BB#0:
475+
; SSE4A-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
476+
; SSE4A-NEXT: movss %xmm0, (%rdi)
477+
; SSE4A-NEXT: retq
478+
;
479+
; SSE41-LABEL: test_extract_f32:
480+
; SSE41: # BB#0:
481+
; SSE41-NEXT: extractps $1, %xmm0, %eax
482+
; SSE41-NEXT: movntil %eax, (%rdi)
483+
; SSE41-NEXT: retq
484+
;
485+
; AVX-LABEL: test_extract_f32:
486+
; AVX: # BB#0:
487+
; AVX-NEXT: vextractps $1, %xmm0, %eax
488+
; AVX-NEXT: movntil %eax, (%rdi)
489+
; AVX-NEXT: retq
490+
;
491+
; VLX-LABEL: test_extract_f32:
492+
; VLX: # BB#0:
493+
; VLX-NEXT: vextractps $1, %xmm0, %eax
494+
; VLX-NEXT: movntil %eax, (%rdi)
495+
; VLX-NEXT: retq
496+
%1 = extractelement <4 x float> %arg, i32 1
497+
store float %1, float* %dst, align 1, !nontemporal !1
498+
ret void
499+
}
500+
501+
define void @test_extract_i32(<4 x i32> %arg, i32* %dst) {
502+
; SSE2-LABEL: test_extract_i32:
503+
; SSE2: # BB#0:
504+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
505+
; SSE2-NEXT: movd %xmm0, %eax
506+
; SSE2-NEXT: movntil %eax, (%rdi)
507+
; SSE2-NEXT: retq
508+
;
509+
; SSE4A-LABEL: test_extract_i32:
510+
; SSE4A: # BB#0:
511+
; SSE4A-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
512+
; SSE4A-NEXT: movd %xmm0, %eax
513+
; SSE4A-NEXT: movntil %eax, (%rdi)
514+
; SSE4A-NEXT: retq
515+
;
516+
; SSE41-LABEL: test_extract_i32:
517+
; SSE41: # BB#0:
518+
; SSE41-NEXT: pextrd $1, %xmm0, %eax
519+
; SSE41-NEXT: movntil %eax, (%rdi)
520+
; SSE41-NEXT: retq
521+
;
522+
; AVX-LABEL: test_extract_i32:
523+
; AVX: # BB#0:
524+
; AVX-NEXT: vpextrd $1, %xmm0, %eax
525+
; AVX-NEXT: movntil %eax, (%rdi)
526+
; AVX-NEXT: retq
527+
;
528+
; VLX-LABEL: test_extract_i32:
529+
; VLX: # BB#0:
530+
; VLX-NEXT: vpextrd $1, %xmm0, %eax
531+
; VLX-NEXT: movntil %eax, (%rdi)
532+
; VLX-NEXT: retq
533+
%1 = extractelement <4 x i32> %arg, i32 1
534+
store i32 %1, i32* %dst, align 1, !nontemporal !1
535+
ret void
536+
}
537+
538+
define void @test_extract_f64(<2 x double> %arg, double* %dst) {
539+
; SSE-LABEL: test_extract_f64:
540+
; SSE: # BB#0:
541+
; SSE-NEXT: movhpd %xmm0, (%rdi)
542+
; SSE-NEXT: retq
543+
;
544+
; AVX-LABEL: test_extract_f64:
545+
; AVX: # BB#0:
546+
; AVX-NEXT: vmovhpd %xmm0, (%rdi)
547+
; AVX-NEXT: retq
548+
;
549+
; VLX-LABEL: test_extract_f64:
550+
; VLX: # BB#0:
551+
; VLX-NEXT: vmovhpd %xmm0, (%rdi)
552+
; VLX-NEXT: retq
553+
%1 = extractelement <2 x double> %arg, i32 1
554+
store double %1, double* %dst, align 1, !nontemporal !1
555+
ret void
556+
}
557+
558+
define void @test_extract_i64(<2 x i64> %arg, i64* %dst) {
559+
; SSE2-LABEL: test_extract_i64:
560+
; SSE2: # BB#0:
561+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
562+
; SSE2-NEXT: movd %xmm0, %rax
563+
; SSE2-NEXT: movntiq %rax, (%rdi)
564+
; SSE2-NEXT: retq
565+
;
566+
; SSE4A-LABEL: test_extract_i64:
567+
; SSE4A: # BB#0:
568+
; SSE4A-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
569+
; SSE4A-NEXT: movd %xmm0, %rax
570+
; SSE4A-NEXT: movntiq %rax, (%rdi)
571+
; SSE4A-NEXT: retq
572+
;
573+
; SSE41-LABEL: test_extract_i64:
574+
; SSE41: # BB#0:
575+
; SSE41-NEXT: pextrq $1, %xmm0, %rax
576+
; SSE41-NEXT: movntiq %rax, (%rdi)
577+
; SSE41-NEXT: retq
578+
;
579+
; AVX-LABEL: test_extract_i64:
580+
; AVX: # BB#0:
581+
; AVX-NEXT: vpextrq $1, %xmm0, %rax
582+
; AVX-NEXT: movntiq %rax, (%rdi)
583+
; AVX-NEXT: retq
584+
;
585+
; VLX-LABEL: test_extract_i64:
586+
; VLX: # BB#0:
587+
; VLX-NEXT: vpextrq $1, %xmm0, %rax
588+
; VLX-NEXT: movntiq %rax, (%rdi)
589+
; VLX-NEXT: retq
590+
%1 = extractelement <2 x i64> %arg, i32 1
591+
store i64 %1, i64* %dst, align 1, !nontemporal !1
592+
ret void
593+
}
594+
464595
; And now XMM versions.
465596

466597
define void @test_arg_v4f32(<4 x float> %arg, <4 x float>* %dst) {

0 commit comments

Comments
 (0)