@@ -789,12 +789,55 @@ define double @load_double_seq_cst(ptr %fptr) {
789
789
}
790
790
791
791
define void @store_bfloat (ptr %fptr , bfloat %v ) {
792
- ; X86-LABEL: store_bfloat:
793
- ; X86: # %bb.0:
794
- ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
795
- ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
796
- ; X86-NEXT: movw %cx, (%eax)
797
- ; X86-NEXT: retl
792
+ ; X86-SSE1-LABEL: store_bfloat:
793
+ ; X86-SSE1: # %bb.0:
794
+ ; X86-SSE1-NEXT: pushl %esi
795
+ ; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
796
+ ; X86-SSE1-NEXT: subl $8, %esp
797
+ ; X86-SSE1-NEXT: .cfi_def_cfa_offset 16
798
+ ; X86-SSE1-NEXT: .cfi_offset %esi, -8
799
+ ; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
800
+ ; X86-SSE1-NEXT: movss %xmm0, (%esp)
801
+ ; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
802
+ ; X86-SSE1-NEXT: calll __truncsfbf2
803
+ ; X86-SSE1-NEXT: movw %ax, (%esi)
804
+ ; X86-SSE1-NEXT: addl $8, %esp
805
+ ; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
806
+ ; X86-SSE1-NEXT: popl %esi
807
+ ; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
808
+ ; X86-SSE1-NEXT: retl
809
+ ;
810
+ ; X86-SSE2-LABEL: store_bfloat:
811
+ ; X86-SSE2: # %bb.0:
812
+ ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
813
+ ; X86-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
814
+ ; X86-SSE2-NEXT: movw %cx, (%eax)
815
+ ; X86-SSE2-NEXT: retl
816
+ ;
817
+ ; X86-AVX-LABEL: store_bfloat:
818
+ ; X86-AVX: # %bb.0:
819
+ ; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
820
+ ; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
821
+ ; X86-AVX-NEXT: movw %cx, (%eax)
822
+ ; X86-AVX-NEXT: retl
823
+ ;
824
+ ; X86-NOSSE-LABEL: store_bfloat:
825
+ ; X86-NOSSE: # %bb.0:
826
+ ; X86-NOSSE-NEXT: pushl %esi
827
+ ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
828
+ ; X86-NOSSE-NEXT: subl $8, %esp
829
+ ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16
830
+ ; X86-NOSSE-NEXT: .cfi_offset %esi, -8
831
+ ; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
832
+ ; X86-NOSSE-NEXT: fstps (%esp)
833
+ ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
834
+ ; X86-NOSSE-NEXT: calll __truncsfbf2
835
+ ; X86-NOSSE-NEXT: movw %ax, (%esi)
836
+ ; X86-NOSSE-NEXT: addl $8, %esp
837
+ ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
838
+ ; X86-NOSSE-NEXT: popl %esi
839
+ ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
840
+ ; X86-NOSSE-NEXT: retl
798
841
;
799
842
; X64-SSE-LABEL: store_bfloat:
800
843
; X64-SSE: # %bb.0:
@@ -811,8 +854,7 @@ define void @store_bfloat(ptr %fptr, bfloat %v) {
811
854
ret void
812
855
}
813
856
814
- ; Work around issue #92899 by casting to float
815
- define float @load_bfloat (ptr %fptr ) {
857
+ define bfloat @load_bfloat (ptr %fptr ) {
816
858
; X86-SSE1-LABEL: load_bfloat:
817
859
; X86-SSE1: # %bb.0:
818
860
; X86-SSE1-NEXT: pushl %eax
@@ -828,30 +870,16 @@ define float @load_bfloat(ptr %fptr) {
828
870
;
829
871
; X86-SSE2-LABEL: load_bfloat:
830
872
; X86-SSE2: # %bb.0:
831
- ; X86-SSE2-NEXT: pushl %eax
832
- ; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
833
873
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
834
874
; X86-SSE2-NEXT: movzwl (%eax), %eax
835
- ; X86-SSE2-NEXT: shll $16, %eax
836
- ; X86-SSE2-NEXT: movd %eax, %xmm0
837
- ; X86-SSE2-NEXT: movd %xmm0, (%esp)
838
- ; X86-SSE2-NEXT: flds (%esp)
839
- ; X86-SSE2-NEXT: popl %eax
840
- ; X86-SSE2-NEXT: .cfi_def_cfa_offset 4
875
+ ; X86-SSE2-NEXT: pinsrw $0, %eax, %xmm0
841
876
; X86-SSE2-NEXT: retl
842
877
;
843
878
; X86-AVX-LABEL: load_bfloat:
844
879
; X86-AVX: # %bb.0:
845
- ; X86-AVX-NEXT: pushl %eax
846
- ; X86-AVX-NEXT: .cfi_def_cfa_offset 8
847
880
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
848
881
; X86-AVX-NEXT: movzwl (%eax), %eax
849
- ; X86-AVX-NEXT: shll $16, %eax
850
- ; X86-AVX-NEXT: vmovd %eax, %xmm0
851
- ; X86-AVX-NEXT: vmovd %xmm0, (%esp)
852
- ; X86-AVX-NEXT: flds (%esp)
853
- ; X86-AVX-NEXT: popl %eax
854
- ; X86-AVX-NEXT: .cfi_def_cfa_offset 4
882
+ ; X86-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
855
883
; X86-AVX-NEXT: retl
856
884
;
857
885
; X86-NOSSE-LABEL: load_bfloat:
@@ -870,17 +898,14 @@ define float @load_bfloat(ptr %fptr) {
870
898
; X64-SSE-LABEL: load_bfloat:
871
899
; X64-SSE: # %bb.0:
872
900
; X64-SSE-NEXT: movzwl (%rdi), %eax
873
- ; X64-SSE-NEXT: shll $16, %eax
874
- ; X64-SSE-NEXT: movd %eax, %xmm0
901
+ ; X64-SSE-NEXT: pinsrw $0, %eax, %xmm0
875
902
; X64-SSE-NEXT: retq
876
903
;
877
904
; X64-AVX-LABEL: load_bfloat:
878
905
; X64-AVX: # %bb.0:
879
906
; X64-AVX-NEXT: movzwl (%rdi), %eax
880
- ; X64-AVX-NEXT: shll $16, %eax
881
- ; X64-AVX-NEXT: vmovd %eax, %xmm0
907
+ ; X64-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
882
908
; X64-AVX-NEXT: retq
883
909
%v = load atomic bfloat, ptr %fptr unordered , align 2
884
- %ext = fpext bfloat %v to float
885
- ret float %ext
910
+ ret bfloat %v
886
911
}
0 commit comments