@@ -854,8 +854,7 @@ define void @store_bfloat(ptr %fptr, bfloat %v) {
854
854
ret void
855
855
}
856
856
857
- ; Work around issue #92899 by casting to float
858
- define float @load_bfloat (ptr %fptr ) {
857
+ define bfloat @load_bfloat (ptr %fptr ) {
859
858
; X86-SSE1-LABEL: load_bfloat:
860
859
; X86-SSE1: # %bb.0:
861
860
; X86-SSE1-NEXT: pushl %eax
@@ -871,30 +870,16 @@ define float @load_bfloat(ptr %fptr) {
871
870
;
872
871
; X86-SSE2-LABEL: load_bfloat:
873
872
; X86-SSE2: # %bb.0:
874
- ; X86-SSE2-NEXT: pushl %eax
875
- ; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
876
873
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
877
874
; X86-SSE2-NEXT: movzwl (%eax), %eax
878
- ; X86-SSE2-NEXT: shll $16, %eax
879
- ; X86-SSE2-NEXT: movd %eax, %xmm0
880
- ; X86-SSE2-NEXT: movd %xmm0, (%esp)
881
- ; X86-SSE2-NEXT: flds (%esp)
882
- ; X86-SSE2-NEXT: popl %eax
883
- ; X86-SSE2-NEXT: .cfi_def_cfa_offset 4
875
+ ; X86-SSE2-NEXT: pinsrw $0, %eax, %xmm0
884
876
; X86-SSE2-NEXT: retl
885
877
;
886
878
; X86-AVX-LABEL: load_bfloat:
887
879
; X86-AVX: # %bb.0:
888
- ; X86-AVX-NEXT: pushl %eax
889
- ; X86-AVX-NEXT: .cfi_def_cfa_offset 8
890
880
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
891
881
; X86-AVX-NEXT: movzwl (%eax), %eax
892
- ; X86-AVX-NEXT: shll $16, %eax
893
- ; X86-AVX-NEXT: vmovd %eax, %xmm0
894
- ; X86-AVX-NEXT: vmovd %xmm0, (%esp)
895
- ; X86-AVX-NEXT: flds (%esp)
896
- ; X86-AVX-NEXT: popl %eax
897
- ; X86-AVX-NEXT: .cfi_def_cfa_offset 4
882
+ ; X86-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
898
883
; X86-AVX-NEXT: retl
899
884
;
900
885
; X86-NOSSE-LABEL: load_bfloat:
@@ -913,17 +898,14 @@ define float @load_bfloat(ptr %fptr) {
913
898
; X64-SSE-LABEL: load_bfloat:
914
899
; X64-SSE: # %bb.0:
915
900
; X64-SSE-NEXT: movzwl (%rdi), %eax
916
- ; X64-SSE-NEXT: shll $16, %eax
917
- ; X64-SSE-NEXT: movd %eax, %xmm0
901
+ ; X64-SSE-NEXT: pinsrw $0, %eax, %xmm0
918
902
; X64-SSE-NEXT: retq
919
903
;
920
904
; X64-AVX-LABEL: load_bfloat:
921
905
; X64-AVX: # %bb.0:
922
906
; X64-AVX-NEXT: movzwl (%rdi), %eax
923
- ; X64-AVX-NEXT: shll $16, %eax
924
- ; X64-AVX-NEXT: vmovd %eax, %xmm0
907
+ ; X64-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
925
908
; X64-AVX-NEXT: retq
926
909
%v = load atomic bfloat, ptr %fptr unordered , align 2
927
- %ext = fpext bfloat %v to float
928
- ret float %ext
910
+ ret bfloat %v
929
911
}
0 commit comments