Skip to content

Commit 9177e81

Browse files
authored
X86: Fix asserting on bfloat argument/return without sse2 (#93146)
These now get the default promote-to-float behavior, like half does. Fixes #92899
1 parent f2ad39b commit 9177e81

File tree

3 files changed

+1309
-37
lines changed

3 files changed

+1309
-37
lines changed

llvm/lib/Target/X86/X86ISelLoweringCall.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,14 @@ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
124124
!Subtarget.hasX87())
125125
return MVT::i32;
126126

127-
if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
128-
return getRegisterTypeForCallingConv(Context, CC,
129-
VT.changeVectorElementType(MVT::f16));
127+
if (isTypeLegal(MVT::f16)) {
128+
if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
129+
return getRegisterTypeForCallingConv(
130+
Context, CC, VT.changeVectorElementType(MVT::f16));
130131

131-
if (VT == MVT::bf16)
132-
return MVT::f16;
132+
if (VT == MVT::bf16)
133+
return MVT::f16;
134+
}
133135

134136
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
135137
}
@@ -162,7 +164,8 @@ unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
162164
return 3;
163165
}
164166

165-
if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
167+
if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
168+
isTypeLegal(MVT::f16))
166169
return getNumRegistersForCallingConv(Context, CC,
167170
VT.changeVectorElementType(MVT::f16));
168171

@@ -194,7 +197,8 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
194197
}
195198

196199
// Split vNbf16 vectors according to vNf16.
197-
if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
200+
if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
201+
isTypeLegal(MVT::f16))
198202
VT = VT.changeVectorElementType(MVT::f16);
199203

200204
return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,

llvm/test/CodeGen/X86/atomic-non-integer.ll

Lines changed: 55 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -789,12 +789,55 @@ define double @load_double_seq_cst(ptr %fptr) {
789789
}
790790

791791
define void @store_bfloat(ptr %fptr, bfloat %v) {
792-
; X86-LABEL: store_bfloat:
793-
; X86: # %bb.0:
794-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
795-
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
796-
; X86-NEXT: movw %cx, (%eax)
797-
; X86-NEXT: retl
792+
; X86-SSE1-LABEL: store_bfloat:
793+
; X86-SSE1: # %bb.0:
794+
; X86-SSE1-NEXT: pushl %esi
795+
; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
796+
; X86-SSE1-NEXT: subl $8, %esp
797+
; X86-SSE1-NEXT: .cfi_def_cfa_offset 16
798+
; X86-SSE1-NEXT: .cfi_offset %esi, -8
799+
; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
800+
; X86-SSE1-NEXT: movss %xmm0, (%esp)
801+
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %esi
802+
; X86-SSE1-NEXT: calll __truncsfbf2
803+
; X86-SSE1-NEXT: movw %ax, (%esi)
804+
; X86-SSE1-NEXT: addl $8, %esp
805+
; X86-SSE1-NEXT: .cfi_def_cfa_offset 8
806+
; X86-SSE1-NEXT: popl %esi
807+
; X86-SSE1-NEXT: .cfi_def_cfa_offset 4
808+
; X86-SSE1-NEXT: retl
809+
;
810+
; X86-SSE2-LABEL: store_bfloat:
811+
; X86-SSE2: # %bb.0:
812+
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
813+
; X86-SSE2-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
814+
; X86-SSE2-NEXT: movw %cx, (%eax)
815+
; X86-SSE2-NEXT: retl
816+
;
817+
; X86-AVX-LABEL: store_bfloat:
818+
; X86-AVX: # %bb.0:
819+
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
820+
; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
821+
; X86-AVX-NEXT: movw %cx, (%eax)
822+
; X86-AVX-NEXT: retl
823+
;
824+
; X86-NOSSE-LABEL: store_bfloat:
825+
; X86-NOSSE: # %bb.0:
826+
; X86-NOSSE-NEXT: pushl %esi
827+
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
828+
; X86-NOSSE-NEXT: subl $8, %esp
829+
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 16
830+
; X86-NOSSE-NEXT: .cfi_offset %esi, -8
831+
; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
832+
; X86-NOSSE-NEXT: fstps (%esp)
833+
; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
834+
; X86-NOSSE-NEXT: calll __truncsfbf2
835+
; X86-NOSSE-NEXT: movw %ax, (%esi)
836+
; X86-NOSSE-NEXT: addl $8, %esp
837+
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8
838+
; X86-NOSSE-NEXT: popl %esi
839+
; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4
840+
; X86-NOSSE-NEXT: retl
798841
;
799842
; X64-SSE-LABEL: store_bfloat:
800843
; X64-SSE: # %bb.0:
@@ -811,8 +854,7 @@ define void @store_bfloat(ptr %fptr, bfloat %v) {
811854
ret void
812855
}
813856

814-
; Work around issue #92899 by casting to float
815-
define float @load_bfloat(ptr %fptr) {
857+
define bfloat @load_bfloat(ptr %fptr) {
816858
; X86-SSE1-LABEL: load_bfloat:
817859
; X86-SSE1: # %bb.0:
818860
; X86-SSE1-NEXT: pushl %eax
@@ -828,30 +870,16 @@ define float @load_bfloat(ptr %fptr) {
828870
;
829871
; X86-SSE2-LABEL: load_bfloat:
830872
; X86-SSE2: # %bb.0:
831-
; X86-SSE2-NEXT: pushl %eax
832-
; X86-SSE2-NEXT: .cfi_def_cfa_offset 8
833873
; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
834874
; X86-SSE2-NEXT: movzwl (%eax), %eax
835-
; X86-SSE2-NEXT: shll $16, %eax
836-
; X86-SSE2-NEXT: movd %eax, %xmm0
837-
; X86-SSE2-NEXT: movd %xmm0, (%esp)
838-
; X86-SSE2-NEXT: flds (%esp)
839-
; X86-SSE2-NEXT: popl %eax
840-
; X86-SSE2-NEXT: .cfi_def_cfa_offset 4
875+
; X86-SSE2-NEXT: pinsrw $0, %eax, %xmm0
841876
; X86-SSE2-NEXT: retl
842877
;
843878
; X86-AVX-LABEL: load_bfloat:
844879
; X86-AVX: # %bb.0:
845-
; X86-AVX-NEXT: pushl %eax
846-
; X86-AVX-NEXT: .cfi_def_cfa_offset 8
847880
; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
848881
; X86-AVX-NEXT: movzwl (%eax), %eax
849-
; X86-AVX-NEXT: shll $16, %eax
850-
; X86-AVX-NEXT: vmovd %eax, %xmm0
851-
; X86-AVX-NEXT: vmovd %xmm0, (%esp)
852-
; X86-AVX-NEXT: flds (%esp)
853-
; X86-AVX-NEXT: popl %eax
854-
; X86-AVX-NEXT: .cfi_def_cfa_offset 4
882+
; X86-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
855883
; X86-AVX-NEXT: retl
856884
;
857885
; X86-NOSSE-LABEL: load_bfloat:
@@ -870,17 +898,14 @@ define float @load_bfloat(ptr %fptr) {
870898
; X64-SSE-LABEL: load_bfloat:
871899
; X64-SSE: # %bb.0:
872900
; X64-SSE-NEXT: movzwl (%rdi), %eax
873-
; X64-SSE-NEXT: shll $16, %eax
874-
; X64-SSE-NEXT: movd %eax, %xmm0
901+
; X64-SSE-NEXT: pinsrw $0, %eax, %xmm0
875902
; X64-SSE-NEXT: retq
876903
;
877904
; X64-AVX-LABEL: load_bfloat:
878905
; X64-AVX: # %bb.0:
879906
; X64-AVX-NEXT: movzwl (%rdi), %eax
880-
; X64-AVX-NEXT: shll $16, %eax
881-
; X64-AVX-NEXT: vmovd %eax, %xmm0
907+
; X64-AVX-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
882908
; X64-AVX-NEXT: retq
883909
%v = load atomic bfloat, ptr %fptr unordered, align 2
884-
%ext = fpext bfloat %v to float
885-
ret float %ext
910+
ret bfloat %v
886911
}

0 commit comments

Comments
 (0)