Skip to content

Commit 04527f1

Browse files
committed
[X86][BF16] Customize INSERT_VECTOR_ELT for bf16 when feature BF16 is on
Fixes root cause of #63017. The reason is similar to BUILD_VECTOR. We have legal vector type but still soft promote for scalar type. So we need to customize these scalar to vector nodes. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D155961
1 parent 9b2dfff commit 04527f1

File tree

2 files changed

+36
-2
lines changed

2 files changed

+36
-2
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2276,9 +2276,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
22762276
addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
22772277
addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
22782278
// We set the type action of bf16 to TypeSoftPromoteHalf, but we don't
2279-
// provide the method to promote BUILD_VECTOR. Set the operation action
2280-
// Custom to do the customization later.
2279+
// provide the method to promote BUILD_VECTOR and INSERT_VECTOR_ELT.
2280+
// Set the operation action Custom to do the customization later.
22812281
setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
2282+
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::bf16, Custom);
22822283
for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
22832284
setF16Action(VT, Expand);
22842285
setOperationAction(ISD::FADD, VT, Expand);
@@ -20751,6 +20752,14 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
2075120752
SDValue N2 = Op.getOperand(2);
2075220753
auto *N2C = dyn_cast<ConstantSDNode>(N2);
2075320754

20755+
if (EltVT == MVT::bf16) {
20756+
MVT IVT = VT.changeVectorElementTypeToInteger();
20757+
SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVT,
20758+
DAG.getBitcast(IVT, N0),
20759+
DAG.getBitcast(MVT::i16, N1), N2);
20760+
return DAG.getBitcast(VT, Res);
20761+
}
20762+
2075420763
if (!N2C) {
2075520764
// Variable insertion indices, usually we're better off spilling to stack,
2075620765
// but AVX512 can use a variable compare+select by comparing against all

llvm/test/CodeGen/X86/bfloat.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1158,4 +1158,29 @@ define <32 x bfloat> @pr63017_2() nounwind {
11581158
ret <32 x bfloat> %1
11591159
}
11601160

1161+
define <32 x bfloat> @pr62997_3(<32 x bfloat> %0, bfloat %1) {
1162+
; SSE2-LABEL: pr62997_3:
1163+
; SSE2: # %bb.0:
1164+
; SSE2-NEXT: movq %xmm0, %rax
1165+
; SSE2-NEXT: movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
1166+
; SSE2-NEXT: andq %rax, %rcx
1167+
; SSE2-NEXT: movzwl %ax, %eax
1168+
; SSE2-NEXT: movd %xmm4, %edx
1169+
; SSE2-NEXT: shll $16, %edx
1170+
; SSE2-NEXT: orl %eax, %edx
1171+
; SSE2-NEXT: orq %rcx, %rdx
1172+
; SSE2-NEXT: movq %rdx, %xmm4
1173+
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1]
1174+
; SSE2-NEXT: retq
1175+
;
1176+
; BF16-LABEL: pr62997_3:
1177+
; BF16: # %bb.0:
1178+
; BF16-NEXT: vmovd %xmm1, %eax
1179+
; BF16-NEXT: vpinsrw $1, %eax, %xmm0, %xmm1
1180+
; BF16-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
1181+
; BF16-NEXT: retq
1182+
%3 = insertelement <32 x bfloat> %0, bfloat %1, i64 1
1183+
ret <32 x bfloat> %3
1184+
}
1185+
11611186
declare <32 x bfloat> @llvm.masked.load.v32bf16.p0(ptr, i32, <32 x i1>, <32 x bfloat>)

0 commit comments

Comments
 (0)