Skip to content

Commit a384cd5

Browse files
authored
[X86][BF16] Add subvec_zero_lowering patterns (#76507)
1 parent c664a51 commit a384cd5

File tree

3 files changed

+26
-1
lines changed

3 files changed

+26
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3741,9 +3741,11 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
37413741
// type. This ensures they get CSE'd. But if the integer type is not
37423742
// available, use a floating-point +0.0 instead.
37433743
SDValue Vec;
3744+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
37443745
if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
37453746
Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
3746-
} else if (VT.isFloatingPoint()) {
3747+
} else if (VT.isFloatingPoint() &&
3748+
TLI.isTypeLegal(VT.getVectorElementType())) {
37473749
Vec = DAG.getConstantFP(+0.0, dl, VT);
37483750
} else if (VT.getVectorElementType() == MVT::i1) {
37493751
assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&

llvm/lib/Target/X86/X86InstrVecCompiler.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,9 @@ let Predicates = [HasAVX, NoVLX] in {
130130
defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, sub_xmm>;
131131
}
132132

133+
let Predicates = [HasAVXNECONVERT, NoVLX] in
134+
defm : subvec_zero_lowering<"DQA", VR128, v16bf16, v8bf16, sub_xmm>;
135+
133136
let Predicates = [HasVLX] in {
134137
defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, sub_xmm>;
135138
defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, sub_xmm>;
@@ -175,6 +178,12 @@ let Predicates = [HasFP16, HasVLX] in {
175178
defm : subvec_zero_lowering<"APSZ256", VR256X, v32f16, v16f16, sub_ymm>;
176179
}
177180

181+
let Predicates = [HasBF16, HasVLX] in {
182+
defm : subvec_zero_lowering<"APSZ128", VR128X, v16bf16, v8bf16, sub_xmm>;
183+
defm : subvec_zero_lowering<"APSZ128", VR128X, v32bf16, v8bf16, sub_xmm>;
184+
defm : subvec_zero_lowering<"APSZ256", VR256X, v32bf16, v16bf16, sub_ymm>;
185+
}
186+
178187
class maskzeroupper<ValueType vt, RegisterClass RC> :
179188
PatLeaf<(vt RC:$src), [{
180189
return isMaskZeroExtended(N);

llvm/test/CodeGen/X86/bfloat.ll

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2529,3 +2529,17 @@ define <8 x bfloat> @extract_v32bf16_v8bf16(<32 x bfloat> %x) {
25292529
%a = shufflevector <32 x bfloat> %x, <32 x bfloat> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
25302530
ret <8 x bfloat> %a
25312531
}
2532+
2533+
define <16 x bfloat> @concat_zero_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y) {
2534+
; SSE2-LABEL: concat_zero_v8bf16:
2535+
; SSE2: # %bb.0:
2536+
; SSE2-NEXT: xorps %xmm1, %xmm1
2537+
; SSE2-NEXT: retq
2538+
;
2539+
; AVX-LABEL: concat_zero_v8bf16:
2540+
; AVX: # %bb.0:
2541+
; AVX-NEXT: vmovaps %xmm0, %xmm0
2542+
; AVX-NEXT: retq
2543+
%a = shufflevector <8 x bfloat> %x, <8 x bfloat> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2544+
ret <16 x bfloat> %a
2545+
}

0 commit comments

Comments
 (0)