Skip to content

Commit 55c466d

Browse files
committed
[X86][AVX512BF16] Add a few missing insert/extract patterns
These are really the same as the f16 (and i16) instructions, but we need them for any type that can occur.
1 parent 6f11c95 commit 55c466d

File tree

2 files changed

+33
-0
lines changed

2 files changed

+33
-0
lines changed

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,20 +494,26 @@ defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
494494
vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
495495
defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
496496
vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
497+
defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8bf16x_info, v16bf16x_info,
498+
vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
497499
// Codegen pattern with the alternative types insert VEC128 into VEC512
498500
defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
499501
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
500502
defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
501503
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
502504
defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
503505
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
506+
defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8bf16x_info, v32bf16_info,
507+
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
504508
// Codegen pattern with the alternative types insert VEC256 into VEC512
505509
defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
506510
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
507511
defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
508512
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
509513
defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
510514
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
515+
defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16bf16x_info, v32bf16_info,
516+
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
511517

512518

513519
multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
@@ -795,6 +801,8 @@ defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
795801
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
796802
defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
797803
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
804+
defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16bf16x_info, v8bf16x_info,
805+
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
798806

799807
// Codegen pattern with the alternative types extract VEC128 from VEC512
800808
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
@@ -803,13 +811,17 @@ defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
803811
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
804812
defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
805813
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
814+
defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32bf16_info, v8bf16x_info,
815+
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
806816
// Codegen pattern with the alternative types extract VEC256 from VEC512
807817
defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
808818
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
809819
defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
810820
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
811821
defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
812822
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
823+
defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32bf16_info, v16bf16x_info,
824+
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
813825

814826

815827
// A 128-bit extract from bits [255:128] of a 512-bit vector should use a

llvm/test/CodeGen/X86/bfloat.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2805,3 +2805,24 @@ define <16 x bfloat> @concat_zero_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y) {
28052805
%a = shufflevector <8 x bfloat> %x, <8 x bfloat> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
28062806
ret <16 x bfloat> %a
28072807
}
2808+
2809+
define <16 x bfloat> @concat_dup_v8bf16(<8 x bfloat> %x, <8 x bfloat> %y) {
2810+
; X86-LABEL: concat_dup_v8bf16:
2811+
; X86: # %bb.0:
2812+
; X86-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
2813+
; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2814+
; X86-NEXT: retl
2815+
;
2816+
; SSE2-LABEL: concat_dup_v8bf16:
2817+
; SSE2: # %bb.0:
2818+
; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
2819+
; SSE2-NEXT: retq
2820+
;
2821+
; AVX-LABEL: concat_dup_v8bf16:
2822+
; AVX: # %bb.0:
2823+
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
2824+
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
2825+
; AVX-NEXT: retq
2826+
%a = shufflevector <8 x bfloat> %x, <8 x bfloat> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2827+
ret <16 x bfloat> %a
2828+
}

0 commit comments

Comments
 (0)