Skip to content

Commit 3081bac

Browse files
authored
[X86][BF16] Add X86SubVBroadcastld patterns (#76479)
1 parent 19e7846 commit 3081bac

File tree

3 files changed

+39
-0
lines changed

3 files changed

+39
-0
lines changed

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,6 +1447,17 @@ def : Pat<(vselect_mask VK8WM:$mask,
14471447
(VBROADCASTI32X4Z256rmk VR256X:$src0, VK8WM:$mask, addr:$src)>;
14481448
}
14491449

1450+
let Predicates = [HasBF16] in {
1451+
def : Pat<(v32bf16 (X86SubVBroadcastld256 addr:$src)),
1452+
(VBROADCASTF64X4rm addr:$src)>;
1453+
def : Pat<(v32bf16 (X86SubVBroadcastld128 addr:$src)),
1454+
(VBROADCASTF32X4rm addr:$src)>;
1455+
}
1456+
1457+
let Predicates = [HasBF16, HasVLX] in
1458+
def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)),
1459+
(VBROADCASTF32X4Z256rm addr:$src)>;
1460+
14501461
let Predicates = [HasVLX, HasDQI] in {
14511462
defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
14521463
X86SubVBroadcastld128, v4i64x_info, v2i64x_info>, VEX_W1X,

llvm/lib/Target/X86/X86InstrSSE.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7160,6 +7160,10 @@ def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
71607160
(VBROADCASTF128rm addr:$src)>;
71617161
}
71627162

7163+
let Predicates = [HasAVXNECONVERT, NoVLX] in
7164+
def : Pat<(v16bf16 (X86SubVBroadcastld128 addr:$src)),
7165+
(VBROADCASTF128rm addr:$src)>;
7166+
71637167
//===----------------------------------------------------------------------===//
71647168
// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
71657169
//

llvm/test/CodeGen/X86/bfloat.ll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2461,3 +2461,27 @@ define <8 x bfloat> @fptrunc_v8f64(<8 x double> %a) nounwind {
24612461
%b = fptrunc <8 x double> %a to <8 x bfloat>
24622462
ret <8 x bfloat> %b
24632463
}
2464+
2465+
define <32 x bfloat> @test_v8bf16_v32bf16(ptr %0) {
2466+
; SSE2-LABEL: test_v8bf16_v32bf16:
2467+
; SSE2: # %bb.0:
2468+
; SSE2-NEXT: movaps (%rdi), %xmm0
2469+
; SSE2-NEXT: movaps %xmm0, %xmm1
2470+
; SSE2-NEXT: movaps %xmm0, %xmm2
2471+
; SSE2-NEXT: movaps %xmm0, %xmm3
2472+
; SSE2-NEXT: retq
2473+
;
2474+
; F16-LABEL: test_v8bf16_v32bf16:
2475+
; F16: # %bb.0:
2476+
; F16-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
2477+
; F16-NEXT: retq
2478+
;
2479+
; AVXNC-LABEL: test_v8bf16_v32bf16:
2480+
; AVXNC: # %bb.0:
2481+
; AVXNC-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
2482+
; AVXNC-NEXT: vmovaps %ymm0, %ymm1
2483+
; AVXNC-NEXT: retq
2484+
%2 = load <8 x bfloat>, ptr %0, align 16
2485+
%3 = shufflevector <8 x bfloat> %2, <8 x bfloat> %2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2486+
ret <32 x bfloat> %3
2487+
}

0 commit comments

Comments
 (0)