Skip to content

Commit fd85761

Browse files
authored
[X86][BF16] Customize VSELECT for BF16 under AVX-NECONVERT (#113322)
Fixes: https://godbolt.org/z/9abGnE8zs
1 parent 5155c38 commit fd85761

File tree

2 files changed

+116
-0
lines changed

2 files changed

+116
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2393,6 +2393,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
23932393
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::bf16, Custom);
23942394
for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
23952395
setF16Action(VT, Expand);
2396+
if (!Subtarget.hasBF16())
2397+
setOperationAction(ISD::VSELECT, VT, Custom);
23962398
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
23972399
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
23982400
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);

llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,3 +215,117 @@ define <8 x bfloat> @test_int_x86_vcvtneps2bf16256(<8 x float> %A) {
215215
}
216216
declare <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A)
217217

218+
define <8 x bfloat> @select(i8 %x, <8 x bfloat> %y) nounwind {
219+
; X64-LABEL: select:
220+
; X64: # %bb.0:
221+
; X64-NEXT: vmovaps %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x28,0xc8]
222+
; X64-NEXT: movb %dil, %al # encoding: [0x40,0x88,0xf8]
223+
; X64-NEXT: movb %al, -{{[0-9]+}}(%rsp) # encoding: [0x88,0x44,0x24,0xff]
224+
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0xff]
225+
; X64-NEXT: movl %eax, %ecx # encoding: [0x89,0xc1]
226+
; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
227+
; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
228+
; X64-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
229+
; X64-NEXT: # kill: def $al killed $al killed $eax
230+
; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
231+
; X64-NEXT: shrb %cl # encoding: [0xd0,0xe9]
232+
; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
233+
; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
234+
; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
235+
; X64-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
236+
; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
237+
; X64-NEXT: shrb $2, %cl # encoding: [0xc0,0xe9,0x02]
238+
; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
239+
; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
240+
; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
241+
; X64-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
242+
; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
243+
; X64-NEXT: shrb $3, %cl # encoding: [0xc0,0xe9,0x03]
244+
; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
245+
; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
246+
; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
247+
; X64-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
248+
; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
249+
; X64-NEXT: shrb $4, %cl # encoding: [0xc0,0xe9,0x04]
250+
; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
251+
; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
252+
; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
253+
; X64-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
254+
; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
255+
; X64-NEXT: shrb $5, %cl # encoding: [0xc0,0xe9,0x05]
256+
; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
257+
; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
258+
; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
259+
; X64-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
260+
; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
261+
; X64-NEXT: shrb $6, %cl # encoding: [0xc0,0xe9,0x06]
262+
; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
263+
; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
264+
; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
265+
; X64-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
266+
; X64-NEXT: shrb $7, %al # encoding: [0xc0,0xe8,0x07]
267+
; X64-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
268+
; X64-NEXT: negl %eax # encoding: [0xf7,0xd8]
269+
; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
270+
; X64-NEXT: vpandn %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdf,0xc1]
271+
; X64-NEXT: retq # encoding: [0xc3]
272+
;
273+
; X86-LABEL: select:
274+
; X86: # %bb.0:
275+
; X86-NEXT: pushl %eax # encoding: [0x50]
276+
; X86-NEXT: vmovaps %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x28,0xc8]
277+
; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x08]
278+
; X86-NEXT: movb %al, {{[0-9]+}}(%esp) # encoding: [0x88,0x44,0x24,0x03]
279+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x03]
280+
; X86-NEXT: movl %eax, %ecx # encoding: [0x89,0xc1]
281+
; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
282+
; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
283+
; X86-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
284+
; X86-NEXT: # kill: def $al killed $al killed $eax
285+
; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
286+
; X86-NEXT: shrb %cl # encoding: [0xd0,0xe9]
287+
; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
288+
; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
289+
; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
290+
; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
291+
; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
292+
; X86-NEXT: shrb $2, %cl # encoding: [0xc0,0xe9,0x02]
293+
; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
294+
; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
295+
; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
296+
; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
297+
; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
298+
; X86-NEXT: shrb $3, %cl # encoding: [0xc0,0xe9,0x03]
299+
; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
300+
; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
301+
; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
302+
; X86-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
303+
; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
304+
; X86-NEXT: shrb $4, %cl # encoding: [0xc0,0xe9,0x04]
305+
; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
306+
; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
307+
; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
308+
; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
309+
; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
310+
; X86-NEXT: shrb $5, %cl # encoding: [0xc0,0xe9,0x05]
311+
; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
312+
; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
313+
; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
314+
; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
315+
; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
316+
; X86-NEXT: shrb $6, %cl # encoding: [0xc0,0xe9,0x06]
317+
; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
318+
; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
319+
; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
320+
; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
321+
; X86-NEXT: shrb $7, %al # encoding: [0xc0,0xe8,0x07]
322+
; X86-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
323+
; X86-NEXT: negl %eax # encoding: [0xf7,0xd8]
324+
; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
325+
; X86-NEXT: vpandn %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdf,0xc1]
326+
; X86-NEXT: popl %eax # encoding: [0x58]
327+
; X86-NEXT: retl # encoding: [0xc3]
328+
%1 = bitcast i8 %x to <8 x i1>
329+
%2 = select <8 x i1> %1, <8 x bfloat> zeroinitializer, <8 x bfloat> %y
330+
ret <8 x bfloat> %2
331+
}

0 commit comments

Comments
 (0)