-
Notifications
You must be signed in to change notification settings - Fork 14.2k
[X86][BF16] Customize VSELECT for BF16 under AVX-NECONVERT #113322
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) ChangesFixes: https://godbolt.org/z/9abGnE8zs Full diff: https://github.com/llvm/llvm-project/pull/113322.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bcb84add65d83e..c453d7ae1d3889 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2393,6 +2393,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::bf16, Custom);
for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
setF16Action(VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
diff --git a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
index b311c8831457b8..ef87ac31fcf48c 100644
--- a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
@@ -215,3 +215,117 @@ define <8 x bfloat> @test_int_x86_vcvtneps2bf16256(<8 x float> %A) {
}
declare <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A)
+define <8 x bfloat> @select(i8 %x, <8 x bfloat> %y) nounwind {
+; X64-LABEL: select:
+; X64: # %bb.0:
+; X64-NEXT: vmovaps %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x28,0xc8]
+; X64-NEXT: movb %dil, %al # encoding: [0x40,0x88,0xf8]
+; X64-NEXT: movb %al, -{{[0-9]+}}(%rsp) # encoding: [0x88,0x44,0x24,0xff]
+; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0xff]
+; X64-NEXT: movl %eax, %ecx # encoding: [0x89,0xc1]
+; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT: shrb %cl # encoding: [0xd0,0xe9]
+; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
+; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT: shrb $2, %cl # encoding: [0xc0,0xe9,0x02]
+; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
+; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT: shrb $3, %cl # encoding: [0xc0,0xe9,0x03]
+; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
+; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT: shrb $4, %cl # encoding: [0xc0,0xe9,0x04]
+; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
+; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT: shrb $5, %cl # encoding: [0xc0,0xe9,0x05]
+; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
+; X64-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT: shrb $6, %cl # encoding: [0xc0,0xe9,0x06]
+; X64-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
+; X64-NEXT: shrb $7, %al # encoding: [0xc0,0xe8,0x07]
+; X64-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; X64-NEXT: negl %eax # encoding: [0xf7,0xd8]
+; X64-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
+; X64-NEXT: vpandn %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdf,0xc1]
+; X64-NEXT: retq # encoding: [0xc3]
+;
+; X86-LABEL: select:
+; X86: # %bb.0:
+; X86-NEXT: pushl %eax # encoding: [0x50]
+; X86-NEXT: vmovaps %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x28,0xc8]
+; X86-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x08]
+; X86-NEXT: movb %al, {{[0-9]+}}(%esp) # encoding: [0x88,0x44,0x24,0x03]
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x03]
+; X86-NEXT: movl %eax, %ecx # encoding: [0x89,0xc1]
+; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
+; X86-NEXT: # kill: def $al killed $al killed $eax
+; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT: shrb %cl # encoding: [0xd0,0xe9]
+; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
+; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT: shrb $2, %cl # encoding: [0xc0,0xe9,0x02]
+; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
+; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT: shrb $3, %cl # encoding: [0xc0,0xe9,0x03]
+; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
+; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT: shrb $4, %cl # encoding: [0xc0,0xe9,0x04]
+; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
+; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT: shrb $5, %cl # encoding: [0xc0,0xe9,0x05]
+; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
+; X86-NEXT: movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT: shrb $6, %cl # encoding: [0xc0,0xe9,0x06]
+; X86-NEXT: movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT: andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT: negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
+; X86-NEXT: shrb $7, %al # encoding: [0xc0,0xe8,0x07]
+; X86-NEXT: movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; X86-NEXT: negl %eax # encoding: [0xf7,0xd8]
+; X86-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
+; X86-NEXT: vpandn %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdf,0xc1]
+; X86-NEXT: popl %eax # encoding: [0x58]
+; X86-NEXT: retl # encoding: [0xc3]
+ %1 = bitcast i8 %x to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x bfloat> zeroinitializer, <8 x bfloat> %y
+ ret <8 x bfloat> %2
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/30/builds/8981 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/134/builds/7555 Here is the relevant piece of the build log for the reference
|
Fixes: https://godbolt.org/z/9abGnE8zs