Skip to content

[X86] Add missing vNbf16 handling in X86CallingConv.td file #127102

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 24 additions & 24 deletions llvm/lib/Target/X86/X86CallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -267,19 +267,19 @@ def RetCC_X86Common : CallingConv<[
// Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3
// can only be used by ABI non-compliant code. If the target doesn't have XMM
// registers, it won't have vector types.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64],
CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,

// 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3
// can only be used by ABI non-compliant code. This vector type is only
// supported while using the AVX target feature.
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64],
CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,

// 512-bit vectors are returned in ZMM0 and ZMM1, when they fit. ZMM2 and ZMM3
// can only be used by ABI non-compliant code. This vector type is only
// supported while using the AVX-512 target feature.
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v32bf16, v16f32, v8f64],
CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,

// Long double types are always returned in FP0 (even with SSE),
Expand Down Expand Up @@ -565,7 +565,7 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[v64i1], CCPromoteToType<v64i8>>,

// The first 8 FP/Vector arguments are passed in XMM registers.
CCIfType<[f16, f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCIfType<[f16, f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,

Expand All @@ -574,13 +574,13 @@ def CC_X86_64_C : CallingConv<[
// FIXME: This isn't precisely correct; the x86-64 ABI document says that
// fixed arguments to vararg functions are supposed to be passed in
// registers. Actually modeling that would be a lot of work, though.
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64],
CCIfSubtarget<"hasAVX()",
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3,
YMM4, YMM5, YMM6, YMM7]>>>>,

// The first 8 512-bit vector arguments are passed in ZMM registers.
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v32bf16, v16f32, v8f64],
CCIfSubtarget<"hasAVX512()",
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7]>>>>,

Expand All @@ -593,14 +593,14 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[f80, f128], CCAssignToStack<0, 0>>,

// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64], CCAssignToStack<16, 16>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64], CCAssignToStack<16, 16>>,

// 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64],
CCAssignToStack<32, 32>>,

// 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v32bf16, v16f32, v8f64],
CCAssignToStack<64, 64>>
]>;

Expand Down Expand Up @@ -631,13 +631,13 @@ def CC_X86_Win64_C : CallingConv<[
CCIfCFGuardTarget<CCAssignToReg<[RAX]>>,

// 128 bit vectors are passed by pointer
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64], CCPassIndirect<i64>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64], CCPassIndirect<i64>>,

// 256 bit vectors are passed by pointer
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64], CCPassIndirect<i64>>,
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64], CCPassIndirect<i64>>,

// 512 bit vectors are passed by pointer
CCIfType<[v64i8, v32i16, v16i32, v32f16, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
CCIfType<[v64i8, v32i16, v16i32, v32f16, v32bf16, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,

// Long doubles are passed by pointer
CCIfType<[f80], CCPassIndirect<i64>>,
Expand Down Expand Up @@ -734,48 +734,48 @@ def CC_X86_64_AnyReg : CallingConv<[
/// values are spilled on the stack.
def CC_X86_32_Vector_Common : CallingConv<[
// Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64],
CCAssignToStack<16, 16>>,

// 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64],
CCAssignToStack<32, 32>>,

// 512-bit AVX 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v32bf16, v16f32, v8f64],
CCAssignToStack<64, 64>>
]>;

/// CC_X86_Win32_Vector - In X86 Win32 calling conventions, extra vector
/// values are spilled on the stack.
def CC_X86_Win32_Vector : CallingConv<[
// Other SSE vectors get 16-byte stack slots that are 4-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64],
CCAssignToStack<16, 4>>,

// 256-bit AVX vectors get 32-byte stack slots that are 4-byte aligned.
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64],
CCAssignToStack<32, 4>>,

// 512-bit AVX 512-bit vectors get 64-byte stack slots that are 4-byte aligned.
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v32bf16, v16f32, v8f64],
CCAssignToStack<64, 4>>
]>;

// CC_X86_32_Vector_Standard - The first 3 vector arguments are passed in
// vector registers
def CC_X86_32_Vector_Standard : CallingConv<[
// SSE vector arguments are passed in XMM registers.
CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64],
CCAssignToReg<[XMM0, XMM1, XMM2]>>>,

// AVX 256-bit vector arguments are passed in YMM registers.
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64],
CCIfSubtarget<"hasAVX()",
CCAssignToReg<[YMM0, YMM1, YMM2]>>>>,

// AVX 512-bit vector arguments are passed in ZMM registers.
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v32bf16, v16f32, v8f64],
CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>>,

CCIfIsVarArgOnWin<CCDelegateTo<CC_X86_Win32_Vector>>,
Expand All @@ -786,16 +786,16 @@ def CC_X86_32_Vector_Standard : CallingConv<[
// vector registers.
def CC_X86_32_Vector_Darwin : CallingConv<[
// SSE vector arguments are passed in XMM registers.
CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64],
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,

// AVX 256-bit vector arguments are passed in YMM registers.
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64],
CCIfSubtarget<"hasAVX()",
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,

// AVX 512-bit vector arguments are passed in ZMM registers.
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v32bf16, v16f32, v8f64],
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3]>>>,

CCDelegateTo<CC_X86_32_Vector_Common>
Expand Down
Loading