Skip to content

[Offload] Implement the remaining initial Offload API #122106

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Apr 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion offload/liboffload/API/APIDefs.td
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ class Typedef : APIObject { string value; }

class FptrTypedef : APIObject {
list<Param> params;
list<Return> returns;
string return;
}

class Macro : APIObject {
Expand Down
28 changes: 24 additions & 4 deletions offload/liboffload/API/Common.td
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,39 @@ def : Handle {
let desc = "Handle of context object";
}

def : Handle {
let name = "ol_queue_handle_t";
let desc = "Handle of queue object";
}

def : Handle {
let name = "ol_event_handle_t";
let desc = "Handle of event object";
}

def : Handle {
let name = "ol_program_handle_t";
let desc = "Handle of program object";
}

def : Typedef {
let name = "ol_kernel_handle_t";
let desc = "Handle of kernel object";
let value = "void *";
}

def : Enum {
let name = "ol_errc_t";
let desc = "Defines Return/Error codes";
let etors =[
Etor<"SUCCESS", "Success">,
Etor<"INVALID_VALUE", "Invalid Value">,
Etor<"INVALID_PLATFORM", "Invalid platform">,
Etor<"DEVICE_NOT_FOUND", "Device not found">,
Etor<"INVALID_DEVICE", "Invalid device">,
Etor<"DEVICE_LOST", "Device hung, reset, was removed, or driver update occurred">,
Etor<"UNINITIALIZED", "plugin is not initialized or specific entry-point is not implemented">,
Etor<"INVALID_QUEUE", "Invalid queue">,
Etor<"INVALID_EVENT", "Invalid event">,
Etor<"INVALID_KERNEL_NAME", "Named kernel not found in the program binary">,
Etor<"OUT_OF_RESOURCES", "Out of resources">,
Etor<"UNSUPPORTED_VERSION", "generic error code for unsupported versions">,
Etor<"UNSUPPORTED_FEATURE", "generic error code for unsupported features">,
Etor<"INVALID_ARGUMENT", "generic error code for invalid arguments">,
Etor<"INVALID_NULL_HANDLE", "handle argument is not valid">,
Expand Down
37 changes: 16 additions & 21 deletions offload/liboffload/API/Device.td
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

def : Enum {
let name = "ol_device_type_t";
let desc = "Supported device types";
let desc = "Supported device types.";
let etors =[
Etor<"DEFAULT", "The default device type as preferred by the runtime">,
Etor<"ALL", "Devices of all types">,
Expand All @@ -23,7 +23,7 @@ def : Enum {

def : Enum {
let name = "ol_device_info_t";
let desc = "Supported device info";
let desc = "Supported device info.";
let is_typed = 1;
let etors =[
TaggedEtor<"TYPE", "ol_device_type_t", "type of the device">,
Expand All @@ -34,39 +34,34 @@ def : Enum {
];
}

def : Function {
let name = "olGetDeviceCount";
let desc = "Retrieves the number of available devices within a platform";
def : FptrTypedef {
let name = "ol_device_iterate_cb_t";
let desc = "User-provided function to be used with `olIterateDevices`";
let params = [
Param<"ol_platform_handle_t", "Platform", "handle of the platform instance", PARAM_IN>,
Param<"uint32_t*", "NumDevices", "pointer to the number of devices.", PARAM_OUT>
Param<"ol_device_handle_t", "Device", "the device handle of the current iteration", PARAM_IN>,
Param<"void*", "UserData", "optional user data", PARAM_IN_OPTIONAL>
];
let returns = [];
let return = "bool";
}

def : Function {
let name = "olGetDevice";
let desc = "Retrieves devices within a platform";
let name = "olIterateDevices";
let desc = "Iterates over all available devices, calling the callback for each device.";
let details = [
"Multiple calls to this function will return identical device handles, in the same order.",
"If the user-provided callback returns `false`, the iteration is stopped."
];
let params = [
Param<"ol_platform_handle_t", "Platform", "handle of the platform instance", PARAM_IN>,
Param<"uint32_t", "NumEntries", "the number of devices to be added to phDevices, which must be greater than zero", PARAM_IN>,
RangedParam<"ol_device_handle_t*", "Devices", "Array of device handles. "
"If NumEntries is less than the number of devices available, then this function shall only retrieve that number of devices.", PARAM_OUT,
Range<"0", "NumEntries">>
Param<"ol_device_iterate_cb_t", "Callback", "User-provided function called for each available device", PARAM_IN>,
Param<"void*", "UserData", "Optional user data to pass to the callback", PARAM_IN_OPTIONAL>
];
let returns = [
Return<"OL_ERRC_INVALID_SIZE", [
"`NumEntries == 0`"
]>
Return<"OL_ERRC_INVALID_DEVICE">
];
}

def : Function {
let name = "olGetDeviceInfo";
let desc = "Queries the given property of the device";
let desc = "Queries the given property of the device.";
let details = [];
let params = [
Param<"ol_device_handle_t", "Device", "handle of the device instance", PARAM_IN>,
Expand All @@ -90,7 +85,7 @@ def : Function {

def : Function {
let name = "olGetDeviceInfoSize";
let desc = "Returns the storage size of the given device query";
let desc = "Returns the storage size of the given device query.";
let details = [];
let params = [
Param<"ol_device_handle_t", "Device", "handle of the device instance", PARAM_IN>,
Expand Down
31 changes: 31 additions & 0 deletions offload/liboffload/API/Event.td
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//===-- Event.td - Event definitions for Offload -----------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains Offload API definitions related to the event handle
//
//===----------------------------------------------------------------------===//

def : Function {
let name = "olDestroyEvent";
let desc = "Destroy the event and free all underlying resources.";
let details = [];
let params = [
Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
];
let returns = [];
}

def : Function {
let name = "olWaitEvent";
let desc = "Wait for the event to be complete.";
let details = [];
let params = [
Param<"ol_event_handle_t", "Event", "handle of the event", PARAM_IN>
];
let returns = [];
}
61 changes: 61 additions & 0 deletions offload/liboffload/API/Kernel.td
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
//===-- Kernel.td - Kernel definitions for Offload ---------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains Offload API definitions related to the kernel handle
//
//===----------------------------------------------------------------------===//

def : Function {
let name = "olGetKernel";
let desc = "Get a kernel from the function identified by `KernelName` in the given program.";
let details = [
"The kernel handle returned is owned by the device so does not need to be destroyed."
];
let params = [
Param<"ol_program_handle_t", "Program", "handle of the program", PARAM_IN>,
Param<"const char*", "KernelName", "name of the kernel entry point in the program", PARAM_IN>,
Param<"ol_kernel_handle_t*", "Kernel", "output pointer for the fetched kernel", PARAM_OUT>
];
let returns = [];
}

def : Struct {
let name = "ol_kernel_launch_size_args_t";
let desc = "Size-related arguments for a kernel launch.";
let members = [
StructMember<"size_t", "Dimensions", "Number of work dimensions">,
StructMember<"size_t", "NumGroupsX", "Number of work groups on the X dimension">,
StructMember<"size_t", "NumGroupsY", "Number of work groups on the Y dimension">,
StructMember<"size_t", "NumGroupsZ", "Number of work groups on the Z dimension">,
StructMember<"size_t", "GroupSizeX", "Size of a work group on the X dimension.">,
StructMember<"size_t", "GroupSizeY", "Size of a work group on the Y dimension.">,
StructMember<"size_t", "GroupSizeZ", "Size of a work group on the Z dimension.">,
StructMember<"size_t", "DynSharedMemory", "Size of dynamic shared memory in bytes.">
];
}

def : Function {
let name = "olLaunchKernel";
let desc = "Enqueue a kernel launch with the specified size and parameters.";
let details = [
"If a queue is not specified, kernel execution happens synchronously"
];
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN_OPTIONAL>,
Param<"ol_device_handle_t", "Device", "handle of the device to execute on", PARAM_IN>,
Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN>,
Param<"size_t", "ArgumentsSize", "size of the kernel argument struct", PARAM_IN>,
Param<"const ol_kernel_launch_size_args_t*", "LaunchSizeArgs", "pointer to the struct containing launch size parameters", PARAM_IN>,
Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
];
let returns = [
Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>,
Return<"OL_ERRC_INVALID_DEVICE", ["If Queue is non-null but does not belong to Device"]>,
];
}
68 changes: 68 additions & 0 deletions offload/liboffload/API/Memory.td
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//===-- Memory.td - Memory definitions for Offload ---------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains Offload API definitions related to memory allocations
//
//===----------------------------------------------------------------------===//

def : Enum {
let name = "ol_alloc_type_t";
let desc = "Represents the type of allocation made with olMemAlloc.";
let etors = [
Etor<"HOST", "Host allocation">,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These names suck, they roughly equate to CUDA's host, managed, and device memory. Honestly we should take this opportunity to change them to something more understandable.

In this context, I believe host is 'pinned' memory that always resides on the host managed is memory that can migrate in the unified memory context. While device is just memory that only exists on the GPU. HSA has coarse-grained and fine-grained. coarse-grained being only accessible to one 'agent' (i.e. GPU) while fine-grained is likely pinned. They also have their svm API which I believe is closer to managed. Naming things is hard unfortunately.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't disagree but I'm struggling to come up with better names. Maybe we can discuss this in the next call and come to a decision.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd probably just go with pinned, migratable, and device if nothing else. But not going to bikeshed here.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it should be called pinned. The memory is allocated on the host, accessible by the device and does not migrate to device. ie stays on host. pinned in non offload term could mean pinned on the host and not paged out.
Also the "migratable" memory is which is accessible by both host and device and the implementation may not migrate the memory but allocate in a location where both host and device can access.

Etor<"DEVICE", "Device allocation">,
Etor<"MANAGED", "Managed allocation">
];
}

def : Function {
let name = "olMemAlloc";
let desc = "Creates a memory allocation on the specified device.";
let params = [
Param<"ol_device_handle_t", "Device", "handle of the device to allocate on", PARAM_IN>,
Param<"ol_alloc_type_t", "Type", "type of the allocation", PARAM_IN>,
Param<"size_t", "Size", "size of the allocation in bytes", PARAM_IN>,
Param<"void**", "AllocationOut", "output for the allocated pointer", PARAM_OUT>
];
let returns = [
Return<"OL_ERRC_INVALID_SIZE", [
"`Size == 0`"
]>
];
}

def : Function {
let name = "olMemFree";
let desc = "Frees a memory allocation previously made by olMemAlloc.";
let params = [
Param<"void*", "Address", "address of the allocation to free", PARAM_IN>,
];
let returns = [];
}

def : Function {
let name = "olMemcpy";
let desc = "Enqueue a memcpy operation.";
let details = [
"For host pointers, use the host device belonging to the OL_PLATFORM_BACKEND_HOST platform.",
"If a queue is specified, at least one device must be a non-host device",
"If a queue is not specified, the memcpy happens synchronously"
];
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue.", PARAM_IN_OPTIONAL>,
Param<"void*", "DstPtr", "pointer to copy to", PARAM_IN>,
Param<"ol_device_handle_t", "DstDevice", "device that DstPtr belongs to", PARAM_IN>,
Param<"void*", "SrcPtr", "pointer to copy from", PARAM_IN>,
Param<"ol_device_handle_t", "SrcDevice", "device that SrcPtr belongs to", PARAM_IN>,
Param<"size_t", "Size", "size in bytes of data to copy", PARAM_IN>,
Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
];
let returns = [
Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>
];
}
5 changes: 5 additions & 0 deletions offload/liboffload/API/OffloadAPI.td
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ include "APIDefs.td"
include "Common.td"
include "Platform.td"
include "Device.td"
include "Memory.td"
include "Queue.td"
include "Event.td"
include "Program.td"
include "Kernel.td"
43 changes: 5 additions & 38 deletions offload/liboffload/API/Platform.td
Original file line number Diff line number Diff line change
Expand Up @@ -9,44 +9,10 @@
// This file contains Offload API definitions related to the Platform handle
//
//===----------------------------------------------------------------------===//
def : Function {
let name = "olGetPlatform";
let desc = "Retrieves all available platforms";
let details = [
"Multiple calls to this function will return identical platforms handles, in the same order.",
];
let params = [
Param<"uint32_t", "NumEntries",
"The number of platforms to be added to Platforms. NumEntries must be "
"greater than zero.",
PARAM_IN>,
RangedParam<"ol_platform_handle_t*", "Platforms",
"Array of handle of platforms. If NumEntries is less than the number of "
"platforms available, then olGetPlatform shall only retrieve that "
"number of platforms.",
PARAM_OUT, Range<"0", "NumEntries">>
];
let returns = [
Return<"OL_ERRC_INVALID_SIZE", [
"`NumEntries == 0`"
]>
];
}

def : Function {
let name = "olGetPlatformCount";
let desc = "Retrieves the number of available platforms";
let params = [
Param<"uint32_t*",
"NumPlatforms", "returns the total number of platforms available.",
PARAM_OUT>
];
let returns = [];
}

def : Enum {
let name = "ol_platform_info_t";
let desc = "Supported platform info";
let desc = "Supported platform info.";
let is_typed = 1;
let etors = [
TaggedEtor<"NAME", "char[]", "The string denoting name of the platform. The size of the info needs to be dynamically queried.">,
Expand All @@ -58,17 +24,18 @@ def : Enum {

def : Enum {
let name = "ol_platform_backend_t";
let desc = "Identifies the native backend of the platform";
let desc = "Identifies the native backend of the platform.";
let etors =[
Etor<"UNKNOWN", "The backend is not recognized">,
Etor<"CUDA", "The backend is CUDA">,
Etor<"AMDGPU", "The backend is AMDGPU">,
Etor<"HOST", "The backend is the host">,
];
}

def : Function {
let name = "olGetPlatformInfo";
let desc = "Queries the given property of the platform";
let desc = "Queries the given property of the platform.";
let details = [
"`olGetPlatformInfoSize` can be used to query the storage size "
"required for the given query."
Expand Down Expand Up @@ -96,7 +63,7 @@ def : Function {

def : Function {
let name = "olGetPlatformInfoSize";
let desc = "Returns the storage size of the given platform query";
let desc = "Returns the storage size of the given platform query.";
let details = [];
let params = [
Param<"ol_platform_handle_t", "Platform", "handle of the platform", PARAM_IN>,
Expand Down
Loading
Loading