-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[libc] Explicitly pin memory for the client symbol lookup #73988
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Summary: Previously, we determined that coarse grained memory cannot be used in the general case. That removed the buffer used to transfer the memory, however we still had this lookup. Though we do not access the symbol directly, it still conflicts with the agents apparently. Pin this as well. This resolves the problems @lntue was having with the `libc` GPU build.
@llvm/pr-subscribers-backend-amdgpu Author: Joseph Huber (jhuber6) ChangesSummary: This resolves the problems @lntue was having with the Full diff: https://github.com/llvm/llvm-project/pull/73988.diff 1 Files Affected:
diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp
index 5c28607b2f290a9..a9a687656efcb39 100644
--- a/libc/utils/gpu/loader/amdgpu/Loader.cpp
+++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp
@@ -464,18 +464,20 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
executable, rpc_client_symbol_name, &dev_agent, &rpc_client_sym))
handle_error(err);
- void *rpc_client_host;
- if (hsa_status_t err =
- hsa_amd_memory_pool_allocate(coarsegrained_pool, sizeof(void *),
- /*flags=*/0, &rpc_client_host))
- handle_error(err);
-
void *rpc_client_dev;
if (hsa_status_t err = hsa_executable_symbol_get_info(
rpc_client_sym, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
&rpc_client_dev))
handle_error(err);
+ // Pin some memory we can use to obtain the address of the rpc client.
+ void *rpc_client_storage = nullptr;
+ void *rpc_client_host = nullptr;
+ if (hsa_status_t err =
+ hsa_amd_memory_lock(&rpc_client_storage, sizeof(void *),
+ /*agents=*/nullptr, 0, &rpc_client_host))
+ handle_error(err);
+
// Copy the address of the client buffer from the device to the host.
if (hsa_status_t err = hsa_memcpy(rpc_client_host, host_agent, rpc_client_dev,
dev_agent, sizeof(void *)))
@@ -497,7 +499,7 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
if (hsa_status_t err = hsa_amd_memory_unlock(
const_cast<void *>(rpc_get_client_buffer(device_id))))
handle_error(err);
- if (hsa_status_t err = hsa_amd_memory_pool_free(rpc_client_host))
+ if (hsa_status_t err = hsa_amd_memory_unlock(rpc_client_host))
handle_error(err);
// Obtain the GPU's fixed-frequency clock rate and copy it to the GPU.
|
@llvm/pr-subscribers-libc Author: Joseph Huber (jhuber6) ChangesSummary: This resolves the problems @lntue was having with the Full diff: https://github.com/llvm/llvm-project/pull/73988.diff 1 Files Affected:
diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp
index 5c28607b2f290a9..a9a687656efcb39 100644
--- a/libc/utils/gpu/loader/amdgpu/Loader.cpp
+++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp
@@ -464,18 +464,20 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
executable, rpc_client_symbol_name, &dev_agent, &rpc_client_sym))
handle_error(err);
- void *rpc_client_host;
- if (hsa_status_t err =
- hsa_amd_memory_pool_allocate(coarsegrained_pool, sizeof(void *),
- /*flags=*/0, &rpc_client_host))
- handle_error(err);
-
void *rpc_client_dev;
if (hsa_status_t err = hsa_executable_symbol_get_info(
rpc_client_sym, HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS,
&rpc_client_dev))
handle_error(err);
+ // Pin some memory we can use to obtain the address of the rpc client.
+ void *rpc_client_storage = nullptr;
+ void *rpc_client_host = nullptr;
+ if (hsa_status_t err =
+ hsa_amd_memory_lock(&rpc_client_storage, sizeof(void *),
+ /*agents=*/nullptr, 0, &rpc_client_host))
+ handle_error(err);
+
// Copy the address of the client buffer from the device to the host.
if (hsa_status_t err = hsa_memcpy(rpc_client_host, host_agent, rpc_client_dev,
dev_agent, sizeof(void *)))
@@ -497,7 +499,7 @@ int load(int argc, char **argv, char **envp, void *image, size_t size,
if (hsa_status_t err = hsa_amd_memory_unlock(
const_cast<void *>(rpc_get_client_buffer(device_id))))
handle_error(err);
- if (hsa_status_t err = hsa_amd_memory_pool_free(rpc_client_host))
+ if (hsa_status_t err = hsa_amd_memory_unlock(rpc_client_host))
handle_error(err);
// Obtain the GPU's fixed-frequency clock rate and copy it to the GPU.
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tested and confirmed it works on my system: Ubuntu 22.04 + RTX 6800 + ROCm 5.6.1
Summary:
Previously, we determined that coarse grained memory cannot be used in
the general case. That removed the buffer used to transfer the memory,
however we still had this lookup. Though we do not access the symbol
directly, it still conflicts with the agents apparently. Pin this as
well.
This resolves the problems @lntue was having with the
libc
GPU build.