Skip to content

Commit 8cf93a3

Browse files
[libomptarget][amdgpu] Destruct HSA queues
Store queues in unique_ptr so they are destroyed when the global DeviceInfo is. Currently they leak which raises an assert in debug builds of hsa. Reviewed By: pdhaliwal Differential Revision: https://reviews.llvm.org/D109511
1 parent 6063e6b commit 8cf93a3

File tree

3 files changed

+27
-12
lines changed

3 files changed

+27
-12
lines changed

openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ DLWRAP(hsa_signal_store_relaxed, 2);
2929
DLWRAP(hsa_signal_store_screlease, 2);
3030
DLWRAP(hsa_signal_wait_scacquire, 5);
3131
DLWRAP(hsa_queue_create, 8);
32+
DLWRAP(hsa_queue_destroy, 1);
3233
DLWRAP(hsa_queue_load_read_index_scacquire, 1);
3334
DLWRAP(hsa_queue_add_write_index_relaxed, 2);
3435
DLWRAP(hsa_memory_copy, 3);

openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@ hsa_status_t hsa_queue_create(hsa_agent_t agent, uint32_t size,
142142
void *data, uint32_t private_segment_size,
143143
uint32_t group_segment_size, hsa_queue_t **queue);
144144

145+
hsa_status_t hsa_queue_destroy(hsa_queue_t *queue);
146+
145147
uint64_t hsa_queue_load_read_index_scacquire(const hsa_queue_t *queue);
146148

147149
uint64_t hsa_queue_add_write_index_relaxed(const hsa_queue_t *queue,

openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,17 @@ class RTLDeviceInfoTy {
451451
HSALifetime HSA; // First field => constructed first and destructed last
452452
std::vector<std::list<FuncOrGblEntryTy>> FuncGblEntries;
453453

454+
struct QueueDeleter {
455+
void operator()(hsa_queue_t *Q) {
456+
if (Q) {
457+
hsa_status_t Err = hsa_queue_destroy(Q);
458+
if (Err != HSA_STATUS_SUCCESS) {
459+
DP("Error destroying hsa queue: %s\n", get_error_string(Err));
460+
}
461+
}
462+
}
463+
};
464+
454465
public:
455466
// load binary populates symbol tables and mutates various global state
456467
// run uses those symbol tables
@@ -460,7 +471,8 @@ class RTLDeviceInfoTy {
460471

461472
// GPU devices
462473
std::vector<hsa_agent_t> HSAAgents;
463-
std::vector<hsa_queue_t *> HSAQueues; // one per gpu
474+
std::vector<std::unique_ptr<hsa_queue_t, QueueDeleter>>
475+
HSAQueues; // one per gpu
464476

465477
// CPUs
466478
std::vector<hsa_agent_t> CPUAgents;
@@ -773,10 +785,6 @@ class RTLDeviceInfoTy {
773785
return;
774786
}
775787

776-
for (int i = 0; i < NumberOfDevices; i++) {
777-
HSAQueues[i] = nullptr;
778-
}
779-
780788
for (int i = 0; i < NumberOfDevices; i++) {
781789
uint32_t queue_size = 0;
782790
{
@@ -792,12 +800,16 @@ class RTLDeviceInfoTy {
792800
}
793801
}
794802

795-
hsa_status_t rc = hsa_queue_create(
796-
HSAAgents[i], queue_size, HSA_QUEUE_TYPE_MULTI, callbackQueue, NULL,
797-
UINT32_MAX, UINT32_MAX, &HSAQueues[i]);
798-
if (rc != HSA_STATUS_SUCCESS) {
799-
DP("Failed to create HSA queue %d\n", i);
800-
return;
803+
{
804+
hsa_queue_t *Q = nullptr;
805+
hsa_status_t rc =
806+
hsa_queue_create(HSAAgents[i], queue_size, HSA_QUEUE_TYPE_MULTI,
807+
callbackQueue, NULL, UINT32_MAX, UINT32_MAX, &Q);
808+
if (rc != HSA_STATUS_SUCCESS) {
809+
DP("Failed to create HSA queue %d\n", i);
810+
return;
811+
}
812+
HSAQueues[i].reset(Q);
801813
}
802814

803815
deviceStateStore[i] = {nullptr, 0};
@@ -2149,7 +2161,7 @@ int32_t __tgt_rtl_run_target_team_region_locked(
21492161

21502162
// Run on the device.
21512163
{
2152-
hsa_queue_t *queue = DeviceInfo.HSAQueues[device_id];
2164+
hsa_queue_t *queue = DeviceInfo.HSAQueues[device_id].get();
21532165
if (!queue) {
21542166
return OFFLOAD_FAIL;
21552167
}

0 commit comments

Comments
 (0)