Skip to content

Commit f628eef

Browse files
[libomptarget][amdgpu] Fix latent race in load binary
1 parent 4fa0dbd commit f628eef

File tree

1 file changed

+21
-8
lines changed
  • openmp/libomptarget/plugins/amdgpu/src

1 file changed

+21
-8
lines changed

openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,26 @@ __tgt_target_table *__tgt_rtl_load_binary(int32_t device_id,
925925
return res;
926926
}
927927

928+
static atmi_status_t atmi_calloc(void **ret_ptr, size_t size,
929+
atmi_mem_place_t place) {
930+
uint64_t rounded = 4 * ((size + 3) / 4);
931+
void *ptr;
932+
atmi_status_t err = atmi_malloc(&ptr, rounded, place);
933+
if (err != ATMI_STATUS_SUCCESS) {
934+
return err;
935+
}
936+
937+
hsa_status_t rc = hsa_amd_memory_fill(ptr, 0, rounded / 4);
938+
if (rc != HSA_STATUS_SUCCESS) {
939+
fprintf(stderr, "zero fill device_state failed with %u\n", rc);
940+
atmi_free(ptr);
941+
return ATMI_STATUS_ERROR;
942+
}
943+
944+
*ret_ptr = ptr;
945+
return ATMI_STATUS_SUCCESS;
946+
}
947+
928948
__tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
929949
__tgt_device_image *image) {
930950
// This function loads the device image onto gpu[device_id] and does other
@@ -1024,7 +1044,7 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
10241044
assert(dss.second == 0);
10251045
void *ptr = NULL;
10261046
atmi_status_t err =
1027-
atmi_malloc(&ptr, device_State_bytes, get_gpu_mem_place(device_id));
1047+
atmi_calloc(&ptr, device_State_bytes, get_gpu_mem_place(device_id));
10281048
if (err != ATMI_STATUS_SUCCESS) {
10291049
fprintf(stderr, "Failed to allocate device_state array\n");
10301050
return NULL;
@@ -1062,13 +1082,6 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
10621082
fprintf(stderr, "memcpy install of state_ptr failed\n");
10631083
return NULL;
10641084
}
1065-
1066-
assert((device_State_bytes & 0x3) == 0); // known >= 4 byte aligned
1067-
hsa_status_t rc = hsa_amd_memory_fill(ptr, 0, device_State_bytes / 4);
1068-
if (rc != HSA_STATUS_SUCCESS) {
1069-
fprintf(stderr, "zero fill device_state failed with %u\n", rc);
1070-
return NULL;
1071-
}
10721085
}
10731086

10741087
// TODO: Check with Guansong to understand the below comment more thoroughly.

0 commit comments

Comments
 (0)