|
36 | 36 | #include "internal.h"
|
37 | 37 |
|
38 | 38 | #include "Debug.h"
|
| 39 | +#include "get_elf_mach_gfx_name.h" |
39 | 40 | #include "omptargetplugin.h"
|
40 | 41 |
|
41 | 42 | #include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
@@ -92,14 +93,6 @@ uint32_t TgtStackItemSize = 0;
|
92 | 93 |
|
93 | 94 | #include "../../common/elf_common.c"
|
94 | 95 |
|
95 |
| -static bool elf_machine_id_is_amdgcn(__tgt_device_image *image) { |
96 |
| - const uint16_t amdgcnMachineID = 224; |
97 |
| - int32_t r = elf_check_machine(image, amdgcnMachineID); |
98 |
| - if (!r) { |
99 |
| - DP("Supported machine ID not found\n"); |
100 |
| - } |
101 |
| - return r; |
102 |
| -} |
103 | 96 |
|
104 | 97 | /// Keep entries table per device
|
105 | 98 | struct FuncOrGblEntryTy {
|
@@ -319,6 +312,7 @@ class RTLDeviceInfoTy {
|
319 | 312 | std::vector<int> GroupsPerDevice;
|
320 | 313 | std::vector<int> ThreadsPerGroup;
|
321 | 314 | std::vector<int> WarpSize;
|
| 315 | + std::vector<std::string> GPUName; |
322 | 316 |
|
323 | 317 | // OpenMP properties
|
324 | 318 | std::vector<int> NumTeams;
|
@@ -472,6 +466,7 @@ class RTLDeviceInfoTy {
|
472 | 466 | FuncGblEntries.resize(NumberOfDevices);
|
473 | 467 | ThreadsPerGroup.resize(NumberOfDevices);
|
474 | 468 | ComputeUnits.resize(NumberOfDevices);
|
| 469 | + GPUName.resize(NumberOfDevices); |
475 | 470 | GroupsPerDevice.resize(NumberOfDevices);
|
476 | 471 | WarpSize.resize(NumberOfDevices);
|
477 | 472 | NumTeams.resize(NumberOfDevices);
|
@@ -642,6 +637,40 @@ void finiAsyncInfoPtr(__tgt_async_info *async_info_ptr) {
|
642 | 637 | assert(async_info_ptr->Queue);
|
643 | 638 | async_info_ptr->Queue = 0;
|
644 | 639 | }
|
| 640 | + |
| 641 | +bool elf_machine_id_is_amdgcn(__tgt_device_image *image) { |
| 642 | + const uint16_t amdgcnMachineID = EM_AMDGPU; |
| 643 | + int32_t r = elf_check_machine(image, amdgcnMachineID); |
| 644 | + if (!r) { |
| 645 | + DP("Supported machine ID not found\n"); |
| 646 | + } |
| 647 | + return r; |
| 648 | +} |
| 649 | + |
| 650 | +uint32_t elf_e_flags(__tgt_device_image *image) { |
| 651 | + char *img_begin = (char *)image->ImageStart; |
| 652 | + size_t img_size = (char *)image->ImageEnd - img_begin; |
| 653 | + |
| 654 | + Elf *e = elf_memory(img_begin, img_size); |
| 655 | + if (!e) { |
| 656 | + DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1)); |
| 657 | + return 0; |
| 658 | + } |
| 659 | + |
| 660 | + Elf64_Ehdr *eh64 = elf64_getehdr(e); |
| 661 | + |
| 662 | + if (!eh64) { |
| 663 | + DP("Unable to get machine ID from ELF file!\n"); |
| 664 | + elf_end(e); |
| 665 | + return 0; |
| 666 | + } |
| 667 | + |
| 668 | + uint32_t Flags = eh64->e_flags; |
| 669 | + |
| 670 | + elf_end(e); |
| 671 | + DP("ELF Flags: 0x%x\n", Flags); |
| 672 | + return Flags; |
| 673 | +} |
645 | 674 | } // namespace
|
646 | 675 |
|
647 | 676 | int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
|
@@ -676,9 +705,20 @@ int32_t __tgt_rtl_init_device(int device_id) {
|
676 | 705 | DeviceInfo.ComputeUnits[device_id] = compute_units;
|
677 | 706 | DP("Using %d compute unis per grid\n", DeviceInfo.ComputeUnits[device_id]);
|
678 | 707 | }
|
| 708 | + |
| 709 | + char GetInfoName[64]; // 64 max size returned by get info |
| 710 | + err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME, |
| 711 | + (void *) GetInfoName); |
| 712 | + if (err) |
| 713 | + DeviceInfo.GPUName[device_id] = "--unknown gpu--"; |
| 714 | + else { |
| 715 | + DeviceInfo.GPUName[device_id] = GetInfoName; |
| 716 | + } |
| 717 | + |
679 | 718 | if (print_kernel_trace == 4)
|
680 |
| - fprintf(stderr, "Device#%-2d CU's: %2d\n", device_id, |
681 |
| - DeviceInfo.ComputeUnits[device_id]); |
| 719 | + fprintf(stderr, "Device#%-2d CU's: %2d %s\n", device_id, |
| 720 | + DeviceInfo.ComputeUnits[device_id], |
| 721 | + DeviceInfo.GPUName[device_id].c_str()); |
682 | 722 |
|
683 | 723 | // Query attributes to determine number of threads/block and blocks/grid.
|
684 | 724 | uint16_t workgroup_max_dim[3];
|
@@ -1038,22 +1078,18 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
|
1038 | 1078 | return ATMI_STATUS_SUCCESS;
|
1039 | 1079 | };
|
1040 | 1080 |
|
1041 |
| - atmi_status_t err; |
1042 | 1081 | {
|
1043 |
| - err = module_register_from_memory_to_place( |
| 1082 | + atmi_status_t err = module_register_from_memory_to_place( |
1044 | 1083 | (void *)image->ImageStart, img_size, get_gpu_place(device_id),
|
1045 | 1084 | on_deserialized_data);
|
1046 | 1085 |
|
1047 | 1086 | check("Module registering", err);
|
1048 | 1087 | if (err != ATMI_STATUS_SUCCESS) {
|
1049 |
| - char GPUName[64] = "--unknown gpu--"; |
1050 |
| - hsa_agent_t agent = DeviceInfo.HSAAgents[device_id]; |
1051 |
| - (void)hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME, |
1052 |
| - (void *)GPUName); |
1053 | 1088 | fprintf(stderr,
|
1054 |
| - "Possible gpu arch mismatch: %s, please check" |
1055 |
| - " compiler: -march=<gpu> flag\n", |
1056 |
| - GPUName); |
| 1089 | + "Possible gpu arch mismatch: device:%s, image:%s please check" |
| 1090 | + " compiler flag: -march=<gpu>\n", |
| 1091 | + DeviceInfo.GPUName[device_id].c_str(), |
| 1092 | + get_elf_mach_gfx_name(elf_e_flags(image))); |
1057 | 1093 | return NULL;
|
1058 | 1094 | }
|
1059 | 1095 | }
|
@@ -1149,8 +1185,8 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
|
1149 | 1185 | void *varptr;
|
1150 | 1186 | uint32_t varsize;
|
1151 | 1187 |
|
1152 |
| - err = atmi_interop_hsa_get_symbol_info(get_gpu_mem_place(device_id), |
1153 |
| - e->name, &varptr, &varsize); |
| 1188 | + atmi_status_t err = atmi_interop_hsa_get_symbol_info( |
| 1189 | + get_gpu_mem_place(device_id), e->name, &varptr, &varsize); |
1154 | 1190 |
|
1155 | 1191 | if (err != ATMI_STATUS_SUCCESS) {
|
1156 | 1192 | DP("Loading global '%s' (Failed)\n", e->name);
|
@@ -1192,7 +1228,7 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
|
1192 | 1228 |
|
1193 | 1229 | atmi_mem_place_t place = get_gpu_mem_place(device_id);
|
1194 | 1230 | uint32_t kernarg_segment_size;
|
1195 |
| - err = atmi_interop_hsa_get_kernel_info( |
| 1231 | + atmi_status_t err = atmi_interop_hsa_get_kernel_info( |
1196 | 1232 | place, e->name, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
|
1197 | 1233 | &kernarg_segment_size);
|
1198 | 1234 |
|
|
0 commit comments