Skip to content

Commit 666ef0d

Browse files
author
git apple-llvm automerger
committed
Merge commit '67a8af9427d9' from apple/main into swift/next
2 parents eaf58ca + 67a8af9 commit 666ef0d

File tree

4 files changed

+120
-22
lines changed

4 files changed

+120
-22
lines changed

openmp/libomptarget/plugins/amdgpu/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ add_library(omptarget.rtl.amdgpu SHARED
5757
impl/atmi.cpp
5858
impl/atmi_interop_hsa.cpp
5959
impl/data.cpp
60+
impl/get_elf_mach_gfx_name.cpp
6061
impl/machine.cpp
6162
impl/system.cpp
6263
impl/utils.cpp
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#include "get_elf_mach_gfx_name.h"
2+
3+
// This header conflicts with the system elf.h (macros vs enums of the same
4+
// identifier) and contains more up to date values for the enum checked here.
5+
// rtl.cpp uses the system elf.h.
6+
#include "llvm/BinaryFormat/ELF.h"
7+
8+
const char *get_elf_mach_gfx_name(uint32_t EFlags) {
9+
using namespace llvm::ELF;
10+
uint32_t Gfx = (EFlags & EF_AMDGPU_MACH);
11+
switch (Gfx) {
12+
case EF_AMDGPU_MACH_AMDGCN_GFX801:
13+
return "gfx801";
14+
case EF_AMDGPU_MACH_AMDGCN_GFX802:
15+
return "gfx802";
16+
case EF_AMDGPU_MACH_AMDGCN_GFX803:
17+
return "gfx803";
18+
case EF_AMDGPU_MACH_AMDGCN_GFX805:
19+
return "gfx805";
20+
case EF_AMDGPU_MACH_AMDGCN_GFX810:
21+
return "gfx810";
22+
case EF_AMDGPU_MACH_AMDGCN_GFX900:
23+
return "gfx900";
24+
case EF_AMDGPU_MACH_AMDGCN_GFX902:
25+
return "gfx902";
26+
case EF_AMDGPU_MACH_AMDGCN_GFX904:
27+
return "gfx904";
28+
case EF_AMDGPU_MACH_AMDGCN_GFX906:
29+
return "gfx906";
30+
case EF_AMDGPU_MACH_AMDGCN_GFX908:
31+
return "gfx908";
32+
case EF_AMDGPU_MACH_AMDGCN_GFX909:
33+
return "gfx909";
34+
case EF_AMDGPU_MACH_AMDGCN_GFX90C:
35+
return "gfx90c";
36+
case EF_AMDGPU_MACH_AMDGCN_GFX1010:
37+
return "gfx1010";
38+
case EF_AMDGPU_MACH_AMDGCN_GFX1011:
39+
return "gfx1011";
40+
case EF_AMDGPU_MACH_AMDGCN_GFX1012:
41+
return "gfx1012";
42+
case EF_AMDGPU_MACH_AMDGCN_GFX1030:
43+
return "gfx1030";
44+
case EF_AMDGPU_MACH_AMDGCN_GFX1031:
45+
return "gfx1031";
46+
case EF_AMDGPU_MACH_AMDGCN_GFX1032:
47+
return "gfx1032";
48+
case EF_AMDGPU_MACH_AMDGCN_GFX1033:
49+
return "gfx1033";
50+
default:
51+
return "--unknown gfx";
52+
}
53+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#ifndef GET_ELF_MACH_GFX_NAME_H_INCLUDED
2+
#define GET_ELF_MACH_GFX_NAME_H_INCLUDED
3+
4+
#include <stdint.h>
5+
6+
const char *get_elf_mach_gfx_name(uint32_t EFlags);
7+
8+
#endif

openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Lines changed: 58 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "internal.h"
3737

3838
#include "Debug.h"
39+
#include "get_elf_mach_gfx_name.h"
3940
#include "omptargetplugin.h"
4041

4142
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
@@ -92,14 +93,6 @@ uint32_t TgtStackItemSize = 0;
9293

9394
#include "../../common/elf_common.c"
9495

95-
static bool elf_machine_id_is_amdgcn(__tgt_device_image *image) {
96-
const uint16_t amdgcnMachineID = 224;
97-
int32_t r = elf_check_machine(image, amdgcnMachineID);
98-
if (!r) {
99-
DP("Supported machine ID not found\n");
100-
}
101-
return r;
102-
}
10396

10497
/// Keep entries table per device
10598
struct FuncOrGblEntryTy {
@@ -319,6 +312,7 @@ class RTLDeviceInfoTy {
319312
std::vector<int> GroupsPerDevice;
320313
std::vector<int> ThreadsPerGroup;
321314
std::vector<int> WarpSize;
315+
std::vector<std::string> GPUName;
322316

323317
// OpenMP properties
324318
std::vector<int> NumTeams;
@@ -472,6 +466,7 @@ class RTLDeviceInfoTy {
472466
FuncGblEntries.resize(NumberOfDevices);
473467
ThreadsPerGroup.resize(NumberOfDevices);
474468
ComputeUnits.resize(NumberOfDevices);
469+
GPUName.resize(NumberOfDevices);
475470
GroupsPerDevice.resize(NumberOfDevices);
476471
WarpSize.resize(NumberOfDevices);
477472
NumTeams.resize(NumberOfDevices);
@@ -642,6 +637,40 @@ void finiAsyncInfoPtr(__tgt_async_info *async_info_ptr) {
642637
assert(async_info_ptr->Queue);
643638
async_info_ptr->Queue = 0;
644639
}
640+
641+
bool elf_machine_id_is_amdgcn(__tgt_device_image *image) {
642+
const uint16_t amdgcnMachineID = EM_AMDGPU;
643+
int32_t r = elf_check_machine(image, amdgcnMachineID);
644+
if (!r) {
645+
DP("Supported machine ID not found\n");
646+
}
647+
return r;
648+
}
649+
650+
uint32_t elf_e_flags(__tgt_device_image *image) {
651+
char *img_begin = (char *)image->ImageStart;
652+
size_t img_size = (char *)image->ImageEnd - img_begin;
653+
654+
Elf *e = elf_memory(img_begin, img_size);
655+
if (!e) {
656+
DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1));
657+
return 0;
658+
}
659+
660+
Elf64_Ehdr *eh64 = elf64_getehdr(e);
661+
662+
if (!eh64) {
663+
DP("Unable to get machine ID from ELF file!\n");
664+
elf_end(e);
665+
return 0;
666+
}
667+
668+
uint32_t Flags = eh64->e_flags;
669+
670+
elf_end(e);
671+
DP("ELF Flags: 0x%x\n", Flags);
672+
return Flags;
673+
}
645674
} // namespace
646675

647676
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) {
@@ -676,9 +705,20 @@ int32_t __tgt_rtl_init_device(int device_id) {
676705
DeviceInfo.ComputeUnits[device_id] = compute_units;
677706
DP("Using %d compute unis per grid\n", DeviceInfo.ComputeUnits[device_id]);
678707
}
708+
709+
char GetInfoName[64]; // 64 max size returned by get info
710+
err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME,
711+
(void *) GetInfoName);
712+
if (err)
713+
DeviceInfo.GPUName[device_id] = "--unknown gpu--";
714+
else {
715+
DeviceInfo.GPUName[device_id] = GetInfoName;
716+
}
717+
679718
if (print_kernel_trace == 4)
680-
fprintf(stderr, "Device#%-2d CU's: %2d\n", device_id,
681-
DeviceInfo.ComputeUnits[device_id]);
719+
fprintf(stderr, "Device#%-2d CU's: %2d %s\n", device_id,
720+
DeviceInfo.ComputeUnits[device_id],
721+
DeviceInfo.GPUName[device_id].c_str());
682722

683723
// Query attributes to determine number of threads/block and blocks/grid.
684724
uint16_t workgroup_max_dim[3];
@@ -1038,22 +1078,18 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
10381078
return ATMI_STATUS_SUCCESS;
10391079
};
10401080

1041-
atmi_status_t err;
10421081
{
1043-
err = module_register_from_memory_to_place(
1082+
atmi_status_t err = module_register_from_memory_to_place(
10441083
(void *)image->ImageStart, img_size, get_gpu_place(device_id),
10451084
on_deserialized_data);
10461085

10471086
check("Module registering", err);
10481087
if (err != ATMI_STATUS_SUCCESS) {
1049-
char GPUName[64] = "--unknown gpu--";
1050-
hsa_agent_t agent = DeviceInfo.HSAAgents[device_id];
1051-
(void)hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME,
1052-
(void *)GPUName);
10531088
fprintf(stderr,
1054-
"Possible gpu arch mismatch: %s, please check"
1055-
" compiler: -march=<gpu> flag\n",
1056-
GPUName);
1089+
"Possible gpu arch mismatch: device:%s, image:%s please check"
1090+
" compiler flag: -march=<gpu>\n",
1091+
DeviceInfo.GPUName[device_id].c_str(),
1092+
get_elf_mach_gfx_name(elf_e_flags(image)));
10571093
return NULL;
10581094
}
10591095
}
@@ -1149,8 +1185,8 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
11491185
void *varptr;
11501186
uint32_t varsize;
11511187

1152-
err = atmi_interop_hsa_get_symbol_info(get_gpu_mem_place(device_id),
1153-
e->name, &varptr, &varsize);
1188+
atmi_status_t err = atmi_interop_hsa_get_symbol_info(
1189+
get_gpu_mem_place(device_id), e->name, &varptr, &varsize);
11541190

11551191
if (err != ATMI_STATUS_SUCCESS) {
11561192
DP("Loading global '%s' (Failed)\n", e->name);
@@ -1192,7 +1228,7 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
11921228

11931229
atmi_mem_place_t place = get_gpu_mem_place(device_id);
11941230
uint32_t kernarg_segment_size;
1195-
err = atmi_interop_hsa_get_kernel_info(
1231+
atmi_status_t err = atmi_interop_hsa_get_kernel_info(
11961232
place, e->name, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
11971233
&kernarg_segment_size);
11981234

0 commit comments

Comments
 (0)