Skip to content

Commit a8e3f61

Browse files
authored
[SYCL][E2E] Enable llvm-lit to accept device architecture (#18197)
Allow for selection of specific devices via `level_zero:arch-intel_gpu_bmg_g21` which accepts device architectures listed under `sycl-ls --verbose`. Devices with multiple GPU's under the devices with multiple GPU's under the device selection style of `backend:*device type*` such as `level_zero:gpu` can cause incorrect aspects to be marked as unsupported in the tests ran through `llvm-lit` using `// REQUIRES: aspect-`. In addition, only the first device listed via `sycl-ls` under the label is ran. This occurs because using the device selection style of `level_zero:gpu` there is currently no way to choose a specific device that falls under that category. Such as a battlemage GPU and an intel iGPU. `sycl/test-e2e/lit.cfg.py` takes all available device aspects and marks aspects as supported that are available for all the devices. This is problematic for aspects such as `ext_oneapi_bindless_images` which is supported on battlemage but not Intel iGPU's. Even if battlemage, which supports bindless images is used to run all the tests as it comes first in the device list shown under `level_zero:gpu`, tests requiring that aspect will not run.
1 parent a0880df commit a8e3f61

File tree

2 files changed

+108
-35
lines changed

2 files changed

+108
-35
lines changed

sycl/test-e2e/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,13 @@ separated from comma-separated list of target devices with colon. Example:
177177
-DSYCL_TEST_E2E_TARGETS="opencl:cpu;level_zero:gpu;cuda:gpu;hip:gpu"
178178
```
179179
180+
In addition, device architecture as shown in sycl-ls is accepted with the
181+
"arch-" prefix. Example:
182+
183+
```bash
184+
-DSYCL_TEST_E2E_TARGETS="cuda:arch-nvidia_gpu_sm_61;level_zero:arch-intel_gpu_bmg_b21"
185+
```
186+
180187
***OpenCL_LIBRARY*** - path to OpenCL ICD loader library. OpenCL
181188
interoperability tests require OpenCL ICD loader to be linked with. For such
182189
tests OpenCL ICD loader library should be installed in the system or available

sycl/test-e2e/lit.cfg.py

Lines changed: 101 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@
191191
# Disable the UR logger callback sink during test runs as output to SYCL RT can interfere with some tests relying on standard input/output
192192
llvm_config.with_environment("UR_LOG_CALLBACK", "disabled")
193193

194+
194195
# Temporarily modify environment to be the same that we use when running tests
195196
class test_env:
196197
def __enter__(self):
@@ -276,6 +277,7 @@ def quote_path(path):
276277
return f'"{path}"'
277278
return shlex.quote(path)
278279

280+
279281
# Call the function to perform the check and add the feature
280282
check_igc_tag_and_add_feature()
281283

@@ -295,6 +297,7 @@ def quote_path(path):
295297
if lit_config.params.get("spirv-backend", False):
296298
config.available_features.add("spirv-backend")
297299

300+
298301
# Use this to make sure that any dynamic checks below are done in the build
299302
# directory and not where the sources are located. This is important for the
300303
# in-tree configuration (as opposite to the standalone one).
@@ -634,22 +637,25 @@ def open_check_file(file_name):
634637
if "amdgcn" in sp[1]:
635638
config.sycl_build_targets.add("target-amd")
636639

637-
cmd = "{} {}".format(config.run_launcher, sycl_ls) if config.run_launcher else sycl_ls
638-
sycl_ls_output = subprocess.check_output(cmd, text=True, shell=True)
639-
640-
# In contrast to `cpu` feature this is a compile-time feature, which is needed
641-
# to check if we can build cpu AOT tests.
642-
if "opencl:cpu" in sycl_ls_output:
643-
config.available_features.add("opencl-cpu-rt")
644-
645-
if len(config.sycl_devices) == 1 and config.sycl_devices[0] == "all":
646-
devices = set()
647-
for line in sycl_ls_output.splitlines():
648-
if not line.startswith("["):
649-
continue
650-
(backend, device) = line[1:].split("]")[0].split(":")
651-
devices.add("{}:{}".format(backend, device))
652-
config.sycl_devices = list(devices)
640+
with test_env():
641+
cmd = (
642+
"{} {}".format(config.run_launcher, sycl_ls) if config.run_launcher else sycl_ls
643+
)
644+
sycl_ls_output = subprocess.check_output(cmd, text=True, shell=True)
645+
646+
# In contrast to `cpu` feature this is a compile-time feature, which is needed
647+
# to check if we can build cpu AOT tests.
648+
if "opencl:cpu" in sycl_ls_output:
649+
config.available_features.add("opencl-cpu-rt")
650+
651+
if len(config.sycl_devices) == 1 and config.sycl_devices[0] == "all":
652+
devices = set()
653+
for line in sycl_ls_output.splitlines():
654+
if not line.startswith("["):
655+
continue
656+
(backend, device) = line[1:].split("]")[0].split(":")
657+
devices.add("{}:{}".format(backend, device))
658+
config.sycl_devices = list(devices)
653659

654660
if len(config.sycl_devices) > 1:
655661
lit_config.note(
@@ -670,7 +676,11 @@ def remove_level_zero_suffix(devices):
670676
}
671677
for d in remove_level_zero_suffix(config.sycl_devices):
672678
be, dev = d.split(":")
673-
if be not in available_devices or dev not in available_devices[be]:
679+
# Verify platform
680+
if be not in available_devices:
681+
lit_config.error("Unsupported device {}".format(d))
682+
# Verify device from available_devices or accept if contains "arch-"
683+
if dev not in available_devices[be] and not "arch-" in dev:
674684
lit_config.error("Unsupported device {}".format(d))
675685

676686
if "cuda:gpu" in config.sycl_devices:
@@ -828,6 +838,79 @@ def remove_level_zero_suffix(devices):
828838
if config.test_mode != "build-only":
829839
config.sycl_build_targets = set()
830840

841+
842+
def get_sycl_ls_verbose(sycl_device, env):
843+
with test_env():
844+
# When using the ONEAPI_DEVICE_SELECTOR environment variable, sycl-ls
845+
# prints warnings that might derail a user thinking something is wrong
846+
# with their test run. It's just us filtering here, so silence them unless
847+
# we get an exit status.
848+
try:
849+
cmd = "{} {} --verbose".format(config.run_launcher or "", sycl_ls)
850+
sp = subprocess.run(
851+
cmd, env=env, text=True, shell=True, capture_output=True
852+
)
853+
sp.check_returncode()
854+
except subprocess.CalledProcessError as e:
855+
# capturing e allows us to see path resolution errors / system
856+
# permissions errors etc
857+
lit_config.fatal(
858+
f"Cannot find devices under {sycl_device}\n"
859+
f"{e}\n"
860+
f"stdout:{sp.stdout}\n"
861+
f"stderr:{sp.stderr}\n"
862+
)
863+
return sp.stdout.splitlines()
864+
865+
866+
# A device filter such as level_zero:gpu can have multiple devices under it and
867+
# the order is not guaranteed. The aspects enabled are also restricted to what
868+
# is supported on all devices under the label. It is possible for level_zero:gpu
869+
# and level_zero:0 to select different devices on different machines with the
870+
# same hardware. It is not currently possible to pass the device architecture to
871+
# ONEAPI_DEVICE_SELECTOR. Instead, if "BACKEND:arch-DEVICE_ARCH" is provided to
872+
# "sycl_devices", giving the desired device architecture, select a device that
873+
# matches that architecture using the backend:device-num device selection
874+
# scheme.
875+
filtered_sycl_devices = []
876+
for sycl_device in remove_level_zero_suffix(config.sycl_devices):
877+
backend, device_arch = sycl_device.split(":", 1)
878+
879+
if not "arch-" in device_arch:
880+
filtered_sycl_devices.append(sycl_device)
881+
continue
882+
883+
env = copy.copy(llvm_config.config.environment)
884+
885+
# Find all available devices under the backend
886+
env["ONEAPI_DEVICE_SELECTOR"] = backend + ":*"
887+
888+
detected_architectures = []
889+
890+
for line in get_sycl_ls_verbose(backend + ":*", env):
891+
if re.match(r" *Architecture:", line):
892+
_, architecture = line.strip().split(":", 1)
893+
detected_architectures.append(architecture.strip())
894+
895+
device = device_arch.replace("arch-", "")
896+
897+
if device in detected_architectures:
898+
device_num = detected_architectures.index(device)
899+
filtered_sycl_devices.append(backend + ":" + str(device_num))
900+
else:
901+
lit_config.warning(
902+
"Couldn't find device with architecture {}"
903+
" under {} device selector! Skipping device "
904+
"{}".format(device, backend + ":*", sycl_device)
905+
)
906+
907+
if not filtered_sycl_devices and not config.test_mode == "build-only":
908+
lit_config.error(
909+
"No sycl devices selected! Check your device " "architecture filters."
910+
)
911+
912+
config.sycl_devices = filtered_sycl_devices
913+
831914
for sycl_device in remove_level_zero_suffix(config.sycl_devices):
832915
be, dev = sycl_device.split(":")
833916
config.available_features.add("any-device-is-" + dev)
@@ -856,31 +939,14 @@ def remove_level_zero_suffix(devices):
856939
env["ONEAPI_DEVICE_SELECTOR"] = sycl_device
857940
if sycl_device.startswith("cuda:"):
858941
env["SYCL_UR_CUDA_ENABLE_IMAGE_SUPPORT"] = "1"
859-
# When using the ONEAPI_DEVICE_SELECTOR environment variable, sycl-ls
860-
# prints warnings that might derail a user thinking something is wrong
861-
# with their test run. It's just us filtering here, so silence them unless
862-
# we get an exit status.
863-
try:
864-
cmd = "{} {} --verbose".format(config.run_launcher or "", sycl_ls)
865-
sp = subprocess.run(cmd, env=env, text=True, shell=True, capture_output=True)
866-
sp.check_returncode()
867-
except subprocess.CalledProcessError as e:
868-
# capturing e allows us to see path resolution errors / system
869-
# permissions errors etc
870-
lit_config.fatal(
871-
f"Cannot list device aspects for {sycl_device}\n"
872-
f"{e}\n"
873-
f"stdout:{sp.stdout}\n"
874-
f"stderr:{sp.stderr}\n"
875-
)
876942

877943
dev_aspects = []
878944
dev_sg_sizes = []
879945
architectures = set()
880946
# See format.py's parse_min_intel_driver_req for explanation.
881947
is_intel_driver = False
882948
intel_driver_ver = {}
883-
for line in sp.stdout.splitlines():
949+
for line in get_sycl_ls_verbose(sycl_device, env):
884950
if re.match(r" *Vendor *: Intel\(R\) Corporation", line):
885951
is_intel_driver = True
886952
if re.match(r" *Driver *:", line):

0 commit comments

Comments
 (0)