|
1 | 1 | ; Test the NVVM reflect pass functionality: verifying that reflect calls are replaced with
|
2 | 2 | ; appropriate values based on command-line options. Verify that we can handle custom reflect arguments
|
3 |
| -; that aren't __CUDA_ARCH or __CUDA_FTZ. If that argument is given a value on the command-line, the reflect call should be replaced with that value. |
4 |
| -; Otherwise, the reflect call should be replaced with 0. |
| 3 | +; that aren't __CUDA_ARCH or __CUDA_FTZ. If that argument is given a value on the command-line, |
| 4 | +; the reflect call should be replaced with that value. Otherwise, the reflect call should be replaced with 0. |
| 5 | + |
| 6 | +; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda \ |
| 7 | +; RUN: -nvvm-reflect-add __CUDA_FTZ=1 -nvvm-reflect-add __CUDA_ARCH=350 %s -S \ |
| 8 | +; RUN: | FileCheck %s --check-prefixes=COMMON,FTZ1,ARCH350,CUSTOM-ABSENT |
| 9 | +; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda \ |
| 10 | +; RUN: -nvvm-reflect-add __CUDA_FTZ=0 -nvvm-reflect-add __CUDA_ARCH=520 %s -S \ |
| 11 | +; RUN: | FileCheck %s --check-prefixes=COMMON,FTZ0,ARCH520,CUSTOM-ABSENT |
| 12 | +; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda \ |
| 13 | +; RUN: -nvvm-reflect-add __CUDA_FTZ=0 -nvvm-reflect-add __CUDA_ARCH=520 \ |
| 14 | +; RUN: -nvvm-reflect-add __CUSTOM_VALUE=42 %s -S \ |
| 15 | +; RUN: | FileCheck %s --check-prefixes=COMMON,CUSTOM-PRESENT |
| 16 | + |
| 17 | +; To ensure that command line options override module options, create a copy of this test file |
| 18 | +; with module options appended and rerun some tests. |
5 | 19 |
|
6 |
| -; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda -nvvm-reflect-add __CUDA_FTZ=1 -nvvm-reflect-add __CUDA_ARCH=350 %s -S | FileCheck %s --check-prefix=CHECK-FTZ1 --check-prefix=CHECK-ARCH350 --check-prefix=CHECK-CUSTOM-ABSENT |
7 |
| -; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda -nvvm-reflect-add __CUDA_FTZ=0 -nvvm-reflect-add __CUDA_ARCH=520 %s -S | FileCheck %s --check-prefix=CHECK-FTZ0 --check-prefix=CHECK-ARCH520 --check-prefix=CHECK-CUSTOM-ABSENT |
8 |
| -; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda -nvvm-reflect-add __CUDA_FTZ=0 -nvvm-reflect-add __CUDA_ARCH=520 -nvvm-reflect-add __CUSTOM_VALUE=42 %s -S | FileCheck %s --check-prefix=CHECK-CUSTOM-PRESENT |
9 |
| - |
10 |
| -; To ensure that command line options override module options, create a copy of this test file with module options appended and rerun some tests. |
11 |
| -; |
12 | 20 | ; RUN: cat %s > %t.options
|
13 | 21 | ; RUN: echo '!llvm.module.flags = !{!0}' >> %t.options
|
14 | 22 | ; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.options
|
15 |
| -; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda -nvvm-reflect-add __CUDA_FTZ=0 -nvvm-reflect-add __CUDA_ARCH=520 %t.options -S | FileCheck %s --check-prefix=CHECK-FTZ0 --check-prefix=CHECK-ARCH520 |
| 23 | +; RUN: opt -passes=nvvm-reflect -mtriple=nvptx-nvidia-cuda \ |
| 24 | +; RUN: -nvvm-reflect-add __CUDA_FTZ=0 -nvvm-reflect-add __CUDA_ARCH=520 %t.options -S \ |
| 25 | +; RUN: | FileCheck %s --check-prefixes=COMMON,FTZ0,ARCH520 |
16 | 26 |
|
17 | 27 | declare i32 @__nvvm_reflect(ptr)
|
18 | 28 | @ftz = private unnamed_addr addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00"
|
19 | 29 | @arch = private unnamed_addr addrspace(1) constant [12 x i8] c"__CUDA_ARCH\00"
|
20 | 30 | @custom = private unnamed_addr addrspace(1) constant [15 x i8] c"__CUSTOM_VALUE\00"
|
21 | 31 |
|
22 |
| -; Test handling of __CUDA_FTZ reflect value |
| 32 | +; COMMON-LABEL: define i32 @test_ftz() |
| 33 | +; FTZ1: ret i32 1 |
| 34 | +; FTZ0: ret i32 0 |
23 | 35 | define i32 @test_ftz() {
|
24 | 36 | %1 = call i32 @__nvvm_reflect(ptr addrspacecast (ptr addrspace(1) @ftz to ptr))
|
25 | 37 | ret i32 %1
|
26 | 38 | }
|
27 | 39 |
|
28 |
| -; CHECK-FTZ1: define i32 @test_ftz() |
29 |
| -; CHECK-FTZ1: ret i32 1 |
30 |
| -; CHECK-FTZ0: define i32 @test_ftz() |
31 |
| -; CHECK-FTZ0: ret i32 0 |
32 |
| - |
33 |
| -; Test handling of __CUDA_ARCH reflect value |
| 40 | +; COMMON-LABEL: define i32 @test_arch() |
| 41 | +; ARCH350: ret i32 350 |
| 42 | +; ARCH520: ret i32 520 |
34 | 43 | define i32 @test_arch() {
|
35 | 44 | %1 = call i32 @__nvvm_reflect(ptr addrspacecast (ptr addrspace(1) @arch to ptr))
|
36 | 45 | ret i32 %1
|
37 | 46 | }
|
38 | 47 |
|
39 |
| -; CHECK-ARCH350: define i32 @test_arch() |
40 |
| -; CHECK-ARCH350: ret i32 350 |
41 |
| -; CHECK-ARCH520: define i32 @test_arch() |
42 |
| -; CHECK-ARCH520: ret i32 520 |
43 |
| - |
44 |
| -; Test handling of a custom reflect value that's not built into the pass |
| 48 | +; COMMON-LABEL: define i32 @test_custom() |
| 49 | +; CUSTOM-ABSENT: ret i32 0 |
| 50 | +; CUSTOM-PRESENT: ret i32 42 |
45 | 51 | define i32 @test_custom() {
|
46 | 52 | %1 = call i32 @__nvvm_reflect(ptr addrspacecast (ptr addrspace(1) @custom to ptr))
|
47 | 53 | ret i32 %1
|
48 | 54 | }
|
49 |
| - |
50 |
| -; CHECK-CUSTOM-ABSENT: define i32 @test_custom() |
51 |
| -; CHECK-CUSTOM-ABSENT: ret i32 0 |
52 |
| -; CHECK-CUSTOM-PRESENT: define i32 @test_custom() |
53 |
| -; CHECK-CUSTOM-PRESENT: ret i32 42 |
|
0 commit comments