|
1 | 1 | ; 32 storage locations is sufficient for all current-generation NVIDIA GPUs
|
2 |
| -; 128 bits per warp is sufficient for all fundamental data types and complex |
3 |
| -; Reducing storage for small data types or increasing it for user-defined types |
4 |
| -; will likely require an additional pass to track group algorithm usage |
5 |
| -@__clc__group_scratch = internal addrspace(3) global [128 x i64] undef, align 1 |
| 2 | +@__clc__group_scratch_i1 = internal addrspace(3) global [32 x i1] poison, align 1 |
| 3 | +@__clc__group_scratch_i8 = internal addrspace(3) global [32 x i8] poison, align 1 |
| 4 | +@__clc__group_scratch_i16 = internal addrspace(3) global [32 x i16] poison, align 2 |
| 5 | +@__clc__group_scratch_i32 = internal addrspace(3) global [32 x i32] poison, align 4 |
| 6 | +@__clc__group_scratch_i64 = internal addrspace(3) global [32 x i64] poison, align 8 |
| 7 | +@__clc__group_scratch_i128 = internal addrspace(3) global [32 x i128] poison, align 8 |
6 | 8 |
|
7 |
| -define i8 addrspace(3)* @__clc__get_group_scratch_bool() nounwind alwaysinline { |
| 9 | +define ptr addrspace(3) @__clc__get_group_scratch_bool() nounwind alwaysinline { |
8 | 10 | entry:
|
9 |
| - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
10 |
| - %cast = bitcast i64 addrspace(3)* %ptr to i8 addrspace(3)* |
11 |
| - ret i8 addrspace(3)* %cast |
| 11 | + ret ptr addrspace(3) @__clc__group_scratch_i1 |
12 | 12 | }
|
13 | 13 |
|
14 |
| -define i8 addrspace(3)* @__clc__get_group_scratch_char() nounwind alwaysinline { |
| 14 | +define ptr addrspace(3) @__clc__get_group_scratch_char() nounwind alwaysinline { |
15 | 15 | entry:
|
16 |
| - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
17 |
| - %cast = bitcast i64 addrspace(3)* %ptr to i8 addrspace(3)* |
18 |
| - ret i8 addrspace(3)* %cast |
| 16 | + ret ptr addrspace(3) @__clc__group_scratch_i8 |
19 | 17 | }
|
20 | 18 |
|
21 |
| -define i16 addrspace(3)* @__clc__get_group_scratch_short() nounwind alwaysinline { |
| 19 | +define ptr addrspace(3) @__clc__get_group_scratch_short() nounwind alwaysinline { |
22 | 20 | entry:
|
23 |
| - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
24 |
| - %cast = bitcast i64 addrspace(3)* %ptr to i16 addrspace(3)* |
25 |
| - ret i16 addrspace(3)* %cast |
| 21 | + ret ptr addrspace(3) @__clc__group_scratch_i16 |
26 | 22 | }
|
27 | 23 |
|
28 |
| -define i32 addrspace(3)* @__clc__get_group_scratch_int() nounwind alwaysinline { |
| 24 | +define ptr addrspace(3) @__clc__get_group_scratch_int() nounwind alwaysinline { |
29 | 25 | entry:
|
30 |
| - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
31 |
| - %cast = bitcast i64 addrspace(3)* %ptr to i32 addrspace(3)* |
32 |
| - ret i32 addrspace(3)* %cast |
| 26 | + ret ptr addrspace(3) @__clc__group_scratch_i32 |
33 | 27 | }
|
34 | 28 |
|
35 |
| -define i64 addrspace(3)* @__clc__get_group_scratch_long() nounwind alwaysinline { |
| 29 | +define ptr addrspace(3) @__clc__get_group_scratch_long() nounwind alwaysinline { |
36 | 30 | entry:
|
37 |
| - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
38 |
| - %cast = bitcast i64 addrspace(3)* %ptr to i64 addrspace(3)* |
39 |
| - ret i64 addrspace(3)* %cast |
| 31 | + ret ptr addrspace(3) @__clc__group_scratch_i64 |
40 | 32 | }
|
41 | 33 |
|
42 |
| -define half addrspace(3)* @__clc__get_group_scratch_half() nounwind alwaysinline { |
| 34 | +define ptr addrspace(3) @__clc__get_group_scratch_half() nounwind alwaysinline { |
43 | 35 | entry:
|
44 |
| - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
45 |
| - %cast = bitcast i64 addrspace(3)* %ptr to half addrspace(3)* |
46 |
| - ret half addrspace(3)* %cast |
| 36 | + ret ptr addrspace(3) @__clc__group_scratch_i16 |
47 | 37 | }
|
48 | 38 |
|
49 |
| -define float addrspace(3)* @__clc__get_group_scratch_float() nounwind alwaysinline { |
| 39 | +define ptr addrspace(3) @__clc__get_group_scratch_float() nounwind alwaysinline { |
50 | 40 | entry:
|
51 |
| - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
52 |
| - %cast = bitcast i64 addrspace(3)* %ptr to float addrspace(3)* |
53 |
| - ret float addrspace(3)* %cast |
| 41 | + ret ptr addrspace(3) @__clc__group_scratch_i32 |
54 | 42 | }
|
55 | 43 |
|
56 |
| -define double addrspace(3)* @__clc__get_group_scratch_double() nounwind alwaysinline { |
| 44 | +define ptr addrspace(3) @__clc__get_group_scratch_double() nounwind alwaysinline { |
57 | 45 | entry:
|
58 |
| - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
59 |
| - %cast = bitcast i64 addrspace(3)* %ptr to double addrspace(3)* |
60 |
| - ret double addrspace(3)* %cast |
| 46 | + ret ptr addrspace(3) @__clc__group_scratch_i64 |
61 | 47 | }
|
62 | 48 |
|
63 |
| -%complex_half = type { |
64 |
| - half, |
65 |
| - half |
66 |
| -} |
67 |
| - |
68 |
| -%complex_float = type { |
69 |
| - float, |
70 |
| - float |
71 |
| -} |
72 |
| - |
73 |
| -%complex_double = type { |
74 |
| - double, |
75 |
| - double |
76 |
| -} |
77 |
| - |
78 |
| -define %complex_half addrspace(3)* @__clc__get_group_scratch_complex_half() nounwind alwaysinline { |
| 49 | +define ptr addrspace(3) @__clc__get_group_scratch_complex_half() nounwind alwaysinline { |
79 | 50 | entry:
|
80 |
| - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
81 |
| - %cast = bitcast i64 addrspace(3)* %ptr to %complex_half addrspace(3)* |
82 |
| - ret %complex_half addrspace(3)* %cast |
| 51 | + ret ptr addrspace(3) @__clc__group_scratch_i32 |
83 | 52 | }
|
84 | 53 |
|
85 |
| -define %complex_float addrspace(3)* @__clc__get_group_scratch_complex_float() nounwind alwaysinline { |
| 54 | +define ptr addrspace(3) @__clc__get_group_scratch_complex_float() nounwind alwaysinline { |
86 | 55 | entry:
|
87 |
| - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
88 |
| - %cast = bitcast i64 addrspace(3)* %ptr to %complex_float addrspace(3)* |
89 |
| - ret %complex_float addrspace(3)* %cast |
| 56 | + ret ptr addrspace(3) @__clc__group_scratch_i64 |
90 | 57 | }
|
91 | 58 |
|
92 |
| -define %complex_double addrspace(3)* @__clc__get_group_scratch_complex_double() nounwind alwaysinline { |
| 59 | +define ptr addrspace(3) @__clc__get_group_scratch_complex_double() nounwind alwaysinline { |
93 | 60 | entry:
|
94 |
| - %ptr = getelementptr inbounds [128 x i64], [128 x i64] addrspace(3)* @__clc__group_scratch, i64 0, i64 0 |
95 |
| - %cast = bitcast i64 addrspace(3)* %ptr to %complex_double addrspace(3)* |
96 |
| - ret %complex_double addrspace(3)* %cast |
| 61 | + ret ptr addrspace(3) @__clc__group_scratch_i128 |
97 | 62 | }
|
0 commit comments