38
38
// AMDGPU-NEXT: [[CALL20:%.*]] = call i64 @__gpu_ballot(i64 noundef -1, i1 noundef zeroext true) #[[ATTR7]]
39
39
// AMDGPU-NEXT: call void @__gpu_sync_threads() #[[ATTR7]]
40
40
// AMDGPU-NEXT: call void @__gpu_sync_lane(i64 noundef -1) #[[ATTR7]]
41
- // AMDGPU-NEXT: [[CALL21:%.*]] = call i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1) #[[ATTR7]]
41
+ // AMDGPU-NEXT: [[CALL21:%.*]] = call i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1, i32 noundef 0 ) #[[ATTR7]]
42
42
// AMDGPU-NEXT: [[CALL22:%.*]] = call i64 @__gpu_first_lane_id(i64 noundef -1) #[[ATTR7]]
43
43
// AMDGPU-NEXT: [[CALL23:%.*]] = call zeroext i1 @__gpu_is_first_in_lane(i64 noundef -1) #[[ATTR7]]
44
44
// AMDGPU-NEXT: call void @__gpu_exit() #[[ATTR8:[0-9]+]]
70
70
// NVPTX-NEXT: [[CALL20:%.*]] = call i64 @__gpu_ballot(i64 noundef -1, i1 noundef zeroext true) #[[ATTR6]]
71
71
// NVPTX-NEXT: call void @__gpu_sync_threads() #[[ATTR6]]
72
72
// NVPTX-NEXT: call void @__gpu_sync_lane(i64 noundef -1) #[[ATTR6]]
73
- // NVPTX-NEXT: [[CALL21:%.*]] = call i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1) #[[ATTR6]]
73
+ // NVPTX-NEXT: [[CALL21:%.*]] = call i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1, i32 noundef 0 ) #[[ATTR6]]
74
74
// NVPTX-NEXT: [[CALL22:%.*]] = call i64 @__gpu_first_lane_id(i64 noundef -1) #[[ATTR6]]
75
75
// NVPTX-NEXT: [[CALL23:%.*]] = call zeroext i1 @__gpu_is_first_in_lane(i64 noundef -1) #[[ATTR6]]
76
76
// NVPTX-NEXT: call void @__gpu_exit() #[[ATTR7:[0-9]+]]
@@ -90,6 +90,68 @@ __gpu_kernel void foo() {
90
90
__gpu_num_threads_z ();
91
91
__gpu_num_threads (0 );
92
92
__gpu_thread_id_x ();
93
+ // AMDGPU-LABEL: define internal i32 @__gpu_thread_id(
94
+ // AMDGPU-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] {
95
+ // AMDGPU-NEXT: [[ENTRY:.*:]]
96
+ // AMDGPU-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
97
+ // AMDGPU-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
98
+ // AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
99
+ // AMDGPU-NEXT: [[__DIM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__DIM_ADDR]] to ptr
100
+ // AMDGPU-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR_ASCAST]], align 4
101
+ // AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR_ASCAST]], align 4
102
+ // AMDGPU-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
103
+ // AMDGPU-NEXT: i32 0, label %[[SW_BB:.*]]
104
+ // AMDGPU-NEXT: i32 1, label %[[SW_BB1:.*]]
105
+ // AMDGPU-NEXT: i32 2, label %[[SW_BB3:.*]]
106
+ // AMDGPU-NEXT: ]
107
+ // AMDGPU: [[SW_BB]]:
108
+ // AMDGPU-NEXT: [[CALL:%.*]] = call i32 @__gpu_thread_id_x() #[[ATTR7]]
109
+ // AMDGPU-NEXT: store i32 [[CALL]], ptr [[RETVAL_ASCAST]], align 4
110
+ // AMDGPU-NEXT: br label %[[RETURN:.*]]
111
+ // AMDGPU: [[SW_BB1]]:
112
+ // AMDGPU-NEXT: [[CALL2:%.*]] = call i32 @__gpu_thread_id_y() #[[ATTR7]]
113
+ // AMDGPU-NEXT: store i32 [[CALL2]], ptr [[RETVAL_ASCAST]], align 4
114
+ // AMDGPU-NEXT: br label %[[RETURN]]
115
+ // AMDGPU: [[SW_BB3]]:
116
+ // AMDGPU-NEXT: [[CALL4:%.*]] = call i32 @__gpu_thread_id_z() #[[ATTR7]]
117
+ // AMDGPU-NEXT: store i32 [[CALL4]], ptr [[RETVAL_ASCAST]], align 4
118
+ // AMDGPU-NEXT: br label %[[RETURN]]
119
+ // AMDGPU: [[SW_DEFAULT]]:
120
+ // AMDGPU-NEXT: unreachable
121
+ // AMDGPU: [[RETURN]]:
122
+ // AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL_ASCAST]], align 4
123
+ // AMDGPU-NEXT: ret i32 [[TMP1]]
124
+ //
125
+ // NVPTX-LABEL: define internal i32 @__gpu_thread_id(
126
+ // NVPTX-SAME: i32 noundef [[__DIM:%.*]]) #[[ATTR0]] {
127
+ // NVPTX-NEXT: [[ENTRY:.*:]]
128
+ // NVPTX-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
129
+ // NVPTX-NEXT: [[__DIM_ADDR:%.*]] = alloca i32, align 4
130
+ // NVPTX-NEXT: store i32 [[__DIM]], ptr [[__DIM_ADDR]], align 4
131
+ // NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[__DIM_ADDR]], align 4
132
+ // NVPTX-NEXT: switch i32 [[TMP0]], label %[[SW_DEFAULT:.*]] [
133
+ // NVPTX-NEXT: i32 0, label %[[SW_BB:.*]]
134
+ // NVPTX-NEXT: i32 1, label %[[SW_BB1:.*]]
135
+ // NVPTX-NEXT: i32 2, label %[[SW_BB3:.*]]
136
+ // NVPTX-NEXT: ]
137
+ // NVPTX: [[SW_BB]]:
138
+ // NVPTX-NEXT: [[CALL:%.*]] = call i32 @__gpu_thread_id_x() #[[ATTR6]]
139
+ // NVPTX-NEXT: store i32 [[CALL]], ptr [[RETVAL]], align 4
140
+ // NVPTX-NEXT: br label %[[RETURN:.*]]
141
+ // NVPTX: [[SW_BB1]]:
142
+ // NVPTX-NEXT: [[CALL2:%.*]] = call i32 @__gpu_thread_id_y() #[[ATTR6]]
143
+ // NVPTX-NEXT: store i32 [[CALL2]], ptr [[RETVAL]], align 4
144
+ // NVPTX-NEXT: br label %[[RETURN]]
145
+ // NVPTX: [[SW_BB3]]:
146
+ // NVPTX-NEXT: [[CALL4:%.*]] = call i32 @__gpu_thread_id_z() #[[ATTR6]]
147
+ // NVPTX-NEXT: store i32 [[CALL4]], ptr [[RETVAL]], align 4
148
+ // NVPTX-NEXT: br label %[[RETURN]]
149
+ // NVPTX: [[SW_DEFAULT]]:
150
+ // NVPTX-NEXT: unreachable
151
+ // NVPTX: [[RETURN]]:
152
+ // NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4
153
+ // NVPTX-NEXT: ret i32 [[TMP1]]
154
+ //
93
155
__gpu_thread_id_y ();
94
156
__gpu_thread_id_z ();
95
157
__gpu_thread_id (0 );
@@ -100,7 +162,7 @@ __gpu_kernel void foo() {
100
162
__gpu_ballot (-1 , 1 );
101
163
__gpu_sync_threads ();
102
164
__gpu_sync_lane (-1 );
103
- __gpu_shuffle_idx_u32 (-1 , -1 , -1 );
165
+ __gpu_shuffle_idx_u32 (-1 , -1 , -1 , 0 );
104
166
__gpu_first_lane_id (-1 );
105
167
__gpu_is_first_in_lane (-1 );
106
168
__gpu_exit ();
0 commit comments