Skip to content

Commit 2e39533

Browse files
authored
AMDGPU: Fix broken check prefix and degraded cov4 test coverage (#131757)
1 parent 6f44be9 commit 2e39533

File tree

2 files changed

+116
-9
lines changed

2 files changed

+116
-9
lines changed
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -passes=amdgpu-attributor -o %t.gfx7.bc %s
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -passes=amdgpu-attributor -o %t.gfx8.bc %s
3+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-attributor -o %t.gfx9.bc %s
4+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %t.gfx7.bc | FileCheck --check-prefixes=CHECK,PRE-GFX9 %s
5+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %t.gfx8.bc | FileCheck --check-prefixes=CHECK,PRE-GFX9 %s
6+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %t.gfx9.bc | FileCheck --check-prefixes=CHECK,GFX9 %s
7+
8+
; CHECK: addrspacecast_requires_queue_ptr
9+
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
10+
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
11+
12+
; CHECK: is_shared_requires_queue_ptr
13+
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
14+
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
15+
16+
; CHECK: is_private_requires_queue_ptr
17+
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
18+
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
19+
20+
; CHECK: trap_requires_queue_ptr
21+
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
22+
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
23+
24+
; CHECK: amdgcn_queue_ptr_requires_queue_ptr
25+
; CHECK: .amdhsa_user_sgpr_queue_ptr 1
26+
27+
28+
; On gfx8, the queue ptr is required for this addrspacecast.
29+
; CHECK: - .args:
30+
; CHECK-NOT: hidden_queue_ptr
31+
; CHECK-LABEL: .name: addrspacecast_requires_queue_ptr
32+
define amdgpu_kernel void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
33+
%flat.private = addrspacecast ptr addrspace(5) %ptr.private to ptr
34+
%flat.local = addrspacecast ptr addrspace(3) %ptr.local to ptr
35+
store volatile i32 1, ptr %flat.private
36+
store volatile i32 2, ptr %flat.local
37+
ret void
38+
}
39+
40+
41+
; CHECK: - .args:
42+
; CHECK-NOT: hidden_shared_base
43+
; CHECK-LABEL: .name: is_shared_requires_queue_ptr
44+
define amdgpu_kernel void @is_shared_requires_queue_ptr(ptr %ptr) {
45+
%is.shared = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
46+
%zext = zext i1 %is.shared to i32
47+
store volatile i32 %zext, ptr addrspace(1) poison
48+
ret void
49+
}
50+
51+
; CHECK: - .args:
52+
; CHECK-NOT: hidden_shared_base
53+
; CHECK-NOT: hidden_private_base
54+
; CHECK-NOT: hidden_queue_ptr
55+
; CHECK-LABEL: .name: is_private_requires_queue_ptr
56+
define amdgpu_kernel void @is_private_requires_queue_ptr(ptr %ptr) {
57+
%is.private = call i1 @llvm.amdgcn.is.private(ptr %ptr)
58+
%zext = zext i1 %is.private to i32
59+
store volatile i32 %zext, ptr addrspace(1) poison
60+
ret void
61+
}
62+
63+
; CHECK: - .args:
64+
65+
; CHECK-NOT: hidden_shared_base
66+
; CHECK-NOT: hidden_private_base
67+
; CHECK-NOT: hidden_queue_ptr
68+
; CHECK-LABEL: .name: trap_requires_queue_ptr
69+
define amdgpu_kernel void @trap_requires_queue_ptr() {
70+
call void @llvm.trap()
71+
unreachable
72+
}
73+
74+
; CHECK: - .args:
75+
; CHECK-NOT: hidden_queue_ptr
76+
; CHECK-NOT: hidden_shared_base
77+
; CHECK-NOT: hidden_private_base
78+
; CHECK-LABEL: .name: amdgcn_queue_ptr_requires_queue_ptr
79+
define amdgpu_kernel void @amdgcn_queue_ptr_requires_queue_ptr(ptr addrspace(1) %ptr) {
80+
%queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
81+
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
82+
%dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
83+
%dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
84+
%queue.load = load volatile i8, ptr addrspace(4) %queue.ptr
85+
%implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr
86+
%dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr
87+
store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
88+
ret void
89+
}
90+
91+
declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr()
92+
declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
93+
declare i64 @llvm.amdgcn.dispatch.id()
94+
declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
95+
declare i1 @llvm.amdgcn.is.shared(ptr)
96+
declare i1 @llvm.amdgcn.is.private(ptr)
97+
declare void @llvm.trap()
98+
declare void @llvm.debugtrap()
99+
100+
!llvm.module.flags = !{!0}
101+
!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}

llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,24 @@
11
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -passes=amdgpu-attributor -o %t.gfx7.bc %s
22
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -passes=amdgpu-attributor -o %t.gfx8.bc %s
33
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-attributor -o %t.gfx9.bc %s
4-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj < %t.gfx7.bc | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
5-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj < %t.gfx8.bc | llvm-readelf --notes - | FileCheck --check-prefix=CHECK %s
6-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %t.gfx9.bc | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX9 %s
7-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %t.gfx7.bc | FileCheck --check-prefix=CHECK %s
8-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %t.gfx8.bc | FileCheck --check-prefix=CHECK %s
9-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %t.gfx9.bc | FileCheck --check-prefixes=CHECK,GFX9 %s
4+
; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj < %t.gfx7.bc | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,PRE-GFX9 %s
5+
; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj < %t.gfx8.bc | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,PRE-GFX9 %s
6+
; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %t.gfx9.bc | llvm-readelf --notes - | FileCheck --check-prefixes=CHECK,GFX9 %s
7+
; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %t.gfx7.bc | FileCheck --check-prefixes=CHECK,PRE-GFX9 %s
8+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %t.gfx8.bc | FileCheck --check-prefixes=CHECK,PRE-GFX9 %s
9+
; xUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %t.gfx9.bc | FileCheck --check-prefixes=CHECK,GFX9 %s
1010

1111

12-
; On gfx8, the queue ptr is required for this addrspacecast.
12+
; On gfx8, the queue ptr is required for this addrspacecast on cov4
1313
; CHECK: - .args:
14-
; PRE-GFX9: hidden_queue_ptr
15-
; GFX9-NOT: hidden_queue_ptr
14+
15+
; PRE-GFX9: .value_kind: hidden_private_base
16+
; PRE-GFX9: .value_kind: hidden_shared_base
17+
18+
; GFX9-NOT: hidden_queue_ptr
19+
; GFX9-NOT: hidden_private_base
20+
; GFX9-NOT: hidden_shared_base
21+
1622
; CHECK-LABEL: .name: addrspacecast_requires_queue_ptr
1723
define amdgpu_kernel void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
1824
%flat.private = addrspacecast ptr addrspace(5) %ptr.private to ptr

0 commit comments

Comments
 (0)