Skip to content

Commit 8c7188a

Browse files
authored
Update NVVM ldu/ldg intrinsics with IntrWillReturn and test for DCE (#98968)
Dead calls to these intrinsics were not being deleted at the IR level as they were not marked `IntrWillReturn`, though they were being deleted when building the SDAG. This fixes that and adds a test to confirm they are deleted during `opt`
1 parent 2e3ee31 commit 8c7188a

File tree

2 files changed

+193
-6
lines changed

2 files changed

+193
-6
lines changed

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1529,30 +1529,30 @@ def int_nvvm_mbarrier_pending_count :
15291529
// pointer's alignment.
15301530
def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
15311531
[llvm_anyptr_ty, llvm_i32_ty],
1532-
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
1532+
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
15331533
"llvm.nvvm.ldu.global.i">;
15341534
def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
15351535
[llvm_anyptr_ty, llvm_i32_ty],
1536-
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
1536+
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
15371537
"llvm.nvvm.ldu.global.f">;
15381538
def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
15391539
[llvm_anyptr_ty, llvm_i32_ty],
1540-
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
1540+
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
15411541
"llvm.nvvm.ldu.global.p">;
15421542

15431543
// Generated within nvvm. Use for ldg on sm_35 or later. Second arg is the
15441544
// pointer's alignment.
15451545
def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
15461546
[llvm_anyptr_ty, llvm_i32_ty],
1547-
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
1547+
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
15481548
"llvm.nvvm.ldg.global.i">;
15491549
def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
15501550
[llvm_anyptr_ty, llvm_i32_ty],
1551-
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
1551+
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
15521552
"llvm.nvvm.ldg.global.f">;
15531553
def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
15541554
[llvm_anyptr_ty, llvm_i32_ty],
1555-
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
1555+
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
15561556
"llvm.nvvm.ldg.global.p">;
15571557

15581558
// Use for generic pointers
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S < %s -passes=dce | FileCheck %s
3+
4+
; ldu/ldg intrinsics were erroneously not marked IntrWillReturn, preventing
5+
; them from being eliminated at IR level when dead.
6+
7+
declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
8+
declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
9+
declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
10+
declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
11+
declare ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 %align)
12+
declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
13+
declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
14+
declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
15+
declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
16+
17+
declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align)
18+
declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align)
19+
declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align)
20+
declare i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align)
21+
declare ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 %align)
22+
declare float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align)
23+
declare double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align)
24+
declare half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align)
25+
declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align)
26+
27+
define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) {
28+
; CHECK-LABEL: define void @test_ldu_i8_dead(
29+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
30+
; CHECK-NEXT: ret void
31+
;
32+
%val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
33+
ret void
34+
}
35+
36+
define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) {
37+
; CHECK-LABEL: define void @test_ldu_i16_dead(
38+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
39+
; CHECK-NEXT: ret void
40+
;
41+
%val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
42+
ret void
43+
}
44+
45+
define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) {
46+
; CHECK-LABEL: define void @test_ldu_i32_dead(
47+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
48+
; CHECK-NEXT: ret void
49+
;
50+
%val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
51+
ret void
52+
}
53+
54+
define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) {
55+
; CHECK-LABEL: define void @test_ldu_i64_dead(
56+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
57+
; CHECK-NEXT: ret void
58+
;
59+
%val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
60+
ret void
61+
}
62+
63+
define void @test_ldu_p_dead(ptr addrspace(1) %ptr) {
64+
; CHECK-LABEL: define void @test_ldu_p_dead(
65+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
66+
; CHECK-NEXT: ret void
67+
;
68+
%val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8)
69+
ret void
70+
}
71+
72+
define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) {
73+
; CHECK-LABEL: define void @test_ldu_f32_dead(
74+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
75+
; CHECK-NEXT: ret void
76+
;
77+
%val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
78+
ret void
79+
}
80+
81+
define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) {
82+
; CHECK-LABEL: define void @test_ldu_f64_dead(
83+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
84+
; CHECK-NEXT: ret void
85+
;
86+
%val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
87+
ret void
88+
}
89+
90+
define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) {
91+
; CHECK-LABEL: define void @test_ldu_f16_dead(
92+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
93+
; CHECK-NEXT: ret void
94+
;
95+
%val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
96+
ret void
97+
}
98+
99+
define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) {
100+
; CHECK-LABEL: define void @test_ldu_v2f16_dead(
101+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
102+
; CHECK-NEXT: ret void
103+
;
104+
%val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
105+
ret void
106+
}
107+
108+
define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) {
109+
; CHECK-LABEL: define void @test_ldg_i8_dead(
110+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
111+
; CHECK-NEXT: ret void
112+
;
113+
%val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4)
114+
ret void
115+
}
116+
117+
define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) {
118+
; CHECK-LABEL: define void @test_ldg_i16_dead(
119+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
120+
; CHECK-NEXT: ret void
121+
;
122+
%val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2)
123+
ret void
124+
}
125+
126+
define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) {
127+
; CHECK-LABEL: define void @test_ldg_i32_dead(
128+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
129+
; CHECK-NEXT: ret void
130+
;
131+
%val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4)
132+
ret void
133+
}
134+
135+
define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) {
136+
; CHECK-LABEL: define void @test_ldg_i64_dead(
137+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
138+
; CHECK-NEXT: ret void
139+
;
140+
%val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8)
141+
ret void
142+
}
143+
144+
define void @test_ldg_p_dead(ptr addrspace(1) %ptr) {
145+
; CHECK-LABEL: define void @test_ldg_p_dead(
146+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
147+
; CHECK-NEXT: ret void
148+
;
149+
%val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8)
150+
ret void
151+
}
152+
153+
define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) {
154+
; CHECK-LABEL: define void @test_ldg_f32_dead(
155+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
156+
; CHECK-NEXT: ret void
157+
;
158+
%val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4)
159+
ret void
160+
}
161+
162+
define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) {
163+
; CHECK-LABEL: define void @test_ldg_f64_dead(
164+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
165+
; CHECK-NEXT: ret void
166+
;
167+
%val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8)
168+
ret void
169+
}
170+
171+
define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) {
172+
; CHECK-LABEL: define void @test_ldg_f16_dead(
173+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
174+
; CHECK-NEXT: ret void
175+
;
176+
%val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2)
177+
ret void
178+
}
179+
180+
define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) {
181+
; CHECK-LABEL: define void @test_ldg_v2f16_dead(
182+
; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) {
183+
; CHECK-NEXT: ret void
184+
;
185+
%val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)
186+
ret void
187+
}

0 commit comments

Comments
 (0)