Skip to content

Commit 6b25f44

Browse files
authored
[AMDGPU] Detect trivially uniform arguments in InstCombine (#129897)
Update one test to use an SGPR argument as the simplest way of getting a uniform value.
1 parent 592e596 commit 6b25f44

File tree

3 files changed

+170
-7
lines changed

3 files changed

+170
-7
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,8 @@ static bool isTriviallyUniform(const Use &U) {
462462
Value *V = U.get();
463463
if (isa<Constant>(V))
464464
return true;
465+
if (const auto *A = dyn_cast<Argument>(V))
466+
return AMDGPU::isArgPassedInSGPR(A);
465467
if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
466468
if (!AMDGPU::isIntrinsicAlwaysUniform(II->getIntrinsicID()))
467469
return false;

llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2743,7 +2743,7 @@ declare i32 @llvm.amdgcn.readfirstlane(i32)
27432743

27442744
@gv = constant i32 0
27452745

2746-
define amdgpu_kernel void @readfirstlane_constant(i32 %arg, ptr %ptr) {
2746+
define amdgpu_cs void @readfirstlane_constant(i32 %arg, ptr %ptr) {
27472747
; CHECK-LABEL: @readfirstlane_constant(
27482748
; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[ARG:%.*]])
27492749
; CHECK-NEXT: store volatile i32 [[VAR]], ptr [[PTR:%.*]], align 4
@@ -2829,7 +2829,7 @@ bb1:
28292829

28302830
declare i32 @llvm.amdgcn.readlane(i32, i32)
28312831

2832-
define amdgpu_kernel void @readlane_constant(i32 %arg, i32 %lane, ptr %ptr) {
2832+
define amdgpu_cs void @readlane_constant(i32 %arg, i32 %lane, ptr %ptr) {
28332833
; CHECK-LABEL: @readlane_constant(
28342834
; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG:%.*]], i32 7)
28352835
; CHECK-NEXT: store volatile i32 [[VAR]], ptr [[PTR:%.*]], align 4
@@ -3041,14 +3041,12 @@ define amdgpu_kernel void @permlanex16_fetch_invalid_bound_ctrl(ptr addrspace(1)
30413041
; llvm.amdgcn.permlane64
30423042
; --------------------------------------------------------------------
30433043

3044-
define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src0) {
3044+
define amdgpu_kernel void @permlane64_uniform(ptr addrspace(1) %out, i32 %src) {
30453045
; CHECK-LABEL: @permlane64_uniform(
3046-
; CHECK-NEXT: [[SRC1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[SRC0:%.*]])
3047-
; CHECK-NEXT: store i32 [[SRC1]], ptr addrspace(1) [[OUT:%.*]], align 4
3046+
; CHECK-NEXT: store i32 [[SRC1:%.*]], ptr addrspace(1) [[OUT:%.*]], align 4
30483047
; CHECK-NEXT: ret void
30493048
;
3050-
%src1 = call i32 @llvm.amdgcn.readfirstlane(i32 %src0)
3051-
%res = call i32 @llvm.amdgcn.permlane64(i32 %src1)
3049+
%res = call i32 @llvm.amdgcn.permlane64(i32 %src)
30523050
store i32 %res, ptr addrspace(1) %out
30533051
ret void
30543052
}
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=amdgcn -mcpu=gfx1010 -passes=instcombine -S < %s | FileCheck %s
3+
4+
; Use readfirstlane to demonstrate when InstCombine deems an input to
5+
; be trivially uniform.
6+
7+
; Constants are trivially uniform.
8+
define i32 @test_constant() {
9+
; CHECK-LABEL: define i32 @test_constant(
10+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
11+
; CHECK-NEXT: ret i32 7
12+
;
13+
%r = call i32 @llvm.amdgcn.readfirstlane(i32 7)
14+
ret i32 %r
15+
}
16+
17+
; The result of an AlwaysUniform intrinsic is trivially uniform.
18+
define i32 @test_intrinsic(i32 %x) {
19+
; CHECK-LABEL: define i32 @test_intrinsic(
20+
; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
21+
; CHECK-NEXT: [[Y:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[X]])
22+
; CHECK-NEXT: ret i32 [[Y]]
23+
;
24+
%y = call i32 @llvm.amdgcn.readfirstlane(i32 %x)
25+
%r = call i32 @llvm.amdgcn.readfirstlane(i32 %y)
26+
ret i32 %r
27+
}
28+
29+
; In compute kernels, all arguments are trivially uniform.
30+
31+
define amdgpu_kernel void @test_compute_i32(ptr %out, i32 %x) {
32+
; CHECK-LABEL: define amdgpu_kernel void @test_compute_i32(
33+
; CHECK-SAME: ptr [[OUT:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
34+
; CHECK-NEXT: store i32 [[X]], ptr [[OUT]], align 4
35+
; CHECK-NEXT: ret void
36+
;
37+
%r = call i32 @llvm.amdgcn.readfirstlane(i32 %x)
38+
store i32 %r, ptr %out
39+
ret void
40+
}
41+
42+
define amdgpu_kernel void @test_compute_i1(ptr %out, i1 %x) {
43+
; CHECK-LABEL: define amdgpu_kernel void @test_compute_i1(
44+
; CHECK-SAME: ptr [[OUT:%.*]], i1 [[X:%.*]]) #[[ATTR0]] {
45+
; CHECK-NEXT: store i1 [[X]], ptr [[OUT]], align 1
46+
; CHECK-NEXT: ret void
47+
;
48+
%r = call i1 @llvm.amdgcn.readfirstlane(i1 %x)
49+
store i1 %r, ptr %out
50+
ret void
51+
}
52+
53+
define amdgpu_kernel void @test_compute_v32i1(ptr %out, <32 x i1> %x) {
54+
; CHECK-LABEL: define amdgpu_kernel void @test_compute_v32i1(
55+
; CHECK-SAME: ptr [[OUT:%.*]], <32 x i1> [[X:%.*]]) #[[ATTR0]] {
56+
; CHECK-NEXT: store <32 x i1> [[X]], ptr [[OUT]], align 4
57+
; CHECK-NEXT: ret void
58+
;
59+
%r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x)
60+
store <32 x i1> %r, ptr %out
61+
ret void
62+
}
63+
64+
; In graphics shaders, inreg arguments are trivially uniform.
65+
66+
define amdgpu_ps i32 @test_graphics_i32(i32 inreg %x) {
67+
; CHECK-LABEL: define amdgpu_ps i32 @test_graphics_i32(
68+
; CHECK-SAME: i32 inreg [[X:%.*]]) #[[ATTR0]] {
69+
; CHECK-NEXT: ret i32 [[X]]
70+
;
71+
%r = call i32 @llvm.amdgcn.readfirstlane(i32 %x)
72+
ret i32 %r
73+
}
74+
75+
define amdgpu_ps i1 @test_graphics_i1(i1 inreg %x) {
76+
; CHECK-LABEL: define amdgpu_ps i1 @test_graphics_i1(
77+
; CHECK-SAME: i1 inreg [[X:%.*]]) #[[ATTR0]] {
78+
; CHECK-NEXT: ret i1 [[X]]
79+
;
80+
%r = call i1 @llvm.amdgcn.readfirstlane(i1 %x)
81+
ret i1 %r
82+
}
83+
84+
define amdgpu_ps <32 x i1> @test_graphics_v32i1(<32 x i1> inreg %x) {
85+
; CHECK-LABEL: define amdgpu_ps <32 x i1> @test_graphics_v32i1(
86+
; CHECK-SAME: <32 x i1> inreg [[X:%.*]]) #[[ATTR0]] {
87+
; CHECK-NEXT: ret <32 x i1> [[X]]
88+
;
89+
%r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x)
90+
ret <32 x i1> %r
91+
}
92+
93+
; In graphics shaders, non-inreg arguments are not trivially uniform.
94+
95+
define amdgpu_ps i32 @test_graphics_i32_negative(i32 %x) {
96+
; CHECK-LABEL: define amdgpu_ps i32 @test_graphics_i32_negative(
97+
; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
98+
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[X]])
99+
; CHECK-NEXT: ret i32 [[R]]
100+
;
101+
%r = call i32 @llvm.amdgcn.readfirstlane(i32 %x)
102+
ret i32 %r
103+
}
104+
105+
define amdgpu_ps i1 @test_graphics_i1_negative(i1 %x) {
106+
; CHECK-LABEL: define amdgpu_ps i1 @test_graphics_i1_negative(
107+
; CHECK-SAME: i1 [[X:%.*]]) #[[ATTR0]] {
108+
; CHECK-NEXT: [[R:%.*]] = call i1 @llvm.amdgcn.readfirstlane.i1(i1 [[X]])
109+
; CHECK-NEXT: ret i1 [[R]]
110+
;
111+
%r = call i1 @llvm.amdgcn.readfirstlane(i1 %x)
112+
ret i1 %r
113+
}
114+
115+
define amdgpu_ps <32 x i1> @test_graphics_v32i1_negative(<32 x i1> %x) {
116+
; CHECK-LABEL: define amdgpu_ps <32 x i1> @test_graphics_v32i1_negative(
117+
; CHECK-SAME: <32 x i1> [[X:%.*]]) #[[ATTR0]] {
118+
; CHECK-NEXT: [[R:%.*]] = call <32 x i1> @llvm.amdgcn.readfirstlane.v32i1(<32 x i1> [[X]])
119+
; CHECK-NEXT: ret <32 x i1> [[R]]
120+
;
121+
%r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x)
122+
ret <32 x i1> %r
123+
}
124+
125+
; Test i1 arguments in non-entry functions.
126+
127+
define amdgpu_gfx i1 @test_callable_i1(i1 inreg %x) {
128+
; CHECK-LABEL: define amdgpu_gfx i1 @test_callable_i1(
129+
; CHECK-SAME: i1 inreg [[X:%.*]]) #[[ATTR0]] {
130+
; CHECK-NEXT: ret i1 [[X]]
131+
;
132+
%r = call i1 @llvm.amdgcn.readfirstlane(i1 %x)
133+
ret i1 %r
134+
}
135+
136+
define amdgpu_gfx <32 x i1> @test_callable_v32i1(<32 x i1> inreg %x) {
137+
; CHECK-LABEL: define amdgpu_gfx <32 x i1> @test_callable_v32i1(
138+
; CHECK-SAME: <32 x i1> inreg [[X:%.*]]) #[[ATTR0]] {
139+
; CHECK-NEXT: ret <32 x i1> [[X]]
140+
;
141+
%r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x)
142+
ret <32 x i1> %r
143+
}
144+
145+
define amdgpu_gfx i1 @test_callable_i1_negative(i1 %x) {
146+
; CHECK-LABEL: define amdgpu_gfx i1 @test_callable_i1_negative(
147+
; CHECK-SAME: i1 [[X:%.*]]) #[[ATTR0]] {
148+
; CHECK-NEXT: [[R:%.*]] = call i1 @llvm.amdgcn.readfirstlane.i1(i1 [[X]])
149+
; CHECK-NEXT: ret i1 [[R]]
150+
;
151+
%r = call i1 @llvm.amdgcn.readfirstlane(i1 %x)
152+
ret i1 %r
153+
}
154+
155+
define amdgpu_gfx <32 x i1> @test_callable_v32i1_negative(<32 x i1> %x) {
156+
; CHECK-LABEL: define amdgpu_gfx <32 x i1> @test_callable_v32i1_negative(
157+
; CHECK-SAME: <32 x i1> [[X:%.*]]) #[[ATTR0]] {
158+
; CHECK-NEXT: [[R:%.*]] = call <32 x i1> @llvm.amdgcn.readfirstlane.v32i1(<32 x i1> [[X]])
159+
; CHECK-NEXT: ret <32 x i1> [[R]]
160+
;
161+
%r = call <32 x i1> @llvm.amdgcn.readfirstlane(<32 x i1> %x)
162+
ret <32 x i1> %r
163+
}

0 commit comments

Comments
 (0)