Skip to content

Commit 48ec59c

Browse files
authored
[llvm][AMDGPU] Fold llvm.amdgcn.wavefrontsize early (#114481)
Fold `llvm.amdgcn.wavefrontsize` early, during InstCombine, so that it's concrete value is used throughout subsequent optimisation passes.
1 parent 15fadeb commit 48ec59c

File tree

4 files changed

+124
-30
lines changed

4 files changed

+124
-30
lines changed

clang/test/CodeGenOpenCL/builtins-amdgcn.cl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// REQUIRES: amdgpu-registered-target
22
// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu tahiti -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefixes=CHECK-AMDGCN,CHECK %s
3-
// RUN: %clang_cc1 -cl-std=CL2.0 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefix=CHECK %s
3+
// RUN: %clang_cc1 -cl-std=CL2.0 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck -enable-var-scope --check-prefixes=CHECK,CHECK-SPIRV %s
44

55

66
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
@@ -866,7 +866,8 @@ void test_atomic_inc_dec(__attribute__((address_space(3))) uint *lptr, __attribu
866866
// CHECK-LABEL test_wavefrontsize(
867867
unsigned test_wavefrontsize() {
868868

869-
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.wavefrontsize()
869+
// CHECK-AMDGCN: ret i32 {{[0-9]+}}
870+
// CHECK-SPIRV: {{.*}}call{{.*}} i32 @llvm.amdgcn.wavefrontsize()
870871
return __builtin_amdgcn_wavefrontsize();
871872
}
872873

llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,6 +1024,12 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
10241024
}
10251025
break;
10261026
}
1027+
case Intrinsic::amdgcn_wavefrontsize: {
1028+
if (ST->isWaveSizeKnown())
1029+
return IC.replaceInstUsesWith(
1030+
II, ConstantInt::get(II.getType(), ST->getWavefrontSize()));
1031+
break;
1032+
}
10271033
case Intrinsic::amdgcn_wqm_vote: {
10281034
// wqm_vote is identity when the argument is constant.
10291035
if (!isa<Constant>(II.getArgOperand(0)))

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,48 +4,28 @@
44
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s
55
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
66

7-
; RUN: opt -O3 -S < %s | FileCheck -check-prefix=OPT %s
8-
; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefix=OPT %s
9-
; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
10-
; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
11-
; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
12-
; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefix=OPT %s
13-
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
14-
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
15-
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
16-
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
17-
187
; GCN-LABEL: {{^}}fold_wavefrontsize:
19-
; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize(
208

219
; W32: v_mov_b32_e32 [[V:v[0-9]+]], 32
2210
; W64: v_mov_b32_e32 [[V:v[0-9]+]], 64
2311
; GCN: store_{{dword|b32}} v{{.+}}, [[V]]
2412

25-
; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize()
26-
; OPT: store i32 %tmp, ptr addrspace(1) %arg, align 4
27-
; OPT-NEXT: ret void
2813

2914
define amdgpu_kernel void @fold_wavefrontsize(ptr addrspace(1) nocapture %arg) {
15+
3016
bb:
3117
%tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0
3218
store i32 %tmp, ptr addrspace(1) %arg, align 4
3319
ret void
3420
}
3521

3622
; GCN-LABEL: {{^}}fold_and_optimize_wavefrontsize:
37-
; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize(
3823

3924
; W32: v_mov_b32_e32 [[V:v[0-9]+]], 1{{$}}
4025
; W64: v_mov_b32_e32 [[V:v[0-9]+]], 2{{$}}
4126
; GCN-NOT: cndmask
4227
; GCN: store_{{dword|b32}} v{{.+}}, [[V]]
4328

44-
; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize()
45-
; OPT: %tmp1 = icmp ugt i32 %tmp, 32
46-
; OPT: %tmp2 = select i1 %tmp1, i32 2, i32 1
47-
; OPT: store i32 %tmp2, ptr addrspace(1) %arg
48-
; OPT-NEXT: ret void
4929

5030
define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) nocapture %arg) {
5131
bb:
@@ -57,13 +37,6 @@ bb:
5737
}
5838

5939
; GCN-LABEL: {{^}}fold_and_optimize_if_wavefrontsize:
60-
; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(
61-
62-
; OPT: bb:
63-
; OPT: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize()
64-
; OPT: %tmp1 = icmp ugt i32 %tmp, 32
65-
; OPT: bb3:
66-
; OPT-NEXT: ret void
6740

6841
define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) nocapture %arg) {
6942
bb:
Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=amdgcn-- -passes=instcombine -S < %s | FileCheck -check-prefix=OPT %s
3+
; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize32 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W32 %s
4+
; RUN: opt -mtriple=amdgcn-- -mattr=+wavefrontsize64 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W64 %s
5+
; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W64 %s
6+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=+wavefrontsize32 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W32 %s
7+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=+wavefrontsize64 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W64 %s
8+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+wavefrontsize32 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W32 %s
9+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+wavefrontsize64 -passes=instcombine -S < %s | FileCheck -check-prefix=OPT-W64 %s
10+
11+
define amdgpu_kernel void @fold_wavefrontsize(ptr addrspace(1) nocapture %arg) {
12+
; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize(
13+
; OPT-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) {
14+
; OPT-NEXT: [[BB:.*:]]
15+
; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1:[0-9]+]]
16+
; OPT-NEXT: store i32 [[TMP]], ptr addrspace(1) [[ARG]], align 4
17+
; OPT-NEXT: ret void
18+
;
19+
; OPT-W32-LABEL: define amdgpu_kernel void @fold_wavefrontsize(
20+
; OPT-W32-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
21+
; OPT-W32-NEXT: [[BB:.*:]]
22+
; OPT-W32-NEXT: store i32 32, ptr addrspace(1) [[ARG]], align 4
23+
; OPT-W32-NEXT: ret void
24+
;
25+
; OPT-W64-LABEL: define amdgpu_kernel void @fold_wavefrontsize(
26+
; OPT-W64-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
27+
; OPT-W64-NEXT: [[BB:.*:]]
28+
; OPT-W64-NEXT: store i32 64, ptr addrspace(1) [[ARG]], align 4
29+
; OPT-W64-NEXT: ret void
30+
;
31+
bb:
32+
%tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0
33+
store i32 %tmp, ptr addrspace(1) %arg, align 4
34+
ret void
35+
}
36+
37+
define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) nocapture %arg) {
38+
; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize(
39+
; OPT-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) {
40+
; OPT-NEXT: [[BB:.*:]]
41+
; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1]]
42+
; OPT-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32
43+
; OPT-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 1
44+
; OPT-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[ARG]], align 4
45+
; OPT-NEXT: ret void
46+
;
47+
; OPT-W32-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize(
48+
; OPT-W32-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0]] {
49+
; OPT-W32-NEXT: [[BB:.*:]]
50+
; OPT-W32-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4
51+
; OPT-W32-NEXT: ret void
52+
;
53+
; OPT-W64-LABEL: define amdgpu_kernel void @fold_and_optimize_wavefrontsize(
54+
; OPT-W64-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0]] {
55+
; OPT-W64-NEXT: [[BB:.*:]]
56+
; OPT-W64-NEXT: store i32 2, ptr addrspace(1) [[ARG]], align 4
57+
; OPT-W64-NEXT: ret void
58+
;
59+
bb:
60+
%tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0
61+
%tmp1 = icmp ugt i32 %tmp, 32
62+
%tmp2 = select i1 %tmp1, i32 2, i32 1
63+
store i32 %tmp2, ptr addrspace(1) %arg
64+
ret void
65+
}
66+
67+
define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) nocapture %arg) {
68+
; OPT-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(
69+
; OPT-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) {
70+
; OPT-NEXT: [[BB:.*:]]
71+
; OPT-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.wavefrontsize() #[[ATTR1]]
72+
; OPT-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[TMP]], 32
73+
; OPT-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
74+
; OPT: [[BB2]]:
75+
; OPT-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4
76+
; OPT-NEXT: br label %[[BB3]]
77+
; OPT: [[BB3]]:
78+
; OPT-NEXT: ret void
79+
;
80+
; OPT-W32-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(
81+
; OPT-W32-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0]] {
82+
; OPT-W32-NEXT: [[BB:.*:]]
83+
; OPT-W32-NEXT: br i1 false, label %[[BB2:.*]], label %[[BB3:.*]]
84+
; OPT-W32: [[BB2]]:
85+
; OPT-W32-NEXT: br label %[[BB3]]
86+
; OPT-W32: [[BB3]]:
87+
; OPT-W32-NEXT: ret void
88+
;
89+
; OPT-W64-LABEL: define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(
90+
; OPT-W64-SAME: ptr addrspace(1) nocapture [[ARG:%.*]]) #[[ATTR0]] {
91+
; OPT-W64-NEXT: [[BB:.*:]]
92+
; OPT-W64-NEXT: br i1 true, label %[[BB2:.*]], label %[[BB3:.*]]
93+
; OPT-W64: [[BB2]]:
94+
; OPT-W64-NEXT: store i32 1, ptr addrspace(1) [[ARG]], align 4
95+
; OPT-W64-NEXT: br label %[[BB3]]
96+
; OPT-W64: [[BB3]]:
97+
; OPT-W64-NEXT: ret void
98+
;
99+
bb:
100+
%tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0
101+
%tmp1 = icmp ugt i32 %tmp, 32
102+
br i1 %tmp1, label %bb2, label %bb3
103+
104+
bb2: ; preds = %bb
105+
store i32 1, ptr addrspace(1) %arg, align 4
106+
br label %bb3
107+
108+
bb3: ; preds = %bb2, %bb
109+
ret void
110+
}
111+
112+
declare i32 @llvm.amdgcn.wavefrontsize() #0
113+
114+
attributes #0 = { nounwind readnone speculatable }

0 commit comments

Comments
 (0)