Skip to content

Commit d358808

Browse files
committed
scratch
1 parent ad0c7da commit d358808

File tree

8 files changed

+92
-62
lines changed

8 files changed

+92
-62
lines changed

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/IR/Value.h"
2121
#include "llvm/Support/Casting.h"
2222
#include "llvm/Support/ErrorHandling.h"
23+
#include "llvm/Support/NVPTXAddrSpace.h"
2324
#include "llvm/Transforms/InstCombine/InstCombiner.h"
2425
#include <optional>
2526
using namespace llvm;
@@ -564,6 +565,13 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
564565
return nullptr;
565566
}
566567

568+
unsigned NVPTXTTIImpl::getAssumedAddrSpace(const Value *V) const {
569+
if (isa<AllocaInst>(V))
570+
return ADDRESS_SPACE_LOCAL;
571+
572+
return -1;
573+
}
574+
567575
void NVPTXTTIImpl::collectKernelLaunchBounds(
568576
const Function &F,
569577
SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
129129

130130
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
131131
Value *NewV) const;
132+
unsigned getAssumedAddrSpace(const Value *V) const;
132133

133134
void collectKernelLaunchBounds(
134135
const Function &F,

llvm/test/CodeGen/NVPTX/local-stack-frame.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66
; Ensure we access the local stack properly
77

88
; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
9-
; PTX32: cvta.local.u32 %SP, %SPL;
109
; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
11-
; PTX32: st.volatile.u32 [%SP], %r{{[0-9]+}};
10+
; PTX32: add.u32 %r[[SP_REG:[0-9]+]], %SPL, 0;
11+
; PTX32: st.local.u32 [%r[[SP_REG]]], %r{{[0-9]+}};
1212
; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}};
13-
; PTX64: cvta.local.u64 %SP, %SPL;
1413
; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
15-
; PTX64: st.volatile.u32 [%SP], %r{{[0-9]+}};
14+
; PTX64: add.u64 %rd[[SP_REG:[0-9]+]], %SPL, 0;
15+
; PTX64: st.local.u32 [%rd[[SP_REG]]], %r{{[0-9]+}};
1616
define void @foo(i32 %a) {
1717
%local = alloca i32, align 4
1818
store volatile i32 %a, ptr %local

llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,31 +29,32 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
2929
; PTX-NEXT: .reg .pred %p<2>;
3030
; PTX-NEXT: .reg .b16 %rs<3>;
3131
; PTX-NEXT: .reg .b32 %r<11>;
32-
; PTX-NEXT: .reg .b64 %rd<9>;
32+
; PTX-NEXT: .reg .b64 %rd<10>;
3333
; PTX-EMPTY:
3434
; PTX-NEXT: // %bb.0: // %entry
3535
; PTX-NEXT: mov.u64 %SPL, __local_depot0;
3636
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
3737
; PTX-NEXT: ld.param.u8 %rs1, [non_kernel_function_param_1];
3838
; PTX-NEXT: and.b16 %rs2, %rs1, 1;
3939
; PTX-NEXT: setp.eq.b16 %p1, %rs2, 1;
40-
; PTX-NEXT: ld.param.s32 %rd1, [non_kernel_function_param_2];
41-
; PTX-NEXT: ld.param.u64 %rd2, [non_kernel_function_param_0+8];
42-
; PTX-NEXT: st.u64 [%SP+8], %rd2;
43-
; PTX-NEXT: ld.param.u64 %rd3, [non_kernel_function_param_0];
44-
; PTX-NEXT: st.u64 [%SP], %rd3;
45-
; PTX-NEXT: mov.u64 %rd4, gi;
46-
; PTX-NEXT: cvta.global.u64 %rd5, %rd4;
47-
; PTX-NEXT: add.u64 %rd6, %SP, 0;
48-
; PTX-NEXT: selp.b64 %rd7, %rd6, %rd5, %p1;
49-
; PTX-NEXT: add.s64 %rd8, %rd7, %rd1;
50-
; PTX-NEXT: ld.u8 %r1, [%rd8];
51-
; PTX-NEXT: ld.u8 %r2, [%rd8+1];
40+
; PTX-NEXT: add.u64 %rd1, %SP, 0;
41+
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
42+
; PTX-NEXT: ld.param.s32 %rd3, [non_kernel_function_param_2];
43+
; PTX-NEXT: ld.param.u64 %rd4, [non_kernel_function_param_0+8];
44+
; PTX-NEXT: st.local.u64 [%rd2+8], %rd4;
45+
; PTX-NEXT: ld.param.u64 %rd5, [non_kernel_function_param_0];
46+
; PTX-NEXT: st.local.u64 [%rd2], %rd5;
47+
; PTX-NEXT: mov.u64 %rd6, gi;
48+
; PTX-NEXT: cvta.global.u64 %rd7, %rd6;
49+
; PTX-NEXT: selp.b64 %rd8, %rd1, %rd7, %p1;
50+
; PTX-NEXT: add.s64 %rd9, %rd8, %rd3;
51+
; PTX-NEXT: ld.u8 %r1, [%rd9];
52+
; PTX-NEXT: ld.u8 %r2, [%rd9+1];
5253
; PTX-NEXT: shl.b32 %r3, %r2, 8;
5354
; PTX-NEXT: or.b32 %r4, %r3, %r1;
54-
; PTX-NEXT: ld.u8 %r5, [%rd8+2];
55+
; PTX-NEXT: ld.u8 %r5, [%rd9+2];
5556
; PTX-NEXT: shl.b32 %r6, %r5, 16;
56-
; PTX-NEXT: ld.u8 %r7, [%rd8+3];
57+
; PTX-NEXT: ld.u8 %r7, [%rd9+3];
5758
; PTX-NEXT: shl.b32 %r8, %r7, 24;
5859
; PTX-NEXT: or.b32 %r9, %r8, %r6;
5960
; PTX-NEXT: or.b32 %r10, %r9, %r4;

llvm/test/CodeGen/NVPTX/lower-args.ll

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,24 +40,25 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
4040
; PTX-NEXT: .local .align 8 .b8 __local_depot1[8];
4141
; PTX-NEXT: .reg .b64 %SP;
4242
; PTX-NEXT: .reg .b64 %SPL;
43-
; PTX-NEXT: .reg .b64 %rd<5>;
43+
; PTX-NEXT: .reg .b64 %rd<6>;
4444
; PTX-EMPTY:
4545
; PTX-NEXT: // %bb.0:
4646
; PTX-NEXT: mov.u64 %SPL, __local_depot1;
4747
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
48-
; PTX-NEXT: ld.param.u64 %rd1, [load_padding_param_0];
49-
; PTX-NEXT: st.u64 [%SP], %rd1;
50-
; PTX-NEXT: add.u64 %rd2, %SP, 0;
48+
; PTX-NEXT: add.u64 %rd1, %SP, 0;
49+
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
50+
; PTX-NEXT: ld.param.u64 %rd3, [load_padding_param_0];
51+
; PTX-NEXT: st.local.u64 [%rd2], %rd3;
5152
; PTX-NEXT: { // callseq 1, 0
5253
; PTX-NEXT: .param .b64 param0;
53-
; PTX-NEXT: st.param.b64 [param0], %rd2;
54+
; PTX-NEXT: st.param.b64 [param0], %rd1;
5455
; PTX-NEXT: .param .b64 retval0;
5556
; PTX-NEXT: call.uni (retval0),
5657
; PTX-NEXT: escape,
5758
; PTX-NEXT: (
5859
; PTX-NEXT: param0
5960
; PTX-NEXT: );
60-
; PTX-NEXT: ld.param.b64 %rd3, [retval0];
61+
; PTX-NEXT: ld.param.b64 %rd4, [retval0];
6162
; PTX-NEXT: } // callseq 1
6263
; PTX-NEXT: ret;
6364
%tmp = call ptr @escape(ptr nonnull align 16 %arg)

llvm/test/CodeGen/NVPTX/variadics-backend.ll

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -148,35 +148,34 @@ entry:
148148
define dso_local i32 @variadics2(i32 noundef %first, ...) {
149149
; CHECK-PTX-LABEL: variadics2(
150150
; CHECK-PTX: {
151-
; CHECK-PTX-NEXT: .local .align 2 .b8 __local_depot2[4];
151+
; CHECK-PTX-NEXT: .local .align 1 .b8 __local_depot2[3];
152152
; CHECK-PTX-NEXT: .reg .b64 %SP;
153153
; CHECK-PTX-NEXT: .reg .b64 %SPL;
154-
; CHECK-PTX-NEXT: .reg .b16 %rs<6>;
154+
; CHECK-PTX-NEXT: .reg .b16 %rs<4>;
155155
; CHECK-PTX-NEXT: .reg .b32 %r<7>;
156-
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
156+
; CHECK-PTX-NEXT: .reg .b64 %rd<9>;
157157
; CHECK-PTX-EMPTY:
158158
; CHECK-PTX-NEXT: // %bb.0: // %entry
159159
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot2;
160-
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
161160
; CHECK-PTX-NEXT: ld.param.u32 %r1, [variadics2_param_0];
162161
; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics2_param_1];
163-
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
164-
; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8;
165-
; CHECK-PTX-NEXT: ld.u32 %r2, [%rd3];
166-
; CHECK-PTX-NEXT: ld.s8 %r3, [%rd3+4];
167-
; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd3+7];
168-
; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs1;
169-
; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd3+5];
170-
; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd3+6];
171-
; CHECK-PTX-NEXT: shl.b16 %rs4, %rs3, 8;
172-
; CHECK-PTX-NEXT: or.b16 %rs5, %rs4, %rs2;
173-
; CHECK-PTX-NEXT: st.u16 [%SP], %rs5;
174-
; CHECK-PTX-NEXT: ld.u64 %rd4, [%rd3+8];
162+
; CHECK-PTX-NEXT: add.u64 %rd3, %SPL, 0;
163+
; CHECK-PTX-NEXT: add.s64 %rd4, %rd1, 7;
164+
; CHECK-PTX-NEXT: and.b64 %rd5, %rd4, -8;
165+
; CHECK-PTX-NEXT: ld.u32 %r2, [%rd5];
166+
; CHECK-PTX-NEXT: ld.s8 %r3, [%rd5+4];
167+
; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd5+7];
168+
; CHECK-PTX-NEXT: st.local.u8 [%rd3+2], %rs1;
169+
; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd5+6];
170+
; CHECK-PTX-NEXT: st.local.u8 [%rd3+1], %rs2;
171+
; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd5+5];
172+
; CHECK-PTX-NEXT: st.local.u8 [%rd3], %rs3;
173+
; CHECK-PTX-NEXT: ld.u64 %rd6, [%rd5+8];
175174
; CHECK-PTX-NEXT: add.s32 %r4, %r1, %r2;
176175
; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3;
177-
; CHECK-PTX-NEXT: cvt.u64.u32 %rd5, %r5;
178-
; CHECK-PTX-NEXT: add.s64 %rd6, %rd5, %rd4;
179-
; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd6;
176+
; CHECK-PTX-NEXT: cvt.u64.u32 %rd7, %r5;
177+
; CHECK-PTX-NEXT: add.s64 %rd8, %rd7, %rd6;
178+
; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd8;
180179
; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r6;
181180
; CHECK-PTX-NEXT: ret;
182181
entry:
@@ -213,7 +212,7 @@ define dso_local i32 @bar() {
213212
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot3[24];
214213
; CHECK-PTX-NEXT: .reg .b64 %SP;
215214
; CHECK-PTX-NEXT: .reg .b64 %SPL;
216-
; CHECK-PTX-NEXT: .reg .b16 %rs<10>;
215+
; CHECK-PTX-NEXT: .reg .b16 %rs<8>;
217216
; CHECK-PTX-NEXT: .reg .b32 %r<4>;
218217
; CHECK-PTX-NEXT: .reg .b64 %rd<4>;
219218
; CHECK-PTX-EMPTY:
@@ -228,9 +227,7 @@ define dso_local i32 @bar() {
228227
; CHECK-PTX-NEXT: cvt.u16.u8 %rs4, %rs3;
229228
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd1+6];
230229
; CHECK-PTX-NEXT: cvt.u16.u8 %rs6, %rs5;
231-
; CHECK-PTX-NEXT: shl.b16 %rs7, %rs6, 8;
232-
; CHECK-PTX-NEXT: or.b16 %rs8, %rs7, %rs4;
233-
; CHECK-PTX-NEXT: st.u16 [%SP], %rs8;
230+
; CHECK-PTX-NEXT: st.local.u8 [%rd2], %rs6;
234231
; CHECK-PTX-NEXT: mov.b32 %r1, 1;
235232
; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1;
236233
; CHECK-PTX-NEXT: mov.b16 %rs9, 1;
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=infer-address-spaces %s | FileCheck %s
3+
4+
target triple = "nvptx64-nvidia-cuda"
5+
6+
7+
define float @load_alloca() {
8+
; CHECK-LABEL: define float @load_alloca() {
9+
; CHECK-NEXT: [[ADDR:%.*]] = alloca float, align 4
10+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
11+
; CHECK-NEXT: [[VAL:%.*]] = load float, ptr addrspace(5) [[TMP1]], align 4
12+
; CHECK-NEXT: ret float [[VAL]]
13+
;
14+
%addr = alloca float
15+
%val = load float, ptr %addr
16+
ret float %val
17+
}

llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,38 +9,43 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
99
; CHECK-NEXT: .local .align 8 .b8 __local_depot0[32];
1010
; CHECK-NEXT: .reg .b32 %SP;
1111
; CHECK-NEXT: .reg .b32 %SPL;
12-
; CHECK-NEXT: .reg .b32 %r<2>;
13-
; CHECK-NEXT: .reg .b64 %rd<13>;
12+
; CHECK-NEXT: .reg .b32 %r<4>;
13+
; CHECK-NEXT: .reg .b64 %rd<17>;
1414
; CHECK-EMPTY:
1515
; CHECK-NEXT: // %bb.0:
1616
; CHECK-NEXT: mov.u32 %SPL, __local_depot0;
1717
; CHECK-NEXT: cvta.local.u32 %SP, %SPL;
1818
; CHECK-NEXT: ld.param.u32 %r1, [caller_St8x4_param_1];
19+
; CHECK-NEXT: add.u32 %r3, %SPL, 0;
1920
; CHECK-NEXT: ld.param.u64 %rd1, [caller_St8x4_param_0+24];
20-
; CHECK-NEXT: st.u64 [%SP+24], %rd1;
21+
; CHECK-NEXT: st.local.u64 [%r3+24], %rd1;
2122
; CHECK-NEXT: ld.param.u64 %rd2, [caller_St8x4_param_0+16];
22-
; CHECK-NEXT: st.u64 [%SP+16], %rd2;
23+
; CHECK-NEXT: st.local.u64 [%r3+16], %rd2;
2324
; CHECK-NEXT: ld.param.u64 %rd3, [caller_St8x4_param_0+8];
24-
; CHECK-NEXT: st.u64 [%SP+8], %rd3;
25+
; CHECK-NEXT: st.local.u64 [%r3+8], %rd3;
2526
; CHECK-NEXT: ld.param.u64 %rd4, [caller_St8x4_param_0];
26-
; CHECK-NEXT: st.u64 [%SP], %rd4;
27+
; CHECK-NEXT: st.local.u64 [%r3], %rd4;
28+
; CHECK-NEXT: ld.u64 %rd5, [%SP+8];
29+
; CHECK-NEXT: ld.u64 %rd6, [%SP];
30+
; CHECK-NEXT: ld.u64 %rd7, [%SP+24];
31+
; CHECK-NEXT: ld.u64 %rd8, [%SP+16];
2732
; CHECK-NEXT: { // callseq 0, 0
2833
; CHECK-NEXT: .param .align 16 .b8 param0[32];
29-
; CHECK-NEXT: st.param.v2.b64 [param0], {%rd4, %rd3};
30-
; CHECK-NEXT: st.param.v2.b64 [param0+16], {%rd2, %rd1};
34+
; CHECK-NEXT: st.param.v2.b64 [param0], {%rd6, %rd5};
35+
; CHECK-NEXT: st.param.v2.b64 [param0+16], {%rd8, %rd7};
3136
; CHECK-NEXT: .param .align 16 .b8 retval0[32];
3237
; CHECK-NEXT: call.uni (retval0),
3338
; CHECK-NEXT: callee_St8x4,
3439
; CHECK-NEXT: (
3540
; CHECK-NEXT: param0
3641
; CHECK-NEXT: );
37-
; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [retval0];
38-
; CHECK-NEXT: ld.param.v2.b64 {%rd7, %rd8}, [retval0+16];
42+
; CHECK-NEXT: ld.param.v2.b64 {%rd9, %rd10}, [retval0];
43+
; CHECK-NEXT: ld.param.v2.b64 {%rd11, %rd12}, [retval0+16];
3944
; CHECK-NEXT: } // callseq 0
40-
; CHECK-NEXT: st.u64 [%r1], %rd5;
41-
; CHECK-NEXT: st.u64 [%r1+8], %rd6;
42-
; CHECK-NEXT: st.u64 [%r1+16], %rd7;
43-
; CHECK-NEXT: st.u64 [%r1+24], %rd8;
45+
; CHECK-NEXT: st.u64 [%r1], %rd9;
46+
; CHECK-NEXT: st.u64 [%r1+8], %rd10;
47+
; CHECK-NEXT: st.u64 [%r1+16], %rd11;
48+
; CHECK-NEXT: st.u64 [%r1+24], %rd12;
4449
; CHECK-NEXT: ret;
4550
%call = tail call fastcc [4 x i64] @callee_St8x4(ptr noundef nonnull byval(%struct.St8x4) align 8 %in) #2
4651
%.fca.0.extract = extractvalue [4 x i64] %call, 0

0 commit comments

Comments
 (0)