1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
1
2
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
2
3
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
3
4
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
@@ -9,62 +10,142 @@ target triple = "nvptx-unknown-cuda"
9
10
declare { float , float , float , float } @llvm.nvvm.tex.unified.1d.v4f32.s32 (i64 , i32 )
10
11
declare i64 @llvm.nvvm.texsurf.handle.internal.p1 (ptr addrspace (1 ))
11
12
12
- ; SM20-LABEL: .entry foo
13
- ; SM30-LABEL: .entry foo
14
13
define void @foo (i64 %img , ptr %red , i32 %idx ) {
15
- ; SM20: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0];
16
- ; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
17
- ; SM30: ld.param.u64 %rd[[TEXREG:[0-9]+]], [foo_param_0];
18
- ; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}]
14
+ ; SM20-LABEL: foo(
15
+ ; SM20: {
16
+ ; SM20-NEXT: .reg .b32 %r<2>;
17
+ ; SM20-NEXT: .reg .f32 %f<5>;
18
+ ; SM20-NEXT: .reg .b64 %rd<4>;
19
+ ; SM20-EMPTY:
20
+ ; SM20-NEXT: // %bb.0:
21
+ ; SM20-NEXT: ld.param.u64 %rd1, [foo_param_0];
22
+ ; SM20-NEXT: ld.param.u64 %rd2, [foo_param_1];
23
+ ; SM20-NEXT: cvta.to.global.u64 %rd3, %rd2;
24
+ ; SM20-NEXT: ld.param.u32 %r1, [foo_param_2];
25
+ ; SM20-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd1, {%r1}];
26
+ ; SM20-NEXT: st.global.f32 [%rd3], %f1;
27
+ ; SM20-NEXT: ret;
28
+ ;
29
+ ; SM30-LABEL: foo(
30
+ ; SM30: {
31
+ ; SM30-NEXT: .reg .b32 %r<2>;
32
+ ; SM30-NEXT: .reg .f32 %f<5>;
33
+ ; SM30-NEXT: .reg .b64 %rd<4>;
34
+ ; SM30-EMPTY:
35
+ ; SM30-NEXT: // %bb.0:
36
+ ; SM30-NEXT: ld.param.u64 %rd1, [foo_param_0];
37
+ ; SM30-NEXT: ld.param.u64 %rd2, [foo_param_1];
38
+ ; SM30-NEXT: cvta.to.global.u64 %rd3, %rd2;
39
+ ; SM30-NEXT: ld.param.u32 %r1, [foo_param_2];
40
+ ; SM30-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd1, {%r1}];
41
+ ; SM30-NEXT: st.global.f32 [%rd3], %f1;
42
+ ; SM30-NEXT: ret;
19
43
%val = tail call { float , float , float , float } @llvm.nvvm.tex.unified.1d.v4f32.s32 (i64 %img , i32 %idx )
20
44
%ret = extractvalue { float , float , float , float } %val , 0
21
- ; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]]
22
- ; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]]
23
45
store float %ret , ptr %red
24
46
ret void
25
47
}
26
48
27
49
28
50
@tex0 = internal addrspace (1 ) global i64 0 , align 8
29
51
30
- ; SM20-LABEL: .entry bar
31
- ; SM30-LABEL: .entry bar
32
52
define void @bar (ptr %red , i32 %idx ) {
33
- ; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0
53
+ ; SM20-LABEL: bar(
54
+ ; SM20: {
55
+ ; SM20-NEXT: .reg .b32 %r<2>;
56
+ ; SM20-NEXT: .reg .f32 %f<5>;
57
+ ; SM20-NEXT: .reg .b64 %rd<4>;
58
+ ; SM20-EMPTY:
59
+ ; SM20-NEXT: // %bb.0:
60
+ ; SM20-NEXT: ld.param.u64 %rd1, [bar_param_0];
61
+ ; SM20-NEXT: cvta.to.global.u64 %rd2, %rd1;
62
+ ; SM20-NEXT: ld.param.u32 %r1, [bar_param_1];
63
+ ; SM20-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
64
+ ; SM20-NEXT: st.global.f32 [%rd2], %f1;
65
+ ; SM20-NEXT: ret;
66
+ ;
67
+ ; SM30-LABEL: bar(
68
+ ; SM30: {
69
+ ; SM30-NEXT: .reg .b32 %r<2>;
70
+ ; SM30-NEXT: .reg .f32 %f<5>;
71
+ ; SM30-NEXT: .reg .b64 %rd<4>;
72
+ ; SM30-EMPTY:
73
+ ; SM30-NEXT: // %bb.0:
74
+ ; SM30-NEXT: ld.param.u64 %rd1, [bar_param_0];
75
+ ; SM30-NEXT: cvta.to.global.u64 %rd2, %rd1;
76
+ ; SM30-NEXT: ld.param.u32 %r1, [bar_param_1];
77
+ ; SM30-NEXT: mov.u64 %rd3, tex0;
78
+ ; SM30-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd3, {%r1}];
79
+ ; SM30-NEXT: st.global.f32 [%rd2], %f1;
80
+ ; SM30-NEXT: ret;
34
81
%texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1 (ptr addrspace (1 ) @tex0 )
35
- ; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
36
- ; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
37
82
%val = tail call { float , float , float , float } @llvm.nvvm.tex.unified.1d.v4f32.s32 (i64 %texHandle , i32 %idx )
38
83
%ret = extractvalue { float , float , float , float } %val , 0
39
- ; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]]
40
- ; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RED]]
41
84
store float %ret , ptr %red
42
85
ret void
43
86
}
44
87
45
88
declare float @texfunc (i64 )
46
89
47
- ; SM20-LABEL: .entry baz
48
- ; SM30-LABEL: .entry baz
49
90
define void @baz (ptr %red , i32 %idx ) {
50
- ; SM30: mov.u64 %rd[[TEXHANDLE:[0-9]+]], tex0
91
+ ; SM20-LABEL: baz(
92
+ ; SM20: {
93
+ ; SM20-NEXT: .reg .b32 %r<2>;
94
+ ; SM20-NEXT: .reg .f32 %f<8>;
95
+ ; SM20-NEXT: .reg .b64 %rd<4>;
96
+ ; SM20-EMPTY:
97
+ ; SM20-NEXT: // %bb.0:
98
+ ; SM20-NEXT: ld.param.u64 %rd1, [baz_param_0];
99
+ ; SM20-NEXT: cvta.to.global.u64 %rd2, %rd1;
100
+ ; SM20-NEXT: ld.param.u32 %r1, [baz_param_1];
101
+ ; SM20-NEXT: mov.u64 %rd3, tex0;
102
+ ; SM20-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
103
+ ; SM20-NEXT: { // callseq 0, 0
104
+ ; SM20-NEXT: .param .b64 param0;
105
+ ; SM20-NEXT: st.param.b64 [param0], %rd3;
106
+ ; SM20-NEXT: .param .b32 retval0;
107
+ ; SM20-NEXT: call.uni (retval0),
108
+ ; SM20-NEXT: texfunc,
109
+ ; SM20-NEXT: (
110
+ ; SM20-NEXT: param0
111
+ ; SM20-NEXT: );
112
+ ; SM20-NEXT: ld.param.f32 %f5, [retval0];
113
+ ; SM20-NEXT: } // callseq 0
114
+ ; SM20-NEXT: add.rn.f32 %f7, %f1, %f5;
115
+ ; SM20-NEXT: st.global.f32 [%rd2], %f7;
116
+ ; SM20-NEXT: ret;
117
+ ;
118
+ ; SM30-LABEL: baz(
119
+ ; SM30: {
120
+ ; SM30-NEXT: .reg .b32 %r<2>;
121
+ ; SM30-NEXT: .reg .f32 %f<8>;
122
+ ; SM30-NEXT: .reg .b64 %rd<4>;
123
+ ; SM30-EMPTY:
124
+ ; SM30-NEXT: // %bb.0:
125
+ ; SM30-NEXT: ld.param.u64 %rd1, [baz_param_0];
126
+ ; SM30-NEXT: cvta.to.global.u64 %rd2, %rd1;
127
+ ; SM30-NEXT: ld.param.u32 %r1, [baz_param_1];
128
+ ; SM30-NEXT: mov.u64 %rd3, tex0;
129
+ ; SM30-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd3, {%r1}];
130
+ ; SM30-NEXT: { // callseq 0, 0
131
+ ; SM30-NEXT: .param .b64 param0;
132
+ ; SM30-NEXT: st.param.b64 [param0], %rd3;
133
+ ; SM30-NEXT: .param .b32 retval0;
134
+ ; SM30-NEXT: call.uni (retval0),
135
+ ; SM30-NEXT: texfunc,
136
+ ; SM30-NEXT: (
137
+ ; SM30-NEXT: param0
138
+ ; SM30-NEXT: );
139
+ ; SM30-NEXT: ld.param.f32 %f5, [retval0];
140
+ ; SM30-NEXT: } // callseq 0
141
+ ; SM30-NEXT: add.rn.f32 %f7, %f1, %f5;
142
+ ; SM30-NEXT: st.global.f32 [%rd2], %f7;
143
+ ; SM30-NEXT: ret;
51
144
%texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1 (ptr addrspace (1 ) @tex0 )
52
- ; SM20: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [tex0, {%r{{[0-9]+}}}]
53
- ; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}]
54
145
%val = tail call { float , float , float , float } @llvm.nvvm.tex.unified.1d.v4f32.s32 (i64 %texHandle , i32 %idx )
55
146
%ret = extractvalue { float , float , float , float } %val , 0
56
- ; SM20: call.uni ([[RETVAL:.*]]),
57
- ; SM30: call.uni ([[RETVAL:.*]]),
58
- ; SM20: texfunc,
59
- ; SM30: texfunc,
60
147
%texcall = tail call float @texfunc (i64 %texHandle )
61
- ; SM20: ld.param.f32 %f[[TEXCALL:[0-9]+]], [[[RETVAL]]]
62
- ; SM30: ld.param.f32 %f[[TEXCALL:[0-9]+]], [[[RETVAL]]]
63
- ; SM20: add.rn.f32 %f[[RET2:[0-9]+]], %f[[RED]], %f[[TEXCALL]]
64
- ; SM30: add.rn.f32 %f[[RET2:[0-9]+]], %f[[RED]], %f[[TEXCALL]]
65
148
%ret2 = fadd float %ret , %texcall
66
- ; SM20: st.global.f32 [%rd{{[0-9]+}}], %f[[RET2]]
67
- ; SM30: st.global.f32 [%rd{{[0-9]+}}], %f[[RET2]]
68
149
store float %ret2 , ptr %red
69
150
ret void
70
151
}
0 commit comments