1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2
- ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s --check-prefix=SM20
3
- ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s --check-prefix=SM30
2
+ ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
3
+ ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | FileCheck %s
4
4
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -verify-machineinstrs | %ptxas-verify %}
5
5
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_30 -verify-machineinstrs | %ptxas-verify %}
6
6
@@ -11,35 +11,20 @@ declare { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64,
11
11
declare i64 @llvm.nvvm.texsurf.handle.internal.p1 (ptr addrspace (1 ))
12
12
13
13
define void @foo (i64 %img , ptr %red , i32 %idx ) {
14
- ; SM20-LABEL: foo(
15
- ; SM20: {
16
- ; SM20-NEXT: .reg .b32 %r<2>;
17
- ; SM20-NEXT: .reg .f32 %f<5>;
18
- ; SM20-NEXT: .reg .b64 %rd<4>;
19
- ; SM20-EMPTY:
20
- ; SM20-NEXT: // %bb.0:
21
- ; SM20-NEXT: ld.param.u64 %rd1, [foo_param_0];
22
- ; SM20-NEXT: ld.param.u64 %rd2, [foo_param_1];
23
- ; SM20-NEXT: cvta.to.global.u64 %rd3, %rd2;
24
- ; SM20-NEXT: ld.param.u32 %r1, [foo_param_2];
25
- ; SM20-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd1, {%r1}];
26
- ; SM20-NEXT: st.global.f32 [%rd3], %f1;
27
- ; SM20-NEXT: ret;
28
- ;
29
- ; SM30-LABEL: foo(
30
- ; SM30: {
31
- ; SM30-NEXT: .reg .b32 %r<2>;
32
- ; SM30-NEXT: .reg .f32 %f<5>;
33
- ; SM30-NEXT: .reg .b64 %rd<4>;
34
- ; SM30-EMPTY:
35
- ; SM30-NEXT: // %bb.0:
36
- ; SM30-NEXT: ld.param.u64 %rd1, [foo_param_0];
37
- ; SM30-NEXT: ld.param.u64 %rd2, [foo_param_1];
38
- ; SM30-NEXT: cvta.to.global.u64 %rd3, %rd2;
39
- ; SM30-NEXT: ld.param.u32 %r1, [foo_param_2];
40
- ; SM30-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd1, {%r1}];
41
- ; SM30-NEXT: st.global.f32 [%rd3], %f1;
42
- ; SM30-NEXT: ret;
14
+ ; CHECK-LABEL: foo(
15
+ ; CHECK: {
16
+ ; CHECK-NEXT: .reg .b32 %r<2>;
17
+ ; CHECK-NEXT: .reg .f32 %f<5>;
18
+ ; CHECK-NEXT: .reg .b64 %rd<4>;
19
+ ; CHECK-EMPTY:
20
+ ; CHECK-NEXT: // %bb.0:
21
+ ; CHECK-NEXT: ld.param.u64 %rd1, [foo_param_0];
22
+ ; CHECK-NEXT: ld.param.u64 %rd2, [foo_param_1];
23
+ ; CHECK-NEXT: cvta.to.global.u64 %rd3, %rd2;
24
+ ; CHECK-NEXT: ld.param.u32 %r1, [foo_param_2];
25
+ ; CHECK-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [%rd1, {%r1}];
26
+ ; CHECK-NEXT: st.global.f32 [%rd3], %f1;
27
+ ; CHECK-NEXT: ret;
43
28
%val = tail call { float , float , float , float } @llvm.nvvm.tex.unified.1d.v4f32.s32 (i64 %img , i32 %idx )
44
29
%ret = extractvalue { float , float , float , float } %val , 0
45
30
store float %ret , ptr %red
@@ -50,33 +35,19 @@ define void @foo(i64 %img, ptr %red, i32 %idx) {
50
35
@tex0 = internal addrspace (1 ) global i64 0 , align 8
51
36
52
37
define void @bar (ptr %red , i32 %idx ) {
53
- ; SM20-LABEL: bar(
54
- ; SM20: {
55
- ; SM20-NEXT: .reg .b32 %r<2>;
56
- ; SM20-NEXT: .reg .f32 %f<5>;
57
- ; SM20-NEXT: .reg .b64 %rd<4>;
58
- ; SM20-EMPTY:
59
- ; SM20-NEXT: // %bb.0:
60
- ; SM20-NEXT: ld.param.u64 %rd1, [bar_param_0];
61
- ; SM20-NEXT: cvta.to.global.u64 %rd2, %rd1;
62
- ; SM20-NEXT: ld.param.u32 %r1, [bar_param_1];
63
- ; SM20-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
64
- ; SM20-NEXT: st.global.f32 [%rd2], %f1;
65
- ; SM20-NEXT: ret;
66
- ;
67
- ; SM30-LABEL: bar(
68
- ; SM30: {
69
- ; SM30-NEXT: .reg .b32 %r<2>;
70
- ; SM30-NEXT: .reg .f32 %f<5>;
71
- ; SM30-NEXT: .reg .b64 %rd<4>;
72
- ; SM30-EMPTY:
73
- ; SM30-NEXT: // %bb.0:
74
- ; SM30-NEXT: ld.param.u64 %rd1, [bar_param_0];
75
- ; SM30-NEXT: cvta.to.global.u64 %rd2, %rd1;
76
- ; SM30-NEXT: ld.param.u32 %r1, [bar_param_1];
77
- ; SM30-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
78
- ; SM30-NEXT: st.global.f32 [%rd2], %f1;
79
- ; SM30-NEXT: ret;
38
+ ; CHECK-LABEL: bar(
39
+ ; CHECK: {
40
+ ; CHECK-NEXT: .reg .b32 %r<2>;
41
+ ; CHECK-NEXT: .reg .f32 %f<5>;
42
+ ; CHECK-NEXT: .reg .b64 %rd<4>;
43
+ ; CHECK-EMPTY:
44
+ ; CHECK-NEXT: // %bb.0:
45
+ ; CHECK-NEXT: ld.param.u64 %rd1, [bar_param_0];
46
+ ; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1;
47
+ ; CHECK-NEXT: ld.param.u32 %r1, [bar_param_1];
48
+ ; CHECK-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
49
+ ; CHECK-NEXT: st.global.f32 [%rd2], %f1;
50
+ ; CHECK-NEXT: ret;
80
51
%texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1 (ptr addrspace (1 ) @tex0 )
81
52
%val = tail call { float , float , float , float } @llvm.nvvm.tex.unified.1d.v4f32.s32 (i64 %texHandle , i32 %idx )
82
53
%ret = extractvalue { float , float , float , float } %val , 0
@@ -87,59 +58,32 @@ define void @bar(ptr %red, i32 %idx) {
87
58
declare float @texfunc (i64 )
88
59
89
60
define void @baz (ptr %red , i32 %idx ) {
90
- ; SM20-LABEL: baz(
91
- ; SM20: {
92
- ; SM20-NEXT: .reg .b32 %r<2>;
93
- ; SM20-NEXT: .reg .f32 %f<8>;
94
- ; SM20-NEXT: .reg .b64 %rd<4>;
95
- ; SM20-EMPTY:
96
- ; SM20-NEXT: // %bb.0:
97
- ; SM20-NEXT: ld.param.u64 %rd1, [baz_param_0];
98
- ; SM20-NEXT: cvta.to.global.u64 %rd2, %rd1;
99
- ; SM20-NEXT: ld.param.u32 %r1, [baz_param_1];
100
- ; SM20-NEXT: mov.u64 %rd3, tex0;
101
- ; SM20-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
102
- ; SM20-NEXT: { // callseq 0, 0
103
- ; SM20-NEXT: .param .b64 param0;
104
- ; SM20-NEXT: st.param.b64 [param0], %rd3;
105
- ; SM20-NEXT: .param .b32 retval0;
106
- ; SM20-NEXT: call.uni (retval0),
107
- ; SM20-NEXT: texfunc,
108
- ; SM20-NEXT: (
109
- ; SM20-NEXT: param0
110
- ; SM20-NEXT: );
111
- ; SM20-NEXT: ld.param.f32 %f5, [retval0];
112
- ; SM20-NEXT: } // callseq 0
113
- ; SM20-NEXT: add.rn.f32 %f7, %f1, %f5;
114
- ; SM20-NEXT: st.global.f32 [%rd2], %f7;
115
- ; SM20-NEXT: ret;
116
- ;
117
- ; SM30-LABEL: baz(
118
- ; SM30: {
119
- ; SM30-NEXT: .reg .b32 %r<2>;
120
- ; SM30-NEXT: .reg .f32 %f<8>;
121
- ; SM30-NEXT: .reg .b64 %rd<4>;
122
- ; SM30-EMPTY:
123
- ; SM30-NEXT: // %bb.0:
124
- ; SM30-NEXT: ld.param.u64 %rd1, [baz_param_0];
125
- ; SM30-NEXT: cvta.to.global.u64 %rd2, %rd1;
126
- ; SM30-NEXT: ld.param.u32 %r1, [baz_param_1];
127
- ; SM30-NEXT: mov.u64 %rd3, tex0;
128
- ; SM30-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
129
- ; SM30-NEXT: { // callseq 0, 0
130
- ; SM30-NEXT: .param .b64 param0;
131
- ; SM30-NEXT: st.param.b64 [param0], %rd3;
132
- ; SM30-NEXT: .param .b32 retval0;
133
- ; SM30-NEXT: call.uni (retval0),
134
- ; SM30-NEXT: texfunc,
135
- ; SM30-NEXT: (
136
- ; SM30-NEXT: param0
137
- ; SM30-NEXT: );
138
- ; SM30-NEXT: ld.param.f32 %f5, [retval0];
139
- ; SM30-NEXT: } // callseq 0
140
- ; SM30-NEXT: add.rn.f32 %f7, %f1, %f5;
141
- ; SM30-NEXT: st.global.f32 [%rd2], %f7;
142
- ; SM30-NEXT: ret;
61
+ ; CHECK-LABEL: baz(
62
+ ; CHECK: {
63
+ ; CHECK-NEXT: .reg .b32 %r<2>;
64
+ ; CHECK-NEXT: .reg .f32 %f<8>;
65
+ ; CHECK-NEXT: .reg .b64 %rd<4>;
66
+ ; CHECK-EMPTY:
67
+ ; CHECK-NEXT: // %bb.0:
68
+ ; CHECK-NEXT: ld.param.u64 %rd1, [baz_param_0];
69
+ ; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1;
70
+ ; CHECK-NEXT: ld.param.u32 %r1, [baz_param_1];
71
+ ; CHECK-NEXT: mov.u64 %rd3, tex0;
72
+ ; CHECK-NEXT: tex.1d.v4.f32.s32 {%f1, %f2, %f3, %f4}, [tex0, {%r1}];
73
+ ; CHECK-NEXT: { // callseq 0, 0
74
+ ; CHECK-NEXT: .param .b64 param0;
75
+ ; CHECK-NEXT: st.param.b64 [param0], %rd3;
76
+ ; CHECK-NEXT: .param .b32 retval0;
77
+ ; CHECK-NEXT: call.uni (retval0),
78
+ ; CHECK-NEXT: texfunc,
79
+ ; CHECK-NEXT: (
80
+ ; CHECK-NEXT: param0
81
+ ; CHECK-NEXT: );
82
+ ; CHECK-NEXT: ld.param.f32 %f5, [retval0];
83
+ ; CHECK-NEXT: } // callseq 0
84
+ ; CHECK-NEXT: add.rn.f32 %f7, %f1, %f5;
85
+ ; CHECK-NEXT: st.global.f32 [%rd2], %f7;
86
+ ; CHECK-NEXT: ret;
143
87
%texHandle = tail call i64 @llvm.nvvm.texsurf.handle.internal.p1 (ptr addrspace (1 ) @tex0 )
144
88
%val = tail call { float , float , float , float } @llvm.nvvm.tex.unified.1d.v4f32.s32 (i64 %texHandle , i32 %idx )
145
89
%ret = extractvalue { float , float , float , float } %val , 0
0 commit comments