1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 5
2
- ; RUN: llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx83 | FileCheck %s
3
- ; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx83 | %ptxas-verify -arch=sm_70 %}
2
+ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | FileCheck %s
3
+ ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | %ptxas-verify -arch=sm_70 %}
4
4
5
5
target triple = "nvptx64-nvidia-cuda"
6
6
@@ -9,47 +9,41 @@ target triple = "nvptx64-nvidia-cuda"
9
9
define void @test_b128_input_from_const () {
10
10
; CHECK-LABEL: test_b128_input_from_const(
11
11
; CHECK: {
12
- ; CHECK-NEXT: .reg .b32 %r<3>;
13
- ; CHECK-NEXT: .reg .b64 %rd<4>;
12
+ ; CHECK-NEXT: .reg .b64 %rd<5>;
14
13
; CHECK-NEXT: .reg .b128 %rq<2>;
15
14
; CHECK-EMPTY:
16
15
; CHECK-NEXT: // %bb.0:
17
16
; CHECK-NEXT: mov.u64 %rd2, 0;
18
17
; CHECK-NEXT: mov.u64 %rd3, 42;
19
18
; CHECK-NEXT: mov.b128 %rq1, {%rd3, %rd2};
20
- ; CHECK-NEXT: mov.u32 %r1, value;
21
- ; CHECK-NEXT: cvta.global.u32 %r2, %r1;
22
- ; CHECK-NEXT: cvt.u64.u32 %rd1, %r2;
19
+ ; CHECK-NEXT: mov.u64 %rd4, value;
20
+ ; CHECK-NEXT: cvta.global.u64 %rd1, %rd4;
23
21
; CHECK-NEXT: // begin inline asm
24
22
; CHECK-NEXT: { st.b128 [%rd1], %rq1; }
25
23
; CHECK-NEXT: // end inline asm
26
24
; CHECK-NEXT: ret;
27
-
28
25
tail call void asm sideeffect "{ st.b128 [$0], $1; }" , "l,q" (ptr nonnull addrspacecast (ptr addrspace (1 ) @value to ptr ), i128 42 )
29
26
ret void
30
27
}
31
28
32
29
define void @test_b128_input_from_load (ptr nocapture readonly %data ) {
33
30
; CHECK-LABEL: test_b128_input_from_load(
34
31
; CHECK: {
35
- ; CHECK-NEXT: .reg .b32 %r<5>;
36
- ; CHECK-NEXT: .reg .b64 %rd<4>;
32
+ ; CHECK-NEXT: .reg .b64 %rd<7>;
37
33
; CHECK-NEXT: .reg .b128 %rq<2>;
38
34
; CHECK-EMPTY:
39
35
; CHECK-NEXT: // %bb.0:
40
- ; CHECK-NEXT: ld.param.u32 %r1, [test_b128_input_from_load_param_0];
41
- ; CHECK-NEXT: cvta.to.global.u32 %r2, %r1;
42
- ; CHECK-NEXT: ld.global.u64 %rd2, [%r2+8];
43
- ; CHECK-NEXT: ld.global.u64 %rd3, [%r2];
44
- ; CHECK-NEXT: mov.b128 %rq1, {%rd3, %rd2};
45
- ; CHECK-NEXT: mov.u32 %r3, value;
46
- ; CHECK-NEXT: cvta.global.u32 %r4, %r3;
47
- ; CHECK-NEXT: cvt.u64.u32 %rd1, %r4;
36
+ ; CHECK-NEXT: ld.param.u64 %rd2, [test_b128_input_from_load_param_0];
37
+ ; CHECK-NEXT: cvta.to.global.u64 %rd3, %rd2;
38
+ ; CHECK-NEXT: ld.global.u64 %rd4, [%rd3+8];
39
+ ; CHECK-NEXT: ld.global.u64 %rd5, [%rd3];
40
+ ; CHECK-NEXT: mov.b128 %rq1, {%rd5, %rd4};
41
+ ; CHECK-NEXT: mov.u64 %rd6, value;
42
+ ; CHECK-NEXT: cvta.global.u64 %rd1, %rd6;
48
43
; CHECK-NEXT: // begin inline asm
49
44
; CHECK-NEXT: { st.b128 [%rd1], %rq1; }
50
45
; CHECK-NEXT: // end inline asm
51
46
; CHECK-NEXT: ret;
52
-
53
47
%1 = addrspacecast ptr %data to ptr addrspace (1 )
54
48
%2 = load <2 x i64 >, ptr addrspace (1 ) %1 , align 16
55
49
%3 = bitcast <2 x i64 > %2 to i128
@@ -62,26 +56,23 @@ define void @test_b128_input_from_select(ptr nocapture readonly %flag) {
62
56
; CHECK: {
63
57
; CHECK-NEXT: .reg .pred %p<2>;
64
58
; CHECK-NEXT: .reg .b16 %rs<2>;
65
- ; CHECK-NEXT: .reg .b32 %r<5>;
66
- ; CHECK-NEXT: .reg .b64 %rd<4>;
59
+ ; CHECK-NEXT: .reg .b64 %rd<7>;
67
60
; CHECK-NEXT: .reg .b128 %rq<2>;
68
61
; CHECK-EMPTY:
69
62
; CHECK-NEXT: // %bb.0:
70
- ; CHECK-NEXT: ld.param.u32 %r1 , [test_b128_input_from_select_param_0];
71
- ; CHECK-NEXT: cvta.to.global.u32 %r2 , %r1 ;
72
- ; CHECK-NEXT: ld.global.u8 %rs1, [%r2 ];
63
+ ; CHECK-NEXT: ld.param.u64 %rd2 , [test_b128_input_from_select_param_0];
64
+ ; CHECK-NEXT: cvta.to.global.u64 %rd3 , %rd2 ;
65
+ ; CHECK-NEXT: ld.global.u8 %rs1, [%rd3 ];
73
66
; CHECK-NEXT: setp.eq.s16 %p1, %rs1, 0;
74
- ; CHECK-NEXT: selp.b64 %rd2, 24, 42, %p1;
75
- ; CHECK-NEXT: mov.u64 %rd3, 0;
76
- ; CHECK-NEXT: mov.b128 %rq1, {%rd2, %rd3};
77
- ; CHECK-NEXT: mov.u32 %r3, value;
78
- ; CHECK-NEXT: cvta.global.u32 %r4, %r3;
79
- ; CHECK-NEXT: cvt.u64.u32 %rd1, %r4;
67
+ ; CHECK-NEXT: selp.b64 %rd4, 24, 42, %p1;
68
+ ; CHECK-NEXT: mov.u64 %rd5, 0;
69
+ ; CHECK-NEXT: mov.b128 %rq1, {%rd4, %rd5};
70
+ ; CHECK-NEXT: mov.u64 %rd6, value;
71
+ ; CHECK-NEXT: cvta.global.u64 %rd1, %rd6;
80
72
; CHECK-NEXT: // begin inline asm
81
73
; CHECK-NEXT: { st.b128 [%rd1], %rq1; }
82
74
; CHECK-NEXT: // end inline asm
83
75
; CHECK-NEXT: ret;
84
-
85
76
%1 = addrspacecast ptr %flag to ptr addrspace (1 )
86
77
%2 = load i8 , ptr addrspace (1 ) %1 , align 1
87
78
%3 = icmp eq i8 %2 , 0
@@ -106,7 +97,6 @@ define void @test_store_b128_output() {
106
97
; CHECK-NEXT: st.global.u64 [value+8], %rd4;
107
98
; CHECK-NEXT: st.global.u64 [value], %rd3;
108
99
; CHECK-NEXT: ret;
109
-
110
100
%1 = tail call i128 asm "{ mov.b128 $0, 41; }" , "=q" ()
111
101
%add = add nsw i128 %1 , 1
112
102
%2 = bitcast i128 %add to <2 x i64 >
@@ -117,26 +107,24 @@ define void @test_store_b128_output() {
117
107
define void @test_use_of_b128_output (ptr nocapture readonly %data ) {
118
108
; CHECK-LABEL: test_use_of_b128_output(
119
109
; CHECK: {
120
- ; CHECK-NEXT: .reg .b32 %r<3>;
121
- ; CHECK-NEXT: .reg .b64 %rd<7>;
110
+ ; CHECK-NEXT: .reg .b64 %rd<9>;
122
111
; CHECK-NEXT: .reg .b128 %rq<3>;
123
112
; CHECK-EMPTY:
124
113
; CHECK-NEXT: // %bb.0:
125
- ; CHECK-NEXT: ld.param.u32 %r1 , [test_use_of_b128_output_param_0];
126
- ; CHECK-NEXT: cvta.to.global.u32 %r2 , %r1 ;
127
- ; CHECK-NEXT: ld.global.u64 %rd1 , [%r2 +8];
128
- ; CHECK-NEXT: ld.global.u64 %rd2 , [%r2 ];
129
- ; CHECK-NEXT: mov.b128 %rq2, {%rd2 , %rd1 };
114
+ ; CHECK-NEXT: ld.param.u64 %rd1 , [test_use_of_b128_output_param_0];
115
+ ; CHECK-NEXT: cvta.to.global.u64 %rd2 , %rd1 ;
116
+ ; CHECK-NEXT: ld.global.u64 %rd3 , [%rd2 +8];
117
+ ; CHECK-NEXT: ld.global.u64 %rd4 , [%rd2 ];
118
+ ; CHECK-NEXT: mov.b128 %rq2, {%rd4 , %rd3 };
130
119
; CHECK-NEXT: // begin inline asm
131
120
; CHECK-NEXT: { mov.b128 %rq1, %rq2; }
132
121
; CHECK-NEXT: // end inline asm
133
- ; CHECK-NEXT: mov.b128 {%rd3 , %rd4 }, %rq1;
134
- ; CHECK-NEXT: add.cc.s64 %rd5 , %rd3 , 1;
135
- ; CHECK-NEXT: addc.cc.s64 %rd6 , %rd4 , 0;
136
- ; CHECK-NEXT: st.global.u64 [value], %rd5 ;
137
- ; CHECK-NEXT: st.global.u64 [value+8], %rd6 ;
122
+ ; CHECK-NEXT: mov.b128 {%rd5 , %rd6 }, %rq1;
123
+ ; CHECK-NEXT: add.cc.s64 %rd7 , %rd5 , 1;
124
+ ; CHECK-NEXT: addc.cc.s64 %rd8 , %rd6 , 0;
125
+ ; CHECK-NEXT: st.global.u64 [value], %rd7 ;
126
+ ; CHECK-NEXT: st.global.u64 [value+8], %rd8 ;
138
127
; CHECK-NEXT: ret;
139
-
140
128
%1 = addrspacecast ptr %data to ptr addrspace (1 )
141
129
%2 = load <2 x i64 >, ptr addrspace (1 ) %1 , align 16
142
130
%3 = bitcast <2 x i64 > %2 to i128
0 commit comments