Skip to content

Commit dd3aa5e

Browse files
committed
[NVPTX] Update 32-bit NVPTX tests to use 64-bit
Summary: These used the now-removed `nvptx` target. Because they used `ptxas-verify` they would then fail since it's been removed. Update these to 64-bit.
1 parent 694b132 commit dd3aa5e

File tree

3 files changed

+64
-89
lines changed

3 files changed

+64
-89
lines changed

llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll

Lines changed: 33 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 5
2-
; RUN: llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx83 | FileCheck %s
3-
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx83 | %ptxas-verify -arch=sm_70 %}
2+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | %ptxas-verify -arch=sm_70 %}
44

55
target triple = "nvptx64-nvidia-cuda"
66

@@ -9,47 +9,41 @@ target triple = "nvptx64-nvidia-cuda"
99
define void @test_b128_input_from_const() {
1010
; CHECK-LABEL: test_b128_input_from_const(
1111
; CHECK: {
12-
; CHECK-NEXT: .reg .b32 %r<3>;
13-
; CHECK-NEXT: .reg .b64 %rd<4>;
12+
; CHECK-NEXT: .reg .b64 %rd<5>;
1413
; CHECK-NEXT: .reg .b128 %rq<2>;
1514
; CHECK-EMPTY:
1615
; CHECK-NEXT: // %bb.0:
1716
; CHECK-NEXT: mov.u64 %rd2, 0;
1817
; CHECK-NEXT: mov.u64 %rd3, 42;
1918
; CHECK-NEXT: mov.b128 %rq1, {%rd3, %rd2};
20-
; CHECK-NEXT: mov.u32 %r1, value;
21-
; CHECK-NEXT: cvta.global.u32 %r2, %r1;
22-
; CHECK-NEXT: cvt.u64.u32 %rd1, %r2;
19+
; CHECK-NEXT: mov.u64 %rd4, value;
20+
; CHECK-NEXT: cvta.global.u64 %rd1, %rd4;
2321
; CHECK-NEXT: // begin inline asm
2422
; CHECK-NEXT: { st.b128 [%rd1], %rq1; }
2523
; CHECK-NEXT: // end inline asm
2624
; CHECK-NEXT: ret;
27-
2825
tail call void asm sideeffect "{ st.b128 [$0], $1; }", "l,q"(ptr nonnull addrspacecast (ptr addrspace(1) @value to ptr), i128 42)
2926
ret void
3027
}
3128

3229
define void @test_b128_input_from_load(ptr nocapture readonly %data) {
3330
; CHECK-LABEL: test_b128_input_from_load(
3431
; CHECK: {
35-
; CHECK-NEXT: .reg .b32 %r<5>;
36-
; CHECK-NEXT: .reg .b64 %rd<4>;
32+
; CHECK-NEXT: .reg .b64 %rd<7>;
3733
; CHECK-NEXT: .reg .b128 %rq<2>;
3834
; CHECK-EMPTY:
3935
; CHECK-NEXT: // %bb.0:
40-
; CHECK-NEXT: ld.param.u32 %r1, [test_b128_input_from_load_param_0];
41-
; CHECK-NEXT: cvta.to.global.u32 %r2, %r1;
42-
; CHECK-NEXT: ld.global.u64 %rd2, [%r2+8];
43-
; CHECK-NEXT: ld.global.u64 %rd3, [%r2];
44-
; CHECK-NEXT: mov.b128 %rq1, {%rd3, %rd2};
45-
; CHECK-NEXT: mov.u32 %r3, value;
46-
; CHECK-NEXT: cvta.global.u32 %r4, %r3;
47-
; CHECK-NEXT: cvt.u64.u32 %rd1, %r4;
36+
; CHECK-NEXT: ld.param.u64 %rd2, [test_b128_input_from_load_param_0];
37+
; CHECK-NEXT: cvta.to.global.u64 %rd3, %rd2;
38+
; CHECK-NEXT: ld.global.u64 %rd4, [%rd3+8];
39+
; CHECK-NEXT: ld.global.u64 %rd5, [%rd3];
40+
; CHECK-NEXT: mov.b128 %rq1, {%rd5, %rd4};
41+
; CHECK-NEXT: mov.u64 %rd6, value;
42+
; CHECK-NEXT: cvta.global.u64 %rd1, %rd6;
4843
; CHECK-NEXT: // begin inline asm
4944
; CHECK-NEXT: { st.b128 [%rd1], %rq1; }
5045
; CHECK-NEXT: // end inline asm
5146
; CHECK-NEXT: ret;
52-
5347
%1 = addrspacecast ptr %data to ptr addrspace(1)
5448
%2 = load <2 x i64>, ptr addrspace(1) %1, align 16
5549
%3 = bitcast <2 x i64> %2 to i128
@@ -62,26 +56,23 @@ define void @test_b128_input_from_select(ptr nocapture readonly %flag) {
6256
; CHECK: {
6357
; CHECK-NEXT: .reg .pred %p<2>;
6458
; CHECK-NEXT: .reg .b16 %rs<2>;
65-
; CHECK-NEXT: .reg .b32 %r<5>;
66-
; CHECK-NEXT: .reg .b64 %rd<4>;
59+
; CHECK-NEXT: .reg .b64 %rd<7>;
6760
; CHECK-NEXT: .reg .b128 %rq<2>;
6861
; CHECK-EMPTY:
6962
; CHECK-NEXT: // %bb.0:
70-
; CHECK-NEXT: ld.param.u32 %r1, [test_b128_input_from_select_param_0];
71-
; CHECK-NEXT: cvta.to.global.u32 %r2, %r1;
72-
; CHECK-NEXT: ld.global.u8 %rs1, [%r2];
63+
; CHECK-NEXT: ld.param.u64 %rd2, [test_b128_input_from_select_param_0];
64+
; CHECK-NEXT: cvta.to.global.u64 %rd3, %rd2;
65+
; CHECK-NEXT: ld.global.u8 %rs1, [%rd3];
7366
; CHECK-NEXT: setp.eq.s16 %p1, %rs1, 0;
74-
; CHECK-NEXT: selp.b64 %rd2, 24, 42, %p1;
75-
; CHECK-NEXT: mov.u64 %rd3, 0;
76-
; CHECK-NEXT: mov.b128 %rq1, {%rd2, %rd3};
77-
; CHECK-NEXT: mov.u32 %r3, value;
78-
; CHECK-NEXT: cvta.global.u32 %r4, %r3;
79-
; CHECK-NEXT: cvt.u64.u32 %rd1, %r4;
67+
; CHECK-NEXT: selp.b64 %rd4, 24, 42, %p1;
68+
; CHECK-NEXT: mov.u64 %rd5, 0;
69+
; CHECK-NEXT: mov.b128 %rq1, {%rd4, %rd5};
70+
; CHECK-NEXT: mov.u64 %rd6, value;
71+
; CHECK-NEXT: cvta.global.u64 %rd1, %rd6;
8072
; CHECK-NEXT: // begin inline asm
8173
; CHECK-NEXT: { st.b128 [%rd1], %rq1; }
8274
; CHECK-NEXT: // end inline asm
8375
; CHECK-NEXT: ret;
84-
8576
%1 = addrspacecast ptr %flag to ptr addrspace(1)
8677
%2 = load i8, ptr addrspace(1) %1, align 1
8778
%3 = icmp eq i8 %2, 0
@@ -106,7 +97,6 @@ define void @test_store_b128_output() {
10697
; CHECK-NEXT: st.global.u64 [value+8], %rd4;
10798
; CHECK-NEXT: st.global.u64 [value], %rd3;
10899
; CHECK-NEXT: ret;
109-
110100
%1 = tail call i128 asm "{ mov.b128 $0, 41; }", "=q"()
111101
%add = add nsw i128 %1, 1
112102
%2 = bitcast i128 %add to <2 x i64>
@@ -117,26 +107,24 @@ define void @test_store_b128_output() {
117107
define void @test_use_of_b128_output(ptr nocapture readonly %data) {
118108
; CHECK-LABEL: test_use_of_b128_output(
119109
; CHECK: {
120-
; CHECK-NEXT: .reg .b32 %r<3>;
121-
; CHECK-NEXT: .reg .b64 %rd<7>;
110+
; CHECK-NEXT: .reg .b64 %rd<9>;
122111
; CHECK-NEXT: .reg .b128 %rq<3>;
123112
; CHECK-EMPTY:
124113
; CHECK-NEXT: // %bb.0:
125-
; CHECK-NEXT: ld.param.u32 %r1, [test_use_of_b128_output_param_0];
126-
; CHECK-NEXT: cvta.to.global.u32 %r2, %r1;
127-
; CHECK-NEXT: ld.global.u64 %rd1, [%r2+8];
128-
; CHECK-NEXT: ld.global.u64 %rd2, [%r2];
129-
; CHECK-NEXT: mov.b128 %rq2, {%rd2, %rd1};
114+
; CHECK-NEXT: ld.param.u64 %rd1, [test_use_of_b128_output_param_0];
115+
; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1;
116+
; CHECK-NEXT: ld.global.u64 %rd3, [%rd2+8];
117+
; CHECK-NEXT: ld.global.u64 %rd4, [%rd2];
118+
; CHECK-NEXT: mov.b128 %rq2, {%rd4, %rd3};
130119
; CHECK-NEXT: // begin inline asm
131120
; CHECK-NEXT: { mov.b128 %rq1, %rq2; }
132121
; CHECK-NEXT: // end inline asm
133-
; CHECK-NEXT: mov.b128 {%rd3, %rd4}, %rq1;
134-
; CHECK-NEXT: add.cc.s64 %rd5, %rd3, 1;
135-
; CHECK-NEXT: addc.cc.s64 %rd6, %rd4, 0;
136-
; CHECK-NEXT: st.global.u64 [value], %rd5;
137-
; CHECK-NEXT: st.global.u64 [value+8], %rd6;
122+
; CHECK-NEXT: mov.b128 {%rd5, %rd6}, %rq1;
123+
; CHECK-NEXT: add.cc.s64 %rd7, %rd5, 1;
124+
; CHECK-NEXT: addc.cc.s64 %rd8, %rd6, 0;
125+
; CHECK-NEXT: st.global.u64 [value], %rd7;
126+
; CHECK-NEXT: st.global.u64 [value+8], %rd8;
138127
; CHECK-NEXT: ret;
139-
140128
%1 = addrspacecast ptr %data to ptr addrspace(1)
141129
%2 = load <2 x i64>, ptr addrspace(1) %1, align 16
142130
%3 = bitcast <2 x i64> %2 to i128

llvm/test/CodeGen/NVPTX/inline-asm-b128-test2.ll

Lines changed: 29 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 5
2-
; RUN: llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx83 | FileCheck %s
3-
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx83 | %ptxas-verify -arch=sm_70 %}
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | %ptxas-verify -arch=sm_70 %}
44

55
target triple = "nvptx64-nvidia-cuda"
66

@@ -17,20 +17,16 @@ target triple = "nvptx64-nvidia-cuda"
1717
define void @test_corner_values() {
1818
; CHECK-LABEL: test_corner_values(
1919
; CHECK: {
20-
; CHECK-NEXT: .reg .b32 %r<20>;
21-
; CHECK-NEXT: .reg .b64 %rd<17>;
20+
; CHECK-NEXT: .reg .b64 %rd<24>;
2221
; CHECK-NEXT: .reg .b128 %rq<5>;
2322
; CHECK-EMPTY:
2423
; CHECK-NEXT: // %bb.0:
25-
; CHECK-NEXT: ld.global.u32 %r1, [v64];
26-
; CHECK-NEXT: add.s32 %r2, %r1, 8;
24+
; CHECK-NEXT: ld.global.u64 %rd1, [v64];
25+
; CHECK-NEXT: add.s64 %rd2, %rd1, 8;
2726
; CHECK-NEXT: mov.u64 %rd13, -1;
2827
; CHECK-NEXT: mov.b128 %rq1, {%rd13, %rd13};
29-
; CHECK-NEXT: cvt.u64.u32 %rd1, %r1;
30-
; CHECK-NEXT: cvt.u64.u32 %rd2, %r2;
31-
; CHECK-NEXT: mov.u32 %r3, v_u128_max;
32-
; CHECK-NEXT: cvta.global.u32 %r4, %r3;
33-
; CHECK-NEXT: cvt.u64.u32 %rd3, %r4;
28+
; CHECK-NEXT: mov.u64 %rd14, v_u128_max;
29+
; CHECK-NEXT: cvta.global.u64 %rd3, %rd14;
3430
; CHECK-NEXT: // begin inline asm
3531
; CHECK-NEXT: {
3632
; CHECK-NEXT: .reg .b64 hi;
@@ -41,16 +37,13 @@ define void @test_corner_values() {
4137
; CHECK-NEXT: st.b128 [%rd3], %rq1;
4238
; CHECK-NEXT: }
4339
; CHECK-NEXT: // end inline asm
44-
; CHECK-NEXT: ld.global.u32 %r5, [v64];
45-
; CHECK-NEXT: add.s32 %r6, %r5, 16;
46-
; CHECK-NEXT: add.s32 %r7, %r5, 24;
47-
; CHECK-NEXT: mov.u64 %rd14, 9223372036854775807;
48-
; CHECK-NEXT: mov.b128 %rq2, {%rd13, %rd14};
49-
; CHECK-NEXT: mov.u32 %r8, v_i128_max;
50-
; CHECK-NEXT: cvta.global.u32 %r9, %r8;
51-
; CHECK-NEXT: cvt.u64.u32 %rd6, %r9;
52-
; CHECK-NEXT: cvt.u64.u32 %rd4, %r6;
53-
; CHECK-NEXT: cvt.u64.u32 %rd5, %r7;
40+
; CHECK-NEXT: ld.global.u64 %rd15, [v64];
41+
; CHECK-NEXT: add.s64 %rd4, %rd15, 16;
42+
; CHECK-NEXT: add.s64 %rd5, %rd15, 24;
43+
; CHECK-NEXT: mov.u64 %rd16, 9223372036854775807;
44+
; CHECK-NEXT: mov.b128 %rq2, {%rd13, %rd16};
45+
; CHECK-NEXT: mov.u64 %rd17, v_i128_max;
46+
; CHECK-NEXT: cvta.global.u64 %rd6, %rd17;
5447
; CHECK-NEXT: // begin inline asm
5548
; CHECK-NEXT: {
5649
; CHECK-NEXT: .reg .b64 hi;
@@ -61,17 +54,14 @@ define void @test_corner_values() {
6154
; CHECK-NEXT: st.b128 [%rd6], %rq2;
6255
; CHECK-NEXT: }
6356
; CHECK-NEXT: // end inline asm
64-
; CHECK-NEXT: ld.global.u32 %r10, [v64];
65-
; CHECK-NEXT: add.s32 %r11, %r10, 32;
66-
; CHECK-NEXT: add.s32 %r12, %r10, 40;
67-
; CHECK-NEXT: mov.u64 %rd15, -9223372036854775808;
68-
; CHECK-NEXT: mov.u64 %rd16, 0;
69-
; CHECK-NEXT: mov.b128 %rq3, {%rd16, %rd15};
70-
; CHECK-NEXT: mov.u32 %r13, v_i128_min;
71-
; CHECK-NEXT: cvta.global.u32 %r14, %r13;
72-
; CHECK-NEXT: cvt.u64.u32 %rd9, %r14;
73-
; CHECK-NEXT: cvt.u64.u32 %rd7, %r11;
74-
; CHECK-NEXT: cvt.u64.u32 %rd8, %r12;
57+
; CHECK-NEXT: ld.global.u64 %rd18, [v64];
58+
; CHECK-NEXT: add.s64 %rd7, %rd18, 32;
59+
; CHECK-NEXT: add.s64 %rd8, %rd18, 40;
60+
; CHECK-NEXT: mov.u64 %rd19, -9223372036854775808;
61+
; CHECK-NEXT: mov.u64 %rd20, 0;
62+
; CHECK-NEXT: mov.b128 %rq3, {%rd20, %rd19};
63+
; CHECK-NEXT: mov.u64 %rd21, v_i128_min;
64+
; CHECK-NEXT: cvta.global.u64 %rd9, %rd21;
7565
; CHECK-NEXT: // begin inline asm
7666
; CHECK-NEXT: {
7767
; CHECK-NEXT: .reg .b64 hi;
@@ -82,15 +72,12 @@ define void @test_corner_values() {
8272
; CHECK-NEXT: st.b128 [%rd9], %rq3;
8373
; CHECK-NEXT: }
8474
; CHECK-NEXT: // end inline asm
85-
; CHECK-NEXT: ld.global.u32 %r15, [v64];
86-
; CHECK-NEXT: add.s32 %r16, %r15, 48;
87-
; CHECK-NEXT: add.s32 %r17, %r15, 56;
88-
; CHECK-NEXT: mov.b128 %rq4, {%rd16, %rd16};
89-
; CHECK-NEXT: mov.u32 %r18, v_u128_zero;
90-
; CHECK-NEXT: cvta.global.u32 %r19, %r18;
91-
; CHECK-NEXT: cvt.u64.u32 %rd12, %r19;
92-
; CHECK-NEXT: cvt.u64.u32 %rd10, %r16;
93-
; CHECK-NEXT: cvt.u64.u32 %rd11, %r17;
75+
; CHECK-NEXT: ld.global.u64 %rd22, [v64];
76+
; CHECK-NEXT: add.s64 %rd10, %rd22, 48;
77+
; CHECK-NEXT: add.s64 %rd11, %rd22, 56;
78+
; CHECK-NEXT: mov.b128 %rq4, {%rd20, %rd20};
79+
; CHECK-NEXT: mov.u64 %rd23, v_u128_zero;
80+
; CHECK-NEXT: cvta.global.u64 %rd12, %rd23;
9481
; CHECK-NEXT: // begin inline asm
9582
; CHECK-NEXT: {
9683
; CHECK-NEXT: .reg .b64 hi;

llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 5
2-
; RUN: llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx83 | FileCheck %s
3-
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_70 -mattr=+ptx83 | %ptxas-verify -arch=sm_70 %}
2+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | FileCheck %s
3+
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | %ptxas-verify -arch=sm_70 %}
44

55
target triple = "nvptx64-nvidia-cuda"
66

0 commit comments

Comments
 (0)