5
5
; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 | %ptxas-verify %}
6
6
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
7
7
8
+ %struct.S16 = type { i16 , i16 }
9
+ %struct.S32 = type { i32 , i32 }
10
+
8
11
; CHECK-LABEL: abs_i16(
9
12
define i16 @abs_i16 (i16 %a ) {
10
13
; CHECK: abs.s16
@@ -31,3 +34,91 @@ define i64 @abs_i64(i64 %a) {
31
34
%abs = select i1 %abs.cond , i64 %a , i64 %neg
32
35
ret i64 %abs
33
36
}
37
+
38
+ ; CHECK-LABEL: i32_to_2xi16(
39
+ define %struct.S16 @i32_to_2xi16 (i32 noundef %in ) {
40
+ %low = trunc i32 %in to i16
41
+ %high32 = lshr i32 %in , 16
42
+ %high = trunc i32 %high32 to i16
43
+ ; CHECK: ld.param.u32 %[[R32:r[0-9]+]], [i32_to_2xi16_param_0];
44
+ ; CHECK-DAG: cvt.u16.u32 %rs{{[0-9+]}}, %[[R32]];
45
+ ; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32]];
46
+ %s1 = insertvalue %struct.S16 poison, i16 %low , 0
47
+ %s = insertvalue %struct.S16 %s1 , i16 %high , 1
48
+ ret %struct.S16 %s
49
+ }
50
+
51
+ ; CHECK-LABEL: i32_to_2xi16_lh(
52
+ ; Same as above, but with rearranged order of low/high parts.
53
+ define %struct.S16 @i32_to_2xi16_lh (i32 noundef %in ) {
54
+ %high32 = lshr i32 %in , 16
55
+ %high = trunc i32 %high32 to i16
56
+ %low = trunc i32 %in to i16
57
+ ; CHECK: ld.param.u32 %[[R32:r[0-9]+]], [i32_to_2xi16_lh_param_0];
58
+ ; CHECK-DAG: cvt.u16.u32 %rs{{[0-9+]}}, %[[R32]];
59
+ ; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32]];
60
+ %s1 = insertvalue %struct.S16 poison, i16 %low , 0
61
+ %s = insertvalue %struct.S16 %s1 , i16 %high , 1
62
+ ret %struct.S16 %s
63
+ }
64
+
65
+
66
+ ; CHECK-LABEL: i32_to_2xi16_not(
67
+ define %struct.S16 @i32_to_2xi16_not (i32 noundef %in ) {
68
+ %low = trunc i32 %in to i16
69
+ ; Shift by any value other than 16 blocks the conversiopn to mov.
70
+ %high32 = lshr i32 %in , 15
71
+ %high = trunc i32 %high32 to i16
72
+ ; CHECK: cvt.u16.u32
73
+ ; CHECK: shr.u32
74
+ ; CHECK: cvt.u16.u32
75
+ %s1 = insertvalue %struct.S16 poison, i16 %low , 0
76
+ %s = insertvalue %struct.S16 %s1 , i16 %high , 1
77
+ ret %struct.S16 %s
78
+ }
79
+
80
+ ; CHECK-LABEL: i64_to_2xi32(
81
+ define %struct.S32 @i64_to_2xi32 (i64 noundef %in ) {
82
+ %low = trunc i64 %in to i32
83
+ %high64 = lshr i64 %in , 32
84
+ %high = trunc i64 %high64 to i32
85
+ ; CHECK: ld.param.u64 %[[R64:rd[0-9]+]], [i64_to_2xi32_param_0];
86
+ ; CHECK-DAG: cvt.u32.u64 %r{{[0-9+]}}, %[[R64]];
87
+ ; CHECK-DAG mov.b64 {tmp, %r{{[0-9+]}}}, %[[R64]];
88
+ %s1 = insertvalue %struct.S32 poison, i32 %low , 0
89
+ %s = insertvalue %struct.S32 %s1 , i32 %high , 1
90
+ ret %struct.S32 %s
91
+ }
92
+
93
+ ; CHECK-LABEL: i64_to_2xi32_not(
94
+ define %struct.S32 @i64_to_2xi32_not (i64 noundef %in ) {
95
+ %low = trunc i64 %in to i32
96
+ ; Shift by any value other than 32 blocks the conversiopn to mov.
97
+ %high64 = lshr i64 %in , 31
98
+ %high = trunc i64 %high64 to i32
99
+ ; CHECK: cvt.u32.u64
100
+ ; CHECK: shr.u64
101
+ ; CHECK: cvt.u32.u64
102
+ %s1 = insertvalue %struct.S32 poison, i32 %low , 0
103
+ %s = insertvalue %struct.S32 %s1 , i32 %high , 1
104
+ ret %struct.S32 %s
105
+ }
106
+
107
+ ; CHECK-LABEL: i32_to_2xi16_shr(
108
+ ; Make sure we do not get confused when our input itself is [al]shr.
109
+ define %struct.S16 @i32_to_2xi16_shr (i32 noundef %i ){
110
+ call void @escape_int (i32 %i ); // Force %i to be loaded completely.
111
+ %i1 = ashr i32 %i , 16
112
+ %l = trunc i32 %i1 to i16
113
+ %h32 = ashr i32 %i1 , 16
114
+ %h = trunc i32 %h32 to i16
115
+ ; CHECK: ld.param.u32 %[[R32:r[0-9]+]], [i32_to_2xi16_shr_param_0];
116
+ ; CHECK: shr.s32 %[[R32H:r[0-9]+]], %[[R32]], 16;
117
+ ; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32]];
118
+ ; CHECK-DAG mov.b32 {tmp, %rs{{[0-9+]}}}, %[[R32H]];
119
+ %s0 = insertvalue %struct.S16 poison, i16 %l , 0
120
+ %s1 = insertvalue %struct.S16 %s0 , i16 %h , 1
121
+ ret %struct.S16 %s1
122
+ }
123
+ declare dso_local void @escape_int (i32 noundef)
124
+
0 commit comments