|
1 |
| -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s |
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefixes=CHECK,CHECK-O3 |
| 3 | +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O0 | FileCheck %s --check-prefixes=CHECK,CHECK-O0 |
2 | 4 | ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
|
| 5 | +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O0 | %ptxas-verify %} |
3 | 6 |
|
| 7 | +target triple = "nvptx64-nvidia-cuda" |
4 | 8 |
|
5 |
| -; CHECK: bfe0 |
6 | 9 | define i32 @bfe0(i32 %a) {
|
7 |
| -; CHECK: bfe.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, 4, 4 |
8 |
| -; CHECK-NOT: shr |
9 |
| -; CHECK-NOT: and |
| 10 | +; CHECK-LABEL: bfe0( |
| 11 | +; CHECK: { |
| 12 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 13 | +; CHECK-EMPTY: |
| 14 | +; CHECK-NEXT: // %bb.0: |
| 15 | +; CHECK-NEXT: ld.param.u32 %r1, [bfe0_param_0]; |
| 16 | +; CHECK-NEXT: bfe.u32 %r2, %r1, 4, 4; |
| 17 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| 18 | +; CHECK-NEXT: ret; |
10 | 19 | %val0 = ashr i32 %a, 4
|
11 | 20 | %val1 = and i32 %val0, 15
|
12 | 21 | ret i32 %val1
|
13 | 22 | }
|
14 | 23 |
|
15 |
| -; CHECK: bfe1 |
16 | 24 | define i32 @bfe1(i32 %a) {
|
17 |
| -; CHECK: bfe.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, 3, 3 |
18 |
| -; CHECK-NOT: shr |
19 |
| -; CHECK-NOT: and |
| 25 | +; CHECK-LABEL: bfe1( |
| 26 | +; CHECK: { |
| 27 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 28 | +; CHECK-EMPTY: |
| 29 | +; CHECK-NEXT: // %bb.0: |
| 30 | +; CHECK-NEXT: ld.param.u32 %r1, [bfe1_param_0]; |
| 31 | +; CHECK-NEXT: bfe.u32 %r2, %r1, 3, 3; |
| 32 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| 33 | +; CHECK-NEXT: ret; |
20 | 34 | %val0 = ashr i32 %a, 3
|
21 | 35 | %val1 = and i32 %val0, 7
|
22 | 36 | ret i32 %val1
|
23 | 37 | }
|
24 | 38 |
|
25 |
| -; CHECK: bfe2 |
26 | 39 | define i32 @bfe2(i32 %a) {
|
27 |
| -; CHECK: bfe.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, 5, 3 |
28 |
| -; CHECK-NOT: shr |
29 |
| -; CHECK-NOT: and |
| 40 | +; CHECK-LABEL: bfe2( |
| 41 | +; CHECK: { |
| 42 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 43 | +; CHECK-EMPTY: |
| 44 | +; CHECK-NEXT: // %bb.0: |
| 45 | +; CHECK-NEXT: ld.param.u32 %r1, [bfe2_param_0]; |
| 46 | +; CHECK-NEXT: bfe.u32 %r2, %r1, 5, 3; |
| 47 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| 48 | +; CHECK-NEXT: ret; |
30 | 49 | %val0 = ashr i32 %a, 5
|
31 | 50 | %val1 = and i32 %val0, 7
|
32 | 51 | ret i32 %val1
|
33 | 52 | }
|
34 | 53 |
|
35 |
| -; CHECK-LABEL: no_bfe_on_32bit_overflow |
36 | 54 | define i32 @no_bfe_on_32bit_overflow(i32 %a) {
|
37 |
| -; CHECK-NOT: bfe.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, 31, 4 |
| 55 | +; CHECK-LABEL: no_bfe_on_32bit_overflow( |
| 56 | +; CHECK: { |
| 57 | +; CHECK-NEXT: .reg .b32 %r<4>; |
| 58 | +; CHECK-EMPTY: |
| 59 | +; CHECK-NEXT: // %bb.0: |
| 60 | +; CHECK-NEXT: ld.param.u32 %r1, [no_bfe_on_32bit_overflow_param_0]; |
| 61 | +; CHECK-NEXT: shr.s32 %r2, %r1, 31; |
| 62 | +; CHECK-NEXT: and.b32 %r3, %r2, 15; |
| 63 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| 64 | +; CHECK-NEXT: ret; |
38 | 65 | %val0 = ashr i32 %a, 31
|
39 | 66 | %val1 = and i32 %val0, 15
|
40 | 67 | ret i32 %val1
|
41 | 68 | }
|
42 | 69 |
|
43 |
| -; CHECK-LABEL: no_bfe_on_32bit_overflow_shr_and_pair |
44 | 70 | define i32 @no_bfe_on_32bit_overflow_shr_and_pair(i32 %a) {
|
45 |
| -; CHECK: shr.s32 %r{{[0-9]+}}, %r{{[0-9]+}}, 31 |
46 |
| -; CHECK: and.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 15 |
| 71 | +; CHECK-LABEL: no_bfe_on_32bit_overflow_shr_and_pair( |
| 72 | +; CHECK: { |
| 73 | +; CHECK-NEXT: .reg .b32 %r<4>; |
| 74 | +; CHECK-EMPTY: |
| 75 | +; CHECK-NEXT: // %bb.0: |
| 76 | +; CHECK-NEXT: ld.param.u32 %r1, [no_bfe_on_32bit_overflow_shr_and_pair_param_0]; |
| 77 | +; CHECK-NEXT: shr.s32 %r2, %r1, 31; |
| 78 | +; CHECK-NEXT: and.b32 %r3, %r2, 15; |
| 79 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| 80 | +; CHECK-NEXT: ret; |
47 | 81 | %val0 = ashr i32 %a, 31
|
48 | 82 | %val1 = and i32 %val0, 15
|
49 | 83 | ret i32 %val1
|
50 | 84 | }
|
51 | 85 |
|
52 |
| -; CHECK-LABEL: no_bfe_on_64bit_overflow |
53 | 86 | define i64 @no_bfe_on_64bit_overflow(i64 %a) {
|
54 |
| -; CHECK-NOT: bfe.u64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, 63, 3 |
| 87 | +; CHECK-LABEL: no_bfe_on_64bit_overflow( |
| 88 | +; CHECK: { |
| 89 | +; CHECK-NEXT: .reg .b64 %rd<4>; |
| 90 | +; CHECK-EMPTY: |
| 91 | +; CHECK-NEXT: // %bb.0: |
| 92 | +; CHECK-NEXT: ld.param.u64 %rd1, [no_bfe_on_64bit_overflow_param_0]; |
| 93 | +; CHECK-NEXT: shr.s64 %rd2, %rd1, 63; |
| 94 | +; CHECK-NEXT: and.b64 %rd3, %rd2, 7; |
| 95 | +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| 96 | +; CHECK-NEXT: ret; |
55 | 97 | %val0 = ashr i64 %a, 63
|
56 | 98 | %val1 = and i64 %val0, 7
|
57 | 99 | ret i64 %val1
|
58 | 100 | }
|
59 | 101 |
|
60 |
| -; CHECK-LABEL: no_bfe_on_64bit_overflow_shr_and_pair |
61 | 102 | define i64 @no_bfe_on_64bit_overflow_shr_and_pair(i64 %a) {
|
62 |
| -; CHECK: shr.s64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, 63 |
63 |
| -; CHECK: and.b64 %rd{{[0-9]+}}, %rd{{[0-9]+}}, 7 |
| 103 | +; CHECK-LABEL: no_bfe_on_64bit_overflow_shr_and_pair( |
| 104 | +; CHECK: { |
| 105 | +; CHECK-NEXT: .reg .b64 %rd<4>; |
| 106 | +; CHECK-EMPTY: |
| 107 | +; CHECK-NEXT: // %bb.0: |
| 108 | +; CHECK-NEXT: ld.param.u64 %rd1, [no_bfe_on_64bit_overflow_shr_and_pair_param_0]; |
| 109 | +; CHECK-NEXT: shr.s64 %rd2, %rd1, 63; |
| 110 | +; CHECK-NEXT: and.b64 %rd3, %rd2, 7; |
| 111 | +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| 112 | +; CHECK-NEXT: ret; |
64 | 113 | %val0 = ashr i64 %a, 63
|
65 | 114 | %val1 = and i64 %val0, 7
|
66 | 115 | ret i64 %val1
|
67 | 116 | }
|
| 117 | + |
| 118 | +define i32 @bfe_ashr_signed_32(i32 %x) { |
| 119 | +; CHECK-O3-LABEL: bfe_ashr_signed_32( |
| 120 | +; CHECK-O3: { |
| 121 | +; CHECK-O3-NEXT: .reg .b32 %r<3>; |
| 122 | +; CHECK-O3-EMPTY: |
| 123 | +; CHECK-O3-NEXT: // %bb.0: |
| 124 | +; CHECK-O3-NEXT: ld.param.u16 %r1, [bfe_ashr_signed_32_param_0+2]; |
| 125 | +; CHECK-O3-NEXT: bfe.s32 %r2, %r1, 4, 12; |
| 126 | +; CHECK-O3-NEXT: st.param.b32 [func_retval0], %r2; |
| 127 | +; CHECK-O3-NEXT: ret; |
| 128 | +; |
| 129 | +; CHECK-O0-LABEL: bfe_ashr_signed_32( |
| 130 | +; CHECK-O0: { |
| 131 | +; CHECK-O0-NEXT: .reg .b32 %r<3>; |
| 132 | +; CHECK-O0-EMPTY: |
| 133 | +; CHECK-O0-NEXT: // %bb.0: |
| 134 | +; CHECK-O0-NEXT: ld.param.u32 %r1, [bfe_ashr_signed_32_param_0]; |
| 135 | +; CHECK-O0-NEXT: bfe.s32 %r2, %r1, 20, 12; |
| 136 | +; CHECK-O0-NEXT: st.param.b32 [func_retval0], %r2; |
| 137 | +; CHECK-O0-NEXT: ret; |
| 138 | + %and = and i32 %x, -65536 |
| 139 | + %shr = ashr exact i32 %and, 20 |
| 140 | + ret i32 %shr |
| 141 | +} |
| 142 | + |
| 143 | +define i32 @bfe_ashr_unsigned_32(i32 %x) { |
| 144 | +; CHECK-LABEL: bfe_ashr_unsigned_32( |
| 145 | +; CHECK: { |
| 146 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 147 | +; CHECK-EMPTY: |
| 148 | +; CHECK-NEXT: // %bb.0: |
| 149 | +; CHECK-NEXT: ld.param.u32 %r1, [bfe_ashr_unsigned_32_param_0]; |
| 150 | +; CHECK-NEXT: bfe.u32 %r2, %r1, 5, 6; |
| 151 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| 152 | +; CHECK-NEXT: ret; |
| 153 | + %and = and i32 %x, 2047 |
| 154 | + %shr = ashr exact i32 %and, 5 |
| 155 | + ret i32 %shr |
| 156 | +} |
| 157 | + |
| 158 | +define i64 @bfe_ashr_signed_64(i64 %x) { |
| 159 | +; CHECK-LABEL: bfe_ashr_signed_64( |
| 160 | +; CHECK: { |
| 161 | +; CHECK-NEXT: .reg .b64 %rd<3>; |
| 162 | +; CHECK-EMPTY: |
| 163 | +; CHECK-NEXT: // %bb.0: |
| 164 | +; CHECK-NEXT: ld.param.u64 %rd1, [bfe_ashr_signed_64_param_0]; |
| 165 | +; CHECK-NEXT: bfe.s64 %rd2, %rd1, 16, 48; |
| 166 | +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; |
| 167 | +; CHECK-NEXT: ret; |
| 168 | + %and = and i64 %x, -65536 |
| 169 | + %shr = ashr exact i64 %and, 16 |
| 170 | + ret i64 %shr |
| 171 | +} |
| 172 | + |
| 173 | +define i64 @bfe_ashr_unsigned_64(i64 %x) { |
| 174 | +; CHECK-LABEL: bfe_ashr_unsigned_64( |
| 175 | +; CHECK: { |
| 176 | +; CHECK-NEXT: .reg .b64 %rd<3>; |
| 177 | +; CHECK-EMPTY: |
| 178 | +; CHECK-NEXT: // %bb.0: |
| 179 | +; CHECK-NEXT: ld.param.u64 %rd1, [bfe_ashr_unsigned_64_param_0]; |
| 180 | +; CHECK-NEXT: bfe.u64 %rd2, %rd1, 5, 6; |
| 181 | +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; |
| 182 | +; CHECK-NEXT: ret; |
| 183 | + %and = and i64 %x, 2047 |
| 184 | + %shr = ashr exact i64 %and, 5 |
| 185 | + ret i64 %shr |
| 186 | +} |
| 187 | + |
| 188 | +define i32 @bfe3(i128 %a) { |
| 189 | +; CHECK-LABEL: bfe3( |
| 190 | +; CHECK: { |
| 191 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 192 | +; CHECK-NEXT: .reg .b64 %rd<3>; |
| 193 | +; CHECK-EMPTY: |
| 194 | +; CHECK-NEXT: // %bb.0: |
| 195 | +; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [bfe3_param_0]; |
| 196 | +; CHECK-NEXT: cvt.u32.u64 %r1, %rd1; |
| 197 | +; CHECK-NEXT: bfe.s32 %r2, %r1, 15, 17; |
| 198 | +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| 199 | +; CHECK-NEXT: ret; |
| 200 | + %trunc = trunc i128 %a to i32 |
| 201 | + %and = and i32 %trunc, -32768 |
| 202 | + %shr = ashr exact i32 %and, 15 |
| 203 | + ret i32 %shr |
| 204 | +} |
| 205 | + |
| 206 | +define i64 @bfe4(i128 %a) { |
| 207 | +; CHECK-LABEL: bfe4( |
| 208 | +; CHECK: { |
| 209 | +; CHECK-NEXT: .reg .b64 %rd<4>; |
| 210 | +; CHECK-EMPTY: |
| 211 | +; CHECK-NEXT: // %bb.0: |
| 212 | +; CHECK-NEXT: ld.param.v2.u64 {%rd1, %rd2}, [bfe4_param_0]; |
| 213 | +; CHECK-NEXT: bfe.s64 %rd3, %rd1, 17, 47; |
| 214 | +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; |
| 215 | +; CHECK-NEXT: ret; |
| 216 | + %trunc = trunc i128 %a to i64 |
| 217 | + %and = and i64 %trunc, -131072 |
| 218 | + %shr = ashr exact i64 %and, 17 |
| 219 | + ret i64 %shr |
| 220 | +} |
| 221 | + |
0 commit comments