|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
1 | 2 | ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
|
2 | 3 | ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
|
3 | 4 |
|
|
7 | 8 | target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
8 | 9 | target triple = "nvptx64-unknown-unknown"
|
9 | 10 |
|
10 |
| -; CHECK-LABEL: ex_zext |
11 | 11 | define ptx_kernel void @ex_zext(ptr noalias readonly %data, ptr %res) {
|
| 12 | +; CHECK-LABEL: ex_zext( |
| 13 | +; CHECK: { |
| 14 | +; CHECK-NEXT: .reg .b16 %rs<2>; |
| 15 | +; CHECK-NEXT: .reg .b32 %r<2>; |
| 16 | +; CHECK-NEXT: .reg .b64 %rd<5>; |
| 17 | +; CHECK-EMPTY: |
| 18 | +; CHECK-NEXT: // %bb.0: // %entry |
| 19 | +; CHECK-NEXT: ld.param.b64 %rd1, [ex_zext_param_0]; |
| 20 | +; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1; |
| 21 | +; CHECK-NEXT: ld.param.b64 %rd3, [ex_zext_param_1]; |
| 22 | +; CHECK-NEXT: cvta.to.global.u64 %rd4, %rd3; |
| 23 | +; CHECK-NEXT: ld.global.nc.b8 %rs1, [%rd2]; |
| 24 | +; CHECK-NEXT: cvt.u32.u8 %r1, %rs1; |
| 25 | +; CHECK-NEXT: st.global.b32 [%rd4], %r1; |
| 26 | +; CHECK-NEXT: ret; |
12 | 27 | entry:
|
13 |
| -; CHECK: ld.global.nc.b8 |
14 | 28 | %val = load i8, ptr %data
|
15 |
| -; CHECK: cvt.u32.u8 |
16 | 29 | %valext = zext i8 %val to i32
|
17 | 30 | store i32 %valext, ptr %res
|
18 | 31 | ret void
|
19 | 32 | }
|
20 | 33 |
|
21 |
| -; CHECK-LABEL: ex_sext |
22 | 34 | define ptx_kernel void @ex_sext(ptr noalias readonly %data, ptr %res) {
|
| 35 | +; CHECK-LABEL: ex_sext( |
| 36 | +; CHECK: { |
| 37 | +; CHECK-NEXT: .reg .b16 %rs<2>; |
| 38 | +; CHECK-NEXT: .reg .b32 %r<2>; |
| 39 | +; CHECK-NEXT: .reg .b64 %rd<5>; |
| 40 | +; CHECK-EMPTY: |
| 41 | +; CHECK-NEXT: // %bb.0: // %entry |
| 42 | +; CHECK-NEXT: ld.param.b64 %rd1, [ex_sext_param_0]; |
| 43 | +; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1; |
| 44 | +; CHECK-NEXT: ld.param.b64 %rd3, [ex_sext_param_1]; |
| 45 | +; CHECK-NEXT: cvta.to.global.u64 %rd4, %rd3; |
| 46 | +; CHECK-NEXT: ld.global.nc.b8 %rs1, [%rd2]; |
| 47 | +; CHECK-NEXT: cvt.s32.s8 %r1, %rs1; |
| 48 | +; CHECK-NEXT: st.global.b32 [%rd4], %r1; |
| 49 | +; CHECK-NEXT: ret; |
23 | 50 | entry:
|
24 |
| -; CHECK: ld.global.nc.b8 |
25 | 51 | %val = load i8, ptr %data
|
26 |
| -; CHECK: cvt.s32.s8 |
27 | 52 | %valext = sext i8 %val to i32
|
28 | 53 | store i32 %valext, ptr %res
|
29 | 54 | ret void
|
30 | 55 | }
|
31 | 56 |
|
32 |
| -; CHECK-LABEL: ex_zext_v2 |
33 | 57 | define ptx_kernel void @ex_zext_v2(ptr noalias readonly %data, ptr %res) {
|
| 58 | +; CHECK-LABEL: ex_zext_v2( |
| 59 | +; CHECK: { |
| 60 | +; CHECK-NEXT: .reg .b16 %rs<3>; |
| 61 | +; CHECK-NEXT: .reg .b32 %r<3>; |
| 62 | +; CHECK-NEXT: .reg .b64 %rd<5>; |
| 63 | +; CHECK-EMPTY: |
| 64 | +; CHECK-NEXT: // %bb.0: // %entry |
| 65 | +; CHECK-NEXT: ld.param.b64 %rd1, [ex_zext_v2_param_0]; |
| 66 | +; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1; |
| 67 | +; CHECK-NEXT: ld.param.b64 %rd3, [ex_zext_v2_param_1]; |
| 68 | +; CHECK-NEXT: cvta.to.global.u64 %rd4, %rd3; |
| 69 | +; CHECK-NEXT: ld.global.nc.v2.b8 {%rs1, %rs2}, [%rd2]; |
| 70 | +; CHECK-NEXT: cvt.u32.u16 %r1, %rs2; |
| 71 | +; CHECK-NEXT: cvt.u32.u16 %r2, %rs1; |
| 72 | +; CHECK-NEXT: st.global.v2.b32 [%rd4], {%r2, %r1}; |
| 73 | +; CHECK-NEXT: ret; |
34 | 74 | entry:
|
35 |
| -; CHECK: ld.global.nc.v2.b8 |
36 | 75 | %val = load <2 x i8>, ptr %data
|
37 |
| -; CHECK: cvt.u32.u16 |
38 | 76 | %valext = zext <2 x i8> %val to <2 x i32>
|
39 | 77 | store <2 x i32> %valext, ptr %res
|
40 | 78 | ret void
|
41 | 79 | }
|
42 | 80 |
|
43 |
| -; CHECK-LABEL: ex_sext_v2 |
44 | 81 | define ptx_kernel void @ex_sext_v2(ptr noalias readonly %data, ptr %res) {
|
| 82 | +; CHECK-LABEL: ex_sext_v2( |
| 83 | +; CHECK: { |
| 84 | +; CHECK-NEXT: .reg .b16 %rs<3>; |
| 85 | +; CHECK-NEXT: .reg .b32 %r<5>; |
| 86 | +; CHECK-NEXT: .reg .b64 %rd<5>; |
| 87 | +; CHECK-EMPTY: |
| 88 | +; CHECK-NEXT: // %bb.0: // %entry |
| 89 | +; CHECK-NEXT: ld.param.b64 %rd1, [ex_sext_v2_param_0]; |
| 90 | +; CHECK-NEXT: cvta.to.global.u64 %rd2, %rd1; |
| 91 | +; CHECK-NEXT: ld.param.b64 %rd3, [ex_sext_v2_param_1]; |
| 92 | +; CHECK-NEXT: cvta.to.global.u64 %rd4, %rd3; |
| 93 | +; CHECK-NEXT: ld.global.nc.v2.b8 {%rs1, %rs2}, [%rd2]; |
| 94 | +; CHECK-NEXT: cvt.u32.u16 %r1, %rs2; |
| 95 | +; CHECK-NEXT: cvt.s32.s8 %r2, %r1; |
| 96 | +; CHECK-NEXT: cvt.u32.u16 %r3, %rs1; |
| 97 | +; CHECK-NEXT: cvt.s32.s8 %r4, %r3; |
| 98 | +; CHECK-NEXT: st.global.v2.b32 [%rd4], {%r4, %r2}; |
| 99 | +; CHECK-NEXT: ret; |
45 | 100 | entry:
|
46 |
| -; CHECK: ld.global.nc.v2.b8 |
47 | 101 | %val = load <2 x i8>, ptr %data
|
48 |
| -; CHECK: cvt.s32.s8 |
49 | 102 | %valext = sext <2 x i8> %val to <2 x i32>
|
50 | 103 | store <2 x i32> %valext, ptr %res
|
51 | 104 | ret void
|
|
0 commit comments