|
1 |
| -; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=UNPACKED %s |
2 |
| -; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s |
3 |
| -; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=PACKED %s |
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=UNPACKED %s |
| 3 | +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PACKED %s |
| 4 | +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PACKED %s |
4 | 5 |
|
5 |
| -; GCN-LABEL: {{^}}buffer_load_format_d16_x: |
6 |
| -; GCN: buffer_load_format_d16_x v{{[0-9]+}}, {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen |
7 | 6 | define amdgpu_ps half @buffer_load_format_d16_x(ptr addrspace(8) inreg %rsrc) {
|
| 7 | +; UNPACKED-LABEL: buffer_load_format_d16_x: |
| 8 | +; UNPACKED: ; %bb.0: ; %main_body |
| 9 | +; UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] |
| 10 | +; UNPACKED-NEXT: buffer_load_format_d16_x v0, v0, s[0:3], 0 idxen ; encoding: [0x00,0x20,0x20,0xe0,0x00,0x00,0x00,0x80] |
| 11 | +; UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] |
| 12 | +; UNPACKED-NEXT: ; return to shader part epilog |
| 13 | +; |
| 14 | +; PACKED-LABEL: buffer_load_format_d16_x: |
| 15 | +; PACKED: ; %bb.0: ; %main_body |
| 16 | +; PACKED-NEXT: v_mov_b32_e32 v0, 0 |
| 17 | +; PACKED-NEXT: buffer_load_format_d16_x v0, v0, s[0:3], 0 idxen |
| 18 | +; PACKED-NEXT: s_waitcnt vmcnt(0) |
| 19 | +; PACKED-NEXT: ; return to shader part epilog |
8 | 20 | main_body:
|
9 | 21 | %data = call half @llvm.amdgcn.struct.ptr.buffer.load.format.f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
|
10 | 22 | ret half %data
|
11 | 23 | }
|
12 | 24 |
|
13 |
| -; GCN-LABEL: {{^}}buffer_load_format_d16_xy: |
14 |
| -; UNPACKED: buffer_load_format_d16_xy v[{{[0-9]+}}:[[HI:[0-9]+]]], {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen |
15 |
| -; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] |
16 |
| - |
17 |
| -; PACKED: buffer_load_format_d16_xy v[[FULL:[0-9]+]], {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen |
18 |
| -; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[FULL]] |
19 | 25 | define amdgpu_ps half @buffer_load_format_d16_xy(ptr addrspace(8) inreg %rsrc) {
|
| 26 | +; UNPACKED-LABEL: buffer_load_format_d16_xy: |
| 27 | +; UNPACKED: ; %bb.0: ; %main_body |
| 28 | +; UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] |
| 29 | +; UNPACKED-NEXT: buffer_load_format_d16_xy v[0:1], v0, s[0:3], 0 idxen ; encoding: [0x00,0x20,0x24,0xe0,0x00,0x00,0x00,0x80] |
| 30 | +; UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] |
| 31 | +; UNPACKED-NEXT: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e] |
| 32 | +; UNPACKED-NEXT: ; return to shader part epilog |
| 33 | +; |
| 34 | +; PACKED-LABEL: buffer_load_format_d16_xy: |
| 35 | +; PACKED: ; %bb.0: ; %main_body |
| 36 | +; PACKED-NEXT: v_mov_b32_e32 v0, 0 |
| 37 | +; PACKED-NEXT: buffer_load_format_d16_xy v0, v0, s[0:3], 0 idxen |
| 38 | +; PACKED-NEXT: s_waitcnt vmcnt(0) |
| 39 | +; PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| 40 | +; PACKED-NEXT: ; return to shader part epilog |
20 | 41 | main_body:
|
21 | 42 | %data = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
|
22 | 43 | %elt = extractelement <2 x half> %data, i32 1
|
23 | 44 | ret half %elt
|
24 | 45 | }
|
25 | 46 |
|
26 |
| -; GCN-LABEL: {{^}}buffer_load_format_d16_xyz: |
27 |
| -; UNPACKED: buffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen |
28 |
| -; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] |
29 |
| - |
30 |
| -; PACKED: buffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen |
31 |
| -; PACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] |
32 | 47 | define amdgpu_ps half @buffer_load_format_d16_xyz(ptr addrspace(8) inreg %rsrc) {
|
| 48 | +; UNPACKED-LABEL: buffer_load_format_d16_xyz: |
| 49 | +; UNPACKED: ; %bb.0: ; %main_body |
| 50 | +; UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] |
| 51 | +; UNPACKED-NEXT: buffer_load_format_d16_xyz v[0:2], v0, s[0:3], 0 idxen ; encoding: [0x00,0x20,0x28,0xe0,0x00,0x00,0x00,0x80] |
| 52 | +; UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] |
| 53 | +; UNPACKED-NEXT: v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e] |
| 54 | +; UNPACKED-NEXT: ; return to shader part epilog |
| 55 | +; |
| 56 | +; PACKED-LABEL: buffer_load_format_d16_xyz: |
| 57 | +; PACKED: ; %bb.0: ; %main_body |
| 58 | +; PACKED-NEXT: v_mov_b32_e32 v0, 0 |
| 59 | +; PACKED-NEXT: buffer_load_format_d16_xyz v[0:1], v0, s[0:3], 0 idxen |
| 60 | +; PACKED-NEXT: s_waitcnt vmcnt(0) |
| 61 | +; PACKED-NEXT: v_mov_b32_e32 v0, v1 |
| 62 | +; PACKED-NEXT: ; return to shader part epilog |
33 | 63 | main_body:
|
34 | 64 | %data = call <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
|
35 | 65 | %elt = extractelement <3 x half> %data, i32 2
|
36 | 66 | ret half %elt
|
37 | 67 | }
|
38 | 68 |
|
39 |
| -; GCN-LABEL: {{^}}buffer_load_format_d16_xyzw: |
40 |
| -; UNPACKED: buffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen |
41 |
| -; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] |
42 |
| - |
43 |
| -; PACKED: buffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen |
44 |
| -; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] |
45 | 69 | define amdgpu_ps half @buffer_load_format_d16_xyzw(ptr addrspace(8) inreg %rsrc) {
|
| 70 | +; UNPACKED-LABEL: buffer_load_format_d16_xyzw: |
| 71 | +; UNPACKED: ; %bb.0: ; %main_body |
| 72 | +; UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] |
| 73 | +; UNPACKED-NEXT: buffer_load_format_d16_xyzw v[0:3], v0, s[0:3], 0 idxen ; encoding: [0x00,0x20,0x2c,0xe0,0x00,0x00,0x00,0x80] |
| 74 | +; UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] |
| 75 | +; UNPACKED-NEXT: v_mov_b32_e32 v0, v3 ; encoding: [0x03,0x03,0x00,0x7e] |
| 76 | +; UNPACKED-NEXT: ; return to shader part epilog |
| 77 | +; |
| 78 | +; PACKED-LABEL: buffer_load_format_d16_xyzw: |
| 79 | +; PACKED: ; %bb.0: ; %main_body |
| 80 | +; PACKED-NEXT: v_mov_b32_e32 v0, 0 |
| 81 | +; PACKED-NEXT: buffer_load_format_d16_xyzw v[0:1], v0, s[0:3], 0 idxen |
| 82 | +; PACKED-NEXT: s_waitcnt vmcnt(0) |
| 83 | +; PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 |
| 84 | +; PACKED-NEXT: ; return to shader part epilog |
46 | 85 | main_body:
|
47 | 86 | %data = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
|
48 | 87 | %elt = extractelement <4 x half> %data, i32 3
|
49 | 88 | ret half %elt
|
50 | 89 | }
|
51 | 90 |
|
52 |
| -; GCN-LABEL: {{^}}buffer_load_format_i16_x: |
53 |
| -; GCN: buffer_load_format_d16_x v{{[0-9]+}}, {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen |
54 | 91 | define amdgpu_ps half @buffer_load_format_i16_x(ptr addrspace(8) inreg %rsrc) {
|
| 92 | +; UNPACKED-LABEL: buffer_load_format_i16_x: |
| 93 | +; UNPACKED: ; %bb.0: ; %main_body |
| 94 | +; UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] |
| 95 | +; UNPACKED-NEXT: buffer_load_format_d16_x v0, v0, s[0:3], 0 idxen ; encoding: [0x00,0x20,0x20,0xe0,0x00,0x00,0x00,0x80] |
| 96 | +; UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] |
| 97 | +; UNPACKED-NEXT: ; return to shader part epilog |
| 98 | +; |
| 99 | +; PACKED-LABEL: buffer_load_format_i16_x: |
| 100 | +; PACKED: ; %bb.0: ; %main_body |
| 101 | +; PACKED-NEXT: v_mov_b32_e32 v0, 0 |
| 102 | +; PACKED-NEXT: buffer_load_format_d16_x v0, v0, s[0:3], 0 idxen |
| 103 | +; PACKED-NEXT: s_waitcnt vmcnt(0) |
| 104 | +; PACKED-NEXT: ; return to shader part epilog |
55 | 105 | main_body:
|
56 | 106 | %data = call i16 @llvm.amdgcn.struct.ptr.buffer.load.format.i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
|
57 | 107 | %fdata = bitcast i16 %data to half
|
|
0 commit comments