|
1 |
| -; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=GCN,UNPACKED,PREGFX10,PREGFX10-UNPACKED %s |
2 |
| -; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s |
3 |
| -; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,PREGFX10,PREGFX10-PACKED %s |
4 |
| -; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,GFX10,GFX10-PACKED %s |
5 |
| -; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED,GFX10,GFX10-PACKED %s |
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=PREGFX10-UNPACKED %s |
| 3 | +; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s |
| 4 | +; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s |
| 5 | +; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX10-PACKED %s |
| 6 | +; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED %s |
6 | 7 |
|
7 |
| -; GCN-LABEL: {{^}}tbuffer_load_d16_x: |
8 |
| -; PREGFX10: tbuffer_load_format_d16_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] |
9 |
| -; GFX10: tbuffer_load_{{format_d16|d16_format}}_x v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] |
10 | 8 | define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
|
| 9 | +; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_x: |
| 10 | +; PREGFX10-UNPACKED: ; %bb.0: ; %main_body |
| 11 | +; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x00,0xb4,0xe8,0x00,0x00,0x00,0x80] |
| 12 | +; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] |
| 13 | +; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog |
| 14 | +; |
| 15 | +; PREGFX10-PACKED-LABEL: tbuffer_load_d16_x: |
| 16 | +; PREGFX10-PACKED: ; %bb.0: ; %main_body |
| 17 | +; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] |
| 18 | +; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 19 | +; PREGFX10-PACKED-NEXT: ; return to shader part epilog |
| 20 | +; |
| 21 | +; GFX10-PACKED-LABEL: tbuffer_load_d16_x: |
| 22 | +; GFX10-PACKED: ; %bb.0: ; %main_body |
| 23 | +; GFX10-PACKED-NEXT: tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] |
| 24 | +; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 25 | +; GFX10-PACKED-NEXT: ; return to shader part epilog |
| 26 | +; |
| 27 | +; GFX11-PACKED-LABEL: tbuffer_load_d16_x: |
| 28 | +; GFX11-PACKED: ; %bb.0: ; %main_body |
| 29 | +; GFX11-PACKED-NEXT: tbuffer_load_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] |
| 30 | +; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 31 | +; GFX11-PACKED-NEXT: ; return to shader part epilog |
11 | 32 | main_body:
|
12 | 33 | %data = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
|
13 | 34 | ret half %data
|
14 | 35 | }
|
15 | 36 |
|
16 |
| -; GCN-LABEL: {{^}}tbuffer_load_d16_xy: |
17 |
| -; PREGFX10-UNPACKED: tbuffer_load_format_d16_xy v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] |
18 |
| -; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] |
19 |
| - |
20 |
| -; PREGFX10-PACKED: tbuffer_load_format_d16_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] |
21 |
| -; GFX10-PACKED: tbuffer_load_{{format_d16|d16_format}}_xy v[[FULL:[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] |
22 |
| -; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[FULL]] |
23 | 37 | define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) {
|
| 38 | +; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xy: |
| 39 | +; PREGFX10-UNPACKED: ; %bb.0: ; %main_body |
| 40 | +; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xy v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x80,0xb4,0xe8,0x00,0x00,0x00,0x80] |
| 41 | +; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] |
| 42 | +; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e] |
| 43 | +; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog |
| 44 | +; |
| 45 | +; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xy: |
| 46 | +; PREGFX10-PACKED: ; %bb.0: ; %main_body |
| 47 | +; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xy v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] |
| 48 | +; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 49 | +; PREGFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| 50 | +; PREGFX10-PACKED-NEXT: ; return to shader part epilog |
| 51 | +; |
| 52 | +; GFX10-PACKED-LABEL: tbuffer_load_d16_xy: |
| 53 | +; GFX10-PACKED: ; %bb.0: ; %main_body |
| 54 | +; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xy v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] |
| 55 | +; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 56 | +; GFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| 57 | +; GFX10-PACKED-NEXT: ; return to shader part epilog |
| 58 | +; |
| 59 | +; GFX11-PACKED-LABEL: tbuffer_load_d16_xy: |
| 60 | +; GFX11-PACKED: ; %bb.0: ; %main_body |
| 61 | +; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xy v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] |
| 62 | +; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 63 | +; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 |
| 64 | +; GFX11-PACKED-NEXT: ; return to shader part epilog |
24 | 65 | main_body:
|
25 | 66 | %data = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
|
26 | 67 | %elt = extractelement <2 x half> %data, i32 1
|
27 | 68 | ret half %elt
|
28 | 69 | }
|
29 | 70 |
|
30 |
| -; GCN-LABEL: {{^}}tbuffer_load_d16_xyz: |
31 |
| -; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] |
32 |
| -; GFX10-UNPACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] |
33 |
| -; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] |
34 |
| - |
35 |
| -; PREGFX10-PACKED: tbuffer_load_format_d16_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] |
36 |
| -; GFX10-PACKED: tbuffer_load_{{format_d16|d16_format}}_xyz v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] |
37 |
| -; PACKED: v_mov_b{{16|32}}_e32 v{{[0-9]+(\.(l|h))?}}, v[[HI]]{{(\.(l,h))?}} |
38 | 71 | define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) {
|
| 72 | +; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyz: |
| 73 | +; PREGFX10-UNPACKED: ; %bb.0: ; %main_body |
| 74 | +; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xyz v[0:2], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x00,0xb5,0xe8,0x00,0x00,0x00,0x80] |
| 75 | +; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] |
| 76 | +; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e] |
| 77 | +; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog |
| 78 | +; |
| 79 | +; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyz: |
| 80 | +; PREGFX10-PACKED: ; %bb.0: ; %main_body |
| 81 | +; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xyz v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] |
| 82 | +; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 83 | +; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, v1 |
| 84 | +; PREGFX10-PACKED-NEXT: ; return to shader part epilog |
| 85 | +; |
| 86 | +; GFX10-PACKED-LABEL: tbuffer_load_d16_xyz: |
| 87 | +; GFX10-PACKED: ; %bb.0: ; %main_body |
| 88 | +; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] |
| 89 | +; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 90 | +; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, v1 |
| 91 | +; GFX10-PACKED-NEXT: ; return to shader part epilog |
| 92 | +; |
| 93 | +; GFX11-PACKED-LABEL: tbuffer_load_d16_xyz: |
| 94 | +; GFX11-PACKED: ; %bb.0: ; %main_body |
| 95 | +; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] |
| 96 | +; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 97 | +; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, v1 |
| 98 | +; GFX11-PACKED-NEXT: ; return to shader part epilog |
39 | 99 | main_body:
|
40 | 100 | %data = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
|
41 | 101 | %elt = extractelement <3 x half> %data, i32 2
|
42 | 102 | ret half %elt
|
43 | 103 | }
|
44 | 104 |
|
45 |
| -; GCN-LABEL: {{^}}tbuffer_load_d16_xyzw: |
46 |
| -; PREGFX10-UNPACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] |
47 |
| -; GFX10-UNPACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] |
48 |
| -; UNPACKED: v_mov_b32_e32 v{{[0-9]+}}, v[[HI]] |
49 |
| - |
50 |
| -; PREGFX10-PACKED: tbuffer_load_format_d16_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] |
51 |
| -; GFX10-PACKED: tbuffer_load_{{format_d16|d16_format}}_xyzw v[{{[0-9]+}}:[[HI:[0-9]+]]], off, s[{{[0-9]+:[0-9]+}}], 0 format:[BUF_FMT_32_FLOAT] |
52 |
| -; PACKED: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v[[HI]] |
53 | 105 | define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
|
| 106 | +; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyzw: |
| 107 | +; PREGFX10-UNPACKED: ; %bb.0: ; %main_body |
| 108 | +; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x80,0xb5,0xe8,0x00,0x00,0x00,0x80] |
| 109 | +; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] |
| 110 | +; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v3 ; encoding: [0x03,0x03,0x00,0x7e] |
| 111 | +; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog |
| 112 | +; |
| 113 | +; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyzw: |
| 114 | +; PREGFX10-PACKED: ; %bb.0: ; %main_body |
| 115 | +; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] |
| 116 | +; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 117 | +; PREGFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 |
| 118 | +; PREGFX10-PACKED-NEXT: ; return to shader part epilog |
| 119 | +; |
| 120 | +; GFX10-PACKED-LABEL: tbuffer_load_d16_xyzw: |
| 121 | +; GFX10-PACKED: ; %bb.0: ; %main_body |
| 122 | +; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] |
| 123 | +; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 124 | +; GFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 |
| 125 | +; GFX10-PACKED-NEXT: ; return to shader part epilog |
| 126 | +; |
| 127 | +; GFX11-PACKED-LABEL: tbuffer_load_d16_xyzw: |
| 128 | +; GFX11-PACKED: ; %bb.0: ; %main_body |
| 129 | +; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] |
| 130 | +; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) |
| 131 | +; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 |
| 132 | +; GFX11-PACKED-NEXT: ; return to shader part epilog |
54 | 133 | main_body:
|
55 | 134 | %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
|
56 | 135 | %elt = extractelement <4 x half> %data, i32 3
|
|
0 commit comments