|
2 | 2 | ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=SI %s
|
3 | 3 | ; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=VI %s
|
4 | 4 | ; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX11 %s
|
| 5 | +; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s |
5 | 6 |
|
6 | 7 | define amdgpu_kernel void @fadd_f16(
|
7 | 8 | ; SI-LABEL: fadd_f16:
|
@@ -78,6 +79,32 @@ define amdgpu_kernel void @fadd_f16(
|
78 | 79 | ; GFX11-NEXT: s_nop 0
|
79 | 80 | ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
80 | 81 | ; GFX11-NEXT: s_endpgm
|
| 82 | +; |
| 83 | +; GFX11-FAKE16-LABEL: fadd_f16: |
| 84 | +; GFX11-FAKE16: ; %bb.0: ; %entry |
| 85 | +; GFX11-FAKE16-NEXT: s_clause 0x1 |
| 86 | +; GFX11-FAKE16-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 |
| 87 | +; GFX11-FAKE16-NEXT: s_load_b64 s[0:1], s[0:1], 0x34 |
| 88 | +; GFX11-FAKE16-NEXT: s_mov_b32 s11, 0x31016000 |
| 89 | +; GFX11-FAKE16-NEXT: s_mov_b32 s10, -1 |
| 90 | +; GFX11-FAKE16-NEXT: s_mov_b32 s3, s11 |
| 91 | +; GFX11-FAKE16-NEXT: s_mov_b32 s2, s10 |
| 92 | +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 93 | +; GFX11-FAKE16-NEXT: s_mov_b32 s8, s4 |
| 94 | +; GFX11-FAKE16-NEXT: s_mov_b32 s9, s5 |
| 95 | +; GFX11-FAKE16-NEXT: s_mov_b32 s4, s6 |
| 96 | +; GFX11-FAKE16-NEXT: s_mov_b32 s5, s7 |
| 97 | +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s10 |
| 98 | +; GFX11-FAKE16-NEXT: s_mov_b32 s7, s11 |
| 99 | +; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc |
| 100 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 101 | +; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[0:3], 0 glc dlc |
| 102 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 103 | +; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, v0, v1 |
| 104 | +; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[8:11], 0 |
| 105 | +; GFX11-FAKE16-NEXT: s_nop 0 |
| 106 | +; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) |
| 107 | +; GFX11-FAKE16-NEXT: s_endpgm |
81 | 108 | ptr addrspace(1) %r,
|
82 | 109 | ptr addrspace(1) %a,
|
83 | 110 | ptr addrspace(1) %b) {
|
@@ -147,6 +174,26 @@ define amdgpu_kernel void @fadd_f16_imm_a(
|
147 | 174 | ; GFX11-NEXT: s_nop 0
|
148 | 175 | ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
149 | 176 | ; GFX11-NEXT: s_endpgm
|
| 177 | +; |
| 178 | +; GFX11-FAKE16-LABEL: fadd_f16_imm_a: |
| 179 | +; GFX11-FAKE16: ; %bb.0: ; %entry |
| 180 | +; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 |
| 181 | +; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 |
| 182 | +; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1 |
| 183 | +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 184 | +; GFX11-FAKE16-NEXT: s_mov_b32 s4, s0 |
| 185 | +; GFX11-FAKE16-NEXT: s_mov_b32 s5, s1 |
| 186 | +; GFX11-FAKE16-NEXT: s_mov_b32 s0, s2 |
| 187 | +; GFX11-FAKE16-NEXT: s_mov_b32 s1, s3 |
| 188 | +; GFX11-FAKE16-NEXT: s_mov_b32 s2, s6 |
| 189 | +; GFX11-FAKE16-NEXT: s_mov_b32 s3, s7 |
| 190 | +; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[0:3], 0 |
| 191 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 192 | +; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, 1.0, v0 |
| 193 | +; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0 |
| 194 | +; GFX11-FAKE16-NEXT: s_nop 0 |
| 195 | +; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) |
| 196 | +; GFX11-FAKE16-NEXT: s_endpgm |
150 | 197 | ptr addrspace(1) %r,
|
151 | 198 | ptr addrspace(1) %b) {
|
152 | 199 | entry:
|
@@ -214,6 +261,26 @@ define amdgpu_kernel void @fadd_f16_imm_b(
|
214 | 261 | ; GFX11-NEXT: s_nop 0
|
215 | 262 | ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
216 | 263 | ; GFX11-NEXT: s_endpgm
|
| 264 | +; |
| 265 | +; GFX11-FAKE16-LABEL: fadd_f16_imm_b: |
| 266 | +; GFX11-FAKE16: ; %bb.0: ; %entry |
| 267 | +; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 |
| 268 | +; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 |
| 269 | +; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1 |
| 270 | +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 271 | +; GFX11-FAKE16-NEXT: s_mov_b32 s4, s0 |
| 272 | +; GFX11-FAKE16-NEXT: s_mov_b32 s5, s1 |
| 273 | +; GFX11-FAKE16-NEXT: s_mov_b32 s0, s2 |
| 274 | +; GFX11-FAKE16-NEXT: s_mov_b32 s1, s3 |
| 275 | +; GFX11-FAKE16-NEXT: s_mov_b32 s2, s6 |
| 276 | +; GFX11-FAKE16-NEXT: s_mov_b32 s3, s7 |
| 277 | +; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[0:3], 0 |
| 278 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 279 | +; GFX11-FAKE16-NEXT: v_add_f16_e32 v0, 2.0, v0 |
| 280 | +; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0 |
| 281 | +; GFX11-FAKE16-NEXT: s_nop 0 |
| 282 | +; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) |
| 283 | +; GFX11-FAKE16-NEXT: s_endpgm |
217 | 284 | ptr addrspace(1) %r,
|
218 | 285 | ptr addrspace(1) %a) {
|
219 | 286 | entry:
|
@@ -303,6 +370,27 @@ define amdgpu_kernel void @fadd_v2f16(
|
303 | 370 | ; GFX11-NEXT: s_nop 0
|
304 | 371 | ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
305 | 372 | ; GFX11-NEXT: s_endpgm
|
| 373 | +; |
| 374 | +; GFX11-FAKE16-LABEL: fadd_v2f16: |
| 375 | +; GFX11-FAKE16: ; %bb.0: ; %entry |
| 376 | +; GFX11-FAKE16-NEXT: s_clause 0x1 |
| 377 | +; GFX11-FAKE16-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 |
| 378 | +; GFX11-FAKE16-NEXT: s_load_b64 s[8:9], s[0:1], 0x34 |
| 379 | +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 380 | +; GFX11-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 |
| 381 | +; GFX11-FAKE16-NEXT: s_mov_b32 s2, -1 |
| 382 | +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 383 | +; GFX11-FAKE16-NEXT: s_clause 0x1 |
| 384 | +; GFX11-FAKE16-NEXT: global_load_b32 v1, v0, s[6:7] |
| 385 | +; GFX11-FAKE16-NEXT: global_load_b32 v0, v0, s[8:9] |
| 386 | +; GFX11-FAKE16-NEXT: s_mov_b32 s0, s4 |
| 387 | +; GFX11-FAKE16-NEXT: s_mov_b32 s1, s5 |
| 388 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 389 | +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v1, v0 |
| 390 | +; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 |
| 391 | +; GFX11-FAKE16-NEXT: s_nop 0 |
| 392 | +; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) |
| 393 | +; GFX11-FAKE16-NEXT: s_endpgm |
306 | 394 | ptr addrspace(1) %r,
|
307 | 395 | ptr addrspace(1) %a,
|
308 | 396 | ptr addrspace(1) %b) {
|
@@ -382,6 +470,23 @@ define amdgpu_kernel void @fadd_v2f16_imm_a(
|
382 | 470 | ; GFX11-NEXT: s_nop 0
|
383 | 471 | ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
384 | 472 | ; GFX11-NEXT: s_endpgm
|
| 473 | +; |
| 474 | +; GFX11-FAKE16-LABEL: fadd_v2f16_imm_a: |
| 475 | +; GFX11-FAKE16: ; %bb.0: ; %entry |
| 476 | +; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 |
| 477 | +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 478 | +; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 |
| 479 | +; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1 |
| 480 | +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 481 | +; GFX11-FAKE16-NEXT: global_load_b32 v0, v0, s[2:3] |
| 482 | +; GFX11-FAKE16-NEXT: s_mov_b32 s4, s0 |
| 483 | +; GFX11-FAKE16-NEXT: s_mov_b32 s5, s1 |
| 484 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 485 | +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, 0x40003c00, v0 |
| 486 | +; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 |
| 487 | +; GFX11-FAKE16-NEXT: s_nop 0 |
| 488 | +; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) |
| 489 | +; GFX11-FAKE16-NEXT: s_endpgm |
385 | 490 | ptr addrspace(1) %r,
|
386 | 491 | ptr addrspace(1) %b) {
|
387 | 492 | entry:
|
@@ -458,6 +563,23 @@ define amdgpu_kernel void @fadd_v2f16_imm_b(
|
458 | 563 | ; GFX11-NEXT: s_nop 0
|
459 | 564 | ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
|
460 | 565 | ; GFX11-NEXT: s_endpgm
|
| 566 | +; |
| 567 | +; GFX11-FAKE16-LABEL: fadd_v2f16_imm_b: |
| 568 | +; GFX11-FAKE16: ; %bb.0: ; %entry |
| 569 | +; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 |
| 570 | +; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 571 | +; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 |
| 572 | +; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1 |
| 573 | +; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 574 | +; GFX11-FAKE16-NEXT: global_load_b32 v0, v0, s[2:3] |
| 575 | +; GFX11-FAKE16-NEXT: s_mov_b32 s4, s0 |
| 576 | +; GFX11-FAKE16-NEXT: s_mov_b32 s5, s1 |
| 577 | +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 578 | +; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, 0x3c004000, v0 |
| 579 | +; GFX11-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 |
| 580 | +; GFX11-FAKE16-NEXT: s_nop 0 |
| 581 | +; GFX11-FAKE16-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) |
| 582 | +; GFX11-FAKE16-NEXT: s_endpgm |
461 | 583 | ptr addrspace(1) %r,
|
462 | 584 | ptr addrspace(1) %a) {
|
463 | 585 | entry:
|
|
0 commit comments