|
3 | 3 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s
|
4 | 4 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX10,GFX10_DEFAULT %s
|
5 | 5 | ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX10,FLATSCR_GFX10 %s
|
6 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode,+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s |
7 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode,-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s |
8 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch,+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s |
9 |
| -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch,-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s |
| 6 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode < %s | FileCheck -check-prefixes=GFX11 %s |
| 7 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -mattr=-unaligned-access-mode -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GFX11 %s |
10 | 8 |
|
11 | 9 | define <2 x half> @chain_hi_to_lo_private() {
|
12 | 10 | ; GFX900-LABEL: chain_hi_to_lo_private:
|
@@ -158,23 +156,14 @@ define <2 x half> @chain_hi_to_lo_arithmatic(ptr addrspace(5) %base, half %in) {
|
158 | 156 | ; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v0, v1
|
159 | 157 | ; FLATSCR_GFX10-NEXT: s_setpc_b64 s[30:31]
|
160 | 158 | ;
|
161 |
| -; GFX11-TRUE16-LABEL: chain_hi_to_lo_arithmatic: |
162 |
| -; GFX11-TRUE16: ; %bb.0: ; %bb |
163 |
| -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
164 |
| -; GFX11-TRUE16-NEXT: v_add_f16_e32 v1.l, 1.0, v1.l |
165 |
| -; GFX11-TRUE16-NEXT: scratch_load_d16_hi_b16 v1, v0, off |
166 |
| -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) |
167 |
| -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v1 |
168 |
| -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
169 |
| -; |
170 |
| -; GFX11-FAKE16-LABEL: chain_hi_to_lo_arithmatic: |
171 |
| -; GFX11-FAKE16: ; %bb.0: ; %bb |
172 |
| -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
173 |
| -; GFX11-FAKE16-NEXT: v_add_f16_e32 v1, 1.0, v1 |
174 |
| -; GFX11-FAKE16-NEXT: scratch_load_d16_hi_b16 v1, v0, off |
175 |
| -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
176 |
| -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, v1 |
177 |
| -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| 159 | +; GFX11-LABEL: chain_hi_to_lo_arithmatic: |
| 160 | +; GFX11: ; %bb.0: ; %bb |
| 161 | +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 162 | +; GFX11-NEXT: v_add_f16_e32 v1, 1.0, v1 |
| 163 | +; GFX11-NEXT: scratch_load_d16_hi_b16 v1, v0, off |
| 164 | +; GFX11-NEXT: s_waitcnt vmcnt(0) |
| 165 | +; GFX11-NEXT: v_mov_b32_e32 v0, v1 |
| 166 | +; GFX11-NEXT: s_setpc_b64 s[30:31] |
178 | 167 | bb:
|
179 | 168 | %arith_lo = fadd half %in, 1.0
|
180 | 169 | %load_hi = load half, ptr addrspace(5) %base
|
@@ -372,31 +361,18 @@ define <2 x half> @chain_hi_to_lo_flat() {
|
372 | 361 | ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
373 | 362 | ; GFX10-NEXT: s_setpc_b64 s[30:31]
|
374 | 363 | ;
|
375 |
| -; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat: |
376 |
| -; GFX11-TRUE16: ; %bb.0: ; %bb |
377 |
| -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
378 |
| -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 2 |
379 |
| -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 |
380 |
| -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] |
381 |
| -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 |
382 |
| -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0 |
383 |
| -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
384 |
| -; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[1:2] |
385 |
| -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
386 |
| -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
387 |
| -; |
388 |
| -; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat: |
389 |
| -; GFX11-FAKE16: ; %bb.0: ; %bb |
390 |
| -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
391 |
| -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 2 |
392 |
| -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
393 |
| -; GFX11-FAKE16-NEXT: flat_load_u16 v0, v[0:1] |
394 |
| -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0 |
395 |
| -; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0 |
396 |
| -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
397 |
| -; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[1:2] |
398 |
| -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
399 |
| -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| 364 | +; GFX11-LABEL: chain_hi_to_lo_flat: |
| 365 | +; GFX11: ; %bb.0: ; %bb |
| 366 | +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 367 | +; GFX11-NEXT: v_mov_b32_e32 v0, 2 |
| 368 | +; GFX11-NEXT: v_mov_b32_e32 v1, 0 |
| 369 | +; GFX11-NEXT: flat_load_u16 v0, v[0:1] |
| 370 | +; GFX11-NEXT: v_mov_b32_e32 v1, 0 |
| 371 | +; GFX11-NEXT: v_mov_b32_e32 v2, 0 |
| 372 | +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 373 | +; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[1:2] |
| 374 | +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 375 | +; GFX11-NEXT: s_setpc_b64 s[30:31] |
400 | 376 | bb:
|
401 | 377 | %gep_lo = getelementptr inbounds half, ptr null, i64 1
|
402 | 378 | %load_lo = load half, ptr %gep_lo
|
@@ -427,23 +403,14 @@ define <2 x half> @chain_hi_to_lo_flat_different_bases(ptr %base_lo, ptr %base_h
|
427 | 403 | ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
428 | 404 | ; GFX10-NEXT: s_setpc_b64 s[30:31]
|
429 | 405 | ;
|
430 |
| -; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat_different_bases: |
431 |
| -; GFX11-TRUE16: ; %bb.0: ; %bb |
432 |
| -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
433 |
| -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] |
434 |
| -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
435 |
| -; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[2:3] |
436 |
| -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
437 |
| -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
438 |
| -; |
439 |
| -; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat_different_bases: |
440 |
| -; GFX11-FAKE16: ; %bb.0: ; %bb |
441 |
| -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
442 |
| -; GFX11-FAKE16-NEXT: flat_load_u16 v0, v[0:1] |
443 |
| -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
444 |
| -; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[2:3] |
445 |
| -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
446 |
| -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| 406 | +; GFX11-LABEL: chain_hi_to_lo_flat_different_bases: |
| 407 | +; GFX11: ; %bb.0: ; %bb |
| 408 | +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 409 | +; GFX11-NEXT: flat_load_u16 v0, v[0:1] |
| 410 | +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 411 | +; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[2:3] |
| 412 | +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 413 | +; GFX11-NEXT: s_setpc_b64 s[30:31] |
447 | 414 | bb:
|
448 | 415 | %load_lo = load half, ptr %base_lo
|
449 | 416 | %load_hi = load half, ptr %base_hi
|
@@ -897,31 +864,17 @@ define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) {
|
897 | 864 | ; GFX10-NEXT: v_bfi_b32 v0, 0xffff, v2, v0
|
898 | 865 | ; GFX10-NEXT: s_setpc_b64 s[30:31]
|
899 | 866 | ;
|
900 |
| -; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat_other_dep: |
901 |
| -; GFX11-TRUE16: ; %bb.0: ; %bb |
902 |
| -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
903 |
| -; GFX11-TRUE16-NEXT: flat_load_d16_b16 v2, v[0:1] offset:2 glc dlc |
904 |
| -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) |
905 |
| -; GFX11-TRUE16-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc |
906 |
| -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(1) |
907 |
| -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l |
908 |
| -; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
909 |
| -; GFX11-TRUE16-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] |
910 |
| -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
911 |
| -; GFX11-TRUE16-NEXT: v_bfi_b32 v0, 0xffff, v1, v0 |
912 |
| -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] |
913 |
| -; |
914 |
| -; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat_other_dep: |
915 |
| -; GFX11-FAKE16: ; %bb.0: ; %bb |
916 |
| -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
917 |
| -; GFX11-FAKE16-NEXT: flat_load_u16 v2, v[0:1] offset:2 glc dlc |
918 |
| -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) |
919 |
| -; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc |
920 |
| -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
921 |
| -; GFX11-FAKE16-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] |
922 |
| -; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
923 |
| -; GFX11-FAKE16-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 |
924 |
| -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] |
| 867 | +; GFX11-LABEL: chain_hi_to_lo_flat_other_dep: |
| 868 | +; GFX11: ; %bb.0: ; %bb |
| 869 | +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) |
| 870 | +; GFX11-NEXT: flat_load_u16 v2, v[0:1] offset:2 glc dlc |
| 871 | +; GFX11-NEXT: s_waitcnt vmcnt(0) |
| 872 | +; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc |
| 873 | +; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
| 874 | +; GFX11-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0] |
| 875 | +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 876 | +; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v2, v0 |
| 877 | +; GFX11-NEXT: s_setpc_b64 s[30:31] |
925 | 878 | bb:
|
926 | 879 | %gep_lo = getelementptr inbounds i16, ptr addrspace(0) %ptr, i64 1
|
927 | 880 | %load_lo = load volatile i16, ptr addrspace(0) %gep_lo
|
|
0 commit comments