|
7 | 7 | ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefixes=GFX9,GFX9-GISEL %s
|
8 | 8 | ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10,GFX10-SDAG %s
|
9 | 9 | ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10,GFX10-GISEL %s
|
10 |
| -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG %s |
11 |
| -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL %s |
| 10 | +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s |
| 11 | +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s |
| 12 | +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s |
| 13 | +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s |
12 | 14 |
|
13 | 15 | ; Test that add/sub with a constant is swapped to sub/add with negated
|
14 | 16 | ; constant to minimize code size.
|
@@ -1331,31 +1333,57 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp
|
1331 | 1333 | ; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1]
|
1332 | 1334 | ; GFX10-GISEL-NEXT: s_endpgm
|
1333 | 1335 | ;
|
1334 |
| -; GFX11-SDAG-LABEL: v_test_i16_x_sub_64: |
1335 |
| -; GFX11-SDAG: ; %bb.0: |
1336 |
| -; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
1337 |
| -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
1338 |
| -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1339 |
| -; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
1340 |
| -; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
1341 |
| -; GFX11-SDAG-NEXT: global_load_u16 v1, v0, s[2:3] |
1342 |
| -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
1343 |
| -; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64 |
1344 |
| -; GFX11-SDAG-NEXT: global_store_b16 v0, v1, s[0:1] |
1345 |
| -; GFX11-SDAG-NEXT: s_endpgm |
1346 |
| -; |
1347 |
| -; GFX11-GISEL-LABEL: v_test_i16_x_sub_64: |
1348 |
| -; GFX11-GISEL: ; %bb.0: |
1349 |
| -; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
1350 |
| -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
1351 |
| -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1352 |
| -; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
1353 |
| -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
1354 |
| -; GFX11-GISEL-NEXT: global_load_u16 v1, v0, s[2:3] |
1355 |
| -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
1356 |
| -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0 |
1357 |
| -; GFX11-GISEL-NEXT: global_store_b16 v0, v1, s[0:1] |
1358 |
| -; GFX11-GISEL-NEXT: s_endpgm |
| 1336 | +; GFX11-SDAG-TRUE16-LABEL: v_test_i16_x_sub_64: |
| 1337 | +; GFX11-SDAG-TRUE16: ; %bb.0: |
| 1338 | +; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1339 | +; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1340 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1341 | +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 |
| 1342 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1343 | +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] |
| 1344 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1345 | +; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64 |
| 1346 | +; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] |
| 1347 | +; GFX11-SDAG-TRUE16-NEXT: s_endpgm |
| 1348 | +; |
| 1349 | +; GFX11-SDAG-FAKE16-LABEL: v_test_i16_x_sub_64: |
| 1350 | +; GFX11-SDAG-FAKE16: ; %bb.0: |
| 1351 | +; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1352 | +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1353 | +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1354 | +; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 1355 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1356 | +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] |
| 1357 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1358 | +; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v1, v1, 64 |
| 1359 | +; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] |
| 1360 | +; GFX11-SDAG-FAKE16-NEXT: s_endpgm |
| 1361 | +; |
| 1362 | +; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64: |
| 1363 | +; GFX11-GISEL-TRUE16: ; %bb.0: |
| 1364 | +; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1365 | +; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1366 | +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1367 | +; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 1368 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1369 | +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] |
| 1370 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1371 | +; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v1.l, v1.l, 0xffc0 |
| 1372 | +; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v0, v1, s[0:1] |
| 1373 | +; GFX11-GISEL-TRUE16-NEXT: s_endpgm |
| 1374 | +; |
| 1375 | +; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64: |
| 1376 | +; GFX11-GISEL-FAKE16: ; %bb.0: |
| 1377 | +; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1378 | +; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1379 | +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1380 | +; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 1381 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1382 | +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] |
| 1383 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1384 | +; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v1, v1, 0xffc0 |
| 1385 | +; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] |
| 1386 | +; GFX11-GISEL-FAKE16-NEXT: s_endpgm |
1359 | 1387 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
1360 | 1388 | %tid.ext = sext i32 %tid to i64
|
1361 | 1389 | %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext
|
@@ -1491,37 +1519,69 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out
|
1491 | 1519 | ; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
|
1492 | 1520 | ; GFX10-GISEL-NEXT: s_endpgm
|
1493 | 1521 | ;
|
1494 |
| -; GFX11-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32: |
1495 |
| -; GFX11-SDAG: ; %bb.0: |
1496 |
| -; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
1497 |
| -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
1498 |
| -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1499 |
| -; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v0 |
1500 |
| -; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
1501 |
| -; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
1502 |
| -; GFX11-SDAG-NEXT: global_load_u16 v1, v1, s[2:3] |
1503 |
| -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
1504 |
| -; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64 |
1505 |
| -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1506 |
| -; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
1507 |
| -; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] |
1508 |
| -; GFX11-SDAG-NEXT: s_endpgm |
1509 |
| -; |
1510 |
| -; GFX11-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32: |
1511 |
| -; GFX11-GISEL: ; %bb.0: |
1512 |
| -; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
1513 |
| -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
1514 |
| -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1515 |
| -; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v0 |
1516 |
| -; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
1517 |
| -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
1518 |
| -; GFX11-GISEL-NEXT: global_load_u16 v1, v1, s[2:3] |
1519 |
| -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
1520 |
| -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0 |
1521 |
| -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1522 |
| -; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
1523 |
| -; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] |
1524 |
| -; GFX11-GISEL-NEXT: s_endpgm |
| 1522 | +; GFX11-SDAG-TRUE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: |
| 1523 | +; GFX11-SDAG-TRUE16: ; %bb.0: |
| 1524 | +; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1525 | +; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 |
| 1526 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1527 | +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 |
| 1528 | +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| 1529 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1530 | +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v0, s[2:3] |
| 1531 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1532 | +; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64 |
| 1533 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1534 | +; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| 1535 | +; GFX11-SDAG-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] |
| 1536 | +; GFX11-SDAG-TRUE16-NEXT: s_endpgm |
| 1537 | +; |
| 1538 | +; GFX11-SDAG-FAKE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: |
| 1539 | +; GFX11-SDAG-FAKE16: ; %bb.0: |
| 1540 | +; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1541 | +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1542 | +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1543 | +; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 |
| 1544 | +; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1545 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1546 | +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v1, s[2:3] |
| 1547 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1548 | +; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v1, v1, 64 |
| 1549 | +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1550 | +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
| 1551 | +; GFX11-SDAG-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1] |
| 1552 | +; GFX11-SDAG-FAKE16-NEXT: s_endpgm |
| 1553 | +; |
| 1554 | +; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: |
| 1555 | +; GFX11-GISEL-TRUE16: ; %bb.0: |
| 1556 | +; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1557 | +; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0 |
| 1558 | +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1559 | +; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1 |
| 1560 | +; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1 |
| 1561 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1562 | +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v0, s[2:3] |
| 1563 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1564 | +; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 0xffc0 |
| 1565 | +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1566 | +; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 |
| 1567 | +; GFX11-GISEL-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] |
| 1568 | +; GFX11-GISEL-TRUE16-NEXT: s_endpgm |
| 1569 | +; |
| 1570 | +; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64_zext_to_i32: |
| 1571 | +; GFX11-GISEL-FAKE16: ; %bb.0: |
| 1572 | +; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1573 | +; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1574 | +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1575 | +; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 |
| 1576 | +; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0 |
| 1577 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1578 | +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v1, s[2:3] |
| 1579 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1580 | +; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v1, v1, 0xffc0 |
| 1581 | +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1582 | +; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 |
| 1583 | +; GFX11-GISEL-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1] |
| 1584 | +; GFX11-GISEL-FAKE16-NEXT: s_endpgm |
1525 | 1585 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
1526 | 1586 | %tid.ext = sext i32 %tid to i64
|
1527 | 1587 | %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext
|
@@ -1694,43 +1754,86 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out,
|
1694 | 1754 | ; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
|
1695 | 1755 | ; GFX10-GISEL-NEXT: s_endpgm
|
1696 | 1756 | ;
|
1697 |
| -; GFX11-SDAG-LABEL: v_test_i16_x_sub_64_multi_use: |
1698 |
| -; GFX11-SDAG: ; %bb.0: |
1699 |
| -; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
1700 |
| -; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
1701 |
| -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1702 |
| -; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
1703 |
| -; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
1704 |
| -; GFX11-SDAG-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc |
1705 |
| -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
1706 |
| -; GFX11-SDAG-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc |
1707 |
| -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) |
1708 |
| -; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64 |
1709 |
| -; GFX11-SDAG-NEXT: v_sub_nc_u16 v2, v2, 64 |
1710 |
| -; GFX11-SDAG-NEXT: global_store_b16 v0, v1, s[0:1] dlc |
1711 |
| -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 |
1712 |
| -; GFX11-SDAG-NEXT: global_store_b16 v0, v2, s[0:1] dlc |
1713 |
| -; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0 |
1714 |
| -; GFX11-SDAG-NEXT: s_endpgm |
1715 |
| -; |
1716 |
| -; GFX11-GISEL-LABEL: v_test_i16_x_sub_64_multi_use: |
1717 |
| -; GFX11-GISEL: ; %bb.0: |
1718 |
| -; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
1719 |
| -; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
1720 |
| -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) |
1721 |
| -; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
1722 |
| -; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
1723 |
| -; GFX11-GISEL-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc |
1724 |
| -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
1725 |
| -; GFX11-GISEL-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc |
1726 |
| -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) |
1727 |
| -; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0 |
1728 |
| -; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v2, 0xffc0 |
1729 |
| -; GFX11-GISEL-NEXT: global_store_b16 v0, v1, s[0:1] dlc |
1730 |
| -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 |
1731 |
| -; GFX11-GISEL-NEXT: global_store_b16 v0, v2, s[0:1] dlc |
1732 |
| -; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0 |
1733 |
| -; GFX11-GISEL-NEXT: s_endpgm |
| 1757 | +; GFX11-SDAG-TRUE16-LABEL: v_test_i16_x_sub_64_multi_use: |
| 1758 | +; GFX11-SDAG-TRUE16: ; %bb.0: |
| 1759 | +; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1760 | +; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1761 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1762 | +; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0 |
| 1763 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1764 | +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc |
| 1765 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1766 | +; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] glc dlc |
| 1767 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1768 | +; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64 |
| 1769 | +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l |
| 1770 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) |
| 1771 | +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l |
| 1772 | +; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.h, v0.h, 64 |
| 1773 | +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1774 | +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h |
| 1775 | +; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v2, s[0:1] dlc |
| 1776 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1777 | +; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc |
| 1778 | +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1779 | +; GFX11-SDAG-TRUE16-NEXT: s_endpgm |
| 1780 | +; |
| 1781 | +; GFX11-SDAG-FAKE16-LABEL: v_test_i16_x_sub_64_multi_use: |
| 1782 | +; GFX11-SDAG-FAKE16: ; %bb.0: |
| 1783 | +; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1784 | +; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1785 | +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1786 | +; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 1787 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1788 | +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc |
| 1789 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1790 | +; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc |
| 1791 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1792 | +; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v1, v1, 64 |
| 1793 | +; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v2, v2, 64 |
| 1794 | +; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] dlc |
| 1795 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1796 | +; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v2, s[0:1] dlc |
| 1797 | +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1798 | +; GFX11-SDAG-FAKE16-NEXT: s_endpgm |
| 1799 | +; |
| 1800 | +; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64_multi_use: |
| 1801 | +; GFX11-GISEL-TRUE16: ; %bb.0: |
| 1802 | +; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1803 | +; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1804 | +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1805 | +; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 1806 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1807 | +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc |
| 1808 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1809 | +; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc |
| 1810 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) |
| 1811 | +; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v1.l, v1.l, 0xffc0 |
| 1812 | +; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v2.l, v2.l, 0xffc0 |
| 1813 | +; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v0, v1, s[0:1] dlc |
| 1814 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1815 | +; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] dlc |
| 1816 | +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1817 | +; GFX11-GISEL-TRUE16-NEXT: s_endpgm |
| 1818 | +; |
| 1819 | +; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64_multi_use: |
| 1820 | +; GFX11-GISEL-FAKE16: ; %bb.0: |
| 1821 | +; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 |
| 1822 | +; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0 |
| 1823 | +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) |
| 1824 | +; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0 |
| 1825 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) |
| 1826 | +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc |
| 1827 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1828 | +; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc |
| 1829 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) |
| 1830 | +; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v1, v1, 0xffc0 |
| 1831 | +; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v2, v2, 0xffc0 |
| 1832 | +; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] dlc |
| 1833 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1834 | +; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v2, s[0:1] dlc |
| 1835 | +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 |
| 1836 | +; GFX11-GISEL-FAKE16-NEXT: s_endpgm |
1734 | 1837 | %tid = call i32 @llvm.amdgcn.workitem.id.x()
|
1735 | 1838 | %tid.ext = sext i32 %tid to i64
|
1736 | 1839 | %gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext
|
|
0 commit comments