Skip to content

Commit f1b1c7f

Browse files
authored
[AMDGPU][True16][CodeGen] Undo sub(x,c) to add in true16 flow (#118854)
Undo sub x, c -> add x, -c canonicalization in true16 fow. This duplicating the pattern from fake16 and implemement the same pattern in true16 format
1 parent 4f614a8 commit f1b1c7f

File tree

2 files changed

+203
-96
lines changed

2 files changed

+203
-96
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1274,7 +1274,11 @@ let True16Predicate = NotHasTrue16BitInsts, SubtargetPredicate = isGFX10Plus in
12741274
let True16Predicate = UseRealTrue16Insts in {
12751275
def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_t16_e64>;
12761276
def : OpSelBinOpClampPat<usubsat, V_SUB_NC_U16_t16_e64>;
1277-
} // End OtherPredicates = [UseRealTrue16Insts]
1277+
def : GCNPat<
1278+
(add i16:$src0, (i16 NegSubInlineIntConst16:$src1)),
1279+
(V_SUB_NC_U16_t16_e64 0, VSrc_b16:$src0, 0, NegSubInlineIntConst16:$src1, 0, 0)
1280+
>;
1281+
} // End True16Predicate = UseRealTrue16Insts
12781282

12791283
let True16Predicate = UseFakeTrue16Insts in {
12801284
def : OpSelBinOpClampPat<uaddsat, V_ADD_NC_U16_fake16_e64>;

llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll

Lines changed: 198 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefixes=GFX9,GFX9-GISEL %s
88
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10,GFX10-SDAG %s
99
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10,GFX10-GISEL %s
10-
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG %s
11-
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL %s
10+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-TRUE16 %s
11+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG,GFX11-SDAG-FAKE16 %s
12+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-TRUE16 %s
13+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL,GFX11-GISEL-FAKE16 %s
1214

1315
; Test that add/sub with a constant is swapped to sub/add with negated
1416
; constant to minimize code size.
@@ -1331,31 +1333,57 @@ define amdgpu_kernel void @v_test_i16_x_sub_64(ptr addrspace(1) %out, ptr addrsp
13311333
; GFX10-GISEL-NEXT: global_store_short v0, v1, s[0:1]
13321334
; GFX10-GISEL-NEXT: s_endpgm
13331335
;
1334-
; GFX11-SDAG-LABEL: v_test_i16_x_sub_64:
1335-
; GFX11-SDAG: ; %bb.0:
1336-
; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1337-
; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1338-
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1339-
; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1340-
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1341-
; GFX11-SDAG-NEXT: global_load_u16 v1, v0, s[2:3]
1342-
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1343-
; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
1344-
; GFX11-SDAG-NEXT: global_store_b16 v0, v1, s[0:1]
1345-
; GFX11-SDAG-NEXT: s_endpgm
1346-
;
1347-
; GFX11-GISEL-LABEL: v_test_i16_x_sub_64:
1348-
; GFX11-GISEL: ; %bb.0:
1349-
; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1350-
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1351-
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
1352-
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1353-
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1354-
; GFX11-GISEL-NEXT: global_load_u16 v1, v0, s[2:3]
1355-
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1356-
; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
1357-
; GFX11-GISEL-NEXT: global_store_b16 v0, v1, s[0:1]
1358-
; GFX11-GISEL-NEXT: s_endpgm
1336+
; GFX11-SDAG-TRUE16-LABEL: v_test_i16_x_sub_64:
1337+
; GFX11-SDAG-TRUE16: ; %bb.0:
1338+
; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1339+
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1340+
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1341+
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0
1342+
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1343+
; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3]
1344+
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
1345+
; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64
1346+
; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
1347+
; GFX11-SDAG-TRUE16-NEXT: s_endpgm
1348+
;
1349+
; GFX11-SDAG-FAKE16-LABEL: v_test_i16_x_sub_64:
1350+
; GFX11-SDAG-FAKE16: ; %bb.0:
1351+
; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1352+
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1353+
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1354+
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1355+
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1356+
; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3]
1357+
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
1358+
; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v1, v1, 64
1359+
; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
1360+
; GFX11-SDAG-FAKE16-NEXT: s_endpgm
1361+
;
1362+
; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64:
1363+
; GFX11-GISEL-TRUE16: ; %bb.0:
1364+
; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1365+
; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1366+
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1367+
; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1368+
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1369+
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3]
1370+
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0)
1371+
; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v1.l, v1.l, 0xffc0
1372+
; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v0, v1, s[0:1]
1373+
; GFX11-GISEL-TRUE16-NEXT: s_endpgm
1374+
;
1375+
; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64:
1376+
; GFX11-GISEL-FAKE16: ; %bb.0:
1377+
; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1378+
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1379+
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1380+
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1381+
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1382+
; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3]
1383+
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0)
1384+
; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v1, v1, 0xffc0
1385+
; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
1386+
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
13591387
%tid = call i32 @llvm.amdgcn.workitem.id.x()
13601388
%tid.ext = sext i32 %tid to i64
13611389
%gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext
@@ -1491,37 +1519,69 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_zext_to_i32(ptr addrspace(1) %out
14911519
; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
14921520
; GFX10-GISEL-NEXT: s_endpgm
14931521
;
1494-
; GFX11-SDAG-LABEL: v_test_i16_x_sub_64_zext_to_i32:
1495-
; GFX11-SDAG: ; %bb.0:
1496-
; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1497-
; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1498-
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1499-
; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v1, 1, v0
1500-
; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1501-
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1502-
; GFX11-SDAG-NEXT: global_load_u16 v1, v1, s[2:3]
1503-
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1504-
; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
1505-
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1506-
; GFX11-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
1507-
; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
1508-
; GFX11-SDAG-NEXT: s_endpgm
1509-
;
1510-
; GFX11-GISEL-LABEL: v_test_i16_x_sub_64_zext_to_i32:
1511-
; GFX11-GISEL: ; %bb.0:
1512-
; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1513-
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1514-
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
1515-
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v1, 1, v0
1516-
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1517-
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1518-
; GFX11-GISEL-NEXT: global_load_u16 v1, v1, s[2:3]
1519-
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1520-
; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
1521-
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
1522-
; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
1523-
; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
1524-
; GFX11-GISEL-NEXT: s_endpgm
1522+
; GFX11-SDAG-TRUE16-LABEL: v_test_i16_x_sub_64_zext_to_i32:
1523+
; GFX11-SDAG-TRUE16: ; %bb.0:
1524+
; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1525+
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0
1526+
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1527+
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1
1528+
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1
1529+
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1530+
; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v0, s[2:3]
1531+
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
1532+
; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64
1533+
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1534+
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
1535+
; GFX11-SDAG-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1]
1536+
; GFX11-SDAG-TRUE16-NEXT: s_endpgm
1537+
;
1538+
; GFX11-SDAG-FAKE16-LABEL: v_test_i16_x_sub_64_zext_to_i32:
1539+
; GFX11-SDAG-FAKE16: ; %bb.0:
1540+
; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1541+
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1542+
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1543+
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0
1544+
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1545+
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1546+
; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v1, s[2:3]
1547+
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
1548+
; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v1, v1, 64
1549+
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1550+
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
1551+
; GFX11-SDAG-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1]
1552+
; GFX11-SDAG-FAKE16-NEXT: s_endpgm
1553+
;
1554+
; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64_zext_to_i32:
1555+
; GFX11-GISEL-TRUE16: ; %bb.0:
1556+
; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1557+
; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0
1558+
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1559+
; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v1
1560+
; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 2, v1
1561+
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1562+
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v0, v0, s[2:3]
1563+
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0)
1564+
; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, 0xffc0
1565+
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1566+
; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
1567+
; GFX11-GISEL-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1]
1568+
; GFX11-GISEL-TRUE16-NEXT: s_endpgm
1569+
;
1570+
; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64_zext_to_i32:
1571+
; GFX11-GISEL-FAKE16: ; %bb.0:
1572+
; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1573+
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1574+
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1575+
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0
1576+
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1577+
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1578+
; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v1, s[2:3]
1579+
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0)
1580+
; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v1, v1, 0xffc0
1581+
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1582+
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
1583+
; GFX11-GISEL-FAKE16-NEXT: global_store_b32 v0, v1, s[0:1]
1584+
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
15251585
%tid = call i32 @llvm.amdgcn.workitem.id.x()
15261586
%tid.ext = sext i32 %tid to i64
15271587
%gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext
@@ -1694,43 +1754,86 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(ptr addrspace(1) %out,
16941754
; GFX10-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
16951755
; GFX10-GISEL-NEXT: s_endpgm
16961756
;
1697-
; GFX11-SDAG-LABEL: v_test_i16_x_sub_64_multi_use:
1698-
; GFX11-SDAG: ; %bb.0:
1699-
; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1700-
; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1701-
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1702-
; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1703-
; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1704-
; GFX11-SDAG-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
1705-
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1706-
; GFX11-SDAG-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc
1707-
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1708-
; GFX11-SDAG-NEXT: v_sub_nc_u16 v1, v1, 64
1709-
; GFX11-SDAG-NEXT: v_sub_nc_u16 v2, v2, 64
1710-
; GFX11-SDAG-NEXT: global_store_b16 v0, v1, s[0:1] dlc
1711-
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
1712-
; GFX11-SDAG-NEXT: global_store_b16 v0, v2, s[0:1] dlc
1713-
; GFX11-SDAG-NEXT: s_waitcnt_vscnt null, 0x0
1714-
; GFX11-SDAG-NEXT: s_endpgm
1715-
;
1716-
; GFX11-GISEL-LABEL: v_test_i16_x_sub_64_multi_use:
1717-
; GFX11-GISEL: ; %bb.0:
1718-
; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1719-
; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1720-
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
1721-
; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1722-
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1723-
; GFX11-GISEL-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
1724-
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1725-
; GFX11-GISEL-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc
1726-
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1727-
; GFX11-GISEL-NEXT: v_add_nc_u16 v1, v1, 0xffc0
1728-
; GFX11-GISEL-NEXT: v_add_nc_u16 v2, v2, 0xffc0
1729-
; GFX11-GISEL-NEXT: global_store_b16 v0, v1, s[0:1] dlc
1730-
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
1731-
; GFX11-GISEL-NEXT: global_store_b16 v0, v2, s[0:1] dlc
1732-
; GFX11-GISEL-NEXT: s_waitcnt_vscnt null, 0x0
1733-
; GFX11-GISEL-NEXT: s_endpgm
1757+
; GFX11-SDAG-TRUE16-LABEL: v_test_i16_x_sub_64_multi_use:
1758+
; GFX11-SDAG-TRUE16: ; %bb.0:
1759+
; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1760+
; GFX11-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1761+
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1762+
; GFX11-SDAG-TRUE16-NEXT: v_lshlrev_b32_e32 v1, 1, v0
1763+
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1764+
; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] glc dlc
1765+
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
1766+
; GFX11-SDAG-TRUE16-NEXT: global_load_u16 v2, v1, s[2:3] glc dlc
1767+
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
1768+
; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, 64
1769+
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
1770+
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1771+
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
1772+
; GFX11-SDAG-TRUE16-NEXT: v_sub_nc_u16 v0.h, v0.h, 64
1773+
; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1774+
; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, v0.h
1775+
; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v2, s[0:1] dlc
1776+
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
1777+
; GFX11-SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] dlc
1778+
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
1779+
; GFX11-SDAG-TRUE16-NEXT: s_endpgm
1780+
;
1781+
; GFX11-SDAG-FAKE16-LABEL: v_test_i16_x_sub_64_multi_use:
1782+
; GFX11-SDAG-FAKE16: ; %bb.0:
1783+
; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1784+
; GFX11-SDAG-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1785+
; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1786+
; GFX11-SDAG-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1787+
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1788+
; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
1789+
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
1790+
; GFX11-SDAG-FAKE16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc
1791+
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
1792+
; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v1, v1, 64
1793+
; GFX11-SDAG-FAKE16-NEXT: v_sub_nc_u16 v2, v2, 64
1794+
; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] dlc
1795+
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
1796+
; GFX11-SDAG-FAKE16-NEXT: global_store_b16 v0, v2, s[0:1] dlc
1797+
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
1798+
; GFX11-SDAG-FAKE16-NEXT: s_endpgm
1799+
;
1800+
; GFX11-GISEL-TRUE16-LABEL: v_test_i16_x_sub_64_multi_use:
1801+
; GFX11-GISEL-TRUE16: ; %bb.0:
1802+
; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1803+
; GFX11-GISEL-TRUE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1804+
; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1805+
; GFX11-GISEL-TRUE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1806+
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
1807+
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
1808+
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0)
1809+
; GFX11-GISEL-TRUE16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc
1810+
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0)
1811+
; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v1.l, v1.l, 0xffc0
1812+
; GFX11-GISEL-TRUE16-NEXT: v_add_nc_u16 v2.l, v2.l, 0xffc0
1813+
; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v0, v1, s[0:1] dlc
1814+
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
1815+
; GFX11-GISEL-TRUE16-NEXT: global_store_b16 v0, v2, s[0:1] dlc
1816+
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
1817+
; GFX11-GISEL-TRUE16-NEXT: s_endpgm
1818+
;
1819+
; GFX11-GISEL-FAKE16-LABEL: v_test_i16_x_sub_64_multi_use:
1820+
; GFX11-GISEL-FAKE16: ; %bb.0:
1821+
; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
1822+
; GFX11-GISEL-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1823+
; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
1824+
; GFX11-GISEL-FAKE16-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1825+
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
1826+
; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] glc dlc
1827+
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0)
1828+
; GFX11-GISEL-FAKE16-NEXT: global_load_u16 v2, v0, s[2:3] glc dlc
1829+
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0)
1830+
; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v1, v1, 0xffc0
1831+
; GFX11-GISEL-FAKE16-NEXT: v_add_nc_u16 v2, v2, 0xffc0
1832+
; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] dlc
1833+
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
1834+
; GFX11-GISEL-FAKE16-NEXT: global_store_b16 v0, v2, s[0:1] dlc
1835+
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
1836+
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
17341837
%tid = call i32 @llvm.amdgcn.workitem.id.x()
17351838
%tid.ext = sext i32 %tid to i64
17361839
%gep = getelementptr inbounds i16, ptr addrspace(1) %in, i64 %tid.ext

0 commit comments

Comments
 (0)