@@ -1481,6 +1481,221 @@ define <16 x float> @test_smfmac_f32_32x32x32_bf16__sgpr(<8 x bfloat> inreg %arg
1481
1481
ret <16 x float > %result
1482
1482
}
1483
1483
1484
+ ; --------------------------------------------------------------------
1485
+ ; llvm.amdgcn.smfmac.i32.16x16x128.i8
1486
+ ; --------------------------------------------------------------------
1487
+
1488
+ declare <4 x i32 > @llvm.amdgcn.smfmac.i32.16x16x128.i8 (<4 x i32 >, <8 x i32 >, <4 x i32 >, i32 , i32 , i32 )
1489
+
1490
+ define amdgpu_kernel void @test_smfmac_i32_16x16x128_i8__vgpr (ptr addrspace (1 ) %arg , <4 x i32 > %a , <8 x i32 > %b , i32 %idx ) #0 {
1491
+ ; SDAG-LABEL: test_smfmac_i32_16x16x128_i8__vgpr:
1492
+ ; SDAG: ; %bb.0: ; %bb
1493
+ ; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
1494
+ ; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1495
+ ; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
1496
+ ; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
1497
+ ; SDAG-NEXT: v_mov_b32_e32 v16, 0
1498
+ ; SDAG-NEXT: s_waitcnt lgkmcnt(0)
1499
+ ; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
1500
+ ; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
1501
+ ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
1502
+ ; SDAG-NEXT: v_mov_b32_e32 v12, s8
1503
+ ; SDAG-NEXT: v_mov_b32_e32 v13, s9
1504
+ ; SDAG-NEXT: v_mov_b32_e32 v14, s10
1505
+ ; SDAG-NEXT: v_mov_b32_e32 v15, s11
1506
+ ; SDAG-NEXT: v_mov_b32_e32 v0, s12
1507
+ ; SDAG-NEXT: v_mov_b32_e32 v1, s13
1508
+ ; SDAG-NEXT: v_mov_b32_e32 v2, s14
1509
+ ; SDAG-NEXT: v_mov_b32_e32 v3, s15
1510
+ ; SDAG-NEXT: s_waitcnt lgkmcnt(0)
1511
+ ; SDAG-NEXT: v_mov_b32_e32 v4, s0
1512
+ ; SDAG-NEXT: v_mov_b32_e32 v5, s1
1513
+ ; SDAG-NEXT: v_mov_b32_e32 v6, s2
1514
+ ; SDAG-NEXT: v_mov_b32_e32 v7, s3
1515
+ ; SDAG-NEXT: v_mov_b32_e32 v17, s16
1516
+ ; SDAG-NEXT: s_waitcnt vmcnt(0)
1517
+ ; SDAG-NEXT: s_nop 0
1518
+ ; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
1519
+ ; SDAG-NEXT: s_nop 6
1520
+ ; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
1521
+ ; SDAG-NEXT: s_endpgm
1522
+ ;
1523
+ ; GISEL-LABEL: test_smfmac_i32_16x16x128_i8__vgpr:
1524
+ ; GISEL: ; %bb.0: ; %bb
1525
+ ; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1526
+ ; GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0
1527
+ ; GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0
1528
+ ; GISEL-NEXT: s_waitcnt lgkmcnt(0)
1529
+ ; GISEL-NEXT: global_load_dwordx4 v[8:11], v0, s[0:1]
1530
+ ; GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
1531
+ ; GISEL-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x54
1532
+ ; GISEL-NEXT: s_load_dword s2, s[4:5], 0x64
1533
+ ; GISEL-NEXT: s_waitcnt lgkmcnt(0)
1534
+ ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[10:11]
1535
+ ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[8:9]
1536
+ ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[12:13]
1537
+ ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[14:15]
1538
+ ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[16:17]
1539
+ ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[18:19]
1540
+ ; GISEL-NEXT: v_mov_b32_e32 v16, s2
1541
+ ; GISEL-NEXT: s_waitcnt vmcnt(0)
1542
+ ; GISEL-NEXT: s_nop 0
1543
+ ; GISEL-NEXT: v_smfmac_i32_16x16x128_i8 v[8:11], v[12:15], v[0:7], v16 cbsz:1 abid:2
1544
+ ; GISEL-NEXT: v_mov_b32_e32 v0, 0
1545
+ ; GISEL-NEXT: s_nop 5
1546
+ ; GISEL-NEXT: global_store_dwordx4 v0, v[8:11], s[0:1]
1547
+ ; GISEL-NEXT: s_endpgm
1548
+ bb:
1549
+ %id = call i32 @llvm.amdgcn.workitem.id.x ()
1550
+ %gep = getelementptr <4 x i32 >, ptr addrspace (1 ) %arg , i32 %id
1551
+ %in.1 = load <4 x i32 >, ptr addrspace (1 ) %gep
1552
+ %mai.1 = tail call <4 x i32 > @llvm.amdgcn.smfmac.i32.16x16x128.i8 (<4 x i32 > %a , <8 x i32 > %b , <4 x i32 > %in.1 , i32 %idx , i32 1 , i32 2 )
1553
+ store <4 x i32 > %mai.1 , ptr addrspace (1 ) %arg
1554
+ ret void
1555
+ }
1556
+
1557
+ define <4 x i32 > @test_smfmac_i32_16x16x128_i8 (<4 x i32 > %arg0 , <8 x i32 > %arg1 , <4 x i32 > %arg2 , i32 %arg3 ) {
1558
+ ; SDAG-LABEL: test_smfmac_i32_16x16x128_i8:
1559
+ ; SDAG: ; %bb.0:
1560
+ ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1561
+ ; SDAG-NEXT: v_accvgpr_write_b32 a0, v12
1562
+ ; SDAG-NEXT: v_accvgpr_write_b32 a1, v13
1563
+ ; SDAG-NEXT: v_accvgpr_write_b32 a2, v14
1564
+ ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15
1565
+ ; SDAG-NEXT: s_nop 1
1566
+ ; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 a[0:3], v[0:3], v[4:11], v16
1567
+ ; SDAG-NEXT: s_nop 6
1568
+ ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
1569
+ ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1
1570
+ ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2
1571
+ ; SDAG-NEXT: v_accvgpr_read_b32 v3, a3
1572
+ ; SDAG-NEXT: s_setpc_b64 s[30:31]
1573
+ ;
1574
+ ; GISEL-LABEL: test_smfmac_i32_16x16x128_i8:
1575
+ ; GISEL: ; %bb.0:
1576
+ ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1577
+ ; GISEL-NEXT: v_smfmac_i32_16x16x128_i8 v[12:15], v[0:3], v[4:11], v16
1578
+ ; GISEL-NEXT: s_nop 6
1579
+ ; GISEL-NEXT: v_mov_b32_e32 v0, v12
1580
+ ; GISEL-NEXT: v_mov_b32_e32 v1, v13
1581
+ ; GISEL-NEXT: v_mov_b32_e32 v2, v14
1582
+ ; GISEL-NEXT: v_mov_b32_e32 v3, v15
1583
+ ; GISEL-NEXT: s_setpc_b64 s[30:31]
1584
+ %result = call <4 x i32 > @llvm.amdgcn.smfmac.i32.16x16x128.i8 (<4 x i32 > %arg0 , <8 x i32 > %arg1 , <4 x i32 > %arg2 , i32 %arg3 , i32 immarg 0 , i32 immarg 0 )
1585
+ ret <4 x i32 > %result
1586
+ }
1587
+
1588
+ define <4 x i32 > @test_smfmac_i32_16x16x128_i8__flags0 (<4 x i32 > %arg0 , <8 x i32 > %arg1 , <4 x i32 > %arg2 , i32 %arg3 ) {
1589
+ ; SDAG-LABEL: test_smfmac_i32_16x16x128_i8__flags0:
1590
+ ; SDAG: ; %bb.0:
1591
+ ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1592
+ ; SDAG-NEXT: v_accvgpr_write_b32 a0, v12
1593
+ ; SDAG-NEXT: v_accvgpr_write_b32 a1, v13
1594
+ ; SDAG-NEXT: v_accvgpr_write_b32 a2, v14
1595
+ ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15
1596
+ ; SDAG-NEXT: s_nop 1
1597
+ ; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 a[0:3], v[0:3], v[4:11], v16 cbsz:1 abid:3
1598
+ ; SDAG-NEXT: s_nop 6
1599
+ ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
1600
+ ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1
1601
+ ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2
1602
+ ; SDAG-NEXT: v_accvgpr_read_b32 v3, a3
1603
+ ; SDAG-NEXT: s_setpc_b64 s[30:31]
1604
+ ;
1605
+ ; GISEL-LABEL: test_smfmac_i32_16x16x128_i8__flags0:
1606
+ ; GISEL: ; %bb.0:
1607
+ ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1608
+ ; GISEL-NEXT: v_smfmac_i32_16x16x128_i8 v[12:15], v[0:3], v[4:11], v16 cbsz:1 abid:3
1609
+ ; GISEL-NEXT: s_nop 6
1610
+ ; GISEL-NEXT: v_mov_b32_e32 v0, v12
1611
+ ; GISEL-NEXT: v_mov_b32_e32 v1, v13
1612
+ ; GISEL-NEXT: v_mov_b32_e32 v2, v14
1613
+ ; GISEL-NEXT: v_mov_b32_e32 v3, v15
1614
+ ; GISEL-NEXT: s_setpc_b64 s[30:31]
1615
+ %result = call <4 x i32 > @llvm.amdgcn.smfmac.i32.16x16x128.i8 (<4 x i32 > %arg0 , <8 x i32 > %arg1 , <4 x i32 > %arg2 , i32 %arg3 , i32 immarg 1 , i32 immarg 3 )
1616
+ ret <4 x i32 > %result
1617
+ }
1618
+
1619
+ define <4 x i32 > @test_smfmac_i32_16x16x128_i8__flags1 (<4 x i32 > %arg0 , <8 x i32 > %arg1 , <4 x i32 > %arg2 , i32 %arg3 ) {
1620
+ ; SDAG-LABEL: test_smfmac_i32_16x16x128_i8__flags1:
1621
+ ; SDAG: ; %bb.0:
1622
+ ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1623
+ ; SDAG-NEXT: v_accvgpr_write_b32 a0, v12
1624
+ ; SDAG-NEXT: v_accvgpr_write_b32 a1, v13
1625
+ ; SDAG-NEXT: v_accvgpr_write_b32 a2, v14
1626
+ ; SDAG-NEXT: v_accvgpr_write_b32 a3, v15
1627
+ ; SDAG-NEXT: s_nop 1
1628
+ ; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 a[0:3], v[0:3], v[4:11], v16 cbsz:3 abid:1
1629
+ ; SDAG-NEXT: s_nop 6
1630
+ ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
1631
+ ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1
1632
+ ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2
1633
+ ; SDAG-NEXT: v_accvgpr_read_b32 v3, a3
1634
+ ; SDAG-NEXT: s_setpc_b64 s[30:31]
1635
+ ;
1636
+ ; GISEL-LABEL: test_smfmac_i32_16x16x128_i8__flags1:
1637
+ ; GISEL: ; %bb.0:
1638
+ ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1639
+ ; GISEL-NEXT: v_smfmac_i32_16x16x128_i8 v[12:15], v[0:3], v[4:11], v16 cbsz:3 abid:1
1640
+ ; GISEL-NEXT: s_nop 6
1641
+ ; GISEL-NEXT: v_mov_b32_e32 v0, v12
1642
+ ; GISEL-NEXT: v_mov_b32_e32 v1, v13
1643
+ ; GISEL-NEXT: v_mov_b32_e32 v2, v14
1644
+ ; GISEL-NEXT: v_mov_b32_e32 v3, v15
1645
+ ; GISEL-NEXT: s_setpc_b64 s[30:31]
1646
+ %result = call <4 x i32 > @llvm.amdgcn.smfmac.i32.16x16x128.i8 (<4 x i32 > %arg0 , <8 x i32 > %arg1 , <4 x i32 > %arg2 , i32 %arg3 , i32 immarg 3 , i32 immarg 1 )
1647
+ ret <4 x i32 > %result
1648
+ }
1649
+
1650
+ define <4 x i32 > @test_smfmac_i32_16x16x128_i8__sgpr (<4 x i32 > inreg %arg0 , <8 x i32 > inreg %arg1 , <4 x i32 > inreg %arg2 , i32 inreg %arg3 ) {
1651
+ ; SDAG-LABEL: test_smfmac_i32_16x16x128_i8__sgpr:
1652
+ ; SDAG: ; %bb.0:
1653
+ ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1654
+ ; SDAG-NEXT: v_mov_b32_e32 v8, s0
1655
+ ; SDAG-NEXT: v_mov_b32_e32 v9, s1
1656
+ ; SDAG-NEXT: v_mov_b32_e32 v10, s2
1657
+ ; SDAG-NEXT: v_mov_b32_e32 v11, s3
1658
+ ; SDAG-NEXT: v_mov_b32_e32 v0, s16
1659
+ ; SDAG-NEXT: v_mov_b32_e32 v1, s17
1660
+ ; SDAG-NEXT: v_mov_b32_e32 v2, s18
1661
+ ; SDAG-NEXT: v_mov_b32_e32 v3, s19
1662
+ ; SDAG-NEXT: v_mov_b32_e32 v4, s20
1663
+ ; SDAG-NEXT: v_mov_b32_e32 v5, s21
1664
+ ; SDAG-NEXT: v_mov_b32_e32 v6, s22
1665
+ ; SDAG-NEXT: v_mov_b32_e32 v7, s23
1666
+ ; SDAG-NEXT: v_accvgpr_write_b32 a0, s24
1667
+ ; SDAG-NEXT: v_accvgpr_write_b32 a1, s25
1668
+ ; SDAG-NEXT: v_accvgpr_write_b32 a2, s26
1669
+ ; SDAG-NEXT: v_accvgpr_write_b32 a3, s27
1670
+ ; SDAG-NEXT: v_mov_b32_e32 v12, s28
1671
+ ; SDAG-NEXT: s_nop 1
1672
+ ; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 a[0:3], v[8:11], v[0:7], v12
1673
+ ; SDAG-NEXT: s_nop 6
1674
+ ; SDAG-NEXT: v_accvgpr_read_b32 v0, a0
1675
+ ; SDAG-NEXT: v_accvgpr_read_b32 v1, a1
1676
+ ; SDAG-NEXT: v_accvgpr_read_b32 v2, a2
1677
+ ; SDAG-NEXT: v_accvgpr_read_b32 v3, a3
1678
+ ; SDAG-NEXT: s_setpc_b64 s[30:31]
1679
+ ;
1680
+ ; GISEL-LABEL: test_smfmac_i32_16x16x128_i8__sgpr:
1681
+ ; GISEL: ; %bb.0:
1682
+ ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1683
+ ; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[2:3]
1684
+ ; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[0:1]
1685
+ ; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[16:17]
1686
+ ; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
1687
+ ; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[18:19]
1688
+ ; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[20:21]
1689
+ ; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[22:23]
1690
+ ; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
1691
+ ; GISEL-NEXT: v_mov_b32_e32 v16, s28
1692
+ ; GISEL-NEXT: s_nop 1
1693
+ ; GISEL-NEXT: v_smfmac_i32_16x16x128_i8 v[0:3], v[12:15], v[4:11], v16
1694
+ ; GISEL-NEXT: s_setpc_b64 s[30:31]
1695
+ %result = call <4 x i32 > @llvm.amdgcn.smfmac.i32.16x16x128.i8 (<4 x i32 > %arg0 , <8 x i32 > %arg1 , <4 x i32 > %arg2 , i32 %arg3 , i32 immarg 0 , i32 immarg 0 )
1696
+ ret <4 x i32 > %result
1697
+ }
1698
+
1484
1699
attributes #0 = { "amdgpu-flat-work-group-size" ="1,256" }
1485
1700
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1486
1701
; GCN: {{.*}}
0 commit comments