Skip to content

Commit dc928e9

Browse files
committed
[AMDGPU] Refactoring mfma intrinsic definitions. NFC.
Differential Revision: https://reviews.llvm.org/D99366
1 parent e2f34cc commit dc928e9

File tree

1 file changed

+33
-184
lines changed

1 file changed

+33
-184
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 33 additions & 184 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,146 +1882,35 @@ def int_amdgcn_udot8 :
18821882

18831883
def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
18841884

1885-
// llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
1886-
def int_amdgcn_mfma_f32_32x32x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x1f32">,
1887-
Intrinsic<[llvm_v32f32_ty],
1888-
[llvm_float_ty, llvm_float_ty, llvm_v32f32_ty,
1889-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1885+
// llvm.amdgcn.mfma.*.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
1886+
class AMDGPUMfmaIntrinsic<LLVMType DestTy, LLVMType SrcABTy> :
1887+
GCCBuiltin<!subst("int", "__builtin", NAME)>,
1888+
Intrinsic<[DestTy],
1889+
[SrcABTy, SrcABTy, DestTy,
1890+
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
18901891
[IntrConvergent, IntrNoMem, IntrWillReturn,
18911892
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
18921893

1893-
def int_amdgcn_mfma_f32_16x16x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x1f32">,
1894-
Intrinsic<[llvm_v16f32_ty],
1895-
[llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
1896-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1897-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1898-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1899-
1900-
def int_amdgcn_mfma_f32_4x4x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x1f32">,
1901-
Intrinsic<[llvm_v4f32_ty],
1902-
[llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
1903-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1904-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1905-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1906-
1907-
def int_amdgcn_mfma_f32_32x32x2f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2f32">,
1908-
Intrinsic<[llvm_v16f32_ty],
1909-
[llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
1910-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1911-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1912-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1913-
1914-
def int_amdgcn_mfma_f32_16x16x4f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f32">,
1915-
Intrinsic<[llvm_v4f32_ty],
1916-
[llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
1917-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1918-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1919-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1920-
1921-
def int_amdgcn_mfma_f32_32x32x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4f16">,
1922-
Intrinsic<[llvm_v32f32_ty],
1923-
[llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty,
1924-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1925-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1926-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1927-
1928-
def int_amdgcn_mfma_f32_16x16x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f16">,
1929-
Intrinsic<[llvm_v16f32_ty],
1930-
[llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
1931-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1932-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1933-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1934-
1935-
def int_amdgcn_mfma_f32_4x4x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x4f16">,
1936-
Intrinsic<[llvm_v4f32_ty],
1937-
[llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
1938-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1939-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1940-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1941-
1942-
def int_amdgcn_mfma_f32_32x32x8f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x8f16">,
1943-
Intrinsic<[llvm_v16f32_ty],
1944-
[llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
1945-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1946-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1947-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1948-
1949-
def int_amdgcn_mfma_f32_16x16x16f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x16f16">,
1950-
Intrinsic<[llvm_v4f32_ty],
1951-
[llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
1952-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1953-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1954-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1955-
1956-
def int_amdgcn_mfma_i32_32x32x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x4i8">,
1957-
Intrinsic<[llvm_v32i32_ty],
1958-
[llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty,
1959-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1960-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1961-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1962-
1963-
def int_amdgcn_mfma_i32_16x16x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x4i8">,
1964-
Intrinsic<[llvm_v16i32_ty],
1965-
[llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
1966-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1967-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1968-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1969-
1970-
def int_amdgcn_mfma_i32_4x4x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_4x4x4i8">,
1971-
Intrinsic<[llvm_v4i32_ty],
1972-
[llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
1973-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1974-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1975-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1976-
1977-
def int_amdgcn_mfma_i32_32x32x8i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x8i8">,
1978-
Intrinsic<[llvm_v16i32_ty],
1979-
[llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
1980-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1981-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1982-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1983-
1984-
def int_amdgcn_mfma_i32_16x16x16i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x16i8">,
1985-
Intrinsic<[llvm_v4i32_ty],
1986-
[llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
1987-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1988-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1989-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1990-
1991-
def int_amdgcn_mfma_f32_32x32x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2bf16">,
1992-
Intrinsic<[llvm_v32f32_ty],
1993-
[llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty,
1994-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1995-
[IntrConvergent, IntrNoMem, IntrWillReturn,
1996-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1997-
1998-
def int_amdgcn_mfma_f32_16x16x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x2bf16">,
1999-
Intrinsic<[llvm_v16f32_ty],
2000-
[llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
2001-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2002-
[IntrConvergent, IntrNoMem, IntrWillReturn,
2003-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2004-
2005-
def int_amdgcn_mfma_f32_4x4x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x2bf16">,
2006-
Intrinsic<[llvm_v4f32_ty],
2007-
[llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
2008-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2009-
[IntrConvergent, IntrNoMem, IntrWillReturn,
2010-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2011-
2012-
def int_amdgcn_mfma_f32_32x32x4bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4bf16">,
2013-
Intrinsic<[llvm_v16f32_ty],
2014-
[llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
2015-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2016-
[IntrConvergent, IntrNoMem, IntrWillReturn,
2017-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2018-
2019-
def int_amdgcn_mfma_f32_16x16x8bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x8bf16">,
2020-
Intrinsic<[llvm_v4f32_ty],
2021-
[llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
2022-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2023-
[IntrConvergent, IntrNoMem, IntrWillReturn,
2024-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1894+
def int_amdgcn_mfma_f32_32x32x1f32 : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_float_ty>;
1895+
def int_amdgcn_mfma_f32_16x16x1f32 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_float_ty>;
1896+
def int_amdgcn_mfma_f32_4x4x1f32 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_float_ty>;
1897+
def int_amdgcn_mfma_f32_32x32x2f32 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_float_ty>;
1898+
def int_amdgcn_mfma_f32_16x16x4f32 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_float_ty>;
1899+
def int_amdgcn_mfma_f32_32x32x4f16 : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_v4f16_ty>;
1900+
def int_amdgcn_mfma_f32_16x16x4f16 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4f16_ty>;
1901+
def int_amdgcn_mfma_f32_4x4x4f16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4f16_ty>;
1902+
def int_amdgcn_mfma_f32_32x32x8f16 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4f16_ty>;
1903+
def int_amdgcn_mfma_f32_16x16x16f16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4f16_ty>;
1904+
def int_amdgcn_mfma_i32_32x32x4i8 : AMDGPUMfmaIntrinsic<llvm_v32i32_ty, llvm_i32_ty>;
1905+
def int_amdgcn_mfma_i32_16x16x4i8 : AMDGPUMfmaIntrinsic<llvm_v16i32_ty, llvm_i32_ty>;
1906+
def int_amdgcn_mfma_i32_4x4x4i8 : AMDGPUMfmaIntrinsic<llvm_v4i32_ty, llvm_i32_ty>;
1907+
def int_amdgcn_mfma_i32_32x32x8i8 : AMDGPUMfmaIntrinsic<llvm_v16i32_ty, llvm_i32_ty>;
1908+
def int_amdgcn_mfma_i32_16x16x16i8 : AMDGPUMfmaIntrinsic<llvm_v4i32_ty, llvm_i32_ty>;
1909+
def int_amdgcn_mfma_f32_32x32x2bf16 : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_v2i16_ty>;
1910+
def int_amdgcn_mfma_f32_16x16x2bf16 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v2i16_ty>;
1911+
def int_amdgcn_mfma_f32_4x4x2bf16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v2i16_ty>;
1912+
def int_amdgcn_mfma_f32_32x32x4bf16 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v2i16_ty>;
1913+
def int_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v2i16_ty>;
20251914

20261915
//===----------------------------------------------------------------------===//
20271916
// gfx90a intrinsics
@@ -2033,54 +1922,14 @@ def int_amdgcn_flat_atomic_fadd : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
20331922
def int_amdgcn_flat_atomic_fmin : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
20341923
def int_amdgcn_flat_atomic_fmax : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
20351924

2036-
def int_amdgcn_mfma_f32_32x32x4bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4bf16_1k">,
2037-
Intrinsic<[llvm_v32f32_ty],
2038-
[llvm_v4i16_ty, llvm_v4i16_ty, llvm_v32f32_ty,
2039-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2040-
[IntrConvergent, IntrNoMem, IntrWillReturn,
2041-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2042-
2043-
def int_amdgcn_mfma_f32_16x16x4bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4bf16_1k">,
2044-
Intrinsic<[llvm_v16f32_ty],
2045-
[llvm_v4i16_ty, llvm_v4i16_ty, llvm_v16f32_ty,
2046-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2047-
[IntrConvergent, IntrNoMem, IntrWillReturn,
2048-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1925+
def int_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_v4i16_ty>;
1926+
def int_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>;
1927+
def int_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4i16_ty>;
1928+
def int_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>;
1929+
def int_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4i16_ty>;
20491930

2050-
def int_amdgcn_mfma_f32_4x4x4bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x4bf16_1k">,
2051-
Intrinsic<[llvm_v4f32_ty],
2052-
[llvm_v4i16_ty, llvm_v4i16_ty, llvm_v4f32_ty,
2053-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2054-
[IntrConvergent, IntrNoMem, IntrWillReturn,
2055-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2056-
2057-
def int_amdgcn_mfma_f32_32x32x8bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x8bf16_1k">,
2058-
Intrinsic<[llvm_v16f32_ty],
2059-
[llvm_v4i16_ty, llvm_v4i16_ty, llvm_v16f32_ty,
2060-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2061-
[IntrConvergent, IntrNoMem, IntrWillReturn,
2062-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2063-
2064-
def int_amdgcn_mfma_f32_16x16x16bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x16bf16_1k">,
2065-
Intrinsic<[llvm_v4f32_ty],
2066-
[llvm_v4i16_ty, llvm_v4i16_ty, llvm_v4f32_ty,
2067-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2068-
[IntrConvergent, IntrNoMem, IntrWillReturn,
2069-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2070-
2071-
def int_amdgcn_mfma_f64_16x16x4f64 : GCCBuiltin<"__builtin_amdgcn_mfma_f64_16x16x4f64">,
2072-
Intrinsic<[llvm_v4f64_ty],
2073-
[llvm_double_ty, llvm_double_ty, llvm_v4f64_ty,
2074-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2075-
[IntrConvergent, IntrNoMem, IntrWillReturn,
2076-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2077-
2078-
def int_amdgcn_mfma_f64_4x4x4f64 : GCCBuiltin<"__builtin_amdgcn_mfma_f64_4x4x4f64">,
2079-
Intrinsic<[llvm_double_ty],
2080-
[llvm_double_ty, llvm_double_ty, llvm_double_ty,
2081-
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2082-
[IntrConvergent, IntrNoMem, IntrWillReturn,
2083-
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1931+
def int_amdgcn_mfma_f64_16x16x4f64 : AMDGPUMfmaIntrinsic<llvm_v4f64_ty, llvm_double_ty>;
1932+
def int_amdgcn_mfma_f64_4x4x4f64 : AMDGPUMfmaIntrinsic<llvm_double_ty, llvm_double_ty>;
20841933

20851934
//===----------------------------------------------------------------------===//
20861935
// Special Intrinsics for backend internal use only. No frontend

0 commit comments

Comments
 (0)