@@ -1882,146 +1882,35 @@ def int_amdgcn_udot8 :
1882
1882
1883
1883
def int_amdgcn_global_atomic_fadd : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
1884
1884
1885
- // llvm.amdgcn.mfma.f32.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
1886
- def int_amdgcn_mfma_f32_32x32x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x1f32">,
1887
- Intrinsic<[llvm_v32f32_ty],
1888
- [llvm_float_ty, llvm_float_ty, llvm_v32f32_ty,
1889
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1885
+ // llvm.amdgcn.mfma.*.* vdst, srcA, srcB, srcC, cbsz, abid, blgp
1886
+ class AMDGPUMfmaIntrinsic<LLVMType DestTy, LLVMType SrcABTy> :
1887
+ GCCBuiltin<!subst("int", "__builtin", NAME)>,
1888
+ Intrinsic<[DestTy],
1889
+ [SrcABTy, SrcABTy, DestTy,
1890
+ llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1890
1891
[IntrConvergent, IntrNoMem, IntrWillReturn,
1891
1892
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1892
1893
1893
- def int_amdgcn_mfma_f32_16x16x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x1f32">,
1894
- Intrinsic<[llvm_v16f32_ty],
1895
- [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
1896
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1897
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1898
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1899
-
1900
- def int_amdgcn_mfma_f32_4x4x1f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x1f32">,
1901
- Intrinsic<[llvm_v4f32_ty],
1902
- [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
1903
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1904
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1905
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1906
-
1907
- def int_amdgcn_mfma_f32_32x32x2f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2f32">,
1908
- Intrinsic<[llvm_v16f32_ty],
1909
- [llvm_float_ty, llvm_float_ty, llvm_v16f32_ty,
1910
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1911
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1912
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1913
-
1914
- def int_amdgcn_mfma_f32_16x16x4f32 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f32">,
1915
- Intrinsic<[llvm_v4f32_ty],
1916
- [llvm_float_ty, llvm_float_ty, llvm_v4f32_ty,
1917
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1918
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1919
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1920
-
1921
- def int_amdgcn_mfma_f32_32x32x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4f16">,
1922
- Intrinsic<[llvm_v32f32_ty],
1923
- [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v32f32_ty,
1924
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1925
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1926
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1927
-
1928
- def int_amdgcn_mfma_f32_16x16x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4f16">,
1929
- Intrinsic<[llvm_v16f32_ty],
1930
- [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
1931
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1932
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1933
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1934
-
1935
- def int_amdgcn_mfma_f32_4x4x4f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x4f16">,
1936
- Intrinsic<[llvm_v4f32_ty],
1937
- [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
1938
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1939
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1940
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1941
-
1942
- def int_amdgcn_mfma_f32_32x32x8f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x8f16">,
1943
- Intrinsic<[llvm_v16f32_ty],
1944
- [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v16f32_ty,
1945
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1946
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1947
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1948
-
1949
- def int_amdgcn_mfma_f32_16x16x16f16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x16f16">,
1950
- Intrinsic<[llvm_v4f32_ty],
1951
- [llvm_v4f16_ty, llvm_v4f16_ty, llvm_v4f32_ty,
1952
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1953
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1954
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1955
-
1956
- def int_amdgcn_mfma_i32_32x32x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x4i8">,
1957
- Intrinsic<[llvm_v32i32_ty],
1958
- [llvm_i32_ty, llvm_i32_ty, llvm_v32i32_ty,
1959
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1960
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1961
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1962
-
1963
- def int_amdgcn_mfma_i32_16x16x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x4i8">,
1964
- Intrinsic<[llvm_v16i32_ty],
1965
- [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
1966
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1967
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1968
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1969
-
1970
- def int_amdgcn_mfma_i32_4x4x4i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_4x4x4i8">,
1971
- Intrinsic<[llvm_v4i32_ty],
1972
- [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
1973
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1974
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1975
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1976
-
1977
- def int_amdgcn_mfma_i32_32x32x8i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_32x32x8i8">,
1978
- Intrinsic<[llvm_v16i32_ty],
1979
- [llvm_i32_ty, llvm_i32_ty, llvm_v16i32_ty,
1980
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1981
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1982
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1983
-
1984
- def int_amdgcn_mfma_i32_16x16x16i8 : GCCBuiltin<"__builtin_amdgcn_mfma_i32_16x16x16i8">,
1985
- Intrinsic<[llvm_v4i32_ty],
1986
- [llvm_i32_ty, llvm_i32_ty, llvm_v4i32_ty,
1987
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1988
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1989
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1990
-
1991
- def int_amdgcn_mfma_f32_32x32x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x2bf16">,
1992
- Intrinsic<[llvm_v32f32_ty],
1993
- [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v32f32_ty,
1994
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
1995
- [IntrConvergent, IntrNoMem, IntrWillReturn,
1996
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1997
-
1998
- def int_amdgcn_mfma_f32_16x16x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x2bf16">,
1999
- Intrinsic<[llvm_v16f32_ty],
2000
- [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
2001
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2002
- [IntrConvergent, IntrNoMem, IntrWillReturn,
2003
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2004
-
2005
- def int_amdgcn_mfma_f32_4x4x2bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x2bf16">,
2006
- Intrinsic<[llvm_v4f32_ty],
2007
- [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
2008
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2009
- [IntrConvergent, IntrNoMem, IntrWillReturn,
2010
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2011
-
2012
- def int_amdgcn_mfma_f32_32x32x4bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4bf16">,
2013
- Intrinsic<[llvm_v16f32_ty],
2014
- [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v16f32_ty,
2015
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2016
- [IntrConvergent, IntrNoMem, IntrWillReturn,
2017
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2018
-
2019
- def int_amdgcn_mfma_f32_16x16x8bf16 : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x8bf16">,
2020
- Intrinsic<[llvm_v4f32_ty],
2021
- [llvm_v2i16_ty, llvm_v2i16_ty, llvm_v4f32_ty,
2022
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2023
- [IntrConvergent, IntrNoMem, IntrWillReturn,
2024
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1894
+ def int_amdgcn_mfma_f32_32x32x1f32 : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_float_ty>;
1895
+ def int_amdgcn_mfma_f32_16x16x1f32 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_float_ty>;
1896
+ def int_amdgcn_mfma_f32_4x4x1f32 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_float_ty>;
1897
+ def int_amdgcn_mfma_f32_32x32x2f32 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_float_ty>;
1898
+ def int_amdgcn_mfma_f32_16x16x4f32 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_float_ty>;
1899
+ def int_amdgcn_mfma_f32_32x32x4f16 : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_v4f16_ty>;
1900
+ def int_amdgcn_mfma_f32_16x16x4f16 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4f16_ty>;
1901
+ def int_amdgcn_mfma_f32_4x4x4f16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4f16_ty>;
1902
+ def int_amdgcn_mfma_f32_32x32x8f16 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4f16_ty>;
1903
+ def int_amdgcn_mfma_f32_16x16x16f16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4f16_ty>;
1904
+ def int_amdgcn_mfma_i32_32x32x4i8 : AMDGPUMfmaIntrinsic<llvm_v32i32_ty, llvm_i32_ty>;
1905
+ def int_amdgcn_mfma_i32_16x16x4i8 : AMDGPUMfmaIntrinsic<llvm_v16i32_ty, llvm_i32_ty>;
1906
+ def int_amdgcn_mfma_i32_4x4x4i8 : AMDGPUMfmaIntrinsic<llvm_v4i32_ty, llvm_i32_ty>;
1907
+ def int_amdgcn_mfma_i32_32x32x8i8 : AMDGPUMfmaIntrinsic<llvm_v16i32_ty, llvm_i32_ty>;
1908
+ def int_amdgcn_mfma_i32_16x16x16i8 : AMDGPUMfmaIntrinsic<llvm_v4i32_ty, llvm_i32_ty>;
1909
+ def int_amdgcn_mfma_f32_32x32x2bf16 : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_v2i16_ty>;
1910
+ def int_amdgcn_mfma_f32_16x16x2bf16 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v2i16_ty>;
1911
+ def int_amdgcn_mfma_f32_4x4x2bf16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v2i16_ty>;
1912
+ def int_amdgcn_mfma_f32_32x32x4bf16 : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v2i16_ty>;
1913
+ def int_amdgcn_mfma_f32_16x16x8bf16 : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v2i16_ty>;
2025
1914
2026
1915
//===----------------------------------------------------------------------===//
2027
1916
// gfx90a intrinsics
@@ -2033,54 +1922,14 @@ def int_amdgcn_flat_atomic_fadd : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
2033
1922
def int_amdgcn_flat_atomic_fmin : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
2034
1923
def int_amdgcn_flat_atomic_fmax : AMDGPUGlobalAtomicRtn<llvm_anyfloat_ty>;
2035
1924
2036
- def int_amdgcn_mfma_f32_32x32x4bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x4bf16_1k">,
2037
- Intrinsic<[llvm_v32f32_ty],
2038
- [llvm_v4i16_ty, llvm_v4i16_ty, llvm_v32f32_ty,
2039
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2040
- [IntrConvergent, IntrNoMem, IntrWillReturn,
2041
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2042
-
2043
- def int_amdgcn_mfma_f32_16x16x4bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x4bf16_1k">,
2044
- Intrinsic<[llvm_v16f32_ty],
2045
- [llvm_v4i16_ty, llvm_v4i16_ty, llvm_v16f32_ty,
2046
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2047
- [IntrConvergent, IntrNoMem, IntrWillReturn,
2048
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1925
+ def int_amdgcn_mfma_f32_32x32x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v32f32_ty, llvm_v4i16_ty>;
1926
+ def int_amdgcn_mfma_f32_16x16x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>;
1927
+ def int_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4i16_ty>;
1928
+ def int_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>;
1929
+ def int_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4i16_ty>;
2049
1930
2050
- def int_amdgcn_mfma_f32_4x4x4bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_4x4x4bf16_1k">,
2051
- Intrinsic<[llvm_v4f32_ty],
2052
- [llvm_v4i16_ty, llvm_v4i16_ty, llvm_v4f32_ty,
2053
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2054
- [IntrConvergent, IntrNoMem, IntrWillReturn,
2055
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2056
-
2057
- def int_amdgcn_mfma_f32_32x32x8bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_32x32x8bf16_1k">,
2058
- Intrinsic<[llvm_v16f32_ty],
2059
- [llvm_v4i16_ty, llvm_v4i16_ty, llvm_v16f32_ty,
2060
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2061
- [IntrConvergent, IntrNoMem, IntrWillReturn,
2062
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2063
-
2064
- def int_amdgcn_mfma_f32_16x16x16bf16_1k : GCCBuiltin<"__builtin_amdgcn_mfma_f32_16x16x16bf16_1k">,
2065
- Intrinsic<[llvm_v4f32_ty],
2066
- [llvm_v4i16_ty, llvm_v4i16_ty, llvm_v4f32_ty,
2067
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2068
- [IntrConvergent, IntrNoMem, IntrWillReturn,
2069
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2070
-
2071
- def int_amdgcn_mfma_f64_16x16x4f64 : GCCBuiltin<"__builtin_amdgcn_mfma_f64_16x16x4f64">,
2072
- Intrinsic<[llvm_v4f64_ty],
2073
- [llvm_double_ty, llvm_double_ty, llvm_v4f64_ty,
2074
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2075
- [IntrConvergent, IntrNoMem, IntrWillReturn,
2076
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
2077
-
2078
- def int_amdgcn_mfma_f64_4x4x4f64 : GCCBuiltin<"__builtin_amdgcn_mfma_f64_4x4x4f64">,
2079
- Intrinsic<[llvm_double_ty],
2080
- [llvm_double_ty, llvm_double_ty, llvm_double_ty,
2081
- llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
2082
- [IntrConvergent, IntrNoMem, IntrWillReturn,
2083
- ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
1931
+ def int_amdgcn_mfma_f64_16x16x4f64 : AMDGPUMfmaIntrinsic<llvm_v4f64_ty, llvm_double_ty>;
1932
+ def int_amdgcn_mfma_f64_4x4x4f64 : AMDGPUMfmaIntrinsic<llvm_double_ty, llvm_double_ty>;
2084
1933
2085
1934
//===----------------------------------------------------------------------===//
2086
1935
// Special Intrinsics for backend internal use only. No frontend
0 commit comments