Skip to content

Commit 8aa77ec

Browse files
arsenmpravinjagtap
authored andcommitted
AMDGPU: Add gfx950 subtarget definitions (llvm#116307)
Mostly a stub, but adds some baseline tests and tests for removed instructions. Change-Id: I07ca173275feac4b0ce3050ade43d665ffff19a1
1 parent 76c0214 commit 8aa77ec

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+5159
-4522
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1154,6 +1154,19 @@ Target Specific Changes
11541154
AMDGPU Support
11551155
^^^^^^^^^^^^^^
11561156

1157+
- Initial support for gfx950
1158+
1159+
- Added headers ``gpuintrin.h`` and ``amdgpuintrin.h`` that contains common
1160+
definitions for GPU builtin functions. This header can be included for OpenMP,
1161+
CUDA, HIP, OpenCL, and C/C++.
1162+
1163+
NVPTX Support
1164+
^^^^^^^^^^^^^^
1165+
1166+
- Added headers ``gpuintrin.h`` and ``nvptxintrin.h`` that contains common
1167+
definitions for GPU builtin functions. This header can be included for OpenMP,
1168+
CUDA, HIP, OpenCL, and C/C++.
1169+
11571170
X86 Support
11581171
^^^^^^^^^^^
11591172

clang/include/clang/Basic/Cuda.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ enum class OffloadArch {
105105
GFX940,
106106
GFX941,
107107
GFX942,
108+
GFX950,
108109
GFX10_1_GENERIC,
109110
GFX1010,
110111
GFX1011,

clang/lib/Basic/Cuda.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ static const OffloadArchToStringMap arch_names[] = {
123123
GFX(940), // gfx940
124124
GFX(941), // gfx941
125125
GFX(942), // gfx942
126+
GFX(950), // gfx950
126127
{OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
127128
GFX(1010), // gfx1010
128129
GFX(1011), // gfx1011

clang/lib/Basic/Targets/NVPTX.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
209209
case OffloadArch::GFX940:
210210
case OffloadArch::GFX941:
211211
case OffloadArch::GFX942:
212+
case OffloadArch::GFX950:
212213
case OffloadArch::GFX10_1_GENERIC:
213214
case OffloadArch::GFX1010:
214215
case OffloadArch::GFX1011:

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2529,6 +2529,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
25292529
case OffloadArch::GFX940:
25302530
case OffloadArch::GFX941:
25312531
case OffloadArch::GFX942:
2532+
case OffloadArch::GFX950:
25322533
case OffloadArch::GFX10_1_GENERIC:
25332534
case OffloadArch::GFX1010:
25342535
case OffloadArch::GFX1011:

clang/test/CodeGenOpenCL/amdgpu-features.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s
3333
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx941 -emit-llvm -o - %s | FileCheck --check-prefix=GFX941 %s
3434
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s
35+
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s
3536
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
3637
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
3738
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
@@ -88,6 +89,7 @@
8889
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
8990
// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
9091
// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
92+
// GFX950: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
9193
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
9294
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
9395
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"

clang/test/Driver/amdgpu-macros.cl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940 -DFAMILY=GFX9
111111
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx941 -DFAMILY=GFX9
112112
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx942 -DFAMILY=GFX9
113+
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx950 -DFAMILY=GFX9
113114
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010 -DFAMILY=GFX10
114115
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011 -DFAMILY=GFX10
115116
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012 -DFAMILY=GFX10

clang/test/Driver/amdgpu-mcpu.cl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
// RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s
9696
// RUN: %clang -### -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefix=GFX941 %s
9797
// RUN: %clang -### -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefix=GFX942 %s
98+
// RUN: %clang -### -target amdgcn -mcpu=gfx950 %s 2>&1 | FileCheck --check-prefix=GFX950 %s
9899
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s
99100
// RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s
100101
// RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s
@@ -150,6 +151,7 @@
150151
// GFX940: "-target-cpu" "gfx940"
151152
// GFX941: "-target-cpu" "gfx941"
152153
// GFX942: "-target-cpu" "gfx942"
154+
// GFX950: "-target-cpu" "gfx950"
153155
// GFX1010: "-target-cpu" "gfx1010"
154156
// GFX1011: "-target-cpu" "gfx1011"
155157
// GFX1012: "-target-cpu" "gfx1012"

clang/test/Misc/target-invalid-cpu-note.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,15 @@
2929

3030
// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
3131
// NVPTX: error: unknown target CPU 'not-a-cpu'
32-
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx9-generic, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx9-4-generic, gfx940, gfx941, gfx942, gfx10-1-generic, gfx1010, gfx1011, gfx1012, gfx1013, gfx10-3-generic, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx11-generic, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx1153, gfx12-generic, gfx1200, gfx1201, amdgcnspirv{{$}}
32+
// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, sm_90a, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx9-generic, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx9-4-generic, gfx940, gfx941, gfx942, gfx950, gfx10-1-generic, gfx1010, gfx1011, gfx1012, gfx1013, gfx10-3-generic, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx11-generic, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx1153, gfx12-generic, gfx1200, gfx1201, amdgcnspirv{{$}}
3333

3434
// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
3535
// R600: error: unknown target CPU 'not-a-cpu'
3636
// R600-NEXT: note: valid target CPU values are: r600, rv630, rv635, r630, rs780, rs880, rv610, rv620, rv670, rv710, rv730, rv740, rv770, cedar, palm, cypress, hemlock, juniper, redwood, sumo, sumo2, barts, caicos, aruba, cayman, turks{{$}}
3737

3838
// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN
3939
// AMDGCN: error: unknown target CPU 'not-a-cpu'
40-
// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx1153, gfx1200, gfx1201, gfx9-generic, gfx10-1-generic, gfx10-3-generic, gfx11-generic, gfx12-generic, gfx9-4-generic{{$}}
40+
// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx950, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx1153, gfx1200, gfx1201, gfx9-generic, gfx10-1-generic, gfx10-3-generic, gfx11-generic, gfx12-generic, gfx9-4-generic{{$}}
4141

4242
// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM
4343
// WEBASM: error: unknown target CPU 'not-a-cpu'

llvm/docs/AMDGPUUsage.rst

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
400400
work-item Add product
401401
IDs names.
402402

403+
``gfx950`` ``amdgcn`` dGPU - sramecc - Architected *TBA*
404+
- tgsplit flat
405+
- xnack scratch .. TODO::
406+
- kernarg preload - Packed
407+
work-item Add product
408+
IDs names.
409+
403410
**GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_
404411
-----------------------------------------------------------------------------------------------------------------------
405412
``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700
@@ -2116,7 +2123,7 @@ The AMDGPU backend uses the following ELF header:
21162123
``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
21172124
*reserved* 0x04d Reserved.
21182125
``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
2119-
*reserved* 0x04f Reserved.
2126+
``EF_AMDGPU_MACH_AMDGCN_GFX950`` 0x04f ``gfx950``
21202127
*reserved* 0x050 Reserved.
21212128
``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic``
21222129
``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10-1-generic``

llvm/include/llvm/BinaryFormat/ELF.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -790,7 +790,7 @@ enum : unsigned {
790790
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
791791
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
792792
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
793-
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f,
793+
EF_AMDGPU_MACH_AMDGCN_GFX950 = 0x04f,
794794
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
795795
EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
796796
EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,

llvm/include/llvm/TargetParser/TargetParser.h

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -86,18 +86,19 @@ enum GPUKind : uint32_t {
8686
GK_GFX940 = 68,
8787
GK_GFX941 = 69,
8888
GK_GFX942 = 70,
89-
90-
GK_GFX1010 = 71,
91-
GK_GFX1011 = 72,
92-
GK_GFX1012 = 73,
93-
GK_GFX1013 = 74,
94-
GK_GFX1030 = 75,
95-
GK_GFX1031 = 76,
96-
GK_GFX1032 = 77,
97-
GK_GFX1033 = 78,
98-
GK_GFX1034 = 79,
99-
GK_GFX1035 = 80,
100-
GK_GFX1036 = 81,
89+
GK_GFX950 = 71,
90+
91+
GK_GFX1010 = 72,
92+
GK_GFX1011 = 73,
93+
GK_GFX1012 = 74,
94+
GK_GFX1013 = 75,
95+
GK_GFX1030 = 76,
96+
GK_GFX1031 = 77,
97+
GK_GFX1032 = 78,
98+
GK_GFX1033 = 79,
99+
GK_GFX1034 = 80,
100+
GK_GFX1035 = 81,
101+
GK_GFX1036 = 82,
101102

102103
GK_GFX1100 = 90,
103104
GK_GFX1101 = 91,

llvm/lib/Object/ELFObjectFile.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
548548
return "gfx941";
549549
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942:
550550
return "gfx942";
551+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950:
552+
return "gfx950";
551553

552554
// AMDGCN GFX10.
553555
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:

llvm/lib/ObjectYAML/ELFYAML.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
594594
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
595595
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH);
596596
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH);
597+
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX950, EF_AMDGPU_MACH);
597598
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
598599
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
599600
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,12 @@ def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
360360
"Additional instructions for GFX940+"
361361
>;
362362

363+
def FeatureGFX950Insts : SubtargetFeature<"gfx950-insts",
364+
"GFX950Insts",
365+
"true",
366+
"Additional instructions for GFX950+"
367+
>;
368+
363369
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
364370
"GFX10Insts",
365371
"true",
@@ -1460,6 +1466,14 @@ def FeatureISAVersion9_4_Common : FeatureSet<
14601466
FeatureFlatBufferGlobalAtomicFaddF64Inst
14611467
]>;
14621468

1469+
def FeatureISAVersion9_5_Common : FeatureSet<
1470+
!listconcat(FeatureISAVersion9_4_Common.Features,
1471+
[FeatureFP8Insts,
1472+
FeatureFP8ConversionInsts,
1473+
FeatureCvtFP8VOP1Bug,
1474+
FeatureGFX950Insts
1475+
])>;
1476+
14631477
def FeatureISAVersion9_4_0 : FeatureSet<
14641478
!listconcat(FeatureISAVersion9_4_Common.Features,
14651479
[
@@ -1493,6 +1507,8 @@ def FeatureISAVersion9_4_Generic : FeatureSet<
14931507
!listconcat(FeatureISAVersion9_4_Common.Features,
14941508
[FeatureRequiresCOV6])>;
14951509

1510+
def FeatureISAVersion9_5_0 : FeatureSet<FeatureISAVersion9_5_Common.Features>;
1511+
14961512
def FeatureISAVersion10_Common : FeatureSet<
14971513
[FeatureGFX10,
14981514
FeatureLDSBankCount32,

llvm/lib/Target/AMDGPU/GCNProcessors.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,10 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
204204
FeatureISAVersion9_4_2.Features
205205
>;
206206

207+
def : ProcessorModel<"gfx950", SIDPGFX940FullSpeedModel,
208+
FeatureISAVersion9_5_0.Features
209+
>;
210+
207211
// [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c]
208212
def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
209213
FeatureISAVersion9_Generic.Features

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
106106
bool GFX9Insts = false;
107107
bool GFX90AInsts = false;
108108
bool GFX940Insts = false;
109+
bool GFX950Insts = false;
109110
bool GFX10Insts = false;
110111
bool GFX11Insts = false;
111112
bool GFX12Insts = false;

llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
9696
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
9797
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
9898
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
99+
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
99100
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
100101
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
101102
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
@@ -182,6 +183,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
182183
case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
183184
case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
184185
case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
186+
case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
185187
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
186188
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
187189
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;

llvm/lib/TargetParser/TargetParser.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
107107
{{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
108108
{{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
109109
{{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
110+
{{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
110111
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
111112
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
112113
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
@@ -262,6 +263,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
262263
case GK_GFX940: return {9, 4, 0};
263264
case GK_GFX941: return {9, 4, 1};
264265
case GK_GFX942: return {9, 4, 2};
266+
case GK_GFX950: return {9, 5, 0};
265267
case GK_GFX1010: return {10, 1, 0};
266268
case GK_GFX1011: return {10, 1, 1};
267269
case GK_GFX1012: return {10, 1, 2};
@@ -361,7 +363,8 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
361363
Features["wavefrontsize32"] = true;
362364
Features["wavefrontsize64"] = true;
363365
} else if (T.isAMDGCN()) {
364-
switch (parseArchAMDGCN(GPU)) {
366+
AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
367+
switch (Kind) {
365368
case GK_GFX1201:
366369
case GK_GFX1200:
367370
case GK_GFX12_GENERIC:
@@ -466,12 +469,16 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
466469
Features["s-memtime-inst"] = true;
467470
Features["gws"] = true;
468471
break;
472+
case GK_GFX950:
473+
Features["gfx950-insts"] = true;
474+
[[fallthrough]];
469475
case GK_GFX942:
470476
case GK_GFX941:
471477
case GK_GFX940:
472478
Features["fp8-insts"] = true;
473479
Features["fp8-conversion-insts"] = true;
474-
Features["xf32-insts"] = true;
480+
if (Kind != GK_GFX950)
481+
Features["xf32-insts"] = true;
475482
[[fallthrough]];
476483
case GK_GFX9_4_GENERIC:
477484
Features["gfx940-insts"] = true;

0 commit comments

Comments
 (0)