Skip to content

Commit 1ec1528

Browse files
committed
[NVPTX] Add family-specific architectures support
This change adds family-specific architectures support. These architectures have "f" suffix. For example, sm_100f. This change doesn't promote existing features to family-specific architecture.
1 parent 76b6bf4 commit 1ec1528

File tree

4 files changed

+98
-22
lines changed

4 files changed

+98
-22
lines changed

llvm/lib/Target/NVPTX/NVPTX.td

Lines changed: 57 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -33,20 +33,61 @@ class FeaturePTX<int version>:
3333
SubtargetFeature<"ptx"# version, "PTXVersion",
3434
"" # version,
3535
"Use PTX version " # version>;
36-
36+
//
37+
// NVPTX Architecture Hierarchy and Ordering:
38+
//
39+
// Family: 2/3/5/6/7/8/9/10/12 (Follows Onion model, older family is compatible with newer family)
40+
// Arch: 2*/3*/5*/6*/7*/8*/9*/10*/12*
41+
//
42+
// Family-specific: F*f : F*f > F* =>
43+
// + The plain base architecture is compatible with the family-specific architecture
44+
// (e.g. sm_100 compatible with >= sm_100*f*)
45+
// + The family-specific architecture is compatible with future family-specific
46+
// architectures within the same family (e.g. sm_100f compatible with >= sm_10X*f*
47+
// but not with sm_12X*f*)
48+
//
49+
// Family and SM Target Definition:
50+
// +----------------+--------------------------------------------------------+
51+
// | Family | Target SM architectures included |
52+
// +----------------+--------------------------------------------------------+
53+
// | sm_10x family | sm_100f, sm_103f, future targets in sm_10x family |
54+
// | sm_101 family | sm_101f (exception) |
55+
// | sm_12x family | sm_120f, sm_121f, future targets in sm_12x family |
56+
// +----------------+--------------------------------------------------------+
57+
//
58+
// Architecture-specific: F*a : F*a > F*f > F* =>
59+
// + The plain base architecture is compatible with the architecture-specific architecture
60+
// (e.g. sm_100 compatible with >= sm_100*a*)
61+
// + The family-specific architecture is compatible with the architecture-specific architecture
62+
// (e.g. sm_100f compatible with >= sm_100*a*)
63+
// + The architecture-specific architecture is incompatible with any other architecture
64+
// (e.g. sm_100a is only compatible with sm_100*a*)
65+
//
66+
// Encoding: Arch * 1000 + 'f' * 10 + 'a' * 1 (where 'a' ⇒ 'f')
67+
//
68+
// This encoding allows simple implementation of the partial ordering of the architectures.
69+
// + Compare Family and Arch by dividing FullSMVersion by 1000 and 100 respectively before the comparison.
70+
// + Compare within the family by comparing FullSMVersion, given both belongs to the same family.
71+
// + Detect 'a' variants by checking FullSMVersion % 10.
72+
//
3773
foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
3874
60, 61, 62, 70, 72, 75, 80, 86, 87,
39-
89, 90, 100, 101, 103, 120, 121] in
40-
def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
75+
89, 90, 100, 101, 103, 120, 121] in {
76+
// Base SM version (e.g. FullSMVersion for sm_100 is 10000)
77+
def SM#sm : FeatureSM<""#sm, !mul(sm, 100)>;
4178

42-
// Arch-specific targets. PTX for these is not compatible with any other
43-
// architectures.
44-
def SM90a : FeatureSM<"90a", 901>;
45-
def SM100a: FeatureSM<"100a", 1001>;
46-
def SM101a: FeatureSM<"101a", 1011>;
47-
def SM103a: FeatureSM<"103a", 1031>;
48-
def SM120a: FeatureSM<"120a", 1201>;
49-
def SM121a: FeatureSM<"121a", 1211>;
79+
// Family-specific targets which are compatible within same family
80+
// (e.g. FullSMVersion for sm_100f is 10010)
81+
if !ge(sm, 100) then {
82+
def SM#sm#f : FeatureSM<""#sm#"f", !add(!mul(sm, 100), 10)>;
83+
}
84+
85+
// Architecture-specific targets which are incompatible across architectures
86+
// (e.g. FullSMVersion for sm_100a is 10011)
87+
if !ge(sm, 90) then {
88+
def SM#sm#a : FeatureSM<""#sm#"a", !add(!mul(sm, 100), 11)>;
89+
}
90+
}
5091

5192
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
5293
70, 71, 72, 73, 74, 75, 76, 77, 78,
@@ -83,14 +124,19 @@ def : Proc<"sm_90", [SM90, PTX78]>;
83124
def : Proc<"sm_90a", [SM90a, PTX80]>;
84125
def : Proc<"sm_100", [SM100, PTX86]>;
85126
def : Proc<"sm_100a", [SM100a, PTX86]>;
127+
def : Proc<"sm_100f", [SM100f, PTX88]>;
86128
def : Proc<"sm_101", [SM101, PTX86]>;
87129
def : Proc<"sm_101a", [SM101a, PTX86]>;
130+
def : Proc<"sm_101f", [SM101f, PTX88]>;
88131
def : Proc<"sm_103", [SM103, PTX88]>;
89132
def : Proc<"sm_103a", [SM103a, PTX88]>;
133+
def : Proc<"sm_103f", [SM103f, PTX88]>;
90134
def : Proc<"sm_120", [SM120, PTX87]>;
91135
def : Proc<"sm_120a", [SM120a, PTX87]>;
136+
def : Proc<"sm_120f", [SM120f, PTX88]>;
92137
def : Proc<"sm_121", [SM121, PTX88]>;
93138
def : Proc<"sm_121a", [SM121a, PTX88]>;
139+
def : Proc<"sm_121f", [SM121f, PTX88]>;
94140

95141
def NVPTXInstrInfo : InstrInfo {
96142
}

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,10 @@ class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>
166166
class hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>;
167167

168168
// Explicit records for arch-accelerated SM versions
169-
def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">;
170-
def hasSM100a : Predicate<"Subtarget->getFullSmVersion() == 1001">;
171-
def hasSM101a : Predicate<"Subtarget->getFullSmVersion() == 1011">;
172-
def hasSM120a : Predicate<"Subtarget->getFullSmVersion() == 1201">;
169+
def hasSM90a : Predicate<"Subtarget->getSmVersion() == 90 && Subtarget->hasArchAccelFeatures()">;
170+
def hasSM100a : Predicate<"Subtarget->getSmVersion() == 100 && Subtarget->hasArchAccelFeatures()">;
171+
def hasSM101a : Predicate<"Subtarget->getSmVersion() == 101 && Subtarget->hasArchAccelFeatures()">;
172+
def hasSM120a : Predicate<"Subtarget->getSmVersion() == 120 && Subtarget->hasArchAccelFeatures()">;
173173

174174
// non-sync shfl instructions are not available on sm_70+ in PTX6.4+
175175
def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70"

llvm/lib/Target/NVPTX/NVPTXSubtarget.h

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
108108
switch (FullSmVersion) {
109109
default:
110110
break;
111-
case 1001: // sm_100a
112-
case 1011: // sm_101a
111+
case 10011: // sm_100a
112+
case 10111: // sm_101a
113113
HasTcgen05 = true;
114114
break;
115115
}
@@ -127,15 +127,25 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
127127
bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
128128
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
129129
unsigned int getFullSmVersion() const { return FullSmVersion; }
130-
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
130+
unsigned int getSmVersion() const { return getFullSmVersion() / 100; }
131131
// GPUs with "a" suffix have include architecture-accelerated features that
132132
// are supported on the specified architecture only, hence such targets do not
133133
// follow the onion layer model. hasArchAccelFeatures() allows
134134
// distinguishing such GPU variants from the base GPU architecture.
135-
// - 0 represents base GPU model,
136-
// - non-zero value identifies particular architecture-accelerated variant.
137-
bool hasArchAccelFeatures() const { return getFullSmVersion() % 10; }
138-
135+
// - false represents non-accelerated architecture.
136+
// - true represents architecture-accelerated variant.
137+
bool hasArchAccelFeatures() const {
138+
return getFullSmVersion() % 10 && PTXVersion >= 80;
139+
}
140+
// GPUs with 'f' suffix have architecture-accelerated features which are
141+
// portable across all future architectures under same SM major. For example,
142+
// sm_100f features will work for sm_10X*f*/sm_10X*a* future architectures.
143+
// - false represents non-family-specific architecture.
144+
// - true represents family-specific variant.
145+
bool hasFamilySpecificFeatures() const {
146+
return getFullSmVersion() % 100 == 10 ? PTXVersion >= 88
147+
: hasArchAccelFeatures();
148+
}
139149
// If the user did not provide a target we default to the `sm_30` target.
140150
std::string getTargetName() const {
141151
return TargetName.empty() ? "sm_30" : TargetName;

llvm/test/CodeGen/NVPTX/sm-version.ll

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,19 @@
1818
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_90a | FileCheck %s --check-prefix=SM90a
1919
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_100 | FileCheck %s --check-prefix=SM100
2020
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_100a | FileCheck %s --check-prefix=SM100a
21+
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_100f | FileCheck %s --check-prefix=SM100f
2122
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101 | FileCheck %s --check-prefix=SM101
2223
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101a | FileCheck %s --check-prefix=SM101a
24+
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_101f | FileCheck %s --check-prefix=SM101f
2325
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103 | FileCheck %s --check-prefix=SM103
2426
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103a | FileCheck %s --check-prefix=SM103a
27+
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_103f | FileCheck %s --check-prefix=SM103f
2528
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120 | FileCheck %s --check-prefix=SM120
2629
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120a | FileCheck %s --check-prefix=SM120a
30+
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_120f | FileCheck %s --check-prefix=SM120f
2731
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121 | FileCheck %s --check-prefix=SM121
2832
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121a | FileCheck %s --check-prefix=SM121a
33+
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_121f | FileCheck %s --check-prefix=SM121f
2934

3035
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=SM20
3136
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_21 | FileCheck %s --check-prefix=SM21
@@ -47,14 +52,19 @@
4752
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_90a | FileCheck %s --check-prefix=SM90a
4853
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100 | FileCheck %s --check-prefix=SM100
4954
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100a | FileCheck %s --check-prefix=SM100a
55+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100f | FileCheck %s --check-prefix=SM100f
5056
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101 | FileCheck %s --check-prefix=SM101
5157
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101a | FileCheck %s --check-prefix=SM101a
58+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_101f | FileCheck %s --check-prefix=SM101f
5259
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103 | FileCheck %s --check-prefix=SM103
5360
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103a | FileCheck %s --check-prefix=SM103a
61+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_103f | FileCheck %s --check-prefix=SM103f
5462
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120 | FileCheck %s --check-prefix=SM120
5563
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120a | FileCheck %s --check-prefix=SM120a
64+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_120f | FileCheck %s --check-prefix=SM120f
5665
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121 | FileCheck %s --check-prefix=SM121
5766
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121a | FileCheck %s --check-prefix=SM121a
67+
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_121f | FileCheck %s --check-prefix=SM121f
5868

5969
; SM20: .version 3.2
6070
; SM21: .version 3.2
@@ -76,14 +86,19 @@
7686
; SM90a: .version 8.0
7787
; SM100: .version 8.6
7888
; SM100a: .version 8.6
89+
; SM100f: .version 8.8
7990
; SM101: .version 8.6
8091
; SM101a: .version 8.6
92+
; SM101f: .version 8.8
8193
; SM103: .version 8.8
8294
; SM103a: .version 8.8
95+
; SM103f: .version 8.8
8396
; SM120: .version 8.7
8497
; SM120a: .version 8.7
98+
; SM120f: .version 8.8
8599
; SM121: .version 8.8
86100
; SM121a: .version 8.8
101+
; SM121f: .version 8.8
87102

88103
; SM20: .target sm_20
89104
; SM21: .target sm_21
@@ -105,11 +120,16 @@
105120
; SM90a: .target sm_90a
106121
; SM100: .target sm_100
107122
; SM100a: .target sm_100a
123+
; SM100f: .target sm_100f
108124
; SM101: .target sm_101
109125
; SM101a: .target sm_101a
126+
; SM101f: .target sm_101f
110127
; SM103: .target sm_103
111128
; SM103a: .target sm_103a
129+
; SM103f: .target sm_103f
112130
; SM120: .target sm_120
113131
; SM120a: .target sm_120a
132+
; SM120f: .target sm_120f
114133
; SM121: .target sm_121
115134
; SM121a: .target sm_121a
135+
; SM121f: .target sm_121f

0 commit comments

Comments
 (0)