Skip to content

Commit e2837d1

Browse files
committed
Changed Full SM version according to the review comments.
1 parent 203c577 commit e2837d1

File tree

3 files changed

+29
-34
lines changed

3 files changed

+29
-34
lines changed

llvm/lib/Target/NVPTX/NVPTX.td

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -36,19 +36,26 @@ class FeaturePTX<int version>:
3636

3737
foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
3838
60, 61, 62, 70, 72, 75, 80, 86, 87,
39-
89, 90] in
40-
def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
41-
42-
// Full SM version for sm_90a is 901
43-
def SM90a: FeatureSM<"90a", 901>;
44-
45-
foreach sm = [100, 101, 103, 120, 121] in {
46-
def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
47-
// Arch-specific targets. PTX for these is not compatible with any other
48-
// architectures.
49-
def SM#sm#a: FeatureSM<""#sm#"a", !add(!mul(sm, 10), 1)>;
50-
// Family-specific targets. PTX for these is compatible within the same family.
51-
def SM#sm#f: FeatureSM<""#sm#"f", !add(!mul(sm, 10), 2)>;
39+
89, 90, 100, 101, 103, 120, 121] in {
40+
// Base SM version (e.g. FullSMVersion for sm_100 is 10000)
41+
def SM#sm : FeatureSM<""#sm, !mul(sm, 100)>;
42+
43+
// Note: Subset of the architecture-specific features, normally
44+
// available in "a" variants that will be compatible with subsequent targets
45+
// in the same family. I.e they are only ordered within the major architecture,
46+
// but are not comparable with other major architectures
47+
48+
// Family-specific targets which are compatible within same family
49+
// (e.g. FullSMVersion for sm_100f is 10010)
50+
if !ge(sm, 100) then {
51+
def SM#sm#f : FeatureSM<""#sm#"f", !add(!mul(sm, 100), 10)>;
52+
}
53+
54+
// Architecture-specific targets which are incompatible across architectures
55+
// (e.g. FullSMVersion for sm_100a is 10011)
56+
if !ge(sm, 90) then {
57+
def SM#sm#a : FeatureSM<""#sm#"a", !add(!mul(sm, 100), 11)>;
58+
}
5259
}
5360

5461
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -166,10 +166,10 @@ class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>
166166
class hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>;
167167

168168
// Explicit records for arch-accelerated SM versions
169-
def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">;
170-
def hasSM100a : Predicate<"Subtarget->getFullSmVersion() == 1001">;
171-
def hasSM101a : Predicate<"Subtarget->getFullSmVersion() == 1011">;
172-
def hasSM120a : Predicate<"Subtarget->getFullSmVersion() == 1201">;
169+
def hasSM90a : Predicate<"Subtarget->getSmVersion() == 90 && Subtarget->hasArchAccelFeatures()">;
170+
def hasSM100a : Predicate<"Subtarget->getSmVersion() == 100 && Subtarget->hasArchAccelFeatures()">;
171+
def hasSM101a : Predicate<"Subtarget->getSmVersion() == 101 && Subtarget->hasArchAccelFeatures()">;
172+
def hasSM120a : Predicate<"Subtarget->getSmVersion() == 120 && Subtarget->hasArchAccelFeatures()">;
173173

174174
// non-sync shfl instructions are not available on sm_70+ in PTX6.4+
175175
def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70"

llvm/lib/Target/NVPTX/NVPTXSubtarget.h

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
108108
switch (FullSmVersion) {
109109
default:
110110
break;
111-
case 1001: // sm_100a
112-
case 1011: // sm_101a
111+
case 10011: // sm_100a
112+
case 10111: // sm_101a
113113
HasTcgen05 = true;
114114
break;
115115
}
@@ -127,33 +127,21 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
127127
bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
128128
bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
129129
unsigned int getFullSmVersion() const { return FullSmVersion; }
130-
unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
130+
unsigned int getSmVersion() const { return getFullSmVersion() / 100; }
131131
// GPUs with "a" suffix have include architecture-accelerated features that
132132
// are supported on the specified architecture only, hence such targets do not
133133
// follow the onion layer model. hasArchAccelFeatures() allows
134134
// distinguishing such GPU variants from the base GPU architecture.
135135
// - false represents non-accelerated architecture.
136136
// - true represents architecture-accelerated variant.
137-
bool hasArchAccelFeatures() const {
138-
auto FullSMVersionMod = getFullSmVersion() % 10;
139-
assert(FullSMVersionMod < 3 && "Invalid architecture!");
140-
return FullSMVersionMod == 1;
141-
}
137+
bool hasArchAccelFeatures() const { return getFullSmVersion() % 10; }
142138
// GPUs with 'f' suffix have architecture-accelerated features which are
143139
// portable across all future architectures under same SM major. For example,
144140
// sm_100f features will work for sm_10X future architectures.
145141
// - false represents non-family-specific architecture.
146142
// - true represents family-specific variant.
147143
bool hasFamilySpecificFeatures() const {
148-
auto FullSMVersionMod = getFullSmVersion() % 10;
149-
assert(FullSMVersionMod < 3 && "Invalid architecture!");
150-
return FullSMVersionMod == 2 && PTXVersion >= 88;
151-
}
152-
// Checks if architecture is accelerated or family-specific.
153-
// - false represents neither arch-accelerated nor family-specific arch.
154-
// - true represents either arch-accelerated or family-specific arch.
155-
bool hasArchAccelOrFamilySpecificFeatures() const {
156-
return hasArchAccelFeatures() || hasFamilySpecificFeatures();
144+
return getFullSmVersion() % 100 != 0 && PTXVersion >= 88;
157145
}
158146
// If the user did not provide a target we default to the `sm_30` target.
159147
std::string getTargetName() const {

0 commit comments

Comments
 (0)