@@ -33,20 +33,61 @@ class FeaturePTX<int version>:
33
33
SubtargetFeature<"ptx"# version, "PTXVersion",
34
34
"" # version,
35
35
"Use PTX version " # version>;
36
-
36
+ //
37
+ // NVPTX Architecture Hierarchy and Ordering:
38
+ //
39
+ // Family: 2/3/5/6/7/8/9/10/12 (Follows Onion model, older family is compatible with newer family)
40
+ // Arch: 2*/3*/5*/6*/7*/8*/9*/10*/12*
41
+ //
42
+ // Family-specific: F*f : F*f > F* =>
43
+ // + The plain base architecture is compatible with the family-specific architecture
44
+ // (e.g. sm_100 compatible with >= sm_100*f*)
45
+ // + The family-specific architecture is compatible with future family-specific
46
+ // architectures within the same family (e.g. sm_100f compatible with >= sm_10X*f*
47
+ // but not with sm_12X*f*)
48
+ //
49
+ // Family and SM Target Definition:
50
+ // +----------------+--------------------------------------------------------+
51
+ // | Family | Target SM architectures included |
52
+ // +----------------+--------------------------------------------------------+
53
+ // | sm_10x family | sm_100f, sm_103f, future targets in sm_10x family |
54
+ // | sm_101 family | sm_101f (exception) |
55
+ // | sm_12x family | sm_120f, sm_121f, future targets in sm_12x family |
56
+ // +----------------+--------------------------------------------------------+
57
+ //
58
+ // Architecture-specific: F*a : F*a > F*f > F* =>
59
+ // + The plain base architecture is compatible with the architecture-specific architecture
60
+ // (e.g. sm_100 compatible with >= sm_100*a*)
61
+ // + The family-specific architecture is compatible with the architecture-specific architecture
62
+ // (e.g. sm_100f compatible with >= sm_100*a*)
63
+ // + The architecture-specific architecture is incompatible with any other architecture
64
+ // (e.g. sm_100a is only compatible with sm_100*a*)
65
+ //
66
+ // Encoding: Arch * 1000 + 'f' * 10 + 'a' * 1 (where 'a' ⇒ 'f')
67
+ //
68
+ // This encoding allows simple implementation of the partial ordering of the architectures.
69
+ // + Compare Family and Arch by dividing FullSMVersion by 1000 and 100 respectively before the comparison.
70
+ // + Compare within the family by comparing FullSMVersion, given both belongs to the same family.
71
+ // + Detect 'a' variants by checking FullSMVersion % 10.
72
+ //
37
73
foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
38
74
60, 61, 62, 70, 72, 75, 80, 86, 87,
39
- 89, 90, 100, 101, 103, 120, 121] in
40
- def SM#sm: FeatureSM<""#sm, !mul(sm, 10)>;
75
+ 89, 90, 100, 101, 103, 120, 121] in {
76
+ // Base SM version (e.g. FullSMVersion for sm_100 is 10000)
77
+ def SM#sm : FeatureSM<""#sm, !mul(sm, 100)>;
41
78
42
- // Arch-specific targets. PTX for these is not compatible with any other
43
- // architectures.
44
- def SM90a : FeatureSM<"90a", 901>;
45
- def SM100a: FeatureSM<"100a", 1001>;
46
- def SM101a: FeatureSM<"101a", 1011>;
47
- def SM103a: FeatureSM<"103a", 1031>;
48
- def SM120a: FeatureSM<"120a", 1201>;
49
- def SM121a: FeatureSM<"121a", 1211>;
79
+ // Family-specific targets which are compatible within same family
80
+ // (e.g. FullSMVersion for sm_100f is 10010)
81
+ if !ge(sm, 100) then {
82
+ def SM#sm#f : FeatureSM<""#sm#"f", !add(!mul(sm, 100), 10)>;
83
+ }
84
+
85
+ // Architecture-specific targets which are incompatible across architectures
86
+ // (e.g. FullSMVersion for sm_100a is 10011)
87
+ if !ge(sm, 90) then {
88
+ def SM#sm#a : FeatureSM<""#sm#"a", !add(!mul(sm, 100), 11)>;
89
+ }
90
+ }
50
91
51
92
foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
52
93
70, 71, 72, 73, 74, 75, 76, 77, 78,
@@ -83,14 +124,19 @@ def : Proc<"sm_90", [SM90, PTX78]>;
83
124
def : Proc<"sm_90a", [SM90a, PTX80]>;
84
125
def : Proc<"sm_100", [SM100, PTX86]>;
85
126
def : Proc<"sm_100a", [SM100a, PTX86]>;
127
+ def : Proc<"sm_100f", [SM100f, PTX88]>;
86
128
def : Proc<"sm_101", [SM101, PTX86]>;
87
129
def : Proc<"sm_101a", [SM101a, PTX86]>;
130
+ def : Proc<"sm_101f", [SM101f, PTX88]>;
88
131
def : Proc<"sm_103", [SM103, PTX88]>;
89
132
def : Proc<"sm_103a", [SM103a, PTX88]>;
133
+ def : Proc<"sm_103f", [SM103f, PTX88]>;
90
134
def : Proc<"sm_120", [SM120, PTX87]>;
91
135
def : Proc<"sm_120a", [SM120a, PTX87]>;
136
+ def : Proc<"sm_120f", [SM120f, PTX88]>;
92
137
def : Proc<"sm_121", [SM121, PTX88]>;
93
138
def : Proc<"sm_121a", [SM121a, PTX88]>;
139
+ def : Proc<"sm_121f", [SM121f, PTX88]>;
94
140
95
141
def NVPTXInstrInfo : InstrInfo {
96
142
}
0 commit comments