@@ -33,43 +33,48 @@ class FeaturePTX<int version>:
33
33
SubtargetFeature<"ptx"# version, "PTXVersion",
34
34
"" # version,
35
35
"Use PTX version " # version>;
36
- //
37
36
// NVPTX Architecture Hierarchy and Ordering:
38
- //
39
- // Family: 2/3/5/6/7/8/9/10/12 (Follows Onion model, older family is compatible with newer family)
40
- // Arch: 2*/3*/5*/6*/7*/8*/9*/10*/12*
41
37
//
42
- // Family-specific: F*f : F*f > F* =>
43
- // + The plain base architecture is compatible with the family-specific architecture
44
- // (e.g. sm_100 compatible with >= sm_100*f*)
45
- // + The family-specific architecture is compatible with future family-specific
46
- // architectures within the same family (e.g. sm_100f compatible with >= sm_10X*f*
47
- // but not with sm_12X*f*)
38
+ // GPU architectures: sm_2Y/sm_3Y/sm_5Y/sm_6Y/sm_7Y/sm_8Y/sm_9Y/sm_10Y/sm_12Y
39
+ // ('Y' represents version within the architecture)
40
+ // The architectures have name of form sm_XYz where 'X' represent the generation
41
+ // number, 'Y' represents the version within the architecture, and 'z' represents
42
+ // the optional feature suffix.
43
+ // If X1Y1 <= X2Y2, then GPU capabilities of sm_X1Y1 are included in sm_X2Y2.
44
+ // For example, take sm_90 (9 represents 'X', 0 represents 'Y', and no feature
45
+ // suffix) and sm_103 architectures (10 represents 'X', 3 represents 'Y', and no
46
+ // feature suffix). Since 90 <= 103, sm_90 is compatible with sm_103.
48
47
//
49
- // Family and SM Target Definition:
50
- // +----------------+--------------------------------------------------------+
51
- // | Family | Target SM architectures included |
52
- // +----------------+--------------------------------------------------------+
53
- // | sm_10x family | sm_100f, sm_103f, future targets in sm_10x family |
54
- // | sm_101 family | sm_101f (exception) |
55
- // | sm_12x family | sm_120f, sm_121f, future targets in sm_12x family |
56
- // +----------------+--------------------------------------------------------+
48
+ // The family-specific architectures have 'f' feature suffix and they follow
49
+ // following order:
50
+ // sm_X{Y2}f > sm_X{Y1}f iff Y2 > Y1
51
+ // sm_XY{f} > sm_{XY}{}
57
52
//
58
- // Architecture-specific: F*a : F*a > F*f > F* =>
59
- // + The plain base architecture is compatible with the architecture-specific architecture
60
- // (e.g. sm_100 compatible with >= sm_100*a*)
61
- // + The family-specific architecture is compatible with the architecture-specific architecture
62
- // (e.g. sm_100f compatible with >= sm_100*a*)
63
- // + The architecture-specific architecture is incompatible with any other architecture
64
- // (e.g. sm_100a is only compatible with sm_100*a*)
53
+ // For example, take sm_100f (10 represents 'X', 0 represents 'Y', and 'f'
54
+ // represents 'z') and sm_103f (10 represents 'X', 3 represents 'Y', and 'f'
55
+ // represents 'z') architectures. Since Y1 < Y2, sm_100f is compatible with
56
+ // sm_103f. Similarly based on the second rule, sm_90 is compatible with sm_103f.
65
57
//
66
- // Encoding: Arch * 1000 + 'f' * 10 + 'a' * 1 (where 'a' ⇒ 'f')
67
- //
68
- // This encoding allows simple implementation of the partial ordering of the architectures.
69
- // + Compare Family and Arch by dividing FullSMVersion by 1000 and 100 respectively before the comparison.
70
- // + Compare within the family by comparing FullSMVersion, given both belongs to the same family.
71
- // + Detect 'a' variants by checking FullSMVersion % 10.
58
+ // The architecture-specific architectures have 'a' feature suffix and they follow
59
+ // following order:
60
+ // sm_XY{a} > sm_XY{f} > sm_{XY}{}
61
+ //
62
+ // For example, take sm_103a (10 represents 'X', 3 represents 'Y', and 'a'
63
+ // represents 'z'), sm_103f, and sm_103 architectures. The sm_103 is compatible
64
+ // with sm_103a and sm_103f, and sm_103f is compatible with sm_103a.
72
65
//
66
+ // Encoding := Arch * 100 + 10 (for 'f') + 1 (for 'a')
67
+ // Arch := X * 10 + Y
68
+ //
69
+ // For example, sm_103a is encoded as 10311 (103 * 100 + 10 + 1) and sm_103f is
70
+ // encoded as 10310 (103 * 100 + 10).
71
+ //
72
+ // This encoding allows simple partial ordering of the architectures.
73
+ // + Compare Family and Arch by dividing FullSMVersion by 1000 and 100
74
+ // respectively before the comparison.
75
+ // + Compare within the family by comparing FullSMVersion, given both belongs to
76
+ // the same family.
77
+ // + Detect 'a' variants by checking FullSMVersion % 10.
73
78
foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
74
79
60, 61, 62, 70, 72, 75, 80, 86, 87,
75
80
89, 90, 100, 101, 103, 120, 121] in {
0 commit comments