Update design to use separate bit instead of digit

rajatbajpai · rajatbajpai · commit 444c46881b25 · 2025-06-18T12:37:12.000+05:30
diff --git a/llvm/docs/NVPTXUsage.rst b/llvm/docs/NVPTXUsage.rst
@@ -185,19 +185,19 @@ For example, take ``sm_103a`` (10 represents ``X``, 3 represents ``Y``, and ``a`
 represents ``z``), ``sm_103f``, and ``sm_103`` architecture variants. The ``sm_103`` is
 compatible with ``sm_103a`` and ``sm_103f``, and ``sm_103f`` is compatible with ``sm_103a``.
 
-Encoding := Arch * 100 + 10 (for 'f') + 1 (for 'a')
+Encoding := Arch * 10 + 2 (for 'f') + 1 (for 'a')
 Arch := X * 10 + Y
 
-For example, ``sm_103a`` is encoded as 10311 (103 * 100 + 10 + 1) and ``sm_103f`` is
-encoded as 10310 (103 * 100 + 10).
+For example, ``sm_103a`` is encoded as 1033 (103 * 10 + 2 + 1) and ``sm_103f`` is
+encoded as 1032 (103 * 10 + 2).
 
 This encoding allows simple partial ordering of the architectures.
 
-* Compare Family and Arch by dividing FullSMVersion by 1000 and 100
+* Compare Family and Arch by dividing FullSMVersion by 100 and 10
   respectively before the comparison.
 * Compare within the family by comparing FullSMVersion, given both belongs to
   the same family.
-* Detect ``a`` variants by checking FullSMVersion % 10.
+* Detect ``a`` variants by checking FullSMVersion & 1.
 
 .. _nvptx_intrinsics:
 
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -68,33 +68,33 @@ class FeaturePTX<int version>:
 // represents 'z'), sm_103f, and sm_103 architecture variants. The sm_103 is
 // compatible with sm_103a and sm_103f, and sm_103f is compatible with sm_103a.
 //
-// Encoding := Arch * 100 + 10 (for 'f') + 1 (for 'a')
+// Encoding := Arch * 10 + 2 (for 'f') + 1 (for 'a')
 // Arch := X * 10 + Y
 //
-// For example, sm_103a is encoded as 10311 (103 * 100 + 10 + 1) and sm_103f is
-// encoded as 10310 (103 * 100 + 10).
+// For example, sm_103a is encoded as 1033 (103 * 10 + 2 + 1) and sm_103f is
+// encoded as 1032 (103 * 10 + 2).
 //
 // This encoding allows simple partial ordering of the architectures.
-//  + Compare Family and Arch by dividing FullSMVersion by 1000 and 100
+//  + Compare Family and Arch by dividing FullSMVersion by 100 and 10
 //    respectively before the comparison.
 //  + Compare within the family by comparing FullSMVersion, given both belongs to
 //    the same family.
-//  + Detect 'a' variants by checking FullSMVersion % 10.
+//  + Detect 'a' variants by checking FullSMVersion & 1.
 foreach sm = [20, 21, 30, 32, 35, 37, 50, 52, 53,
               60, 61, 62, 70, 72, 75, 80, 86, 87,
               89, 90, 100, 101, 103, 120, 121] in {
-  // Base SM version (e.g. FullSMVersion for sm_100 is 10000)
-  def SM#sm : FeatureSM<""#sm, !mul(sm, 100)>;
+  // Base SM version (e.g. FullSMVersion for sm_100 is 1000)
+  def SM#sm : FeatureSM<""#sm, !mul(sm, 10)>;
 
   // Family-specific targets which are compatible within same family
-  // (e.g. FullSMVersion for sm_100f is 10010)
+  // (e.g. FullSMVersion for sm_100f is 1002)
   if !ge(sm, 100) then
-    def SM#sm#f : FeatureSM<""#sm#"f", !add(!mul(sm, 100), 10)>;
+    def SM#sm#f : FeatureSM<""#sm#"f", !add(!mul(sm, 10), 2)>;
 
   // Architecture-specific targets which are incompatible across architectures
-  // (e.g. FullSMVersion for sm_100a is 10011)
+  // (e.g. FullSMVersion for sm_100a is 1003)
   if !ge(sm, 90) then
-    def SM#sm#a : FeatureSM<""#sm#"a", !add(!mul(sm, 100), 11)>;
+    def SM#sm#a : FeatureSM<""#sm#"a", !add(!mul(sm, 10), 3)>;
 }
 
 foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 62, 63, 64, 65,
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -55,7 +55,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,
                                const std::string &FS,
                                const NVPTXTargetMachine &TM)
     : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),
-      FullSmVersion(2000), SmVersion(getSmVersion()),
+      FullSmVersion(200), SmVersion(getSmVersion()),
       TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {
   TSInfo = std::make_unique<NVPTXSelectionDAGInfo>();
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -108,8 +108,8 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
     switch (FullSmVersion) {
     default:
       break;
-    case 10011: // sm_100a
-    case 10111: // sm_101a
+    case 1003: // sm_100a
+    case 1013: // sm_101a
       HasTcgen05 = true;
       break;
     }
@@ -120,9 +120,15 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   // TMA G2S copy with cta_group::1/2 support
   bool hasCpAsyncBulkTensorCTAGroupSupport() const {
     // TODO: Update/tidy-up after the family-conditional support arrives
-    return ((FullSmVersion == 10011 || FullSmVersion == 10111) &&
-            PTXVersion >= 86) ||
-           (FullSmVersion == 10311 && PTXVersion >= 88);
+    switch (FullSmVersion) {
+    case 1003:
+    case 1013:
+      return PTXVersion >= 86;
+    case 1033:
+      return PTXVersion >= 88;
+    default:
+      return false;
+    }
   }
 
   // Prior to CUDA 12.3 ptxas did not recognize that the trap instruction
@@ -135,24 +141,24 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   bool hasPTXASUnreachableBug() const { return PTXVersion < 83; }
   bool hasCvtaParam() const { return SmVersion >= 70 && PTXVersion >= 77; }
   unsigned int getFullSmVersion() const { return FullSmVersion; }
-  unsigned int getSmVersion() const { return getFullSmVersion() / 100; }
+  unsigned int getSmVersion() const { return getFullSmVersion() / 10; }
   // GPUs with "a" suffix have include architecture-accelerated features that
   // are supported on the specified architecture only, hence such targets do not
   // follow the onion layer model. hasArchAccelFeatures() allows
   // distinguishing such GPU variants from the base GPU architecture.
   // - false represents non-accelerated architecture.
   // - true represents architecture-accelerated variant.
   bool hasArchAccelFeatures() const {
-    return getFullSmVersion() % 10 && PTXVersion >= 80;
+    return (getFullSmVersion() & 1) && PTXVersion >= 80;
   }
   // GPUs with 'f' suffix have architecture-accelerated features which are
   // portable across all future architectures under same SM major. For example,
   // sm_100f features will work for sm_10X*f*/sm_10X*a* future architectures.
   // - false represents non-family-specific architecture.
   // - true represents family-specific variant.
   bool hasFamilySpecificFeatures() const {
-    return getFullSmVersion() % 100 == 10 ? PTXVersion >= 88
-                                          : hasArchAccelFeatures();
+    return getFullSmVersion() % 10 == 2 ? PTXVersion >= 88
+                                        : hasArchAccelFeatures();
   }
   // If the user did not provide a target we default to the `sm_30` target.
   std::string getTargetName() const {

Original file line number	Diff line number	Diff line change
`@@ -55,7 +55,7 @@ NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,`
`55`	`55`	`const std::string &FS,`
`56`	`56`	`const NVPTXTargetMachine &TM)`
`57`	`57`	`: NVPTXGenSubtargetInfo(TT, CPU, /TuneCPU/ CPU, FS), PTXVersion(0),`
`58`		`- FullSmVersion(2000), SmVersion(getSmVersion()),`
	`58`	`+ FullSmVersion(200), SmVersion(getSmVersion()),`
`59`	`59`	`TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {`
`60`	`60`	`TSInfo = std::make_unique<NVPTXSelectionDAGInfo>();`
`61`	`61`	`}`