@@ -141,6 +141,9 @@ def llvm_shared_cluster_ptr_ty : LLVMQualPointerType<7>; // (shared_cluster)ptr
141
141
142
142
defvar WARP_SIZE = 32;
143
143
144
+ // Note: the maximum grid size in the x-dimension is the lower value of 65535
145
+ // on sm_20. We conservatively use the larger value here as it required for
146
+ // sm_30+ and also correct for sm_20.
144
147
defvar MAX_GRID_SIZE_X = 0x7fffffff;
145
148
defvar MAX_GRID_SIZE_Y = 0xffff;
146
149
defvar MAX_GRID_SIZE_Z = 0xffff;
@@ -4768,6 +4771,7 @@ class PTXReadSRegIntrinsic_r32<string name,
4768
4771
4769
4772
multiclass PTXReadSRegIntrinsic_v4i32<string regname,
4770
4773
list<list<IntrinsicProperty>> properties = [[], [], [], []]> {
4774
+ assert !eq(!size(properties), 4), "properties must be a list of 4 lists";
4771
4775
// FIXME: Do we need the 128-bit integer type version?
4772
4776
// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem, IntrSpeculatable]>;
4773
4777
@@ -4781,6 +4785,7 @@ multiclass PTXReadSRegIntrinsic_v4i32<string regname,
4781
4785
// Same, but without automatic clang builtins. It will be used for
4782
4786
// registers that require particular GPU or PTX version.
4783
4787
multiclass PTXReadSRegIntrinsicNB_v4i32<list<list<IntrinsicProperty>> properties = [[], [], [], []]> {
4788
+ assert !eq(!size(properties), 4), "properties must be a list of 4 lists";
4784
4789
defvar suffixes = ["_x", "_y", "_z", "_w"];
4785
4790
foreach i = !range(suffixes) in
4786
4791
def suffixes[i] : PTXReadSRegIntrinsicNB_r32<properties[i]>;
0 commit comments