@@ -2143,15 +2143,12 @@ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
2143
2143
2144
2144
class LDU_G<string TyStr, NVPTXRegClass regclass>
2145
2145
: NVPTXInst<(outs regclass:$result), (ins ADDR:$src),
2146
- "ldu.global." # TyStr # " \t$result, [$src];",
2147
- []>, Requires<[hasLDU]>;
2146
+ "ldu.global." # TyStr # " \t$result, [$src];", []>;
2148
2147
2149
- def INT_PTX_LDU_GLOBAL_i8 : LDU_G<"b8", Int16Regs>;
2150
- def INT_PTX_LDU_GLOBAL_i16 : LDU_G<"b16", Int16Regs>;
2151
- def INT_PTX_LDU_GLOBAL_i32 : LDU_G<"b32", Int32Regs>;
2152
- def INT_PTX_LDU_GLOBAL_i64 : LDU_G<"b64", Int64Regs>;
2153
- def INT_PTX_LDU_GLOBAL_f32 : LDU_G<"b32", Float32Regs>;
2154
- def INT_PTX_LDU_GLOBAL_f64 : LDU_G<"b64", Float64Regs>;
2148
+ def LDU_GLOBAL_i8 : LDU_G<"b8", Int16Regs>;
2149
+ def LDU_GLOBAL_i16 : LDU_G<"b16", Int16Regs>;
2150
+ def LDU_GLOBAL_i32 : LDU_G<"b32", Int32Regs>;
2151
+ def LDU_GLOBAL_i64 : LDU_G<"b64", Int64Regs>;
2155
2152
2156
2153
// vector
2157
2154
@@ -2168,19 +2165,14 @@ class VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass>
2168
2165
"ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
2169
2166
2170
2167
2171
- def INT_PTX_LDU_G_v2i8_ELE : VLDU_G_ELE_V2<"b8", Int16Regs>;
2172
- def INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"b16", Int16Regs>;
2173
- def INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"b32", Int32Regs>;
2174
- def INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"b32", Float32Regs>;
2175
- def INT_PTX_LDU_G_v2i64_ELE : VLDU_G_ELE_V2<"b64", Int64Regs>;
2176
- def INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"b64", Float64Regs>;
2168
+ def LDU_GLOBAL_v2i8 : VLDU_G_ELE_V2<"b8", Int16Regs>;
2169
+ def LDU_GLOBAL_v2i16 : VLDU_G_ELE_V2<"b16", Int16Regs>;
2170
+ def LDU_GLOBAL_v2i32 : VLDU_G_ELE_V2<"b32", Int32Regs>;
2171
+ def LDU_GLOBAL_v2i64 : VLDU_G_ELE_V2<"b64", Int64Regs>;
2177
2172
2178
- def INT_PTX_LDU_G_v4i8_ELE : VLDU_G_ELE_V4<"b8", Int16Regs>;
2179
- def INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>;
2180
- def INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>;
2181
- def INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>;
2182
- def INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>;
2183
- def INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"b32", Float32Regs>;
2173
+ def LDU_GLOBAL_v4i8 : VLDU_G_ELE_V4<"b8", Int16Regs>;
2174
+ def LDU_GLOBAL_v4i16 : VLDU_G_ELE_V4<"b16", Int16Regs>;
2175
+ def LDU_GLOBAL_v4i32 : VLDU_G_ELE_V4<"b32", Int32Regs>;
2184
2176
2185
2177
2186
2178
//-----------------------------------
@@ -2191,55 +2183,47 @@ def INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"b32", Float32Regs>;
2191
2183
// non-coherent texture cache, and therefore the values read must be read-only
2192
2184
// during the lifetime of the kernel.
2193
2185
2194
- class LDG_G<string TyStr, NVPTXRegClass regclass>
2195
- : NVPTXInst<(outs regclass:$result), (ins ADDR:$src),
2196
- "ld.global.nc." # TyStr # " \t$result, [$src];",
2197
- []>, Requires<[hasLDG]>;
2186
+ class LDG_G<NVPTXRegClass regclass>
2187
+ : NVPTXInst<(outs regclass:$result), (ins LdStCode:$Sign, i32imm:$fromWidth, ADDR:$src),
2188
+ "ld.global.nc.${Sign:sign}$fromWidth \t$result, [$src];", []>;
2198
2189
2199
- def INT_PTX_LDG_GLOBAL_i8 : LDG_G<"b8", Int16Regs>;
2200
- def INT_PTX_LDG_GLOBAL_i16 : LDG_G<"b16", Int16Regs>;
2201
- def INT_PTX_LDG_GLOBAL_i32 : LDG_G<"b32", Int32Regs>;
2202
- def INT_PTX_LDG_GLOBAL_i64 : LDG_G<"b64", Int64Regs>;
2203
- def INT_PTX_LDG_GLOBAL_f32 : LDG_G<"b32", Float32Regs>;
2204
- def INT_PTX_LDG_GLOBAL_f64 : LDG_G<"b64", Float64Regs>;
2190
+ def LD_GLOBAL_NC_i8 : LDG_G<Int16Regs>;
2191
+ def LD_GLOBAL_NC_i16 : LDG_G<Int16Regs>;
2192
+ def LD_GLOBAL_NC_i32 : LDG_G<Int32Regs>;
2193
+ def LD_GLOBAL_NC_i64 : LDG_G<Int64Regs>;
2205
2194
2206
2195
// vector
2207
2196
2208
2197
// Elementized vector ldg
2209
- class VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> :
2198
+ class VLDG_G_ELE_V2<NVPTXRegClass regclass> :
2210
2199
NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2211
- (ins ADDR:$src),
2212
- "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>;
2200
+ (ins LdStCode:$Sign, i32imm:$fromWidth, ADDR:$src),
2201
+ "ld.global.nc.v2.${Sign:sign}$fromWidth \t{{$dst1, $dst2}}, [$src];", []>;
2213
2202
2214
2203
2215
- class VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> :
2204
+ class VLDG_G_ELE_V4<NVPTXRegClass regclass> :
2216
2205
NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
2217
- (ins ADDR:$src),
2218
- "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
2206
+ (ins LdStCode:$Sign, i32imm:$fromWidth, ADDR:$src),
2207
+ "ld.global.nc.v4.${Sign:sign}$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
2219
2208
2220
- class VLDG_G_ELE_V8<string TyStr, NVPTXRegClass regclass> :
2209
+ class VLDG_G_ELE_V8<NVPTXRegClass regclass> :
2221
2210
NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4,
2222
2211
regclass:$dst5, regclass:$dst6, regclass:$dst7, regclass:$dst8),
2223
- (ins ADDR:$src),
2224
- "ld.global.nc.v8." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, [$src];", []>;
2212
+ (ins LdStCode:$Sign, i32imm:$fromWidth, ADDR:$src),
2213
+ "ld.global.nc.v8.${Sign:sign}$fromWidth \t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, [$src];", []>;
2225
2214
2226
2215
// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2227
- def INT_PTX_LDG_G_v2i8_ELE : VLDG_G_ELE_V2<"b8", Int16Regs>;
2228
- def INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"b16", Int16Regs>;
2229
- def INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"b32", Int32Regs>;
2230
- def INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"b32", Float32Regs>;
2231
- def INT_PTX_LDG_G_v2i64_ELE : VLDG_G_ELE_V2<"b64", Int64Regs>;
2232
- def INT_PTX_LDG_G_v2f64_ELE : VLDG_G_ELE_V2<"b64", Float64Regs>;
2233
-
2234
- def INT_PTX_LDG_G_v4i8_ELE : VLDG_G_ELE_V4<"b8", Int16Regs>;
2235
- def INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"b16", Int16Regs>;
2236
- def INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"b32", Int32Regs>;
2237
- def INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"b32", Float32Regs>;
2238
-
2239
- def INT_PTX_LDG_G_v4i64_ELE : VLDG_G_ELE_V4<"b64", Int64Regs>;
2240
- def INT_PTX_LDG_G_v4f64_ELE : VLDG_G_ELE_V4<"b64", Float64Regs>;
2241
- def INT_PTX_LDG_G_v8i32_ELE : VLDG_G_ELE_V8<"b32", Int32Regs>;
2242
- def INT_PTX_LDG_G_v8f32_ELE : VLDG_G_ELE_V8<"b32", Float32Regs>;
2216
+ def LD_GLOBAL_NC_v2i8 : VLDG_G_ELE_V2<Int16Regs>;
2217
+ def LD_GLOBAL_NC_v2i16 : VLDG_G_ELE_V2<Int16Regs>;
2218
+ def LD_GLOBAL_NC_v2i32 : VLDG_G_ELE_V2<Int32Regs>;
2219
+ def LD_GLOBAL_NC_v2i64 : VLDG_G_ELE_V2<Int64Regs>;
2220
+
2221
+ def LD_GLOBAL_NC_v4i8 : VLDG_G_ELE_V4<Int16Regs>;
2222
+ def LD_GLOBAL_NC_v4i16 : VLDG_G_ELE_V4<Int16Regs>;
2223
+ def LD_GLOBAL_NC_v4i32 : VLDG_G_ELE_V4<Int32Regs>;
2224
+
2225
+ def LD_GLOBAL_NC_v4i64 : VLDG_G_ELE_V4<Int64Regs>;
2226
+ def LD_GLOBAL_NC_v8i32 : VLDG_G_ELE_V8<Int32Regs>;
2243
2227
2244
2228
multiclass NG_TO_G<string Str, bit Supports32 = 1, list<Predicate> Preds = []> {
2245
2229
if Supports32 then
0 commit comments