@@ -2170,15 +2170,12 @@ defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
2170
2170
2171
2171
class LDU_G<string TyStr, NVPTXRegClass regclass>
2172
2172
: NVPTXInst<(outs regclass:$result), (ins ADDR:$src),
2173
- "ldu.global." # TyStr # " \t$result, [$src];",
2174
- []>, Requires<[hasLDU]>;
2173
+ "ldu.global." # TyStr # " \t$result, [$src];", []>;
2175
2174
2176
- def INT_PTX_LDU_GLOBAL_i8 : LDU_G<"b8", Int16Regs>;
2177
- def INT_PTX_LDU_GLOBAL_i16 : LDU_G<"b16", Int16Regs>;
2178
- def INT_PTX_LDU_GLOBAL_i32 : LDU_G<"b32", Int32Regs>;
2179
- def INT_PTX_LDU_GLOBAL_i64 : LDU_G<"b64", Int64Regs>;
2180
- def INT_PTX_LDU_GLOBAL_f32 : LDU_G<"b32", Float32Regs>;
2181
- def INT_PTX_LDU_GLOBAL_f64 : LDU_G<"b64", Float64Regs>;
2175
+ def LDU_GLOBAL_i8 : LDU_G<"b8", Int16Regs>;
2176
+ def LDU_GLOBAL_i16 : LDU_G<"b16", Int16Regs>;
2177
+ def LDU_GLOBAL_i32 : LDU_G<"b32", Int32Regs>;
2178
+ def LDU_GLOBAL_i64 : LDU_G<"b64", Int64Regs>;
2182
2179
2183
2180
// vector
2184
2181
@@ -2195,19 +2192,14 @@ class VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass>
2195
2192
"ldu.global.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
2196
2193
2197
2194
2198
- def INT_PTX_LDU_G_v2i8_ELE : VLDU_G_ELE_V2<"b8", Int16Regs>;
2199
- def INT_PTX_LDU_G_v2i16_ELE : VLDU_G_ELE_V2<"b16", Int16Regs>;
2200
- def INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"b32", Int32Regs>;
2201
- def INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"b32", Float32Regs>;
2202
- def INT_PTX_LDU_G_v2i64_ELE : VLDU_G_ELE_V2<"b64", Int64Regs>;
2203
- def INT_PTX_LDU_G_v2f64_ELE : VLDU_G_ELE_V2<"b64", Float64Regs>;
2195
+ def LDU_GLOBAL_v2i8 : VLDU_G_ELE_V2<"b8", Int16Regs>;
2196
+ def LDU_GLOBAL_v2i16 : VLDU_G_ELE_V2<"b16", Int16Regs>;
2197
+ def LDU_GLOBAL_v2i32 : VLDU_G_ELE_V2<"b32", Int32Regs>;
2198
+ def LDU_GLOBAL_v2i64 : VLDU_G_ELE_V2<"b64", Int64Regs>;
2204
2199
2205
- def INT_PTX_LDU_G_v4i8_ELE : VLDU_G_ELE_V4<"b8", Int16Regs>;
2206
- def INT_PTX_LDU_G_v4i16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>;
2207
- def INT_PTX_LDU_G_v4i32_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>;
2208
- def INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"b16", Int16Regs>;
2209
- def INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"b32", Int32Regs>;
2210
- def INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"b32", Float32Regs>;
2200
+ def LDU_GLOBAL_v4i8 : VLDU_G_ELE_V4<"b8", Int16Regs>;
2201
+ def LDU_GLOBAL_v4i16 : VLDU_G_ELE_V4<"b16", Int16Regs>;
2202
+ def LDU_GLOBAL_v4i32 : VLDU_G_ELE_V4<"b32", Int32Regs>;
2211
2203
2212
2204
2213
2205
//-----------------------------------
@@ -2218,55 +2210,47 @@ def INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"b32", Float32Regs>;
2218
2210
// non-coherent texture cache, and therefore the values read must be read-only
2219
2211
// during the lifetime of the kernel.
2220
2212
2221
- class LDG_G<string TyStr, NVPTXRegClass regclass>
2222
- : NVPTXInst<(outs regclass:$result), (ins ADDR:$src),
2223
- "ld.global.nc." # TyStr # " \t$result, [$src];",
2224
- []>, Requires<[hasLDG]>;
2213
+ class LDG_G<NVPTXRegClass regclass>
2214
+ : NVPTXInst<(outs regclass:$result), (ins LdStCode:$Sign, i32imm:$fromWidth, ADDR:$src),
2215
+ "ld.global.nc.${Sign:sign}$fromWidth \t$result, [$src];", []>;
2225
2216
2226
- def INT_PTX_LDG_GLOBAL_i8 : LDG_G<"b8", Int16Regs>;
2227
- def INT_PTX_LDG_GLOBAL_i16 : LDG_G<"b16", Int16Regs>;
2228
- def INT_PTX_LDG_GLOBAL_i32 : LDG_G<"b32", Int32Regs>;
2229
- def INT_PTX_LDG_GLOBAL_i64 : LDG_G<"b64", Int64Regs>;
2230
- def INT_PTX_LDG_GLOBAL_f32 : LDG_G<"b32", Float32Regs>;
2231
- def INT_PTX_LDG_GLOBAL_f64 : LDG_G<"b64", Float64Regs>;
2217
+ def LD_GLOBAL_NC_i8 : LDG_G<Int16Regs>;
2218
+ def LD_GLOBAL_NC_i16 : LDG_G<Int16Regs>;
2219
+ def LD_GLOBAL_NC_i32 : LDG_G<Int32Regs>;
2220
+ def LD_GLOBAL_NC_i64 : LDG_G<Int64Regs>;
2232
2221
2233
2222
// vector
2234
2223
2235
2224
// Elementized vector ldg
2236
- class VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> :
2225
+ class VLDG_G_ELE_V2<NVPTXRegClass regclass> :
2237
2226
NVPTXInst<(outs regclass:$dst1, regclass:$dst2),
2238
- (ins ADDR:$src),
2239
- "ld.global.nc.v2." # TyStr # " \t{{$dst1, $dst2}}, [$src];", []>;
2227
+ (ins LdStCode:$Sign, i32imm:$fromWidth, ADDR:$src),
2228
+ "ld.global.nc.v2.${Sign:sign}$fromWidth \t{{$dst1, $dst2}}, [$src];", []>;
2240
2229
2241
2230
2242
- class VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> :
2231
+ class VLDG_G_ELE_V4<NVPTXRegClass regclass> :
2243
2232
NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
2244
- (ins ADDR:$src),
2245
- "ld.global.nc.v4." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
2233
+ (ins LdStCode:$Sign, i32imm:$fromWidth, ADDR:$src),
2234
+ "ld.global.nc.v4.${Sign:sign}$fromWidth \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", []>;
2246
2235
2247
- class VLDG_G_ELE_V8<string TyStr, NVPTXRegClass regclass> :
2236
+ class VLDG_G_ELE_V8<NVPTXRegClass regclass> :
2248
2237
NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4,
2249
2238
regclass:$dst5, regclass:$dst6, regclass:$dst7, regclass:$dst8),
2250
- (ins ADDR:$src),
2251
- "ld.global.nc.v8." # TyStr # " \t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, [$src];", []>;
2239
+ (ins LdStCode:$Sign, i32imm:$fromWidth, ADDR:$src),
2240
+ "ld.global.nc.v8.${Sign:sign}$fromWidth \t{{$dst1, $dst2, $dst3, $dst4, $dst5, $dst6, $dst7, $dst8}}, [$src];", []>;
2252
2241
2253
2242
// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads.
2254
- def INT_PTX_LDG_G_v2i8_ELE : VLDG_G_ELE_V2<"b8", Int16Regs>;
2255
- def INT_PTX_LDG_G_v2i16_ELE : VLDG_G_ELE_V2<"b16", Int16Regs>;
2256
- def INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"b32", Int32Regs>;
2257
- def INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"b32", Float32Regs>;
2258
- def INT_PTX_LDG_G_v2i64_ELE : VLDG_G_ELE_V2<"b64", Int64Regs>;
2259
- def INT_PTX_LDG_G_v2f64_ELE : VLDG_G_ELE_V2<"b64", Float64Regs>;
2260
-
2261
- def INT_PTX_LDG_G_v4i8_ELE : VLDG_G_ELE_V4<"b8", Int16Regs>;
2262
- def INT_PTX_LDG_G_v4i16_ELE : VLDG_G_ELE_V4<"b16", Int16Regs>;
2263
- def INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"b32", Int32Regs>;
2264
- def INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"b32", Float32Regs>;
2265
-
2266
- def INT_PTX_LDG_G_v4i64_ELE : VLDG_G_ELE_V4<"b64", Int64Regs>;
2267
- def INT_PTX_LDG_G_v4f64_ELE : VLDG_G_ELE_V4<"b64", Float64Regs>;
2268
- def INT_PTX_LDG_G_v8i32_ELE : VLDG_G_ELE_V8<"b32", Int32Regs>;
2269
- def INT_PTX_LDG_G_v8f32_ELE : VLDG_G_ELE_V8<"b32", Float32Regs>;
2243
+ def LD_GLOBAL_NC_v2i8 : VLDG_G_ELE_V2<Int16Regs>;
2244
+ def LD_GLOBAL_NC_v2i16 : VLDG_G_ELE_V2<Int16Regs>;
2245
+ def LD_GLOBAL_NC_v2i32 : VLDG_G_ELE_V2<Int32Regs>;
2246
+ def LD_GLOBAL_NC_v2i64 : VLDG_G_ELE_V2<Int64Regs>;
2247
+
2248
+ def LD_GLOBAL_NC_v4i8 : VLDG_G_ELE_V4<Int16Regs>;
2249
+ def LD_GLOBAL_NC_v4i16 : VLDG_G_ELE_V4<Int16Regs>;
2250
+ def LD_GLOBAL_NC_v4i32 : VLDG_G_ELE_V4<Int32Regs>;
2251
+
2252
+ def LD_GLOBAL_NC_v4i64 : VLDG_G_ELE_V4<Int64Regs>;
2253
+ def LD_GLOBAL_NC_v8i32 : VLDG_G_ELE_V8<Int32Regs>;
2270
2254
2271
2255
multiclass NG_TO_G<string Str, bit Supports32 = 1, list<Predicate> Preds = []> {
2272
2256
if Supports32 then
0 commit comments