@@ -227,6 +227,7 @@ class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm, SDNode imm_node,
227
227
int Size = ty.Size;
228
228
}
229
229
230
+ def I1RT : RegTyInfo<i1, Int1Regs, i1imm, imm>;
230
231
def I16RT : RegTyInfo<i16, Int16Regs, i16imm, imm>;
231
232
def I32RT : RegTyInfo<i32, Int32Regs, i32imm, imm>;
232
233
def I64RT : RegTyInfo<i64, Int64Regs, i64imm, imm>;
@@ -240,26 +241,33 @@ def F16X2RT : RegTyInfo<v2f16, Int32Regs, ?, ?, supports_imm = 0>;
240
241
def BF16X2RT : RegTyInfo<v2bf16, Int32Regs, ?, ?, supports_imm = 0>;
241
242
242
243
244
+ multiclass I3Inst<string op_str, SDPatternOperator op_node, RegTyInfo t,
245
+ bit commutative, list<Predicate> requires = []> {
246
+ defvar asmstr = op_str # " \t$dst, $a, $b;";
247
+
248
+ def rr :
249
+ NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b),
250
+ asmstr,
251
+ [(set t.Ty:$dst, (op_node t.Ty:$a, t.Ty:$b))]>,
252
+ Requires<requires>;
253
+ def ri :
254
+ NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.Imm:$b),
255
+ asmstr,
256
+ [(set t.Ty:$dst, (op_node t.Ty:$a, (t.Ty imm:$b)))]>,
257
+ Requires<requires>;
258
+ if !not(commutative) then
259
+ def ir :
260
+ NVPTXInst<(outs t.RC:$dst), (ins t.Imm:$a, t.RC:$b),
261
+ asmstr,
262
+ [(set t.Ty:$dst, (op_node (t.Ty imm:$a), t.Ty:$b))]>,
263
+ Requires<requires>;
264
+ }
265
+
243
266
// Template for instructions which take three int64, int32, or int16 args.
244
267
// The instructions are named "<OpcStr><Width>" (e.g. "add.s64").
245
- multiclass I3<string OpcStr, SDNode OpNode, bit commutative> {
246
- foreach t = [I16RT, I32RT, I64RT] in {
247
- defvar asmstr = OpcStr # t.Size # " \t$dst, $a, $b;";
248
-
249
- def t.Ty # rr :
250
- NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b),
251
- asmstr,
252
- [(set t.Ty:$dst, (OpNode t.Ty:$a, t.Ty:$b))]>;
253
- def t.Ty # ri :
254
- NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.Imm:$b),
255
- asmstr,
256
- [(set t.Ty:$dst, (OpNode t.RC:$a, imm:$b))]>;
257
- if !not(commutative) then
258
- def t.Ty # ir :
259
- NVPTXInst<(outs t.RC:$dst), (ins t.Imm:$a, t.RC:$b),
260
- asmstr,
261
- [(set t.Ty:$dst, (OpNode imm:$a, t.RC:$b))]>;
262
- }
268
+ multiclass I3<string op_str, SDPatternOperator op_node, bit commutative> {
269
+ foreach t = [I16RT, I32RT, I64RT] in
270
+ defm t.Ty# : I3Inst<op_str # t.Size, op_node, t, commutative>;
263
271
}
264
272
265
273
class I16x2<string OpcStr, SDNode OpNode> :
@@ -270,26 +278,11 @@ class I16x2<string OpcStr, SDNode OpNode> :
270
278
271
279
// Template for instructions which take 3 int args. The instructions are
272
280
// named "<OpcStr>.s32" (e.g. "addc.cc.s32").
273
- multiclass ADD_SUB_INT_CARRY<string OpcStr , SDNode OpNode > {
281
+ multiclass ADD_SUB_INT_CARRY<string op_str , SDNode op_node, bit commutative > {
274
282
let hasSideEffects = 1 in {
275
- def i32rr :
276
- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
277
- !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
278
- [(set i32:$dst, (OpNode i32:$a, i32:$b))]>;
279
- def i32ri :
280
- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
281
- !strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
282
- [(set i32:$dst, (OpNode i32:$a, imm:$b))]>;
283
- def i64rr :
284
- NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
285
- !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
286
- [(set i64:$dst, (OpNode i64:$a, i64:$b))]>,
287
- Requires<[hasPTX<43>]>;
288
- def i64ri :
289
- NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
290
- !strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
291
- [(set i64:$dst, (OpNode i64:$a, imm:$b))]>,
292
- Requires<[hasPTX<43>]>;
283
+ defm i32 : I3Inst<op_str # ".s32", op_node, I32RT, commutative>;
284
+ defm i64 : I3Inst<op_str # ".s64", op_node, I64RT, commutative,
285
+ requires = [hasPTX<43>]>;
293
286
}
294
287
}
295
288
@@ -841,31 +834,31 @@ defm SUB_i1 : ADD_SUB_i1<sub>;
841
834
842
835
// int16, int32, and int64 signed addition. Since nvptx is 2's complement, we
843
836
// also use these for unsigned arithmetic.
844
- defm ADD : I3<"add.s", add, /* commutative=*/ true>;
845
- defm SUB : I3<"sub.s", sub, /* commutative=*/ false>;
837
+ defm ADD : I3<"add.s", add, commutative = true>;
838
+ defm SUB : I3<"sub.s", sub, commutative = false>;
846
839
847
840
def ADD16x2 : I16x2<"add.s", add>;
848
841
849
842
// in32 and int64 addition and subtraction with carry-out.
850
- defm ADDCC : ADD_SUB_INT_CARRY<"add.cc", addc>;
851
- defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc>;
843
+ defm ADDCC : ADD_SUB_INT_CARRY<"add.cc", addc, commutative = true >;
844
+ defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc, commutative = false >;
852
845
853
846
// int32 and int64 addition and subtraction with carry-in and carry-out.
854
- defm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde>;
855
- defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube>;
847
+ defm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde, commutative = true >;
848
+ defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube, commutative = false >;
856
849
857
- defm MULT : I3<"mul.lo.s", mul, /* commutative=*/ true>;
850
+ defm MULT : I3<"mul.lo.s", mul, commutative = true>;
858
851
859
- defm MULTHS : I3<"mul.hi.s", mulhs, /* commutative=*/ true>;
860
- defm MULTHU : I3<"mul.hi.u", mulhu, /* commutative=*/ true>;
852
+ defm MULTHS : I3<"mul.hi.s", mulhs, commutative = true>;
853
+ defm MULTHU : I3<"mul.hi.u", mulhu, commutative = true>;
861
854
862
- defm SDIV : I3<"div.s", sdiv, /* commutative=*/ false>;
863
- defm UDIV : I3<"div.u", udiv, /* commutative=*/ false>;
855
+ defm SDIV : I3<"div.s", sdiv, commutative = false>;
856
+ defm UDIV : I3<"div.u", udiv, commutative = false>;
864
857
865
858
// The ri versions of rem.s and rem.u won't be selected; DAGCombiner::visitSREM
866
859
// will lower it.
867
- defm SREM : I3<"rem.s", srem, /* commutative=*/ false>;
868
- defm UREM : I3<"rem.u", urem, /* commutative=*/ false>;
860
+ defm SREM : I3<"rem.s", srem, commutative = false>;
861
+ defm UREM : I3<"rem.u", urem, commutative = false>;
869
862
870
863
// Integer absolute value. NumBits should be one minus the bit width of RC.
871
864
// This idiom implements the algorithm at
@@ -880,10 +873,10 @@ defm ABS_32 : ABS<i32, Int32Regs, ".s32">;
880
873
defm ABS_64 : ABS<i64, Int64Regs, ".s64">;
881
874
882
875
// Integer min/max.
883
- defm SMAX : I3<"max.s", smax, /* commutative=*/ true>;
884
- defm UMAX : I3<"max.u", umax, /* commutative=*/ true>;
885
- defm SMIN : I3<"min.s", smin, /* commutative=*/ true>;
886
- defm UMIN : I3<"min.u", umin, /* commutative=*/ true>;
876
+ defm SMAX : I3<"max.s", smax, commutative = true>;
877
+ defm UMAX : I3<"max.u", umax, commutative = true>;
878
+ defm SMIN : I3<"min.s", smin, commutative = true>;
879
+ defm UMIN : I3<"min.u", umin, commutative = true>;
887
880
888
881
def SMAX16x2 : I16x2<"max.s", smax>;
889
882
def UMAX16x2 : I16x2<"max.u", umax>;
@@ -1393,38 +1386,10 @@ def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
1393
1386
// Template for three-arg bitwise operations. Takes three args, Creates .b16,
1394
1387
// .b32, .b64, and .pred (predicate registers -- i.e., i1) versions of OpcStr.
1395
1388
multiclass BITWISE<string OpcStr, SDNode OpNode> {
1396
- def b1rr :
1397
- NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
1398
- !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
1399
- [(set i1:$dst, (OpNode i1:$a, i1:$b))]>;
1400
- def b1ri :
1401
- NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
1402
- !strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
1403
- [(set i1:$dst, (OpNode i1:$a, imm:$b))]>;
1404
- def b16rr :
1405
- NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
1406
- !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
1407
- [(set i16:$dst, (OpNode i16:$a, i16:$b))]>;
1408
- def b16ri :
1409
- NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1410
- !strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
1411
- [(set i16:$dst, (OpNode i16:$a, imm:$b))]>;
1412
- def b32rr :
1413
- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
1414
- !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
1415
- [(set i32:$dst, (OpNode i32:$a, i32:$b))]>;
1416
- def b32ri :
1417
- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1418
- !strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
1419
- [(set i32:$dst, (OpNode i32:$a, imm:$b))]>;
1420
- def b64rr :
1421
- NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
1422
- !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
1423
- [(set i64:$dst, (OpNode i64:$a, i64:$b))]>;
1424
- def b64ri :
1425
- NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1426
- !strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
1427
- [(set i64:$dst, (OpNode i64:$a, imm:$b))]>;
1389
+ defm b1 : I3Inst<OpcStr # ".pred", OpNode, I1RT, commutative = true>;
1390
+ defm b16 : I3Inst<OpcStr # ".b16", OpNode, I16RT, commutative = true>;
1391
+ defm b32 : I3Inst<OpcStr # ".b32", OpNode, I32RT, commutative = true>;
1392
+ defm b64 : I3Inst<OpcStr # ".b64", OpNode, I64RT, commutative = true>;
1428
1393
}
1429
1394
1430
1395
defm OR : BITWISE<"or", or>;
0 commit comments