Skip to content

Commit fe197ef

Browse files
committed
[AMDGPU] Mark relevant rematerializable VOP3 instructions
Differential Revision: https://reviews.llvm.org/D106110
1 parent 9ef7de7 commit fe197ef

File tree

3 files changed

+1196
-12
lines changed

3 files changed

+1196
-12
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ class VOP3_INTERP16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
275275

276276
let isCommutable = 1 in {
277277

278+
let isReMaterializable = 1 in {
278279
let mayRaiseFPException = 0 in {
279280
let SubtargetPredicate = HasMadMacF32Insts in {
280281
defm V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
@@ -308,6 +309,7 @@ defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", VOP3_Profile<VOP_I32_I32_I32>, mul
308309
defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile<VOP_I32_I32_I32>>;
309310
defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile<VOP_I32_I32_I32>, mulhs>;
310311
} // End SchedRW = [WriteIntMul]
312+
} // End isReMaterializable = 1
311313

312314
let Uses = [MODE, VCC, EXEC] in {
313315
// v_div_fmas_f32:
@@ -328,6 +330,7 @@ defm V_DIV_FMAS_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_fmas_f64", VOP_F64_F64_F6
328330

329331
} // End isCommutable = 1
330332

333+
let isReMaterializable = 1 in {
331334
let mayRaiseFPException = 0 in {
332335
defm V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
333336
defm V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
@@ -370,6 +373,7 @@ let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in {
370373
defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
371374
defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>;
372375
} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1
376+
} // End isReMaterializable = 1
373377

374378

375379
let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it does.
@@ -381,13 +385,15 @@ let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it d
381385
defm V_DIV_SCALE_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1>;
382386
} // End mayRaiseFPException = 0
383387

388+
let isReMaterializable = 1 in
384389
defm V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
385390

386391
let Constraints = "@earlyclobber $vdst" in {
387392
defm V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64, VOP3_CLAMP>>;
388393
} // End Constraints = "@earlyclobber $vdst"
389394

390395

396+
let isReMaterializable = 1 in {
391397
let SchedRW = [WriteDouble] in {
392398
defm V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, int_amdgcn_trig_preop>;
393399
} // End SchedRW = [WriteDouble]
@@ -405,12 +411,14 @@ let SchedRW = [Write64Bit] in {
405411
defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, ashr_rev>;
406412
} // End SubtargetPredicate = isGFX8Plus
407413
} // End SchedRW = [Write64Bit]
414+
} // End isReMaterializable = 1
408415

409416
def : GCNPat<
410417
(i32 (getDivergentFrag<sext>.ret i16:$src)),
411418
(i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10))))
412419
>;
413420

421+
let isReMaterializable = 1 in {
414422
let SubtargetPredicate = isGFX6GFX7GFX10 in {
415423
defm V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
416424
} // End SubtargetPredicate = isGFX6GFX7GFX10
@@ -420,6 +428,7 @@ let SubtargetPredicate = isGFX8Plus in {
420428
defm V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUperm>;
421429
} // End SubtargetPredicate = isGFX8Plus
422430
} // End SchedRW = [Write32Bit]
431+
} // End isReMaterializable = 1
423432

424433
let SubtargetPredicate = isGFX7Plus in {
425434

@@ -601,14 +610,14 @@ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
601610
}
602611

603612
let SubtargetPredicate = isGFX9Plus in {
604-
let isCommutable = 1 in {
613+
let isCommutable = 1, isReMaterializable = 1 in {
605614
defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
606615
defm V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
607616
defm V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
608617
defm V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
609618
defm V_ADD_I32 : VOP3Inst <"v_add_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
610619
defm V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
611-
} // End isCommutable = 1
620+
} // End isCommutable = 1, isReMaterializable = 1
612621
// TODO src0 contains the opsel bit for dst, so if we commute, need to mask and swap this
613622
// to the new src0.
614623
defm V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmed3>;
@@ -632,11 +641,13 @@ defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32
632641
defm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
633642
defm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
634643

635-
defm V_SUB_I32 : VOP3Inst <"v_sub_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
636644
defm V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
645+
646+
let isReMaterializable = 1 in {
647+
defm V_SUB_I32 : VOP3Inst <"v_sub_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
637648
defm V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
638649
defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
639-
650+
} // End isReMaterializable = 1
640651

641652

642653
class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
@@ -715,9 +726,9 @@ class PermlaneDiscardVDstIn<SDPatternOperator permlane,
715726

716727

717728
let SubtargetPredicate = isGFX10Plus in {
718-
let isCommutable = 1 in {
729+
let isCommutable = 1, isReMaterializable = 1 in {
719730
defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
720-
} // End isCommutable = 1
731+
} // End isCommutable = 1, isReMaterializable = 1
721732
def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32_e64>;
722733

723734
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {

0 commit comments

Comments
 (0)