Skip to content

Commit cde2f33

Browse files
Yashwant SinghYashwant Singh
authored andcommitted
[AMDGPU] Introduce never uniform bit field in tablegen
IsNeverUniform can be set to 1 to mark instructions which are inherently never-uniform/divergent. Enabling this bit in Writelane instruction for now. To be extended to all required instructions. Reviewed By: arsenm, sameerds, #amdgpu Differential Revision: https://reviews.llvm.org/D143154
1 parent 1cf344d commit cde2f33

File tree

7 files changed

+24
-11
lines changed

7 files changed

+24
-11
lines changed

llvm/lib/Target/AMDGPU/SIDefines.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ enum : uint64_t {
133133

134134
// Whether tied sources will be read.
135135
TiedSourceNotRead = UINT64_C(1) << 60,
136+
137+
// Is never uniform.
138+
IsNeverUniform = UINT64_C(1) << 61,
136139
};
137140

138141
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.

llvm/lib/Target/AMDGPU/SIInstrFormats.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,9 @@ class InstSI <dag outs, dag ins, string asm = "",
153153
// This bit indicates that tied source will not be read.
154154
field bit TiedSourceNotRead = 0;
155155

156+
// This bit indicates that the instruction is never-uniform/divergent
157+
field bit IsNeverUniform = 0;
158+
156159
// These need to be kept in sync with the enum in SIInstrFlags.
157160
let TSFlags{0} = SALU;
158161
let TSFlags{1} = VALU;
@@ -234,6 +237,8 @@ class InstSI <dag outs, dag ins, string asm = "",
234237

235238
let TSFlags{60} = TiedSourceNotRead;
236239

240+
let TSFlags{61} = IsNeverUniform;
241+
237242
let SchedRW = [Write32Bit];
238243

239244
let AsmVariantName = AMDGPUAsmVariants.Default;

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8414,7 +8414,14 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
84148414

84158415
InstructionUniformity
84168416
SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
8417+
8418+
if (isNeverUniform(MI))
8419+
return InstructionUniformity::NeverUniform;
8420+
84178421
unsigned opcode = MI.getOpcode();
8422+
if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
8423+
return InstructionUniformity::AlwaysUniform;
8424+
84188425
if (MI.isCopy()) {
84198426
const MachineOperand &srcOp = MI.getOperand(1);
84208427
if (srcOp.isReg() && srcOp.getReg().isPhysical()) {
@@ -8456,12 +8463,6 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
84568463
return InstructionUniformity::Default;
84578464
}
84588465

8459-
if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
8460-
return InstructionUniformity::AlwaysUniform;
8461-
8462-
if (opcode == AMDGPU::V_WRITELANE_B32)
8463-
return InstructionUniformity::NeverUniform;
8464-
84658466
const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
84668467
const AMDGPURegisterBankInfo *RBI = ST.getRegBankInfo();
84678468

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -781,6 +781,10 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
781781
return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
782782
}
783783

784+
static bool isNeverUniform(const MachineInstr &MI){
785+
return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
786+
}
787+
784788
static bool doesNotReadTiedSource(const MachineInstr &MI) {
785789
return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
786790
}

llvm/lib/Target/AMDGPU/VOP2Instructions.td

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -764,11 +764,10 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag,
764764
let isConvergent = 1, Uses = []<Register> in {
765765
def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
766766
[(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>;
767-
768-
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
767+
let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
769768
def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE,
770769
[(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>;
771-
} // End $vdst = $vdst_in, DisableEncoding $vdst_in
770+
} // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in
772771
} // End isConvergent = 1
773772

774773
let isReMaterializable = 1 in {

llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/always-uniform.mir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
1+
# RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
2+
23
# readlane, readfirstlane is always uniform
34

45
---

llvm/test/Analysis/DivergenceAnalysis/AMDGPU/MIR/never-uniform.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
1+
# RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s
22
# loads from flat non uniform
33
---
44
name: flatloads

0 commit comments

Comments
 (0)