Skip to content

[AMDGPU] Reduce duplication in FLAT atomic definitions #85383

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 15, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 73 additions & 70 deletions llvm/lib/Target/AMDGPU/FLATInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1787,45 +1787,46 @@ def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;

multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
multiclass FLAT_Real_Atomics_ci <bits<7> op> {
defvar ps = !cast<FLAT_Pseudo>(NAME);
def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
}

defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30>;
defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31>;
defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32>;
defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33>;
defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35>;
defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36>;
defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37>;
defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38>;
defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39>;
defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a>;
defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b>;
defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c>;
defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d>;
defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50>;
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51>;
defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52>;
defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53>;
defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55>;
defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56>;
defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57>;
defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58>;
defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59>;
defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a>;
defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b>;
defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c>;
defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d>;

// CI Only flat instructions
defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e>;
defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f>;
defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40>;
defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e>;
defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f>;
defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60>;


//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -1925,8 +1926,9 @@ def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>;
def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>;
def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>;

multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps,
multiclass FLAT_Real_Atomics_vi <bits<7> op,
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
defvar ps = !cast<FLAT_Pseudo>(NAME);
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>;
def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>;
}
Expand All @@ -1939,32 +1941,32 @@ multiclass FLAT_Global_Real_Atomics_vi<bits<7> op,
}


defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40>;
defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41>;
defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42>;
defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43>;
defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44>;
defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45>;
defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46>;
defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47>;
defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48>;
defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49>;
defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a>;
defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b>;
defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c>;
defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60>;
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61>;
defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62>;
defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63>;
defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64>;
defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65>;
defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66>;
defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67>;
defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68>;
defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69>;
defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a>;
defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b>;
defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c>;

defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>;
defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>;
Expand Down Expand Up @@ -2060,9 +2062,9 @@ let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
}

let SubtargetPredicate = isGFX90AOnly in {
defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>;
defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>;
defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>;
defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, 0>;
defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, 0>;
defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, 0>;
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>;
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>;
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>;
Expand All @@ -2073,7 +2075,8 @@ multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> {
def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
}

multiclass FLAT_Real_Atomics_gfx940 <bits<7> op, FLAT_Pseudo ps> {
multiclass FLAT_Real_Atomics_gfx940 <bits<7> op> {
defvar ps = !cast<FLAT_Pseudo>(NAME);
def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
}
Expand All @@ -2089,15 +2092,15 @@ let SubtargetPredicate = isGFX940Plus in {
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>;
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>;

defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f, FLAT_ATOMIC_ADD_F64>;
defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50, FLAT_ATOMIC_MIN_F64>;
defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51, FLAT_ATOMIC_MAX_F64>;
defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f>;
defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50>;
defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51>;
defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>;
defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>;
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>;
defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d, FLAT_ATOMIC_ADD_F32>;
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e, FLAT_ATOMIC_PK_ADD_F16>;
defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52, FLAT_ATOMIC_PK_ADD_BF16>;
defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>;
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>;
defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>;
defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
} // End SubtargetPredicate = isGFX940Plus

Expand Down