-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Autogenerate dst bytesel asm. NFCI. #143429
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
rampitec
merged 2 commits into
main
from
users/rampitec/06-09-_amdgpu_autogenerate_dst_bytesel_asm._nfci
Jun 9, 2025
Merged
[AMDGPU] Autogenerate dst bytesel asm. NFCI. #143429
rampitec
merged 2 commits into
main
from
users/rampitec/06-09-_amdgpu_autogenerate_dst_bytesel_asm._nfci
Jun 9, 2025
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Somehow we ended up with different code here and downstream after merges, and beeing specific about SrcMods is more correct.
Needed for future t16 support.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
This was referenced Jun 9, 2025
@llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesNeeded for future t16 support. Full diff: https://github.com/llvm/llvm-project/pull/143429.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 9c1d82b50c1a5..a78440dc7a1f4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2034,11 +2034,13 @@ class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
bit HasClamp, bit HasOMod,
- Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod,
+ bit HasFP8ByteSel = 0, bit HasFP8DstByteSel = 0> {
dag ret = getInsVOP3Base<Src0RC, Src1RC,
Src2RC, NumSrcArgs,
HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod,
- Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1>.ret;
+ Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1,
+ HasFP8ByteSel, HasFP8DstByteSel>.ret;
}
class getInsDPPBase <RegisterOperand OldRC, RegisterOperand Src0RC, RegisterOperand Src1RC,
@@ -2244,7 +2246,8 @@ class getAsmVOP3OpSel <int NumSrcArgs,
bit HasOMod,
bit Src0HasMods,
bit Src1HasMods,
- bit Src2HasMods> {
+ bit Src2HasMods,
+ bit HasByteSel = 0> {
string dst = "$vdst";
string isrc0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
@@ -2263,9 +2266,10 @@ class getAsmVOP3OpSel <int NumSrcArgs,
string src1 = !if(Src1HasMods, fsrc1, isrc1);
string src2 = !if(Src2HasMods, fsrc2, isrc2);
+ string bytesel = !if(HasByteSel, "$byte_sel", "");
string clamp = !if(HasClamp, "$clamp", "");
string omod = !if(HasOMod, "$omod", "");
- string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod;
+ string ret = dst#", "#src0#src1#src2#"$op_sel"#bytesel#clamp#omod;
}
class getAsmDPP <bit HasDst, int NumSrcArgs, bit HasModifiers, ValueType DstVT = i32> {
@@ -2630,7 +2634,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
Src0PackedMod, Src1PackedMod, Src2PackedMod>.ret;
field dag InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64,
NumSrcArgs, HasClamp, HasOMod,
- Src0Mod, Src1Mod, Src2Mod>.ret;
+ Src0Mod, Src1Mod, Src2Mod,
+ HasFP8ByteSel, HasFP8DstByteSel>.ret;
field dag InsDPP = !if(HasExtDPP,
getInsDPP<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs,
HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret,
@@ -2680,7 +2685,8 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
HasOMod,
HasSrc0FloatMods,
HasSrc1FloatMods,
- HasSrc2FloatMods>.ret;
+ HasSrc2FloatMods,
+ HasFP8ByteSel>.ret;
field string AsmVOP3DPP = getAsmVOP3DPP<AsmVOP3Base>.ret;
field string AsmVOP3DPP16 = getAsmVOP3DPP16<AsmVOP3Base>.ret;
field string AsmVOP3DPP8 = getAsmVOP3DPP8<AsmVOP3Base>.ret;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index a2672d71cb43c..046cce73ff761 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1055,6 +1055,7 @@ class VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOPProfile P> : VOP3_Profil
class VOP3_CVT_SCALE_FP4_F32_TiedInput_Profile<VOPProfile P> : VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<P> {
let HasFP8DstByteSel = 1;
+ let HasFP8ByteSel = 0; // It works as a dst-bytesel, but does not have byte_sel operand.
}
class VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOPProfile P> : VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<P> {
@@ -1063,6 +1064,7 @@ class VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOPProfile P> : VOP3_
FP32InputMods:$src2_modifiers, Src2RC64:$src2,
VGPR_32:$vdst_in, op_sel0:$op_sel);
let HasFP8DstByteSel = 1;
+ let HasFP8ByteSel = 0; // It works as a dst-bytesel, but does not have byte_sel operand.
}
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 952ee2fe2c955..4cd845aaa5497 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1478,6 +1478,9 @@ class VOP3_Profile_Base<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VO
let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel);
let IsMAI = !if(Features.IsMAI, 1, P.IsMAI);
let IsPacked = !if(Features.IsPacked, 1, P.IsPacked);
+ let HasFP8SrcByteSel = P.HasFP8SrcByteSel;
+ let HasFP8DstByteSel = P.HasFP8DstByteSel;
+ let HasOMod = P.HasOMod;
let HasModifiers =
!if (Features.IsMAI, 0,
@@ -1494,6 +1497,9 @@ class VOP3_Profile_True16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> :
let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel);
let IsMAI = !if(Features.IsMAI, 1, P.IsMAI);
let IsPacked = !if(Features.IsPacked, 1, P.IsPacked);
+ let HasFP8SrcByteSel = P.HasFP8SrcByteSel;
+ let HasFP8DstByteSel = P.HasFP8DstByteSel;
+ let HasOMod = P.HasOMod;
let HasModifiers =
!if (Features.IsMAI, 0,
@@ -1506,6 +1512,9 @@ class VOP3_Profile_Fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR> :
let HasOpSel = !if(Features.HasOpSel, 1, P.HasOpSel);
let IsMAI = !if(Features.IsMAI, 1, P.IsMAI);
let IsPacked = !if(Features.IsPacked, 1, P.IsPacked);
+ let HasFP8SrcByteSel = P.HasFP8SrcByteSel;
+ let HasFP8DstByteSel = P.HasFP8DstByteSel;
+ let HasOMod = P.HasOMod;
let HasModifiers =
!if (Features.IsMAI, 0,
|
arsenm
approved these changes
Jun 9, 2025
Base automatically changed from
users/rampitec/06-09-_amdgpu_fix_getasmvop3base_srcmods_arguments._nfci
to
main
June 9, 2025 22:32
rorth
pushed a commit
to rorth/llvm-project
that referenced
this pull request
Jun 11, 2025
Needed for future t16 support.
DhruvSrivastavaX
pushed a commit
to DhruvSrivastavaX/lldb-for-aix
that referenced
this pull request
Jun 12, 2025
Needed for future t16 support.
tomtor
pushed a commit
to tomtor/llvm-project
that referenced
this pull request
Jun 14, 2025
Needed for future t16 support.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Needed for future t16 support.