-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU][True16][CodeGen] build_vector pattern in true16 #118904
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3359,6 +3359,8 @@ def : GCNPat < | |
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32) | ||
>; | ||
|
||
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in | ||
let True16Predicate = p in { | ||
def : GCNPat < | ||
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$src0), (Ty undef))), | ||
(COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32) | ||
|
@@ -3368,6 +3370,7 @@ def : GCNPat < | |
(vecTy (UniformBinFrag<build_vector> (Ty undef), (Ty SReg_32:$src1))), | ||
(S_LSHL_B32 SReg_32:$src1, (i32 16)) | ||
>; | ||
} | ||
|
||
def : GCNPat < | ||
(vecTy (DivergentBinFrag<build_vector> (Ty undef), (Ty VGPR_32:$src1))), | ||
|
@@ -3377,6 +3380,8 @@ def : GCNPat < | |
} | ||
|
||
let SubtargetPredicate = HasVOP3PInsts in { | ||
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in | ||
let True16Predicate = p in | ||
def : GCNPat < | ||
(v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src0), (i16 VGPR_32:$src1))), | ||
(v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0)))) | ||
|
@@ -3406,12 +3411,24 @@ def : GCNPat < | |
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1) | ||
>; | ||
|
||
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in | ||
let True16Predicate = p in | ||
// Take the lower 16 bits from each VGPR_32 and concat them | ||
def : GCNPat < | ||
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$a), (Ty VGPR_32:$b))), | ||
(V_PERM_B32_e64 VGPR_32:$b, VGPR_32:$a, (S_MOV_B32 (i32 0x05040100))) | ||
>; | ||
|
||
let True16Predicate = UseRealTrue16Insts in { | ||
def : GCNPat < | ||
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_16:$a), (Ty VGPR_16:$b))), | ||
(REG_SEQUENCE VGPR_32, VGPR_16:$a, lo16, VGPR_16:$b, hi16) | ||
>; | ||
def : GCNPat < | ||
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_16:$src0), (Ty undef))), | ||
(REG_SEQUENCE VGPR_32, $src0, lo16, (IMPLICIT_DEF), hi16) | ||
broxigarchen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
>; | ||
} | ||
|
||
// Take the lower 16 bits from V[0] and the upper 16 bits from V[1] | ||
// Special case, can use V_BFI (0xffff literal likely more reusable than 0x70601000) | ||
|
@@ -3437,6 +3454,8 @@ def : GCNPat < | |
|
||
// Take the upper 16 bits from V[0] and the lower 16 bits from V[1] | ||
// Special case, can use V_ALIGNBIT (always uses encoded literal) | ||
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in | ||
let True16Predicate = p in { | ||
def : GCNPat < | ||
(vecTy (DivergentBinFrag<build_vector> | ||
(Ty !if(!eq(Ty, i16), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps the PERM_B32 pat at new line 3456 should also be disabled in Real True16 mode? It seems redundant with the new REG_SEQUENCE pat. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Took a closer look at this. I think these vector patch all might be moving to just fake16/non-16 mode. probably get a patch up in downstream first There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I checked the other pattern, for now just removing them from true16, the codegen does work and select the reg_sequence, but it generates additonal right shifts which is worse than the old pattern right now. I think there exist better selections but it needs a bit more work. This might involving adding more We could merge this patch first and then follow up on this later |
||
|
@@ -3457,7 +3476,7 @@ def : GCNPat < | |
(Ty (bitconvert (i16 (trunc (srl VGPR_32:$b, (i32 16)))))))))), | ||
(V_PERM_B32_e64 VGPR_32:$b, VGPR_32:$a, (S_MOV_B32 (i32 0x07060302))) | ||
>; | ||
|
||
} | ||
|
||
} // end foreach Ty | ||
|
||
|
Large diffs are not rendered by default.
Uh oh!
There was an error while loading. Please reload this page.