Skip to content

Commit cb037d0

Browse files
authored
[AMDGPU] Combine build_vector patterns for i16 and f16. NFCI. (#91806)
1 parent 7f3e378 commit cb037d0

File tree

1 file changed

+17
-34
lines changed

1 file changed

+17
-34
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 17 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3166,49 +3166,33 @@ def : GCNPat <
31663166
(v2f16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1))
31673167
>;
31683168

3169-
def : GCNPat <
3170-
(v2i16 (UniformBinFrag<build_vector> (i16 SReg_32:$src0), (i16 undef))),
3171-
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
3172-
>;
3173-
3174-
def : GCNPat <
3175-
(v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src0), (i16 undef))),
3176-
(COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
3177-
>;
3169+
foreach vecTy = [v2i16, v2f16] in {
31783170

3179-
def : GCNPat <
3180-
(v2f16 (build_vector f16:$src0, (f16 undef))),
3181-
(COPY $src0)
3182-
>;
3171+
defvar Ty = vecTy.ElementType;
31833172

31843173
def : GCNPat <
3185-
(v2i16 (UniformBinFrag<build_vector> (i16 undef), (i16 SReg_32:$src1))),
3186-
(S_LSHL_B32 SReg_32:$src1, (i32 16))
3174+
(vecTy (UniformBinFrag<build_vector> (Ty SReg_32:$src0), (Ty undef))),
3175+
(COPY_TO_REGCLASS SReg_32:$src0, SReg_32)
31873176
>;
31883177

31893178
def : GCNPat <
3190-
(v2i16 (DivergentBinFrag<build_vector> (i16 undef), (i16 VGPR_32:$src1))),
3191-
(v2i16 (V_LSHLREV_B32_e64 (i32 16), VGPR_32:$src1))
3179+
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$src0), (Ty undef))),
3180+
(COPY_TO_REGCLASS VGPR_32:$src0, VGPR_32)
31923181
>;
31933182

3194-
31953183
def : GCNPat <
3196-
(v2f16 (UniformBinFrag<build_vector> (f16 undef), (f16 SReg_32:$src1))),
3184+
(vecTy (UniformBinFrag<build_vector> (Ty undef), (Ty SReg_32:$src1))),
31973185
(S_LSHL_B32 SReg_32:$src1, (i32 16))
31983186
>;
31993187

32003188
def : GCNPat <
3201-
(v2f16 (DivergentBinFrag<build_vector> (f16 undef), (f16 VGPR_32:$src1))),
3202-
(v2f16 (V_LSHLREV_B32_e64 (i32 16), VGPR_32:$src1))
3189+
(vecTy (DivergentBinFrag<build_vector> (Ty undef), (Ty VGPR_32:$src1))),
3190+
(vecTy (V_LSHLREV_B32_e64 (i32 16), VGPR_32:$src1))
32033191
>;
3192+
} // End foreach Ty = ...
32043193
}
32053194

32063195
let SubtargetPredicate = HasVOP3PInsts in {
3207-
def : GCNPat <
3208-
(v2i16 (UniformBinFrag<build_vector> (i16 SReg_32:$src0), (i16 SReg_32:$src1))),
3209-
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
3210-
>;
3211-
32123196
def : GCNPat <
32133197
(v2i16 (DivergentBinFrag<build_vector> (i16 VGPR_32:$src0), (i16 VGPR_32:$src1))),
32143198
(v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0))))
@@ -3227,18 +3211,17 @@ def : GCNPat <
32273211
(S_PACK_HH_B32_B16 SReg_32:$src0, SReg_32:$src1)
32283212
>;
32293213

3230-
def : GCNPat <
3231-
(v2f16 (UniformBinFrag<build_vector> (f16 SReg_32:$src0), (f16 SReg_32:$src1))),
3232-
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
3233-
>;
32343214

3215+
foreach vecTy = [v2i16, v2f16] in {
32353216

3236-
3237-
foreach Ty = [i16, f16] in {
3238-
3239-
defvar vecTy = !if(!eq(Ty, i16), v2i16, v2f16);
3217+
defvar Ty = vecTy.ElementType;
32403218
defvar immzeroTy = !if(!eq(Ty, i16), immzero, fpimmzero);
32413219

3220+
def : GCNPat <
3221+
(vecTy (UniformBinFrag<build_vector> (Ty SReg_32:$src0), (Ty SReg_32:$src1))),
3222+
(S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1)
3223+
>;
3224+
32423225
// Take the lower 16 bits from each VGPR_32 and concat them
32433226
def : GCNPat <
32443227
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_32:$a), (Ty VGPR_32:$b))),

0 commit comments

Comments
 (0)