Skip to content

Commit efc7bbb

Browse files
authored
[AMDGPU] Make v2bf16 BUILD_VECTOR legal (#92022)
There is nothing specific here and it is not different from i16 or f16.
1 parent 4ecf2ca commit efc7bbb

File tree

4 files changed

+1698
-10
lines changed

4 files changed

+1698
-10
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
233233
// sources.
234234
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
235235
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
236-
237-
setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Promote);
238-
AddPromotedToType(ISD::BUILD_VECTOR, MVT::v2bf16, MVT::v2i16);
239236
}
240237

241238
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
@@ -744,9 +741,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
744741
setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND},
745742
MVT::v8i32, Expand);
746743

747-
if (!Subtarget->hasVOP3PInsts())
748-
setOperationAction(ISD::BUILD_VECTOR,
749-
{MVT::v2i16, MVT::v2f16, MVT::v2bf16}, Custom);
744+
setOperationAction(ISD::BUILD_VECTOR, {MVT::v2i16, MVT::v2f16, MVT::v2bf16},
745+
Subtarget->hasVOP3PInsts() ? Legal : Custom);
750746

751747
setOperationAction(ISD::FNEG, MVT::v2f16, Legal);
752748
// This isn't really legal, but this avoids the legalizer unrolling it (and

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3166,7 +3166,7 @@ def : GCNPat <
31663166
(v2f16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), VGPR_32:$src1))
31673167
>;
31683168

3169-
foreach vecTy = [v2i16, v2f16] in {
3169+
foreach vecTy = [v2i16, v2f16, v2bf16] in {
31703170

31713171
defvar Ty = vecTy.ElementType;
31723172

@@ -3212,7 +3212,7 @@ def : GCNPat <
32123212
>;
32133213

32143214

3215-
foreach vecTy = [v2i16, v2f16] in {
3215+
foreach vecTy = [v2i16, v2f16, v2bf16] in {
32163216

32173217
defvar Ty = vecTy.ElementType;
32183218
defvar immzeroTy = !if(!eq(Ty, i16), immzero, fpimmzero);

llvm/test/CodeGen/AMDGPU/bf16-conversions.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,17 @@ define amdgpu_ps float @v_test_cvt_v2f32_v2bf16_s(<2 x float> inreg %src) {
5555
; GCN-NEXT: s_add_i32 s5, s2, 0x7fff
5656
; GCN-NEXT: v_cmp_u_f32_e64 s[2:3], s1, s1
5757
; GCN-NEXT: s_and_b64 s[2:3], s[2:3], exec
58-
; GCN-NEXT: s_cselect_b32 s2, s4, s5
58+
; GCN-NEXT: s_cselect_b32 s1, s4, s5
59+
; GCN-NEXT: s_lshr_b32 s2, s1, 16
5960
; GCN-NEXT: s_bfe_u32 s1, s0, 0x10010
6061
; GCN-NEXT: s_add_i32 s1, s1, s0
6162
; GCN-NEXT: s_or_b32 s3, s0, 0x400000
6263
; GCN-NEXT: s_add_i32 s4, s1, 0x7fff
6364
; GCN-NEXT: v_cmp_u_f32_e64 s[0:1], s0, s0
6465
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec
6566
; GCN-NEXT: s_cselect_b32 s0, s3, s4
66-
; GCN-NEXT: s_pack_hh_b32_b16 s0, s0, s2
67+
; GCN-NEXT: s_lshr_b32 s0, s0, 16
68+
; GCN-NEXT: s_pack_ll_b32_b16 s0, s0, s2
6769
; GCN-NEXT: v_mov_b32_e32 v0, s0
6870
; GCN-NEXT: ; return to shader part epilog
6971
%res = fptrunc <2 x float> %src to <2 x bfloat>

0 commit comments

Comments
 (0)