Skip to content

Commit f9fec40

Browse files
authored
AMDGPU: Make v32bf16 a legal type (#76679)
Depends #76678
1 parent 25e0dc9 commit f9fec40

File tree

3 files changed

+39
-2
lines changed

3 files changed

+39
-2
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
173173
addRegisterClass(MVT::v16bf16, &AMDGPU::SGPR_256RegClass);
174174
addRegisterClass(MVT::v32i16, &AMDGPU::SGPR_512RegClass);
175175
addRegisterClass(MVT::v32f16, &AMDGPU::SGPR_512RegClass);
176+
addRegisterClass(MVT::v32bf16, &AMDGPU::SGPR_512RegClass);
176177
}
177178

178179
addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
@@ -719,11 +720,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
719720
AddPromotedToType(ISD::LOAD, MVT::v32i16, MVT::v16i32);
720721
setOperationAction(ISD::LOAD, MVT::v32f16, Promote);
721722
AddPromotedToType(ISD::LOAD, MVT::v32f16, MVT::v16i32);
723+
setOperationAction(ISD::LOAD, MVT::v32bf16, Promote);
724+
AddPromotedToType(ISD::LOAD, MVT::v32bf16, MVT::v16i32);
722725

723726
setOperationAction(ISD::STORE, MVT::v32i16, Promote);
724727
AddPromotedToType(ISD::STORE, MVT::v32i16, MVT::v16i32);
725728
setOperationAction(ISD::STORE, MVT::v32f16, Promote);
726729
AddPromotedToType(ISD::STORE, MVT::v32f16, MVT::v16i32);
730+
setOperationAction(ISD::STORE, MVT::v32bf16, Promote);
731+
AddPromotedToType(ISD::STORE, MVT::v32bf16, MVT::v16i32);
727732

728733
setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND},
729734
MVT::v2i32, Expand);

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,6 +1801,38 @@ def : BitConvert <v8f64, v16f32, VReg_512>;
18011801
def : BitConvert <v16f32, v8i64, VReg_512>;
18021802
def : BitConvert <v16f32, v8f64, VReg_512>;
18031803

1804+
1805+
1806+
def : BitConvert <v32bf16, v32i16, VReg_512>;
1807+
def : BitConvert <v32i16, v32bf16, VReg_512>;
1808+
def : BitConvert <v32bf16, v32i16, SReg_512>;
1809+
def : BitConvert <v32i16, v32bf16, SReg_512>;
1810+
1811+
def : BitConvert <v32bf16, v32f16, VReg_512>;
1812+
def : BitConvert <v32f16, v32bf16, VReg_512>;
1813+
def : BitConvert <v32bf16, v32f16, SReg_512>;
1814+
def : BitConvert <v32f16, v32bf16, SReg_512>;
1815+
1816+
def : BitConvert <v32bf16, v16i32, VReg_512>;
1817+
def : BitConvert <v16i32, v32bf16, VReg_512>;
1818+
def : BitConvert <v32bf16, v16i32, SReg_512>;
1819+
def : BitConvert <v16i32, v32bf16, SReg_512>;
1820+
1821+
def : BitConvert <v32bf16, v16f32, VReg_512>;
1822+
def : BitConvert <v16f32, v32bf16, VReg_512>;
1823+
def : BitConvert <v32bf16, v16f32, SReg_512>;
1824+
def : BitConvert <v16f32, v32bf16, SReg_512>;
1825+
1826+
def : BitConvert <v32bf16, v8f64, VReg_512>;
1827+
def : BitConvert <v8f64, v32bf16, VReg_512>;
1828+
def : BitConvert <v32bf16, v8f64, SReg_512>;
1829+
def : BitConvert <v8f64, v32bf16, SReg_512>;
1830+
1831+
def : BitConvert <v32bf16, v8i64, VReg_512>;
1832+
def : BitConvert <v8i64, v32bf16, VReg_512>;
1833+
def : BitConvert <v32bf16, v8i64, SReg_512>;
1834+
def : BitConvert <v8i64, v32bf16, SReg_512>;
1835+
18041836
// 1024-bit bitcast
18051837
def : BitConvert <v32i32, v32f32, VReg_1024>;
18061838
def : BitConvert <v32f32, v32i32, VReg_1024>;

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -916,7 +916,7 @@ defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>;
916916
defm "" : SRegClass<12, [v12i32, v12f32], SGPR_384Regs, TTMP_384Regs>;
917917

918918
let GlobalPriority = true in {
919-
defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], SGPR_512Regs, TTMP_512Regs>;
919+
defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16, v32bf16], SGPR_512Regs, TTMP_512Regs>;
920920
defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
921921
}
922922

@@ -970,7 +970,7 @@ defm VReg_352 : VRegClass<11, [v11i32, v11f32], (add VGPR_352)>;
970970
defm VReg_384 : VRegClass<12, [v12i32, v12f32], (add VGPR_384)>;
971971

972972
let GlobalPriority = true in {
973-
defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], (add VGPR_512)>;
973+
defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16, v32bf16], (add VGPR_512)>;
974974
defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>;
975975
}
976976

0 commit comments

Comments
 (0)