Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 9e41f53

Browse files
committed
AMDGPU: Make v4i16/v4f16 legal
Some image loads return these, and it's awkward working around them not being legal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@334835 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 064db43 commit 9e41f53

24 files changed

+627
-267
lines changed

lib/Target/AMDGPU/AMDGPUCallingConv.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def CC_AMDGPU_Func : CallingConv<[
127127
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
128128
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
129129
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
130-
CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>,
130+
CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>,
131131
CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,
132132
CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,
133133
CCIfType<[v4i32, v4f32, v2i64, v2f64], CCAssignToStack<16, 4>>,
@@ -144,7 +144,7 @@ def RetCC_AMDGPU_Func : CallingConv<[
144144
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
145145
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
146146
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
147-
CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64], CCCustom<"allocateVGPRTuple">>
147+
CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>
148148
]>;
149149

150150
def CC_AMDGPU : CallingConv<[

lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,9 @@ static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
7373
case MVT::i64:
7474
case MVT::f64:
7575
case MVT::v2i32:
76-
case MVT::v2f32: {
76+
case MVT::v2f32:
77+
case MVT::v4i16:
78+
case MVT::v4f16: {
7779
// Up to SGPR0-SGPR39
7880
return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
7981
&AMDGPU::SGPR_64RegClass, 20);
@@ -94,7 +96,9 @@ static bool allocateVGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
9496
case MVT::i64:
9597
case MVT::f64:
9698
case MVT::v2i32:
97-
case MVT::v2f32: {
99+
case MVT::v2f32:
100+
case MVT::v4i16:
101+
case MVT::v4f16: {
98102
return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
99103
&AMDGPU::VReg_64RegClass, 31);
100104
}
@@ -1234,6 +1238,16 @@ SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
12341238
SelectionDAG &DAG) const {
12351239
SmallVector<SDValue, 8> Args;
12361240

1241+
EVT VT = Op.getValueType();
1242+
if (VT == MVT::v4i16 || VT == MVT::v4f16) {
1243+
SDLoc SL(Op);
1244+
SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Op.getOperand(0));
1245+
SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Op.getOperand(1));
1246+
1247+
SDValue BV = DAG.getBuildVector(MVT::v2i32, SL, { Lo, Hi });
1248+
return DAG.getNode(ISD::BITCAST, SL, VT, BV);
1249+
}
1250+
12371251
for (const SDUse &U : Op->ops())
12381252
DAG.ExtractVectorElements(U.get(), Args);
12391253

lib/Target/AMDGPU/BUFInstructions.td

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1084,8 +1084,7 @@ let SubtargetPredicate = HasUnpackedD16VMem in {
10841084
let SubtargetPredicate = HasPackedD16VMem in {
10851085
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, f16, "BUFFER_LOAD_FORMAT_D16_X">;
10861086
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2f16, "BUFFER_LOAD_FORMAT_D16_XY">;
1087-
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, i32, "BUFFER_LOAD_FORMAT_D16_XY">;
1088-
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v2i32, "BUFFER_LOAD_FORMAT_D16_XYZW">;
1087+
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format_d16, v4f16, "BUFFER_LOAD_FORMAT_D16_XYZW">;
10891088
} // End HasPackedD16VMem.
10901089

10911090
defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">;
@@ -1145,8 +1144,7 @@ let SubtargetPredicate = HasUnpackedD16VMem in {
11451144
let SubtargetPredicate = HasPackedD16VMem in {
11461145
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, f16, "BUFFER_STORE_FORMAT_D16_X">;
11471146
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2f16, "BUFFER_STORE_FORMAT_D16_XY">;
1148-
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, i32, "BUFFER_STORE_FORMAT_D16_XY">;
1149-
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v2i32, "BUFFER_STORE_FORMAT_D16_XYZW">;
1147+
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store_format_d16, v4f16, "BUFFER_STORE_FORMAT_D16_XYZW">;
11501148
} // End HasPackedD16VMem.
11511149

11521150
defm : MUBUF_StoreIntrinsicPat<SIbuffer_store, f32, "BUFFER_STORE_DWORD">;
@@ -1571,8 +1569,7 @@ let SubtargetPredicate = HasUnpackedD16VMem in {
15711569
let SubtargetPredicate = HasPackedD16VMem in {
15721570
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, f16, "TBUFFER_LOAD_FORMAT_D16_X">;
15731571
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2f16, "TBUFFER_LOAD_FORMAT_D16_XY">;
1574-
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, i32, "TBUFFER_LOAD_FORMAT_D16_XY">;
1575-
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v2i32, "TBUFFER_LOAD_FORMAT_D16_XYZW">;
1572+
defm : MTBUF_LoadIntrinsicPat<SItbuffer_load_d16, v4f16, "TBUFFER_LOAD_FORMAT_D16_XYZW">;
15761573
} // End HasPackedD16VMem.
15771574

15781575
multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
@@ -1633,8 +1630,7 @@ let SubtargetPredicate = HasUnpackedD16VMem in {
16331630
let SubtargetPredicate = HasPackedD16VMem in {
16341631
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, f16, "TBUFFER_STORE_FORMAT_D16_X">;
16351632
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2f16, "TBUFFER_STORE_FORMAT_D16_XY">;
1636-
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, i32, "TBUFFER_STORE_FORMAT_D16_XY">;
1637-
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v2i32, "TBUFFER_STORE_FORMAT_D16_XYZW">;
1633+
defm : MTBUF_StoreIntrinsicPat<SItbuffer_store_d16, v4f16, "TBUFFER_STORE_FORMAT_D16_XYZW">;
16381634
} // End HasPackedD16VMem.
16391635

16401636
//===----------------------------------------------------------------------===//

lib/Target/AMDGPU/MIMGInstructions.td

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -594,12 +594,6 @@ foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
594594
def intr#_pat_v4 : ImageDimPattern<intr, "_V4", v4f32>;
595595
}
596596

597-
// v2f16 and v4f16 are used as data types to signal that D16 should be used.
598-
// However, they are not (always) legal types, and the SelectionDAG requires us
599-
// to legalize them before running any patterns. So we legalize them by
600-
// converting to an int type of equal size and using an internal 'd16helper'
601-
// intrinsic instead which signifies both the use of D16 and actually allows
602-
// this integer-based return type.
603597
multiclass ImageDimD16Helper<AMDGPUImageDimIntrinsic I,
604598
AMDGPUImageDimIntrinsic d16helper> {
605599
let SubtargetPredicate = HasUnpackedD16VMem in {
@@ -611,7 +605,7 @@ multiclass ImageDimD16Helper<AMDGPUImageDimIntrinsic I,
611605
let SubtargetPredicate = HasPackedD16VMem in {
612606
def _packed_v1 : ImageDimPattern<I, "_V1", f16, "_D16">;
613607
def _packed_v2 : ImageDimPattern<I, "_V1", v2f16, "_D16">;
614-
def _packed_v4 : ImageDimPattern<d16helper, "_V2", v2i32, "_D16">;
608+
def _packed_v4 : ImageDimPattern<I, "_V2", v4f16, "_D16">;
615609
} // End HasPackedD16VMem.
616610
}
617611

@@ -653,10 +647,7 @@ foreach intr = AMDGPUImageDimGatherIntrinsics in {
653647
} // End HasUnpackedD16VMem.
654648

655649
let SubtargetPredicate = HasPackedD16VMem in {
656-
def intr#_packed_v4 :
657-
ImageDimPattern<!cast<AMDGPUImageDimIntrinsic>(
658-
"int_SI_image_d16helper_" # intr.P.OpMod # intr.P.Dim.Name),
659-
"_V2", v2i32, "_D16">;
650+
def intr#_packed_v4 : ImageDimPattern<intr, "_V2", v4f16, "_D16">;
660651
} // End HasPackedD16VMem.
661652
}
662653

@@ -703,6 +694,7 @@ multiclass ImageSamplePatterns<SDPatternOperator name, string opcode> {
703694
let SubtargetPredicate = HasPackedD16VMem in {
704695
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
705696
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
697+
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
706698
} // End HasPackedD16VMem.
707699
}
708700

@@ -712,16 +704,15 @@ multiclass ImageSampleAltPatterns<SDPatternOperator name, string opcode> {
712704
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">;
713705
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
714706
} // End HasUnpackedD16VMem.
715-
716-
let SubtargetPredicate = HasPackedD16VMem in {
717-
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
718-
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16">;
719-
} // End HasPackedD16VMem.
720707
}
721708

722709
// ImageGather4 patterns.
723710
multiclass ImageGather4Patterns<SDPatternOperator name, string opcode> {
724711
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
712+
713+
let SubtargetPredicate = HasPackedD16VMem in {
714+
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
715+
} // End HasPackedD16VMem.
725716
}
726717

727718
// ImageGather4 alternative patterns for illegal vector half Types.
@@ -730,9 +721,6 @@ multiclass ImageGather4AltPatterns<SDPatternOperator name, string opcode> {
730721
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
731722
} // End HasUnpackedD16VMem.
732723

733-
let SubtargetPredicate = HasPackedD16VMem in {
734-
defm : ImageSampleDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16">;
735-
} // End HasPackedD16VMem.
736724
}
737725

738726
// ImageLoad for amdgcn.
@@ -766,6 +754,7 @@ multiclass ImageLoadPatterns<SDPatternOperator name, string opcode> {
766754
let SubtargetPredicate = HasPackedD16VMem in {
767755
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
768756
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
757+
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
769758
} // End HasPackedD16VMem.
770759
}
771760

@@ -775,11 +764,6 @@ multiclass ImageLoadAltPatterns<SDPatternOperator name, string opcode> {
775764
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16_gfx80">;
776765
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4i32, "_D16_gfx80">;
777766
} // End HasUnPackedD16VMem.
778-
779-
let SubtargetPredicate = HasPackedD16VMem in {
780-
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
781-
defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2i32, "_D16">;
782-
} // End HasPackedD16VMem.
783767
}
784768

785769
// ImageStore for amdgcn.
@@ -813,6 +797,7 @@ multiclass ImageStorePatterns<SDPatternOperator name, string opcode> {
813797
let SubtargetPredicate = HasPackedD16VMem in {
814798
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f16, "_D16">;
815799
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), v2f16, "_D16">;
800+
defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v4f16, "_D16">;
816801
} // End HasPackedD16VMem.
817802
}
818803

0 commit comments

Comments
 (0)