Skip to content

Commit 6aef78e

Browse files
author
git apple-llvm automerger
committed
Merge commit '872e899b7563' from llvm.org/master into apple/master
2 parents 5d6a63b + 872e899 commit 6aef78e

File tree

4 files changed

+480
-3
lines changed

4 files changed

+480
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2771,10 +2771,72 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI,
27712771
return true;
27722772
}
27732773

2774-
bool AMDGPULegalizerInfo::legalizeIntrinsic(
2774+
bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
27752775
MachineInstr &MI, MachineIRBuilder &B,
2776-
GISelChangeObserver &Observer) const {
2776+
GISelChangeObserver &Observer,
2777+
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const {
2778+
// We are only processing the operands of d16 image operations on subtargets
2779+
// that use the unpacked register layout.
2780+
if (!ST.hasUnpackedD16VMem())
2781+
return true;
2782+
2783+
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2784+
AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode);
2785+
2786+
if (BaseOpcode->Atomic) // No d16 atomics
2787+
return true;
2788+
2789+
MachineRegisterInfo *MRI = B.getMRI();
2790+
const LLT S32 = LLT::scalar(32);
2791+
const LLT S16 = LLT::scalar(16);
2792+
2793+
if (BaseOpcode->Store) {
2794+
Register VData = MI.getOperand(1).getReg();
2795+
LLT Ty = MRI->getType(VData);
2796+
if (!Ty.isVector() || Ty.getElementType() != S16)
2797+
return true;
2798+
2799+
B.setInstr(MI);
2800+
2801+
Observer.changingInstr(MI);
2802+
MI.getOperand(1).setReg(handleD16VData(B, *MRI, VData));
2803+
Observer.changedInstr(MI);
2804+
return true;
2805+
}
2806+
2807+
// Must be an image load.
2808+
Register DstReg = MI.getOperand(0).getReg();
2809+
LLT Ty = MRI->getType(DstReg);
2810+
if (!Ty.isVector() || Ty.getElementType() != S16)
2811+
return true;
2812+
2813+
B.setInsertPt(*MI.getParent(), ++MI.getIterator());
2814+
2815+
LLT WidenedTy = Ty.changeElementType(S32);
2816+
Register WideDstReg = MRI->createGenericVirtualRegister(WidenedTy);
2817+
2818+
Observer.changingInstr(MI);
2819+
MI.getOperand(0).setReg(WideDstReg);
2820+
Observer.changedInstr(MI);
2821+
2822+
// FIXME: Just vector trunc should be sufficent, but legalization currently
2823+
// broken.
2824+
auto Unmerge = B.buildUnmerge(S32, WideDstReg);
2825+
2826+
int NumOps = Unmerge->getNumOperands() - 1;
2827+
SmallVector<Register, 4> RemergeParts(NumOps);
2828+
for (int I = 0; I != NumOps; ++I)
2829+
RemergeParts[I] = B.buildTrunc(S16, Unmerge.getReg(I)).getReg(0);
2830+
2831+
B.buildBuildVector(DstReg, RemergeParts);
2832+
return true;
2833+
}
2834+
2835+
bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
2836+
MachineIRBuilder &B,
2837+
GISelChangeObserver &Observer) const {
27772838
MachineRegisterInfo &MRI = *B.getMRI();
2839+
27782840
// Replace the use G_BRCOND with the exec manipulate and branch pseudos.
27792841
auto IntrID = MI.getIntrinsicID();
27802842
switch (IntrID) {
@@ -2935,9 +2997,13 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(
29352997
return legalizeAtomicIncDec(MI, B, true);
29362998
case Intrinsic::amdgcn_atomic_dec:
29372999
return legalizeAtomicIncDec(MI, B, false);
2938-
default:
3000+
default: {
3001+
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
3002+
AMDGPU::getImageDimIntrinsicInfo(IntrID))
3003+
return legalizeImageIntrinsic(MI, B, Observer, ImageDimIntr);
29393004
return true;
29403005
}
3006+
}
29413007

29423008
return true;
29433009
}

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,11 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
126126
bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
127127
Intrinsic::ID IID) const;
128128

129+
bool legalizeImageIntrinsic(
130+
MachineInstr &MI, MachineIRBuilder &B,
131+
GISelChangeObserver &Observer,
132+
const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const;
133+
129134
bool legalizeAtomicIncDec(MachineInstr &MI, MachineIRBuilder &B,
130135
bool IsInc) const;
131136

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=UNPACKED %s
3+
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -o - %s | FileCheck -check-prefix=PACKED %s
4+
5+
define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
6+
; UNPACKED-LABEL: name: image_load_f16
7+
; UNPACKED: bb.1 (%ir-block.0):
8+
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
9+
; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
10+
; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
11+
; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
12+
; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
13+
; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
14+
; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
15+
; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
16+
; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
17+
; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
18+
; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
19+
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
20+
; UNPACKED: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.2d), 1, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
21+
; UNPACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
22+
; UNPACKED: $vgpr0 = COPY [[ANYEXT]](s32)
23+
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
24+
; PACKED-LABEL: name: image_load_f16
25+
; PACKED: bb.1 (%ir-block.0):
26+
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
27+
; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
28+
; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
29+
; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
30+
; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
31+
; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
32+
; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
33+
; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
34+
; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
35+
; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
36+
; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
37+
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
38+
; PACKED: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.2d), 1, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 2 from custom "TargetCustom8")
39+
; PACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16)
40+
; PACKED: $vgpr0 = COPY [[ANYEXT]](s32)
41+
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
42+
%tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
43+
ret half %tex
44+
}
45+
46+
define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
47+
; UNPACKED-LABEL: name: image_load_v2f16
48+
; UNPACKED: bb.1 (%ir-block.0):
49+
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
50+
; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
51+
; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
52+
; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
53+
; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
54+
; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
55+
; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
56+
; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
57+
; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
58+
; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
59+
; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
60+
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
61+
; UNPACKED: [[INT:%[0-9]+]]:_(<2 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.2d), 3, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
62+
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](<2 x s32>)
63+
; UNPACKED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
64+
; UNPACKED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
65+
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
66+
; UNPACKED: $vgpr0 = COPY [[BUILD_VECTOR1]](<2 x s16>)
67+
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
68+
; PACKED-LABEL: name: image_load_v2f16
69+
; PACKED: bb.1 (%ir-block.0):
70+
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
71+
; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
72+
; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
73+
; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
74+
; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
75+
; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
76+
; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
77+
; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
78+
; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
79+
; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
80+
; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
81+
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
82+
; PACKED: [[INT:%[0-9]+]]:_(<2 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.2d), 3, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 4 from custom "TargetCustom8")
83+
; PACKED: $vgpr0 = COPY [[INT]](<2 x s16>)
84+
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0
85+
%tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
86+
ret <2 x half> %tex
87+
}
88+
89+
define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
90+
; UNPACKED-LABEL: name: image_load_v3f16
91+
; UNPACKED: bb.1 (%ir-block.0):
92+
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
93+
; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
94+
; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
95+
; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
96+
; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
97+
; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
98+
; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
99+
; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
100+
; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
101+
; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
102+
; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
103+
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
104+
; UNPACKED: [[INT:%[0-9]+]]:_(<3 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.2d), 7, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
105+
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](<3 x s32>)
106+
; UNPACKED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
107+
; UNPACKED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
108+
; UNPACKED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
109+
; UNPACKED: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
110+
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
111+
; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[DEF]](s16)
112+
; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
113+
; UNPACKED: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
114+
; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
115+
; UNPACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
116+
; UNPACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
117+
; UNPACKED: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
118+
; UNPACKED: $vgpr0 = COPY [[EXTRACT1]](<2 x s16>)
119+
; UNPACKED: $vgpr1 = COPY [[EXTRACT2]](<2 x s16>)
120+
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
121+
; PACKED-LABEL: name: image_load_v3f16
122+
; PACKED: bb.1 (%ir-block.0):
123+
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
124+
; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
125+
; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
126+
; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
127+
; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
128+
; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
129+
; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
130+
; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
131+
; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
132+
; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
133+
; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
134+
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
135+
; PACKED: [[INT:%[0-9]+]]:_(<3 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.2d), 7, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 6 from custom "TargetCustom8", align 8)
136+
; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
137+
; PACKED: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[INT]](<3 x s16>), 0
138+
; PACKED: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
139+
; PACKED: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 32
140+
; PACKED: $vgpr0 = COPY [[EXTRACT]](<2 x s16>)
141+
; PACKED: $vgpr1 = COPY [[EXTRACT1]](<2 x s16>)
142+
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
143+
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
144+
ret <3 x half> %tex
145+
}
146+
147+
define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
148+
; UNPACKED-LABEL: name: image_load_v4f16
149+
; UNPACKED: bb.1 (%ir-block.0):
150+
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
151+
; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
152+
; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
153+
; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
154+
; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
155+
; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
156+
; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
157+
; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
158+
; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
159+
; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
160+
; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
161+
; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
162+
; UNPACKED: [[INT:%[0-9]+]]:_(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.2d), 15, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
163+
; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](<4 x s32>)
164+
; UNPACKED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
165+
; UNPACKED: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
166+
; UNPACKED: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
167+
; UNPACKED: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
168+
; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
169+
; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC2]](s16), [[TRUNC3]](s16)
170+
; UNPACKED: $vgpr0 = COPY [[BUILD_VECTOR1]](<2 x s16>)
171+
; UNPACKED: $vgpr1 = COPY [[BUILD_VECTOR2]](<2 x s16>)
172+
; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
173+
; PACKED-LABEL: name: image_load_v4f16
174+
; PACKED: bb.1 (%ir-block.0):
175+
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
176+
; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
177+
; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
178+
; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
179+
; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
180+
; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
181+
; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
182+
; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
183+
; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
184+
; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
185+
; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
186+
; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
187+
; PACKED: [[INT:%[0-9]+]]:_(<4 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.2d), 15, [[COPY8]](s32), [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 8 from custom "TargetCustom8")
188+
; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INT]](<4 x s16>)
189+
; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>)
190+
; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>)
191+
; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
192+
%tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
193+
ret <4 x half> %tex
194+
}
195+
196+
declare half @llvm.amdgcn.image.load.2d.f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
197+
declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
198+
declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
199+
declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
200+
201+
attributes #0 = { nounwind readonly }

0 commit comments

Comments
 (0)