Skip to content

Commit fe05683

Browse files
authored
[AMDGPU] Require aligned VGPRs for gfx1250 (#145561)
1 parent f624ba2 commit fe05683

File tree

2 files changed

+149
-1
lines changed

2 files changed

+149
-1
lines changed

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1290,7 +1290,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
12901290
bool hasVALUReadSGPRHazard() const { return getGeneration() == GFX12; }
12911291

12921292
/// Return if operations acting on VGPR tuples require even alignment.
1293-
bool needsAlignedVGPRs() const { return GFX90AInsts; }
1293+
bool needsAlignedVGPRs() const { return GFX90AInsts || GFX1250Insts; }
12941294

12951295
/// Return true if the target has the S_PACK_HL_B32_B16 instruction.
12961296
bool hasSPackHL() const { return GFX11Insts; }
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -start-before=greedy,0 -stop-after=virtregrewriter,2 -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,GFX1250 %s
2+
# Using the unaligned vector tuples are OK as long as they aren't used
3+
# in a real instruction.
4+
5+
---
6+
# GCN-LABEL: name: alloc_vgpr_64
7+
# GFX1250: $vgpr4_vgpr5 = GLOBAL_LOAD
8+
name: alloc_vgpr_64
9+
tracksRegLiveness: true
10+
liveins:
11+
- { reg: '$vgpr0_vgpr1' }
12+
- { reg: '$vgpr2' }
13+
body: |
14+
bb.0:
15+
liveins: $vgpr0_vgpr1, $vgpr2
16+
17+
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
18+
%1:vgpr_32 = COPY $vgpr2
19+
%2:vreg_64_align2 = GLOBAL_LOAD_DWORDX2 %0, 0, 0, implicit $exec
20+
GLOBAL_STORE_DWORDX2 %0, %2, 0, 0, implicit $exec
21+
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
22+
...
23+
24+
---
25+
# GCN-LABEL: name: alloc_vgpr_96
26+
# GFX1250: $vgpr4_vgpr5_vgpr6 = GLOBAL_LOAD
27+
name: alloc_vgpr_96
28+
tracksRegLiveness: true
29+
liveins:
30+
- { reg: '$vgpr0_vgpr1' }
31+
- { reg: '$vgpr2' }
32+
body: |
33+
bb.0:
34+
liveins: $vgpr0_vgpr1, $vgpr2
35+
36+
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
37+
%1:vgpr_32 = COPY $vgpr2
38+
%2:vreg_96_align2 = GLOBAL_LOAD_DWORDX3 %0, 0, 0, implicit $exec
39+
GLOBAL_STORE_DWORDX3 %0, %2, 0, 0, implicit $exec
40+
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
41+
...
42+
43+
---
44+
# GCN-LABEL: name: alloc_vgpr_128
45+
# GFX1250: $vgpr4_vgpr5_vgpr6_vgpr7 = GLOBAL_LOAD
46+
name: alloc_vgpr_128
47+
tracksRegLiveness: true
48+
liveins:
49+
- { reg: '$vgpr0_vgpr1' }
50+
- { reg: '$vgpr2' }
51+
body: |
52+
bb.0:
53+
liveins: $vgpr0_vgpr1, $vgpr2
54+
55+
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
56+
%1:vgpr_32 = COPY $vgpr2
57+
%2:vreg_128_align2 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
58+
GLOBAL_STORE_DWORDX4 %0, %2, 0, 0, implicit $exec
59+
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
60+
...
61+
62+
---
63+
# GCN-LABEL: name: alloc_vgpr_160
64+
# GFX1250: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 = IMAGE_LOAD
65+
name: alloc_vgpr_160
66+
tracksRegLiveness: true
67+
liveins:
68+
- { reg: '$vgpr0_vgpr1' }
69+
- { reg: '$vgpr2' }
70+
body: |
71+
bb.0:
72+
liveins: $vgpr0_vgpr1, $vgpr2
73+
74+
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
75+
%1:vgpr_32 = COPY $vgpr2
76+
%2:vreg_160_align2 = IMAGE_LOAD_V5_V1 %0.sub0, undef %3:sgpr_256, 0, 0, 0, 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
77+
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
78+
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
79+
...
80+
81+
---
82+
# GCN-LABEL: name: alloc_vgpr_256
83+
# GFX1250: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = COPY
84+
name: alloc_vgpr_256
85+
tracksRegLiveness: true
86+
liveins:
87+
- { reg: '$vgpr0_vgpr1' }
88+
- { reg: '$vgpr2' }
89+
body: |
90+
bb.0:
91+
liveins: $vgpr0_vgpr1, $vgpr2
92+
93+
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
94+
%1:vgpr_32 = COPY $vgpr2
95+
%3:sgpr_256 = IMPLICIT_DEF
96+
%2:vreg_256_align2 = COPY %3:sgpr_256
97+
%4:vreg_128_align2 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
98+
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
99+
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
100+
...
101+
102+
---
103+
# GCN-LABEL: name: alloc_vgpr_512
104+
# GFX1250: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 = IMPLICIT_DEF
105+
name: alloc_vgpr_512
106+
tracksRegLiveness: true
107+
liveins:
108+
- { reg: '$vgpr0_vgpr1' }
109+
- { reg: '$vgpr2' }
110+
body: |
111+
bb.0:
112+
liveins: $vgpr0_vgpr1, $vgpr2
113+
114+
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
115+
%1:vgpr_32 = COPY $vgpr2
116+
%2:vreg_512_align2 = IMPLICIT_DEF
117+
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
118+
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
119+
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
120+
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
121+
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
122+
...
123+
124+
---
125+
# GCN-LABEL: name: alloc_vgpr_1024
126+
# GFX1250: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35 = IMPLICIT_DEF
127+
name: alloc_vgpr_1024
128+
tracksRegLiveness: true
129+
liveins:
130+
- { reg: '$vgpr0_vgpr1' }
131+
- { reg: '$vgpr2' }
132+
body: |
133+
bb.0:
134+
liveins: $vgpr0_vgpr1, $vgpr2
135+
136+
%0:vreg_64_align2 = COPY $vgpr0_vgpr1
137+
%1:vgpr_32 = COPY $vgpr2
138+
%2:vreg_1024_align2 = IMPLICIT_DEF
139+
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
140+
GLOBAL_STORE_DWORDX4 %0, %2.sub4_sub5_sub6_sub7, 0, 0, implicit $exec
141+
GLOBAL_STORE_DWORDX4 %0, %2.sub8_sub9_sub10_sub11, 0, 0, implicit $exec
142+
GLOBAL_STORE_DWORDX4 %0, %2.sub12_sub13_sub14_sub15, 0, 0, implicit $exec
143+
GLOBAL_STORE_DWORDX4 %0, %2.sub16_sub17_sub18_sub19, 0, 0, implicit $exec
144+
GLOBAL_STORE_DWORDX4 %0, %2.sub20_sub21_sub22_sub23, 0, 0, implicit $exec
145+
GLOBAL_STORE_DWORDX4 %0, %2.sub24_sub25_sub26_sub27, 0, 0, implicit $exec
146+
GLOBAL_STORE_DWORDX4 %0, %2.sub28_sub29_sub30_sub31, 0, 0, implicit $exec
147+
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
148+
...

0 commit comments

Comments
 (0)