Skip to content

Commit dd05129

Browse files
authored
[AMDGPU] Enable GCNRewritePartialRegUses pass by default. (#72975)
Let's try once again after #69957 has landed.
1 parent 26616c6 commit dd05129

28 files changed

+3141
-3270
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ static cl::opt<bool> EnableMaxIlpSchedStrategy(
352352

353353
static cl::opt<bool> EnableRewritePartialRegUses(
354354
"amdgpu-enable-rewrite-partial-reg-uses",
355-
cl::desc("Enable rewrite partial reg uses pass"), cl::init(false),
355+
cl::desc("Enable rewrite partial reg uses pass"), cl::init(true),
356356
cl::Hidden);
357357

358358
static cl::opt<bool> EnableHipStdPar(

llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll

Lines changed: 182 additions & 306 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll

Lines changed: 144 additions & 144 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll

Lines changed: 77 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -9,43 +9,34 @@ define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addr
99
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
1010
; GCN-NEXT: v_lshlrev_b32_e32 v64, 8, v0
1111
; GCN-NEXT: s_waitcnt lgkmcnt(0)
12-
; GCN-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144
1312
; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1]
14-
; GCN-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:16
15-
; GCN-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:32
16-
; GCN-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:48
17-
; GCN-NEXT: global_load_dwordx4 v[20:23], v64, s[0:1] offset:64
18-
; GCN-NEXT: global_load_dwordx4 v[24:27], v64, s[0:1] offset:80
19-
; GCN-NEXT: global_load_dwordx4 v[28:31], v64, s[0:1] offset:96
20-
; GCN-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] offset:112
21-
; GCN-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:128
13+
; GCN-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:16
14+
; GCN-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:32
15+
; GCN-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:48
16+
; GCN-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:64
17+
; GCN-NEXT: global_load_dwordx4 v[20:23], v64, s[0:1] offset:80
18+
; GCN-NEXT: global_load_dwordx4 v[24:27], v64, s[0:1] offset:96
19+
; GCN-NEXT: global_load_dwordx4 v[28:31], v64, s[0:1] offset:112
20+
; GCN-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] offset:128
21+
; GCN-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:144
2222
; GCN-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:160
2323
; GCN-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:176
2424
; GCN-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:192
2525
; GCN-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:208
2626
; GCN-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:224
2727
; GCN-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:240
28-
; GCN-NEXT: s_waitcnt vmcnt(15)
29-
; GCN-NEXT: v_mov_b32_e32 v5, 0x3e7
30-
; GCN-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:144
31-
; GCN-NEXT: s_waitcnt vmcnt(15)
28+
; GCN-NEXT: s_waitcnt vmcnt(6)
29+
; GCN-NEXT: v_mov_b32_e32 v37, 0x3e7
3230
; GCN-NEXT: global_store_dwordx4 v64, v[0:3], s[2:3]
33-
; GCN-NEXT: s_waitcnt vmcnt(15)
34-
; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:16
35-
; GCN-NEXT: s_waitcnt vmcnt(15)
36-
; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:32
37-
; GCN-NEXT: s_waitcnt vmcnt(15)
38-
; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:48
39-
; GCN-NEXT: s_waitcnt vmcnt(15)
40-
; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:64
41-
; GCN-NEXT: s_waitcnt vmcnt(15)
42-
; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:80
43-
; GCN-NEXT: s_waitcnt vmcnt(15)
44-
; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:96
45-
; GCN-NEXT: s_waitcnt vmcnt(15)
46-
; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:112
47-
; GCN-NEXT: s_waitcnt vmcnt(15)
48-
; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:128
31+
; GCN-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:16
32+
; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:32
33+
; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:48
34+
; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:64
35+
; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:80
36+
; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:96
37+
; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:112
38+
; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:128
39+
; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:144
4940
; GCN-NEXT: s_waitcnt vmcnt(15)
5041
; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:160
5142
; GCN-NEXT: s_waitcnt vmcnt(15)
@@ -67,39 +58,45 @@ define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addr
6758
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
6859
; GFX10-NEXT: s_clause 0xf
6960
; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1]
70-
; GFX10-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:16
71-
; GFX10-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:32
72-
; GFX10-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:48
73-
; GFX10-NEXT: global_load_dwordx4 v[20:23], v64, s[0:1] offset:64
74-
; GFX10-NEXT: global_load_dwordx4 v[24:27], v64, s[0:1] offset:80
75-
; GFX10-NEXT: global_load_dwordx4 v[28:31], v64, s[0:1] offset:96
76-
; GFX10-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] offset:112
77-
; GFX10-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:160
78-
; GFX10-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:176
79-
; GFX10-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:192
80-
; GFX10-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:208
81-
; GFX10-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:224
82-
; GFX10-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:240
83-
; GFX10-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:128
84-
; GFX10-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144
85-
; GFX10-NEXT: s_waitcnt vmcnt(0)
86-
; GFX10-NEXT: v_mov_b32_e32 v5, 0x3e7
61+
; GFX10-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:16
62+
; GFX10-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:32
63+
; GFX10-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:48
64+
; GFX10-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:64
65+
; GFX10-NEXT: global_load_dwordx4 v[20:23], v64, s[0:1] offset:80
66+
; GFX10-NEXT: global_load_dwordx4 v[24:27], v64, s[0:1] offset:96
67+
; GFX10-NEXT: global_load_dwordx4 v[28:31], v64, s[0:1] offset:112
68+
; GFX10-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] offset:128
69+
; GFX10-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:144
70+
; GFX10-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:160
71+
; GFX10-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:176
72+
; GFX10-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:192
73+
; GFX10-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:208
74+
; GFX10-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:224
75+
; GFX10-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:240
76+
; GFX10-NEXT: s_waitcnt vmcnt(6)
77+
; GFX10-NEXT: v_mov_b32_e32 v37, 0x3e7
8778
; GFX10-NEXT: global_store_dwordx4 v64, v[0:3], s[2:3]
88-
; GFX10-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:16
89-
; GFX10-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:32
90-
; GFX10-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:48
91-
; GFX10-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:64
92-
; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:80
93-
; GFX10-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:96
94-
; GFX10-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:112
95-
; GFX10-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:128
96-
; GFX10-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:144
97-
; GFX10-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:160
98-
; GFX10-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:176
99-
; GFX10-NEXT: global_store_dwordx4 v64, v[44:47], s[2:3] offset:192
100-
; GFX10-NEXT: global_store_dwordx4 v64, v[48:51], s[2:3] offset:208
101-
; GFX10-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:224
102-
; GFX10-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:240
79+
; GFX10-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:16
80+
; GFX10-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:32
81+
; GFX10-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:48
82+
; GFX10-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:64
83+
; GFX10-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:80
84+
; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:96
85+
; GFX10-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:112
86+
; GFX10-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:128
87+
; GFX10-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:144
88+
; GFX10-NEXT: s_waitcnt vmcnt(5)
89+
; GFX10-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:160
90+
; GFX10-NEXT: s_waitcnt vmcnt(4)
91+
; GFX10-NEXT: global_store_dwordx4 v64, v[44:47], s[2:3] offset:176
92+
; GFX10-NEXT: s_waitcnt vmcnt(3)
93+
; GFX10-NEXT: global_store_dwordx4 v64, v[48:51], s[2:3] offset:192
94+
; GFX10-NEXT: s_waitcnt vmcnt(2)
95+
; GFX10-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:208
96+
; GFX10-NEXT: s_waitcnt vmcnt(1)
97+
; GFX10-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:224
98+
; GFX10-NEXT: s_waitcnt vmcnt(0)
99+
; GFX10-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:240
103100
; GFX10-NEXT: s_endpgm
104101
;
105102
; GFX11-LABEL: v_insert_v64i32_37:
@@ -109,34 +106,34 @@ define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addr
109106
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
110107
; GFX11-NEXT: s_clause 0xf
111108
; GFX11-NEXT: global_load_b128 v[0:3], v64, s[0:1]
112-
; GFX11-NEXT: global_load_b128 v[8:11], v64, s[0:1] offset:16
113-
; GFX11-NEXT: global_load_b128 v[12:15], v64, s[0:1] offset:32
114-
; GFX11-NEXT: global_load_b128 v[16:19], v64, s[0:1] offset:48
115-
; GFX11-NEXT: global_load_b128 v[20:23], v64, s[0:1] offset:64
116-
; GFX11-NEXT: global_load_b128 v[24:27], v64, s[0:1] offset:80
117-
; GFX11-NEXT: global_load_b128 v[28:31], v64, s[0:1] offset:96
118-
; GFX11-NEXT: global_load_b128 v[32:35], v64, s[0:1] offset:112
119-
; GFX11-NEXT: global_load_b128 v[36:39], v64, s[0:1] offset:128
120-
; GFX11-NEXT: global_load_b128 v[4:7], v64, s[0:1] offset:144
109+
; GFX11-NEXT: global_load_b128 v[4:7], v64, s[0:1] offset:16
110+
; GFX11-NEXT: global_load_b128 v[8:11], v64, s[0:1] offset:32
111+
; GFX11-NEXT: global_load_b128 v[12:15], v64, s[0:1] offset:48
112+
; GFX11-NEXT: global_load_b128 v[16:19], v64, s[0:1] offset:64
113+
; GFX11-NEXT: global_load_b128 v[20:23], v64, s[0:1] offset:80
114+
; GFX11-NEXT: global_load_b128 v[24:27], v64, s[0:1] offset:96
115+
; GFX11-NEXT: global_load_b128 v[28:31], v64, s[0:1] offset:112
116+
; GFX11-NEXT: global_load_b128 v[32:35], v64, s[0:1] offset:128
117+
; GFX11-NEXT: global_load_b128 v[36:39], v64, s[0:1] offset:144
121118
; GFX11-NEXT: global_load_b128 v[40:43], v64, s[0:1] offset:160
122119
; GFX11-NEXT: global_load_b128 v[44:47], v64, s[0:1] offset:176
123120
; GFX11-NEXT: global_load_b128 v[48:51], v64, s[0:1] offset:192
124121
; GFX11-NEXT: global_load_b128 v[52:55], v64, s[0:1] offset:208
125122
; GFX11-NEXT: global_load_b128 v[56:59], v64, s[0:1] offset:224
126123
; GFX11-NEXT: global_load_b128 v[60:63], v64, s[0:1] offset:240
127124
; GFX11-NEXT: s_waitcnt vmcnt(6)
128-
; GFX11-NEXT: v_mov_b32_e32 v5, 0x3e7
125+
; GFX11-NEXT: v_mov_b32_e32 v37, 0x3e7
129126
; GFX11-NEXT: s_clause 0x9
130127
; GFX11-NEXT: global_store_b128 v64, v[0:3], s[2:3]
131-
; GFX11-NEXT: global_store_b128 v64, v[8:11], s[2:3] offset:16
132-
; GFX11-NEXT: global_store_b128 v64, v[12:15], s[2:3] offset:32
133-
; GFX11-NEXT: global_store_b128 v64, v[16:19], s[2:3] offset:48
134-
; GFX11-NEXT: global_store_b128 v64, v[20:23], s[2:3] offset:64
135-
; GFX11-NEXT: global_store_b128 v64, v[24:27], s[2:3] offset:80
136-
; GFX11-NEXT: global_store_b128 v64, v[28:31], s[2:3] offset:96
137-
; GFX11-NEXT: global_store_b128 v64, v[32:35], s[2:3] offset:112
138-
; GFX11-NEXT: global_store_b128 v64, v[36:39], s[2:3] offset:128
139-
; GFX11-NEXT: global_store_b128 v64, v[4:7], s[2:3] offset:144
128+
; GFX11-NEXT: global_store_b128 v64, v[4:7], s[2:3] offset:16
129+
; GFX11-NEXT: global_store_b128 v64, v[8:11], s[2:3] offset:32
130+
; GFX11-NEXT: global_store_b128 v64, v[12:15], s[2:3] offset:48
131+
; GFX11-NEXT: global_store_b128 v64, v[16:19], s[2:3] offset:64
132+
; GFX11-NEXT: global_store_b128 v64, v[20:23], s[2:3] offset:80
133+
; GFX11-NEXT: global_store_b128 v64, v[24:27], s[2:3] offset:96
134+
; GFX11-NEXT: global_store_b128 v64, v[28:31], s[2:3] offset:112
135+
; GFX11-NEXT: global_store_b128 v64, v[32:35], s[2:3] offset:128
136+
; GFX11-NEXT: global_store_b128 v64, v[36:39], s[2:3] offset:144
140137
; GFX11-NEXT: s_waitcnt vmcnt(5)
141138
; GFX11-NEXT: global_store_b128 v64, v[40:43], s[2:3] offset:160
142139
; GFX11-NEXT: s_waitcnt vmcnt(4)

0 commit comments

Comments
 (0)