@@ -127,12 +127,13 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
127
127
; GCN-SDAG: ; %bb.0:
128
128
; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
129
129
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
130
- ; GCN-SDAG-NEXT: s_clause 0xc
131
- ; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:48
132
- ; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:44
133
- ; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:40
134
- ; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32 offset:36
135
- ; GCN-SDAG-NEXT: scratch_store_b32 off, v44, s32 offset:32
130
+ ; GCN-SDAG-NEXT: s_clause 0xd
131
+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:52
132
+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:48
133
+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:44
134
+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32 offset:40
135
+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v44, s32 offset:36
136
+ ; GCN-SDAG-NEXT: scratch_store_b32 off, v45, s32 offset:32
136
137
; GCN-SDAG-NEXT: scratch_store_b32 off, v56, s32 offset:28
137
138
; GCN-SDAG-NEXT: scratch_store_b32 off, v57, s32 offset:24
138
139
; GCN-SDAG-NEXT: scratch_store_b32 off, v58, s32 offset:20
@@ -141,57 +142,50 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
141
142
; GCN-SDAG-NEXT: scratch_store_b32 off, v61, s32 offset:8
142
143
; GCN-SDAG-NEXT: scratch_store_b32 off, v62, s32 offset:4
143
144
; GCN-SDAG-NEXT: scratch_store_b32 off, v63, s32
144
- ; GCN-SDAG-NEXT: global_load_b128 v[5:8], v[0:1], off offset:224
145
+ ; GCN-SDAG-NEXT: global_load_b128 v[6:9], v[0:1], off offset:224
146
+ ; GCN-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
145
147
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
146
- ; GCN-SDAG-NEXT: scratch_store_b128 off, v[5:8 ], s32 offset:68 ; 16-byte Folded Spill
147
- ; GCN-SDAG-NEXT: global_load_b128 v[5:8 ], v[0:1], off offset:240
148
+ ; GCN-SDAG-NEXT: scratch_store_b128 off, v[6:9 ], s32 offset:56 ; 16-byte Folded Spill
149
+ ; GCN-SDAG-NEXT: global_load_b128 v[6:9 ], v[0:1], off offset:240
148
150
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
149
- ; GCN-SDAG-NEXT: scratch_store_b128 off, v[5:8], s32 offset:84 ; 16-byte Folded Spill
150
- ; GCN-SDAG-NEXT: s_clause 0xc
151
- ; GCN-SDAG-NEXT: global_load_b128 v[13:16], v[0:1], off offset:192
152
- ; GCN-SDAG-NEXT: global_load_b128 v[17:20], v[0:1], off offset:208
153
- ; GCN-SDAG-NEXT: global_load_b128 v[21:24], v[0:1], off offset:160
154
- ; GCN-SDAG-NEXT: global_load_b128 v[25:28], v[0:1], off offset:176
155
- ; GCN-SDAG-NEXT: global_load_b128 v[29:32], v[0:1], off offset:128
156
- ; GCN-SDAG-NEXT: global_load_b128 v[33:36], v[0:1], off offset:144
157
- ; GCN-SDAG-NEXT: global_load_b128 v[48:51], v[0:1], off offset:96
158
- ; GCN-SDAG-NEXT: global_load_b128 v[52:55], v[0:1], off offset:112
159
- ; GCN-SDAG-NEXT: global_load_b128 v[37:40], v[0:1], off offset:64
160
- ; GCN-SDAG-NEXT: global_load_b128 v[41:44], v[0:1], off offset:80
161
- ; GCN-SDAG-NEXT: global_load_b128 v[56:59], v[0:1], off offset:32
162
- ; GCN-SDAG-NEXT: global_load_b128 v[60:63], v[0:1], off offset:48
163
- ; GCN-SDAG-NEXT: global_load_b128 v[5:8], v[0:1], off
151
+ ; GCN-SDAG-NEXT: scratch_store_b128 off, v[6:9], s32 offset:72 ; 16-byte Folded Spill
152
+ ; GCN-SDAG-NEXT: s_clause 0xd
153
+ ; GCN-SDAG-NEXT: global_load_b128 v[10:13], v[0:1], off offset:192
154
+ ; GCN-SDAG-NEXT: global_load_b128 v[14:17], v[0:1], off offset:208
155
+ ; GCN-SDAG-NEXT: global_load_b128 v[18:21], v[0:1], off offset:160
156
+ ; GCN-SDAG-NEXT: global_load_b128 v[22:25], v[0:1], off offset:176
157
+ ; GCN-SDAG-NEXT: global_load_b128 v[26:29], v[0:1], off offset:128
158
+ ; GCN-SDAG-NEXT: global_load_b128 v[30:33], v[0:1], off offset:144
159
+ ; GCN-SDAG-NEXT: global_load_b128 v[34:37], v[0:1], off offset:96
160
+ ; GCN-SDAG-NEXT: global_load_b128 v[48:51], v[0:1], off offset:112
161
+ ; GCN-SDAG-NEXT: global_load_b128 v[52:55], v[0:1], off offset:64
162
+ ; GCN-SDAG-NEXT: global_load_b128 v[38:41], v[0:1], off offset:80
163
+ ; GCN-SDAG-NEXT: global_load_b128 v[42:45], v[0:1], off offset:32
164
+ ; GCN-SDAG-NEXT: global_load_b128 v[56:59], v[0:1], off offset:48
165
+ ; GCN-SDAG-NEXT: global_load_b128 v[60:63], v[0:1], off
166
+ ; GCN-SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off offset:16
167
+ ; GCN-SDAG-NEXT: scratch_load_b128 v[6:9], off, s32 offset:56 th:TH_LOAD_LU ; 16-byte Folded Reload
164
168
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
165
- ; GCN-SDAG-NEXT: scratch_store_b128 off, v[5:8], s32 offset:52 ; 16-byte Folded Spill
166
- ; GCN-SDAG-NEXT: global_load_b128 v[5:8], v[0:1], off offset:16
167
- ; GCN-SDAG-NEXT: scratch_load_b128 v[9:12], off, s32 offset:68 th:TH_LOAD_LU ; 16-byte Folded Reload
168
- ; GCN-SDAG-NEXT: s_wait_loadcnt 0x1
169
- ; GCN-SDAG-NEXT: s_wait_xcnt 0x1
170
- ; GCN-SDAG-NEXT: v_mov_b32_e32 v0, v7
171
- ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
172
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[9:12], off offset:224
173
- ; GCN-SDAG-NEXT: scratch_load_b128 v[9:12], off, s32 offset:84 th:TH_LOAD_LU ; 16-byte Folded Reload
174
- ; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
175
- ; GCN-SDAG-NEXT: s_clause 0xc
176
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[9:12], off offset:240
177
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[13:16], off offset:192
178
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[17:20], off offset:208
179
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[21:24], off offset:160
180
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[25:28], off offset:176
181
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[29:32], off offset:128
182
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[33:36], off offset:144
183
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[48:51], off offset:96
184
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[52:55], off offset:112
185
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[37:40], off offset:64
186
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[41:44], off offset:80
187
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[56:59], off offset:32
188
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[60:63], off offset:48
189
- ; GCN-SDAG-NEXT: scratch_load_b128 v[9:12], off, s32 offset:52 th:TH_LOAD_LU ; 16-byte Folded Reload
169
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off offset:224
170
+ ; GCN-SDAG-NEXT: scratch_load_b128 v[6:9], off, s32 offset:72 th:TH_LOAD_LU ; 16-byte Folded Reload
190
171
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
191
- ; GCN-SDAG-NEXT: s_clause 0x1
192
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[9:12], off
193
- ; GCN-SDAG-NEXT: global_store_b128 v[3:4], v[5:8], off offset:16
194
- ; GCN-SDAG-NEXT: s_clause 0xc
172
+ ; GCN-SDAG-NEXT: s_clause 0xe
173
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[6:9], off offset:240
174
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[10:13], off offset:192
175
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[14:17], off offset:208
176
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[18:21], off offset:160
177
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[22:25], off offset:176
178
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[26:29], off offset:128
179
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[30:33], off offset:144
180
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[34:37], off offset:96
181
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[48:51], off offset:112
182
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[52:55], off offset:64
183
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[38:41], off offset:80
184
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[42:45], off offset:32
185
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[56:59], off offset:48
186
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[60:63], off
187
+ ; GCN-SDAG-NEXT: global_store_b128 v[4:5], v[0:3], off offset:16
188
+ ; GCN-SDAG-NEXT: s_clause 0xd
195
189
; GCN-SDAG-NEXT: scratch_load_b32 v63, off, s32
196
190
; GCN-SDAG-NEXT: scratch_load_b32 v62, off, s32 offset:4
197
191
; GCN-SDAG-NEXT: scratch_load_b32 v61, off, s32 offset:8
@@ -200,24 +194,30 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
200
194
; GCN-SDAG-NEXT: scratch_load_b32 v58, off, s32 offset:20
201
195
; GCN-SDAG-NEXT: scratch_load_b32 v57, off, s32 offset:24
202
196
; GCN-SDAG-NEXT: scratch_load_b32 v56, off, s32 offset:28
203
- ; GCN-SDAG-NEXT: scratch_load_b32 v44, off, s32 offset:32
204
- ; GCN-SDAG-NEXT: scratch_load_b32 v43, off, s32 offset:36
205
- ; GCN-SDAG-NEXT: scratch_load_b32 v42, off, s32 offset:40
206
- ; GCN-SDAG-NEXT: scratch_load_b32 v41, off, s32 offset:44
207
- ; GCN-SDAG-NEXT: scratch_load_b32 v40, off, s32 offset:48
197
+ ; GCN-SDAG-NEXT: scratch_load_b32 v45, off, s32 offset:32
198
+ ; GCN-SDAG-NEXT: scratch_load_b32 v44, off, s32 offset:36
199
+ ; GCN-SDAG-NEXT: scratch_load_b32 v43, off, s32 offset:40
200
+ ; GCN-SDAG-NEXT: scratch_load_b32 v42, off, s32 offset:44
201
+ ; GCN-SDAG-NEXT: scratch_load_b32 v41, off, s32 offset:48
202
+ ; GCN-SDAG-NEXT: scratch_load_b32 v40, off, s32 offset:52
203
+ ; GCN-SDAG-NEXT: s_wait_xcnt 0xe
204
+ ; GCN-SDAG-NEXT: v_mov_b32_e32 v0, v2
208
205
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
209
206
; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31]
210
207
;
211
208
; GCN-GISEL-LABEL: test_v64i32_load_store:
212
209
; GCN-GISEL: ; %bb.0:
213
210
; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
214
211
; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
215
- ; GCN-GISEL-NEXT: s_clause 0xc
216
- ; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:48
217
- ; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:44
218
- ; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:40
219
- ; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:36
220
- ; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:32
212
+ ; GCN-GISEL-NEXT: s_clause 0xf
213
+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:60
214
+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:56
215
+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:52
216
+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:48
217
+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:44
218
+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v45, s32 offset:40
219
+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v46, s32 offset:36
220
+ ; GCN-GISEL-NEXT: scratch_store_b32 off, v47, s32 offset:32
221
221
; GCN-GISEL-NEXT: scratch_store_b32 off, v56, s32 offset:28
222
222
; GCN-GISEL-NEXT: scratch_store_b32 off, v57, s32 offset:24
223
223
; GCN-GISEL-NEXT: scratch_store_b32 off, v58, s32 offset:20
@@ -226,56 +226,53 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
226
226
; GCN-GISEL-NEXT: scratch_store_b32 off, v61, s32 offset:8
227
227
; GCN-GISEL-NEXT: scratch_store_b32 off, v62, s32 offset:4
228
228
; GCN-GISEL-NEXT: scratch_store_b32 off, v63, s32
229
- ; GCN-GISEL-NEXT: global_load_b128 v[5:8], v[0:1], off offset:32
229
+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x8
230
+ ; GCN-GISEL-NEXT: v_dual_mov_b32 v46, v3 :: v_dual_mov_b32 v47, v4
231
+ ; GCN-GISEL-NEXT: global_load_b128 v[2:5], v[0:1], off offset:32
230
232
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
231
- ; GCN-GISEL-NEXT: scratch_store_b128 off, v[5:8], s32 offset:52 ; 16-byte Folded Spill
232
- ; GCN-GISEL-NEXT: global_load_b128 v[5:8], v[0:1], off offset:48
233
+ ; GCN-GISEL-NEXT: scratch_store_b128 off, v[2:5], s32 offset:80 ; 16-byte Folded Spill
234
+ ; GCN-GISEL-NEXT: s_clause 0xe
235
+ ; GCN-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off offset:48
236
+ ; GCN-GISEL-NEXT: global_load_b128 v[10:13], v[0:1], off offset:64
237
+ ; GCN-GISEL-NEXT: global_load_b128 v[14:17], v[0:1], off offset:80
238
+ ; GCN-GISEL-NEXT: global_load_b128 v[18:21], v[0:1], off offset:96
239
+ ; GCN-GISEL-NEXT: global_load_b128 v[22:25], v[0:1], off offset:112
240
+ ; GCN-GISEL-NEXT: global_load_b128 v[26:29], v[0:1], off offset:128
241
+ ; GCN-GISEL-NEXT: global_load_b128 v[30:33], v[0:1], off offset:144
242
+ ; GCN-GISEL-NEXT: global_load_b128 v[34:37], v[0:1], off offset:160
243
+ ; GCN-GISEL-NEXT: global_load_b128 v[48:51], v[0:1], off offset:176
244
+ ; GCN-GISEL-NEXT: global_load_b128 v[52:55], v[0:1], off offset:192
245
+ ; GCN-GISEL-NEXT: global_load_b128 v[38:41], v[0:1], off offset:208
246
+ ; GCN-GISEL-NEXT: global_load_b128 v[42:45], v[0:1], off offset:224
247
+ ; GCN-GISEL-NEXT: global_load_b128 v[56:59], v[0:1], off
248
+ ; GCN-GISEL-NEXT: global_load_b128 v[60:63], v[0:1], off offset:16
249
+ ; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:240
233
250
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
234
- ; GCN-GISEL-NEXT: scratch_store_b128 off, v[5:8], s32 offset:68 ; 16-byte Folded Spill
235
- ; GCN-GISEL-NEXT: s_clause 0xd
236
- ; GCN-GISEL-NEXT: global_load_b128 v[13:16], v[0:1], off offset:64
237
- ; GCN-GISEL-NEXT: global_load_b128 v[17:20], v[0:1], off offset:80
238
- ; GCN-GISEL-NEXT: global_load_b128 v[21:24], v[0:1], off offset:96
239
- ; GCN-GISEL-NEXT: global_load_b128 v[25:28], v[0:1], off offset:112
240
- ; GCN-GISEL-NEXT: global_load_b128 v[29:32], v[0:1], off offset:128
241
- ; GCN-GISEL-NEXT: global_load_b128 v[33:36], v[0:1], off offset:144
242
- ; GCN-GISEL-NEXT: global_load_b128 v[48:51], v[0:1], off offset:160
243
- ; GCN-GISEL-NEXT: global_load_b128 v[52:55], v[0:1], off offset:176
244
- ; GCN-GISEL-NEXT: global_load_b128 v[37:40], v[0:1], off offset:192
245
- ; GCN-GISEL-NEXT: global_load_b128 v[41:44], v[0:1], off offset:208
246
- ; GCN-GISEL-NEXT: global_load_b128 v[56:59], v[0:1], off offset:224
247
- ; GCN-GISEL-NEXT: global_load_b128 v[60:63], v[0:1], off
248
- ; GCN-GISEL-NEXT: global_load_b128 v[5:8], v[0:1], off offset:16
249
- ; GCN-GISEL-NEXT: global_load_b128 v[9:12], v[0:1], off offset:240
250
- ; GCN-GISEL-NEXT: s_wait_loadcnt 0x1
251
- ; GCN-GISEL-NEXT: s_wait_xcnt 0x0
252
- ; GCN-GISEL-NEXT: v_mov_b32_e32 v0, v7
253
- ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
254
- ; GCN-GISEL-NEXT: scratch_store_b128 off, v[9:12], s32 offset:84 ; 16-byte Folded Spill
255
- ; GCN-GISEL-NEXT: scratch_load_b128 v[9:12], off, s32 offset:52 th:TH_LOAD_LU ; 16-byte Folded Reload
251
+ ; GCN-GISEL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:64 ; 16-byte Folded Spill
252
+ ; GCN-GISEL-NEXT: scratch_load_b128 v[0:3], off, s32 offset:80 th:TH_LOAD_LU ; 16-byte Folded Reload
256
253
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
257
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[9:12], off offset:32
258
- ; GCN-GISEL-NEXT: scratch_load_b128 v[9:12], off, s32 offset:68 th:TH_LOAD_LU ; 16-byte Folded Reload
254
+ ; GCN-GISEL-NEXT: s_clause 0xe
255
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[0:3], off offset:32
256
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[6:9], off offset:48
257
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[10:13], off offset:64
258
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[14:17], off offset:80
259
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[18:21], off offset:96
260
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[22:25], off offset:112
261
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[26:29], off offset:128
262
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[30:33], off offset:144
263
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[34:37], off offset:160
264
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[48:51], off offset:176
265
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[52:55], off offset:192
266
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[38:41], off offset:208
267
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[42:45], off offset:224
268
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[56:59], off
269
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[60:63], off offset:16
270
+ ; GCN-GISEL-NEXT: scratch_load_b128 v[0:3], off, s32 offset:64 th:TH_LOAD_LU ; 16-byte Folded Reload
259
271
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
260
- ; GCN-GISEL-NEXT: s_clause 0xd
261
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[9:12], off offset:48
262
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[13:16], off offset:64
263
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[17:20], off offset:80
264
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[21:24], off offset:96
265
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[25:28], off offset:112
266
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[29:32], off offset:128
267
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[33:36], off offset:144
268
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[48:51], off offset:160
269
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[52:55], off offset:176
270
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[37:40], off offset:192
271
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[41:44], off offset:208
272
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[56:59], off offset:224
273
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[60:63], off
274
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[5:8], off offset:16
275
- ; GCN-GISEL-NEXT: scratch_load_b128 v[8:11], off, s32 offset:84 th:TH_LOAD_LU ; 16-byte Folded Reload
276
- ; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
277
- ; GCN-GISEL-NEXT: global_store_b128 v[3:4], v[8:11], off offset:240
278
- ; GCN-GISEL-NEXT: s_clause 0xc
272
+ ; GCN-GISEL-NEXT: global_store_b128 v[46:47], v[0:3], off offset:240
273
+ ; GCN-GISEL-NEXT: s_wait_xcnt 0x0
274
+ ; GCN-GISEL-NEXT: v_mov_b32_e32 v0, v62
275
+ ; GCN-GISEL-NEXT: s_clause 0xf
279
276
; GCN-GISEL-NEXT: scratch_load_b32 v63, off, s32
280
277
; GCN-GISEL-NEXT: scratch_load_b32 v62, off, s32 offset:4
281
278
; GCN-GISEL-NEXT: scratch_load_b32 v61, off, s32 offset:8
@@ -284,11 +281,14 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
284
281
; GCN-GISEL-NEXT: scratch_load_b32 v58, off, s32 offset:20
285
282
; GCN-GISEL-NEXT: scratch_load_b32 v57, off, s32 offset:24
286
283
; GCN-GISEL-NEXT: scratch_load_b32 v56, off, s32 offset:28
287
- ; GCN-GISEL-NEXT: scratch_load_b32 v44, off, s32 offset:32
288
- ; GCN-GISEL-NEXT: scratch_load_b32 v43, off, s32 offset:36
289
- ; GCN-GISEL-NEXT: scratch_load_b32 v42, off, s32 offset:40
290
- ; GCN-GISEL-NEXT: scratch_load_b32 v41, off, s32 offset:44
291
- ; GCN-GISEL-NEXT: scratch_load_b32 v40, off, s32 offset:48
284
+ ; GCN-GISEL-NEXT: scratch_load_b32 v47, off, s32 offset:32
285
+ ; GCN-GISEL-NEXT: scratch_load_b32 v46, off, s32 offset:36
286
+ ; GCN-GISEL-NEXT: scratch_load_b32 v45, off, s32 offset:40
287
+ ; GCN-GISEL-NEXT: scratch_load_b32 v44, off, s32 offset:44
288
+ ; GCN-GISEL-NEXT: scratch_load_b32 v43, off, s32 offset:48
289
+ ; GCN-GISEL-NEXT: scratch_load_b32 v42, off, s32 offset:52
290
+ ; GCN-GISEL-NEXT: scratch_load_b32 v41, off, s32 offset:56
291
+ ; GCN-GISEL-NEXT: scratch_load_b32 v40, off, s32 offset:60
292
292
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
293
293
; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31]
294
294
%vec = load <64 x i32 >, ptr addrspace (1 ) %ptr
0 commit comments