@@ -178,18 +178,18 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
178
178
; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
179
179
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5
180
180
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
181
- ; GFX9-NEXT: s_add_u32 s2, 4 , 0
182
- ; GFX9-NEXT: v_mov_b32_e32 v0, 15
183
- ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
181
+ ; GFX9-NEXT: s_mov_b32 vcc_hi , 0
182
+ ; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc
183
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
184
184
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
185
185
; GFX9-NEXT: s_and_b32 s0, s0, 15
186
186
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
187
- ; GFX9-NEXT: s_add_u32 s1, 0x104, s1
188
- ; GFX9-NEXT: scratch_load_dword v1, off, s2 glc
189
187
; GFX9-NEXT: s_waitcnt vmcnt(0)
190
- ; GFX9-NEXT: s_add_u32 s0, 0x104, s0
188
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 15
189
+ ; GFX9-NEXT: s_add_u32 s1, 0x104, s1
191
190
; GFX9-NEXT: scratch_store_dword off, v0, s1
192
191
; GFX9-NEXT: s_waitcnt vmcnt(0)
192
+ ; GFX9-NEXT: s_add_u32 s0, 0x104, s0
193
193
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
194
194
; GFX9-NEXT: s_waitcnt vmcnt(0)
195
195
; GFX9-NEXT: s_endpgm
@@ -201,8 +201,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
201
201
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
202
202
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
203
203
; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24
204
- ; GFX10-NEXT: s_add_u32 s1, 4, 0
205
- ; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
204
+ ; GFX10-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc
206
205
; GFX10-NEXT: s_waitcnt vmcnt(0)
207
206
; GFX10-NEXT: v_mov_b32_e32 v0, 15
208
207
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
@@ -237,8 +236,8 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
237
236
; GFX9: ; %bb.0: ; %bb
238
237
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3
239
238
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
240
- ; GFX9-NEXT: s_add_u32 s0, 4 , 0
241
- ; GFX9-NEXT: scratch_load_dword v1, off, s0 glc
239
+ ; GFX9-NEXT: s_mov_b32 vcc_hi , 0
240
+ ; GFX9-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc
242
241
; GFX9-NEXT: s_waitcnt vmcnt(0)
243
242
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
244
243
; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0
@@ -263,11 +262,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
263
262
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
264
263
; GFX10-NEXT: v_mov_b32_e32 v2, 0x104
265
264
; GFX10-NEXT: v_mov_b32_e32 v3, 15
266
- ; GFX10-NEXT: s_add_u32 s0, 4, 0
267
265
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1
268
266
; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0
269
267
; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1
270
- ; GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc
268
+ ; GFX10-NEXT: scratch_load_dword v2, off, off offset:4 glc dlc
271
269
; GFX10-NEXT: s_waitcnt vmcnt(0)
272
270
; GFX10-NEXT: scratch_store_dword v0, v3, off
273
271
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
@@ -296,8 +294,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
296
294
; GFX9-LABEL: store_load_vindex_small_offset_foo:
297
295
; GFX9: ; %bb.0: ; %bb
298
296
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299
- ; GFX9-NEXT: s_add_u32 s0, s32, 0
300
- ; GFX9-NEXT: scratch_load_dword v1, off, s0 glc
297
+ ; GFX9-NEXT: scratch_load_dword v1, off, s32 glc
301
298
; GFX9-NEXT: s_waitcnt vmcnt(0)
302
299
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x100
303
300
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
@@ -323,10 +320,9 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
323
320
; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
324
321
; GFX10-NEXT: v_mov_b32_e32 v3, 15
325
322
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1
326
- ; GFX10-NEXT: s_add_u32 s0, s32, 0
327
323
; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0
328
324
; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1
329
- ; GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc
325
+ ; GFX10-NEXT: scratch_load_dword v2, off, s32 glc dlc
330
326
; GFX10-NEXT: s_waitcnt vmcnt(0)
331
327
; GFX10-NEXT: scratch_store_dword v0, v3, off
332
328
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
@@ -355,18 +351,18 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
355
351
; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24
356
352
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5
357
353
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0
358
- ; GFX9-NEXT: s_add_u32 s2, 4 , 0
359
- ; GFX9-NEXT: v_mov_b32_e32 v0, 15
360
- ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
354
+ ; GFX9-NEXT: s_mov_b32 vcc_hi , 0
355
+ ; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc
356
+ ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
361
357
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
362
358
; GFX9-NEXT: s_and_b32 s0, s0, 15
363
359
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
364
- ; GFX9-NEXT: s_add_u32 s1, 0x4004, s1
365
- ; GFX9-NEXT: scratch_load_dword v1, off, s2 glc
366
360
; GFX9-NEXT: s_waitcnt vmcnt(0)
367
- ; GFX9-NEXT: s_add_u32 s0, 0x4004, s0
361
+ ; GFX9-NEXT: v_mov_b32_e32 v0, 15
362
+ ; GFX9-NEXT: s_add_u32 s1, 0x4004, s1
368
363
; GFX9-NEXT: scratch_store_dword off, v0, s1
369
364
; GFX9-NEXT: s_waitcnt vmcnt(0)
365
+ ; GFX9-NEXT: s_add_u32 s0, 0x4004, s0
370
366
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
371
367
; GFX9-NEXT: s_waitcnt vmcnt(0)
372
368
; GFX9-NEXT: s_endpgm
@@ -378,8 +374,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
378
374
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2
379
375
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3
380
376
; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24
381
- ; GFX10-NEXT: s_add_u32 s1, 4, 0
382
- ; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
377
+ ; GFX10-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc
383
378
; GFX10-NEXT: s_waitcnt vmcnt(0)
384
379
; GFX10-NEXT: v_mov_b32_e32 v0, 15
385
380
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
@@ -414,8 +409,8 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
414
409
; GFX9: ; %bb.0: ; %bb
415
410
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3
416
411
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
417
- ; GFX9-NEXT: s_add_u32 s0, 4 , 0
418
- ; GFX9-NEXT: scratch_load_dword v1, off, s0 glc
412
+ ; GFX9-NEXT: s_mov_b32 vcc_hi , 0
413
+ ; GFX9-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc
419
414
; GFX9-NEXT: s_waitcnt vmcnt(0)
420
415
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
421
416
; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0
@@ -440,11 +435,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
440
435
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
441
436
; GFX10-NEXT: v_mov_b32_e32 v2, 0x4004
442
437
; GFX10-NEXT: v_mov_b32_e32 v3, 15
443
- ; GFX10-NEXT: s_add_u32 s0, 4, 0
444
438
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1
445
439
; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0
446
440
; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1
447
- ; GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc
441
+ ; GFX10-NEXT: scratch_load_dword v2, off, off offset:4 glc dlc
448
442
; GFX10-NEXT: s_waitcnt vmcnt(0)
449
443
; GFX10-NEXT: scratch_store_dword v0, v3, off
450
444
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
@@ -473,8 +467,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
473
467
; GFX9-LABEL: store_load_vindex_large_offset_foo:
474
468
; GFX9: ; %bb.0: ; %bb
475
469
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476
- ; GFX9-NEXT: s_add_u32 s0, s32, 0
477
- ; GFX9-NEXT: scratch_load_dword v1, off, s0 glc
470
+ ; GFX9-NEXT: scratch_load_dword v1, off, s32 glc
478
471
; GFX9-NEXT: s_waitcnt vmcnt(0)
479
472
; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000
480
473
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
@@ -500,10 +493,9 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
500
493
; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
501
494
; GFX10-NEXT: v_mov_b32_e32 v3, 15
502
495
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1
503
- ; GFX10-NEXT: s_add_u32 s0, s32, 0
504
496
; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0
505
497
; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1
506
- ; GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc
498
+ ; GFX10-NEXT: scratch_load_dword v2, off, s32 glc dlc
507
499
; GFX10-NEXT: s_waitcnt vmcnt(0)
508
500
; GFX10-NEXT: scratch_store_dword v0, v3, off
509
501
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
@@ -531,11 +523,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
531
523
; GFX9: ; %bb.0: ; %bb
532
524
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3
533
525
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0
526
+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
534
527
; GFX9-NEXT: v_mov_b32_e32 v0, 13
535
- ; GFX9-NEXT: s_add_u32 s0, 4 , 0
536
- ; GFX9-NEXT: scratch_store_dword off, v0, s0
528
+ ; GFX9-NEXT: s_mov_b32 vcc_hi , 0
529
+ ; GFX9-NEXT: scratch_store_dword off, v0, vcc_hi offset:4
537
530
; GFX9-NEXT: s_waitcnt vmcnt(0)
538
- ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
539
531
; GFX9-NEXT: v_mov_b32_e32 v0, 15
540
532
; GFX9-NEXT: s_add_u32 s0, 4, s0
541
533
; GFX9-NEXT: scratch_store_dword off, v0, s0
@@ -553,9 +545,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
553
545
; GFX10-NEXT: v_mov_b32_e32 v0, 13
554
546
; GFX10-NEXT: v_mov_b32_e32 v1, 15
555
547
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
556
- ; GFX10-NEXT: s_add_u32 s1, 4, 0
557
548
; GFX10-NEXT: s_add_u32 s0, 4, s0
558
- ; GFX10-NEXT: scratch_store_dword off, v0, s1
549
+ ; GFX10-NEXT: scratch_store_dword off, v0, off offset:4
559
550
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
560
551
; GFX10-NEXT: scratch_store_dword off, v1, s0
561
552
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
@@ -577,11 +568,10 @@ define void @store_load_large_imm_offset_foo() {
577
568
; GFX9-LABEL: store_load_large_imm_offset_foo:
578
569
; GFX9: ; %bb.0: ; %bb
579
570
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571
+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
580
572
; GFX9-NEXT: v_mov_b32_e32 v0, 13
581
- ; GFX9-NEXT: s_add_u32 s0, s32, 0
582
- ; GFX9-NEXT: scratch_store_dword off, v0, s0
573
+ ; GFX9-NEXT: scratch_store_dword off, v0, s32
583
574
; GFX9-NEXT: s_waitcnt vmcnt(0)
584
- ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
585
575
; GFX9-NEXT: v_mov_b32_e32 v0, 15
586
576
; GFX9-NEXT: s_add_u32 s0, s32, s0
587
577
; GFX9-NEXT: scratch_store_dword off, v0, s0
@@ -597,9 +587,8 @@ define void @store_load_large_imm_offset_foo() {
597
587
; GFX10-NEXT: v_mov_b32_e32 v0, 13
598
588
; GFX10-NEXT: v_mov_b32_e32 v1, 15
599
589
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
600
- ; GFX10-NEXT: s_add_u32 s1, s32, 0
601
590
; GFX10-NEXT: s_add_u32 s0, s32, s0
602
- ; GFX10-NEXT: scratch_store_dword off, v0, s1
591
+ ; GFX10-NEXT: scratch_store_dword off, v0, s32
603
592
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
604
593
; GFX10-NEXT: scratch_store_dword off, v1, s0
605
594
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
0 commit comments