@@ -500,9 +500,9 @@ define void @store_load_vindex_foo(i32 %idx) {
500
500
; GFX9: ; %bb.0: ; %bb
501
501
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
502
502
; GFX9-NEXT: v_mov_b32_e32 v1, s32
503
- ; GFX9-NEXT: v_mov_b32_e32 v3, 15
504
503
; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1
505
- ; GFX9-NEXT: v_and_b32_e32 v0, v0, v3
504
+ ; GFX9-NEXT: v_mov_b32_e32 v3, 15
505
+ ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
506
506
; GFX9-NEXT: scratch_store_dword v2, v3, off
507
507
; GFX9-NEXT: s_waitcnt vmcnt(0)
508
508
; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1
@@ -514,24 +514,24 @@ define void @store_load_vindex_foo(i32 %idx) {
514
514
; GFX10: ; %bb.0: ; %bb
515
515
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
516
516
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
517
- ; GFX10-NEXT: v_mov_b32_e32 v1, 15
518
- ; GFX10-NEXT: v_mov_b32_e32 v2, s32
519
- ; GFX10-NEXT: v_and_b32_e32 v3, v0, v1
520
- ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2
521
- ; GFX10-NEXT: v_lshl_add_u32 v2, v3 , 2, v2
522
- ; GFX10-NEXT: scratch_store_dword v0, v1 , off
517
+ ; GFX10-NEXT: v_mov_b32_e32 v1, s32
518
+ ; GFX10-NEXT: v_and_b32_e32 v2, 15, v0
519
+ ; GFX10-NEXT: v_mov_b32_e32 v3, 15
520
+ ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v1
521
+ ; GFX10-NEXT: v_lshl_add_u32 v1, v2 , 2, v1
522
+ ; GFX10-NEXT: scratch_store_dword v0, v3 , off
523
523
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
524
- ; GFX10-NEXT: scratch_load_dword v0, v2 , off glc dlc
524
+ ; GFX10-NEXT: scratch_load_dword v0, v1 , off glc dlc
525
525
; GFX10-NEXT: s_waitcnt vmcnt(0)
526
526
; GFX10-NEXT: s_setpc_b64 s[30:31]
527
527
;
528
528
; GFX9-PAL-LABEL: store_load_vindex_foo:
529
529
; GFX9-PAL: ; %bb.0: ; %bb
530
530
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
531
531
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s32
532
- ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
533
532
; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1
534
- ; GFX9-PAL-NEXT: v_and_b32_e32 v0, v0, v3
533
+ ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
534
+ ; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0
535
535
; GFX9-PAL-NEXT: scratch_store_dword v2, v3, off
536
536
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
537
537
; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1
@@ -543,14 +543,14 @@ define void @store_load_vindex_foo(i32 %idx) {
543
543
; GFX10-PAL: ; %bb.0: ; %bb
544
544
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
545
545
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
546
- ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
547
- ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, s32
548
- ; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1
549
- ; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2
550
- ; GFX10-PAL-NEXT: v_lshl_add_u32 v2, v3 , 2, v2
551
- ; GFX10-PAL-NEXT: scratch_store_dword v0, v1 , off
546
+ ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, s32
547
+ ; GFX10-PAL-NEXT: v_and_b32_e32 v2, 15, v0
548
+ ; GFX10-PAL-NEXT: v_mov_b32_e32 v3, 15
549
+ ; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1
550
+ ; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v2 , 2, v1
551
+ ; GFX10-PAL-NEXT: scratch_store_dword v0, v3 , off
552
552
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
553
- ; GFX10-PAL-NEXT: scratch_load_dword v0, v2 , off glc dlc
553
+ ; GFX10-PAL-NEXT: scratch_load_dword v0, v1 , off glc dlc
554
554
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
555
555
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
556
556
bb:
@@ -1247,9 +1247,9 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
1247
1247
; GFX9-NEXT: s_waitcnt vmcnt(0)
1248
1248
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x100
1249
1249
; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi
1250
- ; GFX9-NEXT: v_mov_b32_e32 v3, 15
1251
1250
; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1
1252
- ; GFX9-NEXT: v_and_b32_e32 v0, v0, v3
1251
+ ; GFX9-NEXT: v_mov_b32_e32 v3, 15
1252
+ ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
1253
1253
; GFX9-NEXT: scratch_store_dword v2, v3, off
1254
1254
; GFX9-NEXT: s_waitcnt vmcnt(0)
1255
1255
; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1
@@ -1261,17 +1261,17 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
1261
1261
; GFX10: ; %bb.0: ; %bb
1262
1262
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1263
1263
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1264
- ; GFX10-NEXT: v_mov_b32_e32 v1, 15
1265
1264
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100
1266
- ; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
1267
- ; GFX10-NEXT: v_and_b32_e32 v3, v0, v1
1268
- ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2
1269
- ; GFX10-NEXT: v_lshl_add_u32 v2, v3, 2, v2
1270
- ; GFX10-NEXT: scratch_load_dword v3, off, s32 glc dlc
1265
+ ; GFX10-NEXT: v_and_b32_e32 v2, 15, v0
1266
+ ; GFX10-NEXT: v_mov_b32_e32 v1, vcc_lo
1267
+ ; GFX10-NEXT: v_mov_b32_e32 v3, 15
1268
+ ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v1
1269
+ ; GFX10-NEXT: v_lshl_add_u32 v1, v2, 2, v1
1270
+ ; GFX10-NEXT: scratch_load_dword v2, off, s32 glc dlc
1271
1271
; GFX10-NEXT: s_waitcnt vmcnt(0)
1272
- ; GFX10-NEXT: scratch_store_dword v0, v1 , off
1272
+ ; GFX10-NEXT: scratch_store_dword v0, v3 , off
1273
1273
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1274
- ; GFX10-NEXT: scratch_load_dword v0, v2 , off glc dlc
1274
+ ; GFX10-NEXT: scratch_load_dword v0, v1 , off glc dlc
1275
1275
; GFX10-NEXT: s_waitcnt vmcnt(0)
1276
1276
; GFX10-NEXT: s_setpc_b64 s[30:31]
1277
1277
;
@@ -1282,9 +1282,9 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
1282
1282
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
1283
1283
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x100
1284
1284
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi
1285
- ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
1286
1285
; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1
1287
- ; GFX9-PAL-NEXT: v_and_b32_e32 v0, v0, v3
1286
+ ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
1287
+ ; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0
1288
1288
; GFX9-PAL-NEXT: scratch_store_dword v2, v3, off
1289
1289
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
1290
1290
; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1
@@ -1296,17 +1296,17 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
1296
1296
; GFX10-PAL: ; %bb.0: ; %bb
1297
1297
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1298
1298
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
1299
- ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
1300
1299
; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100
1301
- ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, vcc_lo
1302
- ; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1
1303
- ; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2
1304
- ; GFX10-PAL-NEXT: v_lshl_add_u32 v2, v3, 2, v2
1305
- ; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 glc dlc
1300
+ ; GFX10-PAL-NEXT: v_and_b32_e32 v2, 15, v0
1301
+ ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, vcc_lo
1302
+ ; GFX10-PAL-NEXT: v_mov_b32_e32 v3, 15
1303
+ ; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1
1304
+ ; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v2, 2, v1
1305
+ ; GFX10-PAL-NEXT: scratch_load_dword v2, off, s32 glc dlc
1306
1306
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
1307
- ; GFX10-PAL-NEXT: scratch_store_dword v0, v1 , off
1307
+ ; GFX10-PAL-NEXT: scratch_store_dword v0, v3 , off
1308
1308
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
1309
- ; GFX10-PAL-NEXT: scratch_load_dword v0, v2 , off glc dlc
1309
+ ; GFX10-PAL-NEXT: scratch_load_dword v0, v1 , off glc dlc
1310
1310
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
1311
1311
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
1312
1312
bb:
@@ -2019,9 +2019,9 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
2019
2019
; GFX9-NEXT: s_waitcnt vmcnt(0)
2020
2020
; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004
2021
2021
; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi
2022
- ; GFX9-NEXT: v_mov_b32_e32 v3, 15
2023
2022
; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1
2024
- ; GFX9-NEXT: v_and_b32_e32 v0, v0, v3
2023
+ ; GFX9-NEXT: v_mov_b32_e32 v3, 15
2024
+ ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
2025
2025
; GFX9-NEXT: scratch_store_dword v2, v3, off
2026
2026
; GFX9-NEXT: s_waitcnt vmcnt(0)
2027
2027
; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v1
@@ -2033,17 +2033,17 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
2033
2033
; GFX10: ; %bb.0: ; %bb
2034
2034
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2035
2035
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2036
- ; GFX10-NEXT: v_mov_b32_e32 v1, 15
2037
2036
; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004
2038
- ; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
2039
- ; GFX10-NEXT: v_and_b32_e32 v3, v0, v1
2040
- ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2
2041
- ; GFX10-NEXT: v_lshl_add_u32 v2, v3, 2, v2
2042
- ; GFX10-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc
2037
+ ; GFX10-NEXT: v_and_b32_e32 v2, 15, v0
2038
+ ; GFX10-NEXT: v_mov_b32_e32 v1, vcc_lo
2039
+ ; GFX10-NEXT: v_mov_b32_e32 v3, 15
2040
+ ; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v1
2041
+ ; GFX10-NEXT: v_lshl_add_u32 v1, v2, 2, v1
2042
+ ; GFX10-NEXT: scratch_load_dword v2, off, s32 offset:4 glc dlc
2043
2043
; GFX10-NEXT: s_waitcnt vmcnt(0)
2044
- ; GFX10-NEXT: scratch_store_dword v0, v1 , off
2044
+ ; GFX10-NEXT: scratch_store_dword v0, v3 , off
2045
2045
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2046
- ; GFX10-NEXT: scratch_load_dword v0, v2 , off glc dlc
2046
+ ; GFX10-NEXT: scratch_load_dword v0, v1 , off glc dlc
2047
2047
; GFX10-NEXT: s_waitcnt vmcnt(0)
2048
2048
; GFX10-NEXT: s_setpc_b64 s[30:31]
2049
2049
;
@@ -2054,9 +2054,9 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
2054
2054
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
2055
2055
; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4004
2056
2056
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi
2057
- ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
2058
2057
; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1
2059
- ; GFX9-PAL-NEXT: v_and_b32_e32 v0, v0, v3
2058
+ ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
2059
+ ; GFX9-PAL-NEXT: v_and_b32_e32 v0, 15, v0
2060
2060
; GFX9-PAL-NEXT: scratch_store_dword v2, v3, off
2061
2061
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
2062
2062
; GFX9-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1
@@ -2068,17 +2068,17 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
2068
2068
; GFX10-PAL: ; %bb.0: ; %bb
2069
2069
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2070
2070
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
2071
- ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
2072
2071
; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004
2073
- ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, vcc_lo
2074
- ; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1
2075
- ; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2
2076
- ; GFX10-PAL-NEXT: v_lshl_add_u32 v2, v3, 2, v2
2077
- ; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc
2072
+ ; GFX10-PAL-NEXT: v_and_b32_e32 v2, 15, v0
2073
+ ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, vcc_lo
2074
+ ; GFX10-PAL-NEXT: v_mov_b32_e32 v3, 15
2075
+ ; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v1
2076
+ ; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v2, 2, v1
2077
+ ; GFX10-PAL-NEXT: scratch_load_dword v2, off, s32 offset:4 glc dlc
2078
2078
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
2079
- ; GFX10-PAL-NEXT: scratch_store_dword v0, v1 , off
2079
+ ; GFX10-PAL-NEXT: scratch_store_dword v0, v3 , off
2080
2080
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
2081
- ; GFX10-PAL-NEXT: scratch_load_dword v0, v2 , off glc dlc
2081
+ ; GFX10-PAL-NEXT: scratch_load_dword v0, v1 , off glc dlc
2082
2082
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
2083
2083
; GFX10-PAL-NEXT: s_setpc_b64 s[30:31]
2084
2084
bb:
0 commit comments