@@ -15,9 +15,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
15
15
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
16
16
; GFX9-NEXT: s_lshl_b32 s1, s0, 2
17
17
; GFX9-NEXT: s_and_b32 s0, s0, 15
18
+ ; GFX9-NEXT: s_add_i32 s1, s1, 0
18
19
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
19
20
; GFX9-NEXT: scratch_store_dword off, v0, s1
20
21
; GFX9-NEXT: s_waitcnt vmcnt(0)
22
+ ; GFX9-NEXT: s_add_i32 s0, s0, 0
21
23
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
22
24
; GFX9-NEXT: s_waitcnt vmcnt(0)
23
25
; GFX9-NEXT: s_endpgm
@@ -34,6 +36,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
34
36
; GFX10-NEXT: s_and_b32 s1, s0, 15
35
37
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
36
38
; GFX10-NEXT: s_lshl_b32 s1, s1, 2
39
+ ; GFX10-NEXT: s_add_i32 s0, s0, 0
40
+ ; GFX10-NEXT: s_add_i32 s1, s1, 0
37
41
; GFX10-NEXT: scratch_store_dword off, v0, s0
38
42
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
39
43
; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc
@@ -47,9 +51,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
47
51
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
48
52
; GFX940-NEXT: s_lshl_b32 s1, s0, 2
49
53
; GFX940-NEXT: s_and_b32 s0, s0, 15
54
+ ; GFX940-NEXT: s_add_i32 s1, s1, 0
50
55
; GFX940-NEXT: s_lshl_b32 s0, s0, 2
51
56
; GFX940-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
52
57
; GFX940-NEXT: s_waitcnt vmcnt(0)
58
+ ; GFX940-NEXT: s_add_i32 s0, s0, 0
53
59
; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
54
60
; GFX940-NEXT: s_waitcnt vmcnt(0)
55
61
; GFX940-NEXT: s_endpgm
@@ -62,6 +68,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
62
68
; GFX11-NEXT: s_and_b32 s1, s0, 15
63
69
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
64
70
; GFX11-NEXT: s_lshl_b32 s1, s1, 2
71
+ ; GFX11-NEXT: s_add_i32 s0, s0, 0
72
+ ; GFX11-NEXT: s_add_i32 s1, s1, 0
65
73
; GFX11-NEXT: scratch_store_b32 off, v0, s0 dlc
66
74
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
67
75
; GFX11-NEXT: scratch_load_b32 v0, off, s1 glc dlc
@@ -76,6 +84,8 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
76
84
; GFX12-NEXT: s_and_b32 s1, s0, 15
77
85
; GFX12-NEXT: s_lshl_b32 s0, s0, 2
78
86
; GFX12-NEXT: s_lshl_b32 s1, s1, 2
87
+ ; GFX12-NEXT: s_add_co_i32 s0, s0, 0
88
+ ; GFX12-NEXT: s_add_co_i32 s1, s1, 0
79
89
; GFX12-NEXT: scratch_store_b32 off, v0, s0 scope:SCOPE_SYS
80
90
; GFX12-NEXT: s_wait_storecnt 0x0
81
91
; GFX12-NEXT: scratch_load_b32 v0, off, s1 scope:SCOPE_SYS
@@ -1032,13 +1042,13 @@ define void @store_load_large_imm_offset_foo() {
1032
1042
; GFX9-LABEL: store_load_large_imm_offset_foo:
1033
1043
; GFX9: ; %bb.0: ; %bb
1034
1044
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1035
- ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1036
1045
; GFX9-NEXT: v_mov_b32_e32 v0, 13
1037
- ; GFX9-NEXT: s_add_i32 s1, s32, s0
1046
+ ; GFX9-NEXT: s_movk_i32 s0, 0x3e80
1047
+ ; GFX9-NEXT: s_add_i32 s1, s32, 4
1038
1048
; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
1039
1049
; GFX9-NEXT: s_waitcnt vmcnt(0)
1040
1050
; GFX9-NEXT: v_mov_b32_e32 v0, 15
1041
- ; GFX9-NEXT: s_add_i32 s0, s1, 4
1051
+ ; GFX9-NEXT: s_add_i32 s0, s0, s1
1042
1052
; GFX9-NEXT: scratch_store_dword off, v0, s0
1043
1053
; GFX9-NEXT: s_waitcnt vmcnt(0)
1044
1054
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
@@ -1049,10 +1059,10 @@ define void @store_load_large_imm_offset_foo() {
1049
1059
; GFX10: ; %bb.0: ; %bb
1050
1060
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1051
1061
; GFX10-NEXT: v_mov_b32_e32 v0, 13
1052
- ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1053
1062
; GFX10-NEXT: v_mov_b32_e32 v1, 15
1054
- ; GFX10-NEXT: s_add_i32 s1, s32, s0
1055
- ; GFX10-NEXT: s_add_i32 s0, s1, 4
1063
+ ; GFX10-NEXT: s_movk_i32 s0, 0x3e80
1064
+ ; GFX10-NEXT: s_add_i32 s1, s32, 4
1065
+ ; GFX10-NEXT: s_add_i32 s0, s0, s1
1056
1066
; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
1057
1067
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1058
1068
; GFX10-NEXT: scratch_store_dword off, v1, s0
@@ -1064,13 +1074,13 @@ define void @store_load_large_imm_offset_foo() {
1064
1074
; GFX940-LABEL: store_load_large_imm_offset_foo:
1065
1075
; GFX940: ; %bb.0: ; %bb
1066
1076
; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1067
- ; GFX940-NEXT: s_movk_i32 s0, 0x3e80
1068
1077
; GFX940-NEXT: v_mov_b32_e32 v0, 13
1069
- ; GFX940-NEXT: s_add_i32 s1, s32, s0
1078
+ ; GFX940-NEXT: s_movk_i32 s0, 0x3e80
1079
+ ; GFX940-NEXT: s_add_i32 s1, s32, 4
1070
1080
; GFX940-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1
1071
1081
; GFX940-NEXT: s_waitcnt vmcnt(0)
1072
1082
; GFX940-NEXT: v_mov_b32_e32 v0, 15
1073
- ; GFX940-NEXT: s_add_i32 s0, s1, 4
1083
+ ; GFX940-NEXT: s_add_i32 s0, s0, s1
1074
1084
; GFX940-NEXT: scratch_store_dword off, v0, s0 sc0 sc1
1075
1085
; GFX940-NEXT: s_waitcnt vmcnt(0)
1076
1086
; GFX940-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
@@ -1082,9 +1092,9 @@ define void @store_load_large_imm_offset_foo() {
1082
1092
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1083
1093
; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15
1084
1094
; GFX11-NEXT: s_movk_i32 s0, 0x3e80
1085
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1086
- ; GFX11-NEXT: s_add_i32 s1, s32, s0
1087
- ; GFX11-NEXT: s_add_i32 s0, s1, 4
1095
+ ; GFX11-NEXT: s_add_i32 s1, s32, 4
1096
+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1097
+ ; GFX11-NEXT: s_add_i32 s0, s0, s1
1088
1098
; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc
1089
1099
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
1090
1100
; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc
0 commit comments