@@ -9,43 +9,34 @@ define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addr
9
9
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
10
10
; GCN-NEXT: v_lshlrev_b32_e32 v64, 8, v0
11
11
; GCN-NEXT: s_waitcnt lgkmcnt(0)
12
- ; GCN-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144
13
12
; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1]
14
- ; GCN-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:16
15
- ; GCN-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:32
16
- ; GCN-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:48
17
- ; GCN-NEXT: global_load_dwordx4 v[20:23], v64, s[0:1] offset:64
18
- ; GCN-NEXT: global_load_dwordx4 v[24:27], v64, s[0:1] offset:80
19
- ; GCN-NEXT: global_load_dwordx4 v[28:31], v64, s[0:1] offset:96
20
- ; GCN-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] offset:112
21
- ; GCN-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:128
13
+ ; GCN-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:16
14
+ ; GCN-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:32
15
+ ; GCN-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:48
16
+ ; GCN-NEXT: global_load_dwordx4 v[16:19], v64, s[0:1] offset:64
17
+ ; GCN-NEXT: global_load_dwordx4 v[20:23], v64, s[0:1] offset:80
18
+ ; GCN-NEXT: global_load_dwordx4 v[24:27], v64, s[0:1] offset:96
19
+ ; GCN-NEXT: global_load_dwordx4 v[28:31], v64, s[0:1] offset:112
20
+ ; GCN-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] offset:128
21
+ ; GCN-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:144
22
22
; GCN-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:160
23
23
; GCN-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:176
24
24
; GCN-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:192
25
25
; GCN-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:208
26
26
; GCN-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:224
27
27
; GCN-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:240
28
- ; GCN-NEXT: s_waitcnt vmcnt(15)
29
- ; GCN-NEXT: v_mov_b32_e32 v5, 0x3e7
30
- ; GCN-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:144
31
- ; GCN-NEXT: s_waitcnt vmcnt(15)
28
+ ; GCN-NEXT: s_waitcnt vmcnt(6)
29
+ ; GCN-NEXT: v_mov_b32_e32 v37, 0x3e7
32
30
; GCN-NEXT: global_store_dwordx4 v64, v[0:3], s[2:3]
33
- ; GCN-NEXT: s_waitcnt vmcnt(15)
34
- ; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:16
35
- ; GCN-NEXT: s_waitcnt vmcnt(15)
36
- ; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:32
37
- ; GCN-NEXT: s_waitcnt vmcnt(15)
38
- ; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:48
39
- ; GCN-NEXT: s_waitcnt vmcnt(15)
40
- ; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:64
41
- ; GCN-NEXT: s_waitcnt vmcnt(15)
42
- ; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:80
43
- ; GCN-NEXT: s_waitcnt vmcnt(15)
44
- ; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:96
45
- ; GCN-NEXT: s_waitcnt vmcnt(15)
46
- ; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:112
47
- ; GCN-NEXT: s_waitcnt vmcnt(15)
48
- ; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:128
31
+ ; GCN-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:16
32
+ ; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:32
33
+ ; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:48
34
+ ; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:64
35
+ ; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:80
36
+ ; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:96
37
+ ; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:112
38
+ ; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:128
39
+ ; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:144
49
40
; GCN-NEXT: s_waitcnt vmcnt(15)
50
41
; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:160
51
42
; GCN-NEXT: s_waitcnt vmcnt(15)
@@ -67,39 +58,45 @@ define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addr
67
58
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
68
59
; GFX10-NEXT: s_clause 0xf
69
60
; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1]
70
- ; GFX10-NEXT: global_load_dwordx4 v[8:11 ], v64, s[0:1] offset:16
71
- ; GFX10-NEXT: global_load_dwordx4 v[12:15 ], v64, s[0:1] offset:32
72
- ; GFX10-NEXT: global_load_dwordx4 v[16:19 ], v64, s[0:1] offset:48
73
- ; GFX10-NEXT: global_load_dwordx4 v[20:23 ], v64, s[0:1] offset:64
74
- ; GFX10-NEXT: global_load_dwordx4 v[24:27 ], v64, s[0:1] offset:80
75
- ; GFX10-NEXT: global_load_dwordx4 v[28:31 ], v64, s[0:1] offset:96
76
- ; GFX10-NEXT: global_load_dwordx4 v[32:35 ], v64, s[0:1] offset:112
77
- ; GFX10-NEXT: global_load_dwordx4 v[36:39 ], v64, s[0:1] offset:160
78
- ; GFX10-NEXT: global_load_dwordx4 v[40:43 ], v64, s[0:1] offset:176
79
- ; GFX10-NEXT: global_load_dwordx4 v[44:47 ], v64, s[0:1] offset:192
80
- ; GFX10-NEXT: global_load_dwordx4 v[48:51 ], v64, s[0:1] offset:208
81
- ; GFX10-NEXT: global_load_dwordx4 v[52:55 ], v64, s[0:1] offset:224
82
- ; GFX10-NEXT: global_load_dwordx4 v[56:59 ], v64, s[0:1] offset:240
83
- ; GFX10-NEXT: global_load_dwordx4 v[60:63 ], v64, s[0:1] offset:128
84
- ; GFX10-NEXT: global_load_dwordx4 v[4:7 ], v64, s[0:1] offset:144
85
- ; GFX10-NEXT: s_waitcnt vmcnt(0 )
86
- ; GFX10-NEXT: v_mov_b32_e32 v5 , 0x3e7
61
+ ; GFX10-NEXT: global_load_dwordx4 v[4:7 ], v64, s[0:1] offset:16
62
+ ; GFX10-NEXT: global_load_dwordx4 v[8:11 ], v64, s[0:1] offset:32
63
+ ; GFX10-NEXT: global_load_dwordx4 v[12:15 ], v64, s[0:1] offset:48
64
+ ; GFX10-NEXT: global_load_dwordx4 v[16:19 ], v64, s[0:1] offset:64
65
+ ; GFX10-NEXT: global_load_dwordx4 v[20:23 ], v64, s[0:1] offset:80
66
+ ; GFX10-NEXT: global_load_dwordx4 v[24:27 ], v64, s[0:1] offset:96
67
+ ; GFX10-NEXT: global_load_dwordx4 v[28:31 ], v64, s[0:1] offset:112
68
+ ; GFX10-NEXT: global_load_dwordx4 v[32:35 ], v64, s[0:1] offset:128
69
+ ; GFX10-NEXT: global_load_dwordx4 v[36:39 ], v64, s[0:1] offset:144
70
+ ; GFX10-NEXT: global_load_dwordx4 v[40:43 ], v64, s[0:1] offset:160
71
+ ; GFX10-NEXT: global_load_dwordx4 v[44:47 ], v64, s[0:1] offset:176
72
+ ; GFX10-NEXT: global_load_dwordx4 v[48:51 ], v64, s[0:1] offset:192
73
+ ; GFX10-NEXT: global_load_dwordx4 v[52:55 ], v64, s[0:1] offset:208
74
+ ; GFX10-NEXT: global_load_dwordx4 v[56:59 ], v64, s[0:1] offset:224
75
+ ; GFX10-NEXT: global_load_dwordx4 v[60:63 ], v64, s[0:1] offset:240
76
+ ; GFX10-NEXT: s_waitcnt vmcnt(6 )
77
+ ; GFX10-NEXT: v_mov_b32_e32 v37 , 0x3e7
87
78
; GFX10-NEXT: global_store_dwordx4 v64, v[0:3], s[2:3]
88
- ; GFX10-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:16
89
- ; GFX10-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:32
90
- ; GFX10-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:48
91
- ; GFX10-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:64
92
- ; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:80
93
- ; GFX10-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:96
94
- ; GFX10-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:112
95
- ; GFX10-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:128
96
- ; GFX10-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:144
97
- ; GFX10-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:160
98
- ; GFX10-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:176
99
- ; GFX10-NEXT: global_store_dwordx4 v64, v[44:47], s[2:3] offset:192
100
- ; GFX10-NEXT: global_store_dwordx4 v64, v[48:51], s[2:3] offset:208
101
- ; GFX10-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:224
102
- ; GFX10-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:240
79
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[4:7], s[2:3] offset:16
80
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[8:11], s[2:3] offset:32
81
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[12:15], s[2:3] offset:48
82
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:64
83
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:80
84
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:96
85
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:112
86
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] offset:128
87
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:144
88
+ ; GFX10-NEXT: s_waitcnt vmcnt(5)
89
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:160
90
+ ; GFX10-NEXT: s_waitcnt vmcnt(4)
91
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[44:47], s[2:3] offset:176
92
+ ; GFX10-NEXT: s_waitcnt vmcnt(3)
93
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[48:51], s[2:3] offset:192
94
+ ; GFX10-NEXT: s_waitcnt vmcnt(2)
95
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:208
96
+ ; GFX10-NEXT: s_waitcnt vmcnt(1)
97
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:224
98
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
99
+ ; GFX10-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:240
103
100
; GFX10-NEXT: s_endpgm
104
101
;
105
102
; GFX11-LABEL: v_insert_v64i32_37:
@@ -109,34 +106,34 @@ define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addr
109
106
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
110
107
; GFX11-NEXT: s_clause 0xf
111
108
; GFX11-NEXT: global_load_b128 v[0:3], v64, s[0:1]
112
- ; GFX11-NEXT: global_load_b128 v[8:11 ], v64, s[0:1] offset:16
113
- ; GFX11-NEXT: global_load_b128 v[12:15 ], v64, s[0:1] offset:32
114
- ; GFX11-NEXT: global_load_b128 v[16:19 ], v64, s[0:1] offset:48
115
- ; GFX11-NEXT: global_load_b128 v[20:23 ], v64, s[0:1] offset:64
116
- ; GFX11-NEXT: global_load_b128 v[24:27 ], v64, s[0:1] offset:80
117
- ; GFX11-NEXT: global_load_b128 v[28:31 ], v64, s[0:1] offset:96
118
- ; GFX11-NEXT: global_load_b128 v[32:35 ], v64, s[0:1] offset:112
119
- ; GFX11-NEXT: global_load_b128 v[36:39 ], v64, s[0:1] offset:128
120
- ; GFX11-NEXT: global_load_b128 v[4:7 ], v64, s[0:1] offset:144
109
+ ; GFX11-NEXT: global_load_b128 v[4:7 ], v64, s[0:1] offset:16
110
+ ; GFX11-NEXT: global_load_b128 v[8:11 ], v64, s[0:1] offset:32
111
+ ; GFX11-NEXT: global_load_b128 v[12:15 ], v64, s[0:1] offset:48
112
+ ; GFX11-NEXT: global_load_b128 v[16:19 ], v64, s[0:1] offset:64
113
+ ; GFX11-NEXT: global_load_b128 v[20:23 ], v64, s[0:1] offset:80
114
+ ; GFX11-NEXT: global_load_b128 v[24:27 ], v64, s[0:1] offset:96
115
+ ; GFX11-NEXT: global_load_b128 v[28:31 ], v64, s[0:1] offset:112
116
+ ; GFX11-NEXT: global_load_b128 v[32:35 ], v64, s[0:1] offset:128
117
+ ; GFX11-NEXT: global_load_b128 v[36:39 ], v64, s[0:1] offset:144
121
118
; GFX11-NEXT: global_load_b128 v[40:43], v64, s[0:1] offset:160
122
119
; GFX11-NEXT: global_load_b128 v[44:47], v64, s[0:1] offset:176
123
120
; GFX11-NEXT: global_load_b128 v[48:51], v64, s[0:1] offset:192
124
121
; GFX11-NEXT: global_load_b128 v[52:55], v64, s[0:1] offset:208
125
122
; GFX11-NEXT: global_load_b128 v[56:59], v64, s[0:1] offset:224
126
123
; GFX11-NEXT: global_load_b128 v[60:63], v64, s[0:1] offset:240
127
124
; GFX11-NEXT: s_waitcnt vmcnt(6)
128
- ; GFX11-NEXT: v_mov_b32_e32 v5 , 0x3e7
125
+ ; GFX11-NEXT: v_mov_b32_e32 v37 , 0x3e7
129
126
; GFX11-NEXT: s_clause 0x9
130
127
; GFX11-NEXT: global_store_b128 v64, v[0:3], s[2:3]
131
- ; GFX11-NEXT: global_store_b128 v64, v[8:11 ], s[2:3] offset:16
132
- ; GFX11-NEXT: global_store_b128 v64, v[12:15 ], s[2:3] offset:32
133
- ; GFX11-NEXT: global_store_b128 v64, v[16:19 ], s[2:3] offset:48
134
- ; GFX11-NEXT: global_store_b128 v64, v[20:23 ], s[2:3] offset:64
135
- ; GFX11-NEXT: global_store_b128 v64, v[24:27 ], s[2:3] offset:80
136
- ; GFX11-NEXT: global_store_b128 v64, v[28:31 ], s[2:3] offset:96
137
- ; GFX11-NEXT: global_store_b128 v64, v[32:35 ], s[2:3] offset:112
138
- ; GFX11-NEXT: global_store_b128 v64, v[36:39 ], s[2:3] offset:128
139
- ; GFX11-NEXT: global_store_b128 v64, v[4:7 ], s[2:3] offset:144
128
+ ; GFX11-NEXT: global_store_b128 v64, v[4:7 ], s[2:3] offset:16
129
+ ; GFX11-NEXT: global_store_b128 v64, v[8:11 ], s[2:3] offset:32
130
+ ; GFX11-NEXT: global_store_b128 v64, v[12:15 ], s[2:3] offset:48
131
+ ; GFX11-NEXT: global_store_b128 v64, v[16:19 ], s[2:3] offset:64
132
+ ; GFX11-NEXT: global_store_b128 v64, v[20:23 ], s[2:3] offset:80
133
+ ; GFX11-NEXT: global_store_b128 v64, v[24:27 ], s[2:3] offset:96
134
+ ; GFX11-NEXT: global_store_b128 v64, v[28:31 ], s[2:3] offset:112
135
+ ; GFX11-NEXT: global_store_b128 v64, v[32:35 ], s[2:3] offset:128
136
+ ; GFX11-NEXT: global_store_b128 v64, v[36:39 ], s[2:3] offset:144
140
137
; GFX11-NEXT: s_waitcnt vmcnt(5)
141
138
; GFX11-NEXT: global_store_b128 v64, v[40:43], s[2:3] offset:160
142
139
; GFX11-NEXT: s_waitcnt vmcnt(4)
0 commit comments