@@ -34,7 +34,7 @@ body: |
34
34
; GFX67-NEXT: {{ $}}
35
35
; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
36
36
; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
37
- ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4 )
37
+ ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16 )
38
38
; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
39
39
; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
40
40
; GFX67-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>)
@@ -44,7 +44,7 @@ body: |
44
44
; GFX12-NEXT: {{ $}}
45
45
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
46
46
; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
47
- ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4 )
47
+ ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16 )
48
48
; GFX12-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>)
49
49
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
50
50
%1:_(s32) = G_CONSTANT i32 0
@@ -64,7 +64,7 @@ body: |
64
64
; GFX67-NEXT: {{ $}}
65
65
; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
66
66
; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
67
- ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4 )
67
+ ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16 )
68
68
; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
69
69
; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
70
70
; GFX67-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
@@ -75,7 +75,7 @@ body: |
75
75
; GFX12-NEXT: {{ $}}
76
76
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
77
77
; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
78
- ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4 )
78
+ ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16 )
79
79
; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>)
80
80
; GFX12-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x p3>)
81
81
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
@@ -96,7 +96,7 @@ body: |
96
96
; GFX67-NEXT: {{ $}}
97
97
; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
98
98
; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
99
- ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4 )
99
+ ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16 )
100
100
; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
101
101
; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
102
102
; GFX67-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>)
@@ -107,7 +107,7 @@ body: |
107
107
; GFX12-NEXT: {{ $}}
108
108
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
109
109
; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
110
- ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4 )
110
+ ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16 )
111
111
; GFX12-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>)
112
112
; GFX12-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>)
113
113
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
@@ -128,7 +128,7 @@ body: |
128
128
; GCN-NEXT: {{ $}}
129
129
; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
130
130
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
131
- ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4 )
131
+ ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 32 )
132
132
; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>)
133
133
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32)
134
134
; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s32>)
@@ -150,7 +150,7 @@ body: |
150
150
; GCN-NEXT: {{ $}}
151
151
; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
152
152
; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
153
- ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4 )
153
+ ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 32 )
154
154
; GCN-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>)
155
155
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64)
156
156
; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s64>)
@@ -172,7 +172,7 @@ body: |
172
172
; GFX67-NEXT: {{ $}}
173
173
; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
174
174
; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
175
- ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4 )
175
+ ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16 )
176
176
; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
177
177
; GFX67-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
178
178
; GFX67-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
@@ -219,7 +219,7 @@ body: |
219
219
; GFX12-NEXT: {{ $}}
220
220
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
221
221
; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
222
- ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4 )
222
+ ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16 )
223
223
; GFX12-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>)
224
224
; GFX12-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
225
225
; GFX12-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32)
@@ -272,7 +272,7 @@ body: |
272
272
; GFX67-NEXT: {{ $}}
273
273
; GFX67-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
274
274
; GFX67-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
275
- ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4 )
275
+ ; GFX67-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16 )
276
276
; GFX67-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>)
277
277
; GFX67-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
278
278
; GFX67-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>)
@@ -282,7 +282,7 @@ body: |
282
282
; GFX12-NEXT: {{ $}}
283
283
; GFX12-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
284
284
; GFX12-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
285
- ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4 )
285
+ ; GFX12-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 16 )
286
286
; GFX12-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](<3 x s32>)
287
287
%0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
288
288
%1:_(s32) = G_CONSTANT i32 0
0 commit comments