7
7
; GCN: s_load_dword [[PTR:s[0-9]+]]
8
8
; GCN: s_mov_b32 m0, [[PTR]]
9
9
; GCN: ds_append [[RESULT:v[0-9]+]]{{$}}
10
+ ; GCN-NOT: buffer_wbinvl1
10
11
; GCN: {{.*}}store{{.*}} [[RESULT]]
11
12
define amdgpu_kernel void @ds_append_lds (i32 addrspace (3 )* %lds , i32 addrspace (1 )* %out ) #0 {
12
13
%val = call i32 @llvm.amdgcn.ds.append.p3i32 (i32 addrspace (3 )* %lds , i1 false )
@@ -18,6 +19,7 @@ define amdgpu_kernel void @ds_append_lds(i32 addrspace(3)* %lds, i32 addrspace(1
18
19
; GCN: s_load_dword [[PTR:s[0-9]+]]
19
20
; GCN: s_mov_b32 m0, [[PTR]]
20
21
; GCN: ds_append [[RESULT:v[0-9]+]] offset:65532{{$}}
22
+ ; GCN-NOT: buffer_wbinvl1
21
23
; GCN: {{.*}}store{{.*}} [[RESULT]]
22
24
define amdgpu_kernel void @ds_append_lds_max_offset (i32 addrspace (3 )* %lds , i32 addrspace (1 )* %out ) #0 {
23
25
%gep = getelementptr inbounds i32 , i32 addrspace (3 )* %lds , i32 16383
@@ -36,6 +38,7 @@ define amdgpu_kernel void @ds_append_lds_max_offset(i32 addrspace(3)* %lds, i32
36
38
; CIPLUS: s_mov_b32 m0, [[PTR]]
37
39
; CIPLUS: ds_append [[RESULT:v[0-9]+]] offset:16{{$}}
38
40
41
+ ; GCN-NOT: buffer_wbinvl1
39
42
; GCN: {{.*}}store{{.*}} [[RESULT]]
40
43
define amdgpu_kernel void @ds_append_no_fold_offset_si (i32 addrspace (3 )* addrspace (4 )* %lds.ptr , i32 addrspace (1 )* %out ) #0 {
41
44
%lds = load i32 addrspace (3 )*, i32 addrspace (3 )* addrspace (4 )* %lds.ptr , align 4
@@ -53,6 +56,7 @@ define amdgpu_kernel void @ds_append_no_fold_offset_si(i32 addrspace(3)* addrspa
53
56
54
57
; GCN: s_mov_b32 m0, [[PTR]]
55
58
; GCN: ds_append [[RESULT:v[0-9]+]]{{$}}
59
+ ; GCN-NOT: buffer_wbinvl1
56
60
; GCN: {{.*}}store{{.*}} [[RESULT]]
57
61
define amdgpu_kernel void @ds_append_lds_over_max_offset (i32 addrspace (3 )* %lds , i32 addrspace (1 )* %out ) #0 {
58
62
%gep = getelementptr inbounds i32 , i32 addrspace (3 )* %lds , i32 16384
@@ -65,6 +69,7 @@ define amdgpu_kernel void @ds_append_lds_over_max_offset(i32 addrspace(3)* %lds,
65
69
; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
66
70
; GCN: s_mov_b32 m0, [[READLANE]]
67
71
; GCN: ds_append [[RESULT:v[0-9]+]]{{$}}
72
+ ; GCN-NOT: buffer_wbinvl1
68
73
; GCN: {{.*}}store{{.*}} [[RESULT]]
69
74
define void @ds_append_lds_vgpr_addr (i32 addrspace (3 )* %lds , i32 addrspace (1 )* %out ) #0 {
70
75
%val = call i32 @llvm.amdgcn.ds.append.p3i32 (i32 addrspace (3 )* %lds , i1 false )
@@ -76,6 +81,7 @@ define void @ds_append_lds_vgpr_addr(i32 addrspace(3)* %lds, i32 addrspace(1)* %
76
81
; GCN: s_load_dword [[PTR:s[0-9]+]]
77
82
; GCN: s_mov_b32 m0, [[PTR]]
78
83
; GCN: ds_append [[RESULT:v[0-9]+]] gds{{$}}
84
+ ; GCN-NOT: buffer_wbinvl1
79
85
; GCN: {{.*}}store{{.*}} [[RESULT]]
80
86
define amdgpu_kernel void @ds_append_gds (i32 addrspace (2 )* %gds , i32 addrspace (1 )* %out ) #0 {
81
87
%val = call i32 @llvm.amdgcn.ds.append.p2i32 (i32 addrspace (2 )* %gds , i1 false )
@@ -87,6 +93,7 @@ define amdgpu_kernel void @ds_append_gds(i32 addrspace(2)* %gds, i32 addrspace(1
87
93
; GCN: s_load_dword [[PTR:s[0-9]+]]
88
94
; GCN: s_mov_b32 m0, [[PTR]]
89
95
; GCN: ds_append [[RESULT:v[0-9]+]] offset:65532 gds{{$}}
96
+ ; GCN-NOT: buffer_wbinvl1
90
97
; GCN: {{.*}}store{{.*}} [[RESULT]]
91
98
define amdgpu_kernel void @ds_append_gds_max_offset (i32 addrspace (2 )* %gds , i32 addrspace (1 )* %out ) #0 {
92
99
%gep = getelementptr inbounds i32 , i32 addrspace (2 )* %gds , i32 16383
@@ -96,6 +103,7 @@ define amdgpu_kernel void @ds_append_gds_max_offset(i32 addrspace(2)* %gds, i32
96
103
}
97
104
98
105
; GCN-LABEL: {{^}}ds_append_gds_over_max_offset:
106
+ ; GCN-NOT: buffer_wbinvl1
99
107
define amdgpu_kernel void @ds_append_gds_over_max_offset (i32 addrspace (2 )* %gds , i32 addrspace (1 )* %out ) #0 {
100
108
%gep = getelementptr inbounds i32 , i32 addrspace (2 )* %gds , i32 16384
101
109
%val = call i32 @llvm.amdgcn.ds.append.p2i32 (i32 addrspace (2 )* %gep , i1 false )
@@ -107,6 +115,7 @@ define amdgpu_kernel void @ds_append_gds_over_max_offset(i32 addrspace(2)* %gds,
107
115
; GCN: s_load_dword [[PTR:s[0-9]+]]
108
116
; GCN: s_mov_b32 m0, [[PTR]]
109
117
; GCN: ds_append [[RESULT:v[0-9]+]]{{$}}
118
+ ; GCN-NOT: buffer_wbinvl1
110
119
; NOTGFX9: s_mov_b32 m0, -1
111
120
; GFX9-NOT: m0
112
121
; GCN: _store_dword
0 commit comments