Skip to content

Commit 7a354aa

Browse files
enferexyuxuanchen1997
authored andcommitted
[SelectionDAG] Preserve volatile undef stores. (#99918)
Summary: This patch preserves `undef` SDNodes that are `volatile` qualified. Previously, these nodes would be discarded. The motivation behind this change is to adhere to the [LangRef](https://llvm.org/docs/LangRef.html#volatile-memory-accesses), even though that doc is mostly in terms of LLVM-IR, it seems reasonable to imply that the volatile constraints also imply to SDNodes. > Certain memory accesses, such as [load](https://llvm.org/docs/LangRef.html#i-load)’s, [store](https://llvm.org/docs/LangRef.html#i-store)’s, and [llvm.memcpy](https://llvm.org/docs/LangRef.html#int-memcpy)’s may be marked volatile. The optimizers must not change the number of volatile operations or change their order of execution relative to other volatile operations. The optimizers may change the order of volatile operations relative to non-volatile operations. This is not Java’s “volatile” and has no cross-thread synchronization behavior. Source: https://llvm.org/docs/LangRef.html#volatile-memory-accesses Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250702
1 parent d910ddb commit 7a354aa

File tree

7 files changed

+99
-31
lines changed

7 files changed

+99
-31
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21413,7 +21413,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
2141321413
}
2141421414

2141521415
// Turn 'store undef, Ptr' -> nothing.
21416-
if (Value.isUndef() && ST->isUnindexed())
21416+
if (Value.isUndef() && ST->isUnindexed() && !ST->isVolatile())
2141721417
return Chain;
2141821418

2141921419
// Try to infer better alignment information than the store already has.

llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,6 @@ define amdgpu_kernel void @marked_kernel_use_other_sgpr(ptr addrspace(1) %ptr) #
255255
%queue.load = load volatile i8, ptr addrspace(4) %queue.ptr
256256
%implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr
257257
%dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr
258-
store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
259258
ret void
260259
}
261260

llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ define amdgpu_kernel void @store_bitcast_constant_v8i32_to_v16i16(ptr addrspace(
7373
define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
7474
%undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 999) #1
7575
%bc = bitcast i64 %undef to <2 x i32>
76-
store volatile <2 x i32> %bc, ptr addrspace(1) %out
76+
store <2 x i32> %bc, ptr addrspace(1) %out
7777
ret void
7878
}
7979

@@ -83,7 +83,7 @@ define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractel
8383
%undef = call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 9999) #1
8484
%bc = bitcast i64 %undef to <2 x i32>
8585
%elt1 = extractelement <2 x i32> %bc, i32 1
86-
store volatile i32 %elt1, ptr addrspace(1) %out
86+
store i32 %elt1, ptr addrspace(1) %out
8787
ret void
8888
}
8989

llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ define void @func_use_lds_global() {
9898
ret void
9999
}
100100

101-
; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (): local memory global used by non-kernel function
102-
define void @func_use_lds_global_constexpr_cast() {
101+
; ERR: warning: <unknown>:0:0: in function func_use_lds_global_constexpr_cast void (ptr addrspace(1)): local memory global used by non-kernel function
102+
define void @func_use_lds_global_constexpr_cast(ptr addrspace(1) %out) {
103103
; GFX8-SDAG-LABEL: func_use_lds_global_constexpr_cast:
104104
; GFX8-SDAG: ; %bb.0:
105105
; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -153,7 +153,7 @@ define void @func_use_lds_global_constexpr_cast() {
153153
; GISEL-NEXT: s_setpc_b64 s[30:31]
154154
; GISEL-NEXT: .LBB1_2:
155155
; GISEL-NEXT: s_endpgm
156-
store volatile i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) poison, align 4
156+
store i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) %out, align 4
157157
ret void
158158
}
159159

llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3-
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
2+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-SDAG %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-GISEL %s
44
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-SDAG %s
55
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH-GISEL %s
66
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
@@ -126,10 +126,21 @@ define amdgpu_cs void @caller() {
126126
declare amdgpu_gfx void @callee(i32)
127127

128128
define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(1) %outy, ptr addrspace(1) %outz) {
129-
; GFX9-LABEL: workgroup_ids_gfx:
130-
; GFX9: ; %bb.0:
131-
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132-
; GFX9-NEXT: s_setpc_b64 s[30:31]
129+
; GFX9-SDAG-LABEL: workgroup_ids_gfx:
130+
; GFX9-SDAG: ; %bb.0:
131+
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132+
; GFX9-SDAG-NEXT: global_store_dword v[0:1], v0, off
133+
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
134+
; GFX9-SDAG-NEXT: global_store_dword v[2:3], v0, off
135+
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
136+
; GFX9-SDAG-NEXT: global_store_dword v[4:5], v0, off
137+
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
138+
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
139+
;
140+
; GFX9-GISEL-LABEL: workgroup_ids_gfx:
141+
; GFX9-GISEL: ; %bb.0:
142+
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143+
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
133144
;
134145
; GFX9ARCH-SDAG-LABEL: workgroup_ids_gfx:
135146
; GFX9ARCH-SDAG: ; %bb.0:

llvm/test/CodeGen/AMDGPU/mem-builtins.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,64 +9,64 @@ declare hidden i32 @strnlen(ptr nocapture, i32) #1
99
declare hidden i32 @strcmp(ptr nocapture, ptr nocapture) #1
1010

1111

12-
; ERROR: error: <unknown>:0:0: in function test_memcmp void (ptr addrspace(1), ptr addrspace(1), ptr): unsupported call to function memcmp
12+
; ERROR: error: <unknown>:0:0: in function test_memcmp void (ptr addrspace(1), ptr addrspace(1), ptr, ptr addrspace(1)): unsupported call to function memcmp
1313

1414
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@lo+4
1515
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@hi+12
16-
define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p) #0 {
16+
define amdgpu_kernel void @test_memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, ptr nocapture %p, ptr addrspace(1) %out) #0 {
1717
entry:
1818
%cmp = tail call i32 @memcmp(ptr addrspace(1) %x, ptr addrspace(1) %y, i64 2)
19-
store volatile i32 %cmp, ptr addrspace(1) undef
19+
store i32 %cmp, ptr addrspace(1) %out
2020
ret void
2121
}
2222

23-
; ERROR: error: <unknown>:0:0: in function test_memchr void (ptr addrspace(1), i32, i64): unsupported call to function memchr
23+
; ERROR: error: <unknown>:0:0: in function test_memchr void (ptr addrspace(1), i32, i64, ptr addrspace(1)): unsupported call to function memchr
2424

2525
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@lo+4
2626
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@hi+12
27-
define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len) #0 {
27+
define amdgpu_kernel void @test_memchr(ptr addrspace(1) %src, i32 %char, i64 %len, ptr addrspace(1) %out) #0 {
2828
%res = call ptr addrspace(1) @memchr(ptr addrspace(1) %src, i32 %char, i64 %len)
29-
store volatile ptr addrspace(1) %res, ptr addrspace(1) undef
29+
store ptr addrspace(1) %res, ptr addrspace(1) %out
3030
ret void
3131
}
3232

33-
; ERROR: error: <unknown>:0:0: in function test_strcpy void (ptr, ptr): unsupported call to function strcpy
33+
; ERROR: error: <unknown>:0:0: in function test_strcpy void (ptr, ptr, ptr addrspace(1)): unsupported call to function strcpy
3434

3535
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@lo+4
3636
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@hi+12
37-
define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src) #0 {
37+
define amdgpu_kernel void @test_strcpy(ptr %dst, ptr %src, ptr addrspace(1) %out) #0 {
3838
%res = call ptr @strcpy(ptr %dst, ptr %src)
39-
store volatile ptr %res, ptr addrspace(1) undef
39+
store ptr %res, ptr addrspace(1) %out
4040
ret void
4141
}
4242

43-
; ERROR: error: <unknown>:0:0: in function test_strcmp void (ptr, ptr): unsupported call to function strcmp
43+
; ERROR: error: <unknown>:0:0: in function test_strcmp void (ptr, ptr, ptr addrspace(1)): unsupported call to function strcmp
4444

4545
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@lo+4
4646
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@hi+12
47-
define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1) #0 {
47+
define amdgpu_kernel void @test_strcmp(ptr %src0, ptr %src1, ptr addrspace(1) %out) #0 {
4848
%res = call i32 @strcmp(ptr %src0, ptr %src1)
49-
store volatile i32 %res, ptr addrspace(1) undef
49+
store i32 %res, ptr addrspace(1) %out
5050
ret void
5151
}
5252

53-
; ERROR: error: <unknown>:0:0: in function test_strlen void (ptr): unsupported call to function strlen
53+
; ERROR: error: <unknown>:0:0: in function test_strlen void (ptr, ptr addrspace(1)): unsupported call to function strlen
5454

5555
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@lo+4
5656
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@hi+12
57-
define amdgpu_kernel void @test_strlen(ptr %src) #0 {
57+
define amdgpu_kernel void @test_strlen(ptr %src, ptr addrspace(1) %out) #0 {
5858
%res = call i32 @strlen(ptr %src)
59-
store volatile i32 %res, ptr addrspace(1) undef
59+
store i32 %res, ptr addrspace(1) %out
6060
ret void
6161
}
6262

63-
; ERROR: error: <unknown>:0:0: in function test_strnlen void (ptr, i32): unsupported call to function strnlen
63+
; ERROR: error: <unknown>:0:0: in function test_strnlen void (ptr, i32, ptr addrspace(1)): unsupported call to function strnlen
6464

6565
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@lo+4
6666
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@hi+12
67-
define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size) #0 {
67+
define amdgpu_kernel void @test_strnlen(ptr %src, i32 %size, ptr addrspace(1) %out) #0 {
6868
%res = call i32 @strnlen(ptr %src, i32 %size)
69-
store volatile i32 %res, ptr addrspace(1) undef
69+
store i32 %res, ptr addrspace(1) %out
7070
ret void
7171
}
7272

llvm/test/CodeGen/NVPTX/store-undef.ll

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,61 @@ define void @test_store_def(i64 %param0, i32 %param1, ptr %out) {
9090
store %struct.T %S2, ptr %out
9191
ret void
9292
}
93+
94+
define void @test_store_volatile_undef(ptr %out, <8 x i32> %vec) {
95+
; CHECK-LABEL: test_store_volatile_undef(
96+
; CHECK: {
97+
; CHECK-NEXT: .reg .b32 %r<23>;
98+
; CHECK-NEXT: .reg .b64 %rd<5>;
99+
; CHECK-EMPTY:
100+
; CHECK-NEXT: // %bb.0:
101+
; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_undef_param_0];
102+
; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r1, %r2, %r3, %r4};
103+
; CHECK-NEXT: st.volatile.v2.u32 [%rd1+8], {%r5, %r6};
104+
; CHECK-NEXT: st.volatile.u64 [%rd1], %rd2;
105+
; CHECK-NEXT: ld.param.v4.u32 {%r7, %r8, %r9, %r10}, [test_store_volatile_undef_param_1];
106+
; CHECK-NEXT: ld.param.v4.u32 {%r11, %r12, %r13, %r14}, [test_store_volatile_undef_param_1+16];
107+
; CHECK-NEXT: st.volatile.v4.u32 [%rd3], {%r11, %r12, %r13, %r14};
108+
; CHECK-NEXT: st.volatile.v4.u32 [%rd4], {%r7, %r8, %r9, %r10};
109+
; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r15, %r16, %r17, %r18};
110+
; CHECK-NEXT: st.volatile.v4.u32 [%rd1], {%r19, %r20, %r21, %r22};
111+
; CHECK-NEXT: ret;
112+
store volatile %struct.T undef, ptr %out
113+
store volatile <8 x i32> %vec, ptr undef
114+
store volatile <8 x i32> undef, ptr %out
115+
ret void
116+
}
117+
118+
define void @test_store_volatile_of_poison(ptr %out) {
119+
; CHECK-LABEL: test_store_volatile_of_poison(
120+
; CHECK: {
121+
; CHECK-NEXT: .reg .b32 %r<7>;
122+
; CHECK-NEXT: .reg .b64 %rd<3>;
123+
; CHECK-EMPTY:
124+
; CHECK-NEXT: // %bb.0:
125+
; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_of_poison_param_0];
126+
; CHECK-NEXT: st.volatile.v4.u32 [%rd1+16], {%r1, %r2, %r3, %r4};
127+
; CHECK-NEXT: st.volatile.v2.u32 [%rd1+8], {%r5, %r6};
128+
; CHECK-NEXT: st.volatile.u64 [%rd1], %rd2;
129+
; CHECK-NEXT: ret;
130+
store volatile %struct.T poison, ptr %out
131+
ret void
132+
}
133+
134+
define void @test_store_volatile_to_poison(%struct.T %param) {
135+
; CHECK-LABEL: test_store_volatile_to_poison(
136+
; CHECK: {
137+
; CHECK-NEXT: .reg .b32 %r<7>;
138+
; CHECK-NEXT: .reg .b64 %rd<5>;
139+
; CHECK-EMPTY:
140+
; CHECK-NEXT: // %bb.0:
141+
; CHECK-NEXT: ld.param.u64 %rd1, [test_store_volatile_to_poison_param_0];
142+
; CHECK-NEXT: ld.param.v2.u32 {%r1, %r2}, [test_store_volatile_to_poison_param_0+8];
143+
; CHECK-NEXT: ld.param.v4.u32 {%r3, %r4, %r5, %r6}, [test_store_volatile_to_poison_param_0+16];
144+
; CHECK-NEXT: st.volatile.v4.u32 [%rd2], {%r3, %r4, %r5, %r6};
145+
; CHECK-NEXT: st.volatile.v2.u32 [%rd3], {%r1, %r2};
146+
; CHECK-NEXT: st.volatile.u64 [%rd4], %rd1;
147+
; CHECK-NEXT: ret;
148+
store volatile %struct.T %param, ptr poison
149+
ret void
150+
}

0 commit comments

Comments
 (0)