-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Add baseline tests for gep flag handling #110814
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesWe need to know the address computation won't overflow on Full diff: https://github.com/llvm/llvm-project/pull/110814.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
new file mode 100644
index 00000000000000..782894976c711c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
@@ -0,0 +1,197 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; gfx8 required knowing no overflow happened to fold the addressing mode
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+
+define void @gep_noflags_alloca(i32 %idx, i32 %val) #0 {
+; GFX8-LABEL: gep_noflags_alloca:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: gep_noflags_alloca:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
+; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca [32 x i32], addrspace(5)
+ %gep0 = getelementptr [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
+ %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
+ store volatile i32 %val, ptr addrspace(5) %gep1
+ ret void
+}
+
+define void @gep_inbounds_alloca(i32 %idx, i32 %val) #0 {
+; GFX8-LABEL: gep_inbounds_alloca:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: gep_inbounds_alloca:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
+; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca [32 x i32], addrspace(5)
+ %gep0 = getelementptr inbounds [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
+ %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
+ store volatile i32 %val, ptr addrspace(5) %gep1
+ ret void
+}
+
+define void @gep_nuw_alloca(i32 %idx, i32 %val) #0 {
+; GFX8-LABEL: gep_nuw_alloca:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: gep_nuw_alloca:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
+; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca [32 x i32], addrspace(5)
+ %gep0 = getelementptr nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
+ %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
+ store volatile i32 %val, ptr addrspace(5) %gep1
+ ret void
+}
+
+define void @gep_nusw_alloca(i32 %idx, i32 %val) #0 {
+; GFX8-LABEL: gep_nusw_alloca:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: gep_nusw_alloca:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
+; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca [32 x i32], addrspace(5)
+ %gep0 = getelementptr nusw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
+ %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
+ store volatile i32 %val, ptr addrspace(5) %gep1
+ ret void
+}
+
+define void @gep_inbounds_nuw_alloca(i32 %idx, i32 %val) #0 {
+; GFX8-LABEL: gep_inbounds_nuw_alloca:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: gep_inbounds_nuw_alloca:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
+; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca [32 x i32], addrspace(5)
+ %gep0 = getelementptr inbounds nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
+ %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
+ store volatile i32 %val, ptr addrspace(5) %gep1
+ ret void
+}
+
+define void @gep_nusw_nuw_alloca(i32 %idx, i32 %val) #0 {
+; GFX8-LABEL: gep_nusw_nuw_alloca:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: gep_nusw_nuw_alloca:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
+; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca [32 x i32], addrspace(5)
+ %gep0 = getelementptr nusw nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
+ %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
+ store volatile i32 %val, ptr addrspace(5) %gep1
+ ret void
+}
+
+define void @gep_inbounds_nuw_alloca_nonpow2_scale(i32 %idx, i32 %val) #0 {
+; GFX8-LABEL: gep_inbounds_nuw_alloca_nonpow2_scale:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_movk_i32 s4, 0x84
+; GFX8-NEXT: v_mul_lo_u32 v0, v0, s4
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: gep_inbounds_nuw_alloca_nonpow2_scale:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_movk_i32 s4, 0x84
+; GFX9-NEXT: v_mul_lo_u32 v0, v0, s4
+; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX9-NEXT: v_add_u32_e32 v0, v2, v0
+; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca [5 x [33 x i32]], align 4, addrspace(5)
+ %gep1 = getelementptr inbounds nuw [5 x [33 x i32]], ptr addrspace(5) %alloca, i32 0, i32 %idx, i32 4
+ store volatile i32 %val, ptr addrspace(5) %gep1, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
|
@llvm/pr-subscribers-llvm-selectiondag Author: Matt Arsenault (arsenm) ChangesWe need to know the address computation won't overflow on Full diff: https://github.com/llvm/llvm-project/pull/110814.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
new file mode 100644
index 00000000000000..782894976c711c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
@@ -0,0 +1,197 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; gfx8 required knowing no overflow happened to fold the addressing mode
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+
+define void @gep_noflags_alloca(i32 %idx, i32 %val) #0 {
+; GFX8-LABEL: gep_noflags_alloca:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: gep_noflags_alloca:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
+; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca [32 x i32], addrspace(5)
+ %gep0 = getelementptr [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
+ %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
+ store volatile i32 %val, ptr addrspace(5) %gep1
+ ret void
+}
+
+define void @gep_inbounds_alloca(i32 %idx, i32 %val) #0 {
+; GFX8-LABEL: gep_inbounds_alloca:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: gep_inbounds_alloca:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
+; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca [32 x i32], addrspace(5)
+ %gep0 = getelementptr inbounds [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
+ %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
+ store volatile i32 %val, ptr addrspace(5) %gep1
+ ret void
+}
+
+define void @gep_nuw_alloca(i32 %idx, i32 %val) #0 {
+; GFX8-LABEL: gep_nuw_alloca:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: gep_nuw_alloca:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
+; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca [32 x i32], addrspace(5)
+ %gep0 = getelementptr nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
+ %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
+ store volatile i32 %val, ptr addrspace(5) %gep1
+ ret void
+}
+
+define void @gep_nusw_alloca(i32 %idx, i32 %val) #0 {
+; GFX8-LABEL: gep_nusw_alloca:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: gep_nusw_alloca:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
+; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca [32 x i32], addrspace(5)
+ %gep0 = getelementptr nusw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
+ %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
+ store volatile i32 %val, ptr addrspace(5) %gep1
+ ret void
+}
+
+define void @gep_inbounds_nuw_alloca(i32 %idx, i32 %val) #0 {
+; GFX8-LABEL: gep_inbounds_nuw_alloca:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: gep_inbounds_nuw_alloca:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
+; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca [32 x i32], addrspace(5)
+ %gep0 = getelementptr inbounds nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
+ %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
+ store volatile i32 %val, ptr addrspace(5) %gep1
+ ret void
+}
+
+define void @gep_nusw_nuw_alloca(i32 %idx, i32 %val) #0 {
+; GFX8-LABEL: gep_nusw_nuw_alloca:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: gep_nusw_nuw_alloca:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX9-NEXT: v_lshl_add_u32 v0, v0, 2, v2
+; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca [32 x i32], addrspace(5)
+ %gep0 = getelementptr nusw nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
+ %gep1 = getelementptr i32, ptr addrspace(5) %gep0, i32 4
+ store volatile i32 %val, ptr addrspace(5) %gep1
+ ret void
+}
+
+define void @gep_inbounds_nuw_alloca_nonpow2_scale(i32 %idx, i32 %val) #0 {
+; GFX8-LABEL: gep_inbounds_nuw_alloca_nonpow2_scale:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_movk_i32 s4, 0x84
+; GFX8-NEXT: v_mul_lo_u32 v0, v0, s4
+; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: gep_inbounds_nuw_alloca_nonpow2_scale:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_movk_i32 s4, 0x84
+; GFX9-NEXT: v_mul_lo_u32 v0, v0, s4
+; GFX9-NEXT: v_lshrrev_b32_e64 v2, 6, s32
+; GFX9-NEXT: v_add_u32_e32 v0, v2, v0
+; GFX9-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %alloca = alloca [5 x [33 x i32]], align 4, addrspace(5)
+ %gep1 = getelementptr inbounds nuw [5 x [33 x i32]], ptr addrspace(5) %alloca, i32 0, i32 %idx, i32 4
+ store volatile i32 %val, ptr addrspace(5) %gep1, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
|
95b7c37
to
f3c253f
Compare
We need to know the address computation won't overflow on older subtargets to match the addressing mode of stack instructions.
f3c253f
to
c17153f
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/6680 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/6609 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/6592 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/8406 Here is the relevant piece of the build log for the reference
|
Looks like these are failing: http://45.33.8.238/linux/149769/step_11.txt Please take a look and revert for now if it takes a while to fix. |
Is the operand order in several instructions not deterministic? The differences when failing are like several instances of
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/4492 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/11342 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/9463 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/6855 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/9784 Here is the relevant piece of the build log for the reference
|
We need to know the address computation won't overflow on
older subtargets to match the addressing mode of stack instructions.