Skip to content

AMDGPU: Replace undef global initializers in tests with poison #131051

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/32-bit-local-address-space.ll
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ define amdgpu_kernel void @mul_32bit_ptr(ptr addrspace(1) %out, ptr addrspace(3)
ret void
}

@g_lds = addrspace(3) global float undef, align 4
@g_lds = addrspace(3) global float poison, align 4

; FUNC-LABEL: {{^}}infer_ptr_alignment_global_offset:
; SI: v_mov_b32_e32 [[PTR:v[0-9]+]], 0{{$}}
Expand All @@ -93,7 +93,7 @@ define amdgpu_kernel void @infer_ptr_alignment_global_offset(ptr addrspace(1) %o


@ptr = addrspace(3) global ptr addrspace(3) poison
@dst = addrspace(3) global [16383 x i32] undef
@dst = addrspace(3) global [16383 x i32] poison

; FUNC-LABEL: {{^}}global_ptr:
; SI: ds_write_b32
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
; FIXME: Merge with other test. DS offset folding doesn't work due to
; register bank copies, and no return optimization is missing.

@lds0 = internal addrspace(3) global [512 x i32] undef
@lds1 = internal addrspace(3) global [512 x i64] undef, align 8
@lds0 = internal addrspace(3) global [512 x i32] poison
@lds1 = internal addrspace(3) global [512 x i64] poison, align 8

declare i32 @llvm.amdgcn.workitem.id.x() #0

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
; FIXME: Merge with other test. DS offset folding doesn't work due to
; register bank copies, and no return optimization is missing.

@lds0 = internal addrspace(3) global [512 x i32] undef, align 4
@lds1 = internal addrspace(3) global [512 x i64] undef, align 8
@lds0 = internal addrspace(3) global [512 x i32] poison, align 4
@lds1 = internal addrspace(3) global [512 x i64] poison, align 8

declare i32 @llvm.amdgcn.workitem.id.x() #0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p5) = G_GLOBAL_VALUE @external_private (in function: fn_external_private)

@external_private = external addrspace(5) global i32, align 4
@internal_private = internal addrspace(5) global i32 undef, align 4
@internal_private = internal addrspace(5) global i32 poison, align 4

define ptr addrspace(5) @fn_external_private() {
ret ptr addrspace(5) @external_private
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s

@lds0 = addrspace(3) global [512 x float] undef
@lds1 = addrspace(3) global [256 x float] undef
@lds2 = addrspace(3) global [4096 x float] undef
@lds3 = addrspace(3) global [67 x i8] undef
@lds0 = addrspace(3) global [512 x float] poison
@lds1 = addrspace(3) global [256 x float] poison
@lds2 = addrspace(3) global [4096 x float] poison
@lds3 = addrspace(3) global [67 x i8] poison

@dynamic_shared0 = external addrspace(3) global [0 x float]
@dynamic_shared1 = external addrspace(3) global [0 x double]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stop-after=irtranslator -o - %s | FileCheck %s

@var = global i32 undef
@var = global i32 poison

define i32 @test() {
; CHECK-LABEL: name: test
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s
; TODO: Replace with existing DAG tests

@lds_512_4 = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
@lds_4_8 = addrspace(3) global i32 undef, align 8
@lds_512_4 = internal unnamed_addr addrspace(3) global [128 x i32] poison, align 4
@lds_4_8 = addrspace(3) global i32 poison, align 8

define amdgpu_kernel void @use_lds_globals(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
; CHECK-LABEL: use_lds_globals:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
; FIXME: Merge with DAG test

@lds.external = external unnamed_addr addrspace(3) global [0 x i32]
@lds.defined = unnamed_addr addrspace(3) global [8 x i32] undef, align 8
@lds.defined = unnamed_addr addrspace(3) global [8 x i32] poison, align 8

; GCN-LABEL: {{^}}test_basic:
; GCN: s_add_u32 s0, lds.defined@abs32@lo, s0 ; encoding: [0xff,0x00,0x00,0x80,A,A,A,A]
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ bb2:

; FIXME: These aren't localized because thesee were legalized before
; the localizer, and are no longer G_GLOBAL_VALUE.
@gv0 = addrspace(1) global i32 undef, align 4
@gv1 = addrspace(1) global i32 undef, align 4
@gv2 = addrspace(1) global i32 undef, align 4
@gv3 = addrspace(1) global i32 undef, align 4
@gv0 = addrspace(1) global i32 poison, align 4
@gv1 = addrspace(1) global i32 poison, align 4
@gv2 = addrspace(1) global i32 poison, align 4
@gv3 = addrspace(1) global i32 poison, align 4

define amdgpu_kernel void @localize_globals(i1 %cond) {
; GFX9-LABEL: localize_globals:
Expand Down Expand Up @@ -159,10 +159,10 @@ bb2:
ret void
}

@static.gv0 = internal addrspace(1) global i32 undef, align 4
@static.gv1 = internal addrspace(1) global i32 undef, align 4
@static.gv2 = internal addrspace(1) global i32 undef, align 4
@static.gv3 = internal addrspace(1) global i32 undef, align 4
@static.gv0 = internal addrspace(1) global i32 poison, align 4
@static.gv1 = internal addrspace(1) global i32 poison, align 4
@static.gv2 = internal addrspace(1) global i32 poison, align 4
@static.gv3 = internal addrspace(1) global i32 poison, align 4

define void @localize_internal_globals(i1 %cond) {
; GFX9-LABEL: localize_internal_globals:
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@

declare void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) nocapture, ptr addrspace(4) nocapture, i32, i1) #0

@lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4
@lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
@lds.i32 = unnamed_addr addrspace(3) global i32 poison, align 4
@lds.arr = unnamed_addr addrspace(3) global [256 x i32] poison, align 4

@global.i32 = unnamed_addr addrspace(1) global i32 undef, align 4
@global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
@global.i32 = unnamed_addr addrspace(1) global i32 poison, align 4
@global.arr = unnamed_addr addrspace(1) global [256 x i32] poison, align 4

;.
; HSA: @lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4
; HSA: @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
; HSA: @global.i32 = unnamed_addr addrspace(1) global i32 undef, align 4
; HSA: @global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
; HSA: @lds.i32 = unnamed_addr addrspace(3) global i32 poison, align 4
; HSA: @lds.arr = unnamed_addr addrspace(3) global [256 x i32] poison, align 4
; HSA: @global.i32 = unnamed_addr addrspace(1) global i32 poison, align 4
; HSA: @global.arr = unnamed_addr addrspace(1) global [256 x i32] poison, align 4
;.
define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

; ERROR: LLVM ERROR: Unsupported expression in static initializer: addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4))

@lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
@lds.arr = unnamed_addr addrspace(3) global [256 x i32] poison, align 4

@gv_flatptr_from_lds = unnamed_addr addrspace(2) global ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
; CHECK: .quad constant.arr+32
; CHECK: .size gv_flatptr_from_constant, 8

@global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4
@global.arr = unnamed_addr addrspace(1) global [256 x i32] poison, align 4
@constant.arr = external unnamed_addr addrspace(4) global [256 x i32], align 4

@gv_flatptr_from_global = unnamed_addr addrspace(4) global ptr addrspace(0) getelementptr ([256 x i32], ptr addrspace(0) addrspacecast (ptr addrspace(1) @global.arr to ptr addrspace(0)), i64 0, i64 8), align 4
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/addrspacecast-known-non-null.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ define void @cast_alloca() {
ret void
}

@lds = internal unnamed_addr addrspace(3) global i8 undef, align 4
@lds = internal unnamed_addr addrspace(3) global i8 poison, align 4

; CHECK-LABEL: {{^}}cast_lds_gv:
; CHECK: s_mov_b64 s[{{[0-9]+}}:[[HIREG:[0-9]+]]], src_shared_base
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ define void @test_8_3(ptr %p) {
ret void
}

@shm = internal addrspace(3) global [2 x i8] undef, align 4
@shm = internal addrspace(3) global [2 x i8] poison, align 4

; CHECK-LABEL: Function: test_8_4
; CHECK: NoAlias: i8* %p, i8 addrspace(3)* %p1
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ define amdgpu_kernel void @constant_from_offset_cast_global_null() {
ret void
}

@gv = unnamed_addr addrspace(1) global [64 x i8] undef, align 4
@gv = unnamed_addr addrspace(1) global [64 x i8] poison, align 4

define amdgpu_kernel void @constant_from_offset_cast_global_gv() {
; GFX9-LABEL: @constant_from_offset_cast_global_gv(
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
ret float %add
}

@lds = internal addrspace(3) global [64 x float] undef
@lds = internal addrspace(3) global [64 x float] poison

define amdgpu_gfx float @simple_lds(float %arg0) #0 {
%val = load float, ptr addrspace(3) @lds
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

declare i32 @llvm.amdgcn.workitem.id.x()

@local_var32 = addrspace(3) global i32 undef, align 4
@local_var64 = addrspace(3) global i64 undef, align 8
@local_var32 = addrspace(3) global i32 poison, align 4
@local_var64 = addrspace(3) global i64 poison, align 8

; Show what the atomic optimization pass will do for local pointers.

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/divergence-at-use.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 - < %s | FileCheck %s

@local = addrspace(3) global i32 undef
@local = addrspace(3) global i32 poison

define amdgpu_kernel void @reducible() {
; CHECK-LABEL: reducible:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

declare i32 @llvm.amdgcn.workitem.id.x() #0

@lds.obj = addrspace(3) global [256 x i32] undef, align 4
@lds.obj = addrspace(3) global [256 x i32] poison, align 4

define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 {
; CI-LABEL: write_ds_sub0_offset0_global:
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/ds_read2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
; FIXME: We don't get cases where the address was an SGPR because we
; get a copy to the address register for each one.

@lds = addrspace(3) global [512 x float] undef, align 4
@lds.f64 = addrspace(3) global [512 x double] undef, align 8
@lds = addrspace(3) global [512 x float] poison, align 4
@lds.f64 = addrspace(3) global [512 x double] poison, align 8

define amdgpu_kernel void @simple_read2_f32(ptr addrspace(1) %out) #0 {
; CI-LABEL: simple_read2_f32:
Expand Down Expand Up @@ -921,7 +921,7 @@ define amdgpu_kernel void @misaligned_read2_f64(ptr addrspace(1) %out, ptr addrs
ret void
}

@foo = addrspace(3) global [4 x i32] undef, align 4
@foo = addrspace(3) global [4 x i32] poison, align 4

define amdgpu_kernel void @load_constant_adjacent_offsets(ptr addrspace(1) %out) {
; CI-LABEL: load_constant_adjacent_offsets:
Expand Down Expand Up @@ -983,7 +983,7 @@ define amdgpu_kernel void @load_constant_disjoint_offsets(ptr addrspace(1) %out)
ret void
}

@bar = addrspace(3) global [4 x i64] undef, align 4
@bar = addrspace(3) global [4 x i64] poison, align 4

define amdgpu_kernel void @load_misaligned64_constant_offsets(ptr addrspace(1) %out) {
; CI-LABEL: load_misaligned64_constant_offsets:
Expand Down Expand Up @@ -1017,7 +1017,7 @@ define amdgpu_kernel void @load_misaligned64_constant_offsets(ptr addrspace(1) %
ret void
}

@bar.large = addrspace(3) global [4096 x i64] undef, align 4
@bar.large = addrspace(3) global [4096 x i64] poison, align 4

define amdgpu_kernel void @load_misaligned64_constant_large_offsets(ptr addrspace(1) %out) {
; CI-LABEL: load_misaligned64_constant_large_offsets:
Expand Down Expand Up @@ -1053,8 +1053,8 @@ define amdgpu_kernel void @load_misaligned64_constant_large_offsets(ptr addrspac
ret void
}

@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] poison, align 4
@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] poison, align 4

define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(ptr addrspace(1) %C, i32 %lda, i32 %ldb) #0 {
; CI-LABEL: sgemm_inner_loop_read2_sequence:
Expand Down Expand Up @@ -1440,7 +1440,7 @@ define amdgpu_ps <2 x float> @ds_read_interp_read(i32 inreg %prims, ptr addrspac
ret <2 x float> %r1
}

@v2i32_align1 = internal addrspace(3) global [100 x <2 x i32>] undef, align 1
@v2i32_align1 = internal addrspace(3) global [100 x <2 x i32>] poison, align 1

define amdgpu_kernel void @read2_v2i32_align1_odd_offset(ptr addrspace(1) %out) {
; CI-LABEL: read2_v2i32_align1_odd_offset:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/ds_read2_offset_order.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s

@lds = addrspace(3) global [512 x float] undef, align 4
@lds = addrspace(3) global [512 x float] poison, align 4

; offset0 is larger than offset1

Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt,-enable-ds128 < %s | FileCheck --check-prefix=CI %s

@lds = addrspace(3) global [512 x float] undef, align 4
@lds.v2 = addrspace(3) global [512 x <2 x float>] undef, align 4
@lds.v3 = addrspace(3) global [512 x <3 x float>] undef, align 4
@lds.v4 = addrspace(3) global [512 x <4 x float>] undef, align 4
@lds.v8 = addrspace(3) global [512 x <8 x float>] undef, align 4
@lds.v16 = addrspace(3) global [512 x <16 x float>] undef, align 4
@lds = addrspace(3) global [512 x float] poison, align 4
@lds.v2 = addrspace(3) global [512 x <2 x float>] poison, align 4
@lds.v3 = addrspace(3) global [512 x <3 x float>] poison, align 4
@lds.v4 = addrspace(3) global [512 x <4 x float>] poison, align 4
@lds.v8 = addrspace(3) global [512 x <8 x float>] poison, align 4
@lds.v16 = addrspace(3) global [512 x <16 x float>] poison, align 4

; CI-LABEL: {{^}}simple_read2_v2f32_superreg_align4:
; CI: ds_read2_b32 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} offset1:1{{$}}
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/ds_read2st64.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s

@lds = addrspace(3) global [512 x float] undef, align 4
@lds.f64 = addrspace(3) global [512 x double] undef, align 8
@lds = addrspace(3) global [512 x float] poison, align 4
@lds.f64 = addrspace(3) global [512 x double] poison, align 8


; GCN-LABEL: @simple_read2st64_f32_0_1
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/ds_write2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,-unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-ALIGNED %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt,+unaligned-access-mode < %s | FileCheck -enable-var-scope -check-prefixes=GFX9,GFX9-UNALIGNED %s

@lds = addrspace(3) global [512 x float] undef, align 4
@lds.f64 = addrspace(3) global [512 x double] undef, align 8
@lds = addrspace(3) global [512 x float] poison, align 4
@lds.f64 = addrspace(3) global [512 x double] poison, align 8

define amdgpu_kernel void @simple_write2_one_val_f32(ptr addrspace(1) %C, ptr addrspace(1) %in) #0 {
; CI-LABEL: simple_write2_one_val_f32:
Expand Down Expand Up @@ -764,7 +764,7 @@ define amdgpu_kernel void @simple_write2_two_val_f64(ptr addrspace(1) %C, ptr ad
ret void
}

@foo = addrspace(3) global [4 x i32] undef, align 4
@foo = addrspace(3) global [4 x i32] poison, align 4

define amdgpu_kernel void @store_constant_adjacent_offsets() {
; CI-LABEL: store_constant_adjacent_offsets:
Expand Down Expand Up @@ -808,7 +808,7 @@ define amdgpu_kernel void @store_constant_disjoint_offsets() {
ret void
}

@bar = addrspace(3) global [4 x i64] undef, align 4
@bar = addrspace(3) global [4 x i64] poison, align 4

define amdgpu_kernel void @store_misaligned64_constant_offsets() {
; CI-LABEL: store_misaligned64_constant_offsets:
Expand All @@ -834,7 +834,7 @@ define amdgpu_kernel void @store_misaligned64_constant_offsets() {
ret void
}

@bar.large = addrspace(3) global [4096 x i64] undef, align 4
@bar.large = addrspace(3) global [4096 x i64] poison, align 4

define amdgpu_kernel void @store_misaligned64_constant_large_offsets() {
; CI-LABEL: store_misaligned64_constant_large_offsets:
Expand Down Expand Up @@ -862,8 +862,8 @@ define amdgpu_kernel void @store_misaligned64_constant_large_offsets() {
ret void
}

@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] poison, align 4
@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] poison, align 4

define amdgpu_kernel void @write2_sgemm_sequence(ptr addrspace(1) %C, i32 %lda, i32 %ldb, ptr addrspace(1) %in) #0 {
; CI-LABEL: write2_sgemm_sequence:
Expand Down Expand Up @@ -1000,7 +1000,7 @@ define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(ptr addrspace(3)
ret void
}

@v2i32_align1 = internal addrspace(3) global [100 x <2 x i32>] undef, align 1
@v2i32_align1 = internal addrspace(3) global [100 x <2 x i32>] poison, align 1

define amdgpu_kernel void @write2_v2i32_align1_odd_offset() {
; CI-LABEL: write2_v2i32_align1_odd_offset:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/ds_write2st64.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s

@lds = addrspace(3) global [512 x float] undef, align 4
@lds = addrspace(3) global [512 x float] poison, align 4

; GCN-LABEL: @simple_write2st64_one_val_f32_0_1
; CI-DAG: s_mov_b32 m0
Expand Down
Loading
Loading