Skip to content

Commit e524f50

Browse files
committed
Revert "In visitSTORE, always use FindBetterChain, rather than only when UseAA is enabled."
This reverts commit r282600 due to test failues with MCJIT llvm-svn: 282604
1 parent 5217f84 commit e524f50

File tree

70 files changed

+2088
-1735
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+2088
-1735
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 271 additions & 120 deletions
Large diffs are not rendered by default.

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -824,7 +824,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
824824
MinFunctionAlignment = 0;
825825
PrefFunctionAlignment = 0;
826826
PrefLoopAlignment = 0;
827-
GatherAllAliasesMaxDepth = 18;
827+
GatherAllAliasesMaxDepth = 6;
828828
MinStackArgumentAlignment = 1;
829829
MinimumJumpTableEntries = 4;
830830
// TODO: the default will be switched to 0 in the next commit, along

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,16 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
446446
setSelectIsExpensive(false);
447447
PredictableSelectIsExpensive = false;
448448

449+
// We want to find all load dependencies for long chains of stores to enable
450+
// merging into very wide vectors. The problem is with vectors with > 4
451+
// elements. MergeConsecutiveStores will attempt to merge these because x8/x16
452+
// vectors are a legal type, even though we have to split the loads
453+
// usually. When we can more precisely specify load legality per address
454+
// space, we should be able to make FindBetterChain/MergeConsecutiveStores
455+
// smarter so that they can figure out what to do in 2 iterations without all
456+
// N > 4 stores on the same chain.
457+
GatherAllAliasesMaxDepth = 16;
458+
449459
// FIXME: Need to really handle these.
450460
MaxStoresPerMemcpy = 4096;
451461
MaxStoresPerMemmove = 4096;

llvm/test/CodeGen/AArch64/argument-blocks.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ define i64 @test_hfa_ignores_gprs([7 x float], [2 x float] %in, i64, i64 %res) {
6262
; but should go in an 8-byte aligned slot.
6363
define void @test_varargs_stackalign() {
6464
; CHECK-LABEL: test_varargs_stackalign:
65-
; CHECK-DARWINPCS: str {{x[0-9]+}}, [sp, #16]
65+
; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16]
6666

6767
call void(...) @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0])
6868
ret void

llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,17 @@
66
define void @fn9(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, ...) nounwind noinline ssp {
77
; CHECK-LABEL: fn9:
88
; 9th fixed argument
9-
; CHECK: add x[[ADDR:[0-9]+]], sp, #72
9+
; CHECK: ldr {{w[0-9]+}}, [sp, #64]
10+
; CHECK: add [[ARGS:x[0-9]+]], sp, #72
11+
; CHECK: add {{x[0-9]+}}, [[ARGS]], #8
1012
; First vararg
11-
; CHECK-DAG: ldr {{w[0-9]+}}, [sp, #72]
13+
; CHECK: ldr {{w[0-9]+}}, [sp, #72]
14+
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8
1215
; Second vararg
13-
; CHECK-DAG: ldr {{w[0-9]+}}, [x[[ADDR]]]
16+
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
17+
; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #8
1418
; Third vararg
15-
; CHECK-DAG: ldr {{w[0-9]+}}, [x[[ADDR]]], #8
19+
; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}]
1620
%1 = alloca i32, align 4
1721
%2 = alloca i32, align 4
1822
%3 = alloca i32, align 4

llvm/test/CodeGen/AArch64/arm64-abi.ll

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,10 @@ declare i32 @args_i32(i32, i32, i32, i32, i32, i32, i32, i32, i16 signext, i32,
205205
define i32 @test8(i32 %argc, i8** nocapture %argv) nounwind {
206206
entry:
207207
; CHECK-LABEL: test8
208-
; CHECK: str w8, [sp]
208+
; CHECK: strb {{w[0-9]+}}, [sp, #3]
209+
; CHECK: strb wzr, [sp, #2]
210+
; CHECK: strb {{w[0-9]+}}, [sp, #1]
211+
; CHECK: strb wzr, [sp]
209212
; CHECK: bl
210213
; FAST-LABEL: test8
211214
; FAST: strb {{w[0-9]+}}, [sp]

llvm/test/CodeGen/AArch64/arm64-memset-inline.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,11 @@ entry:
99
ret void
1010
}
1111

12-
; FIXME: This shouldn't need to load in a zero value to store
13-
; (e.g. stp xzr,xzr [sp, #16])
14-
1512
define void @t2() nounwind ssp {
1613
entry:
1714
; CHECK-LABEL: t2:
18-
; CHECK: movi v0.2d, #0000000000000000
19-
; CHECK: stur q0, [sp, #16]
2015
; CHECK: strh wzr, [sp, #32]
16+
; CHECK: stp xzr, xzr, [sp, #16]
2117
; CHECK: str xzr, [sp, #8]
2218
%buf = alloca [26 x i8], align 1
2319
%0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0

llvm/test/CodeGen/AArch64/arm64-stur.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,11 @@ define void @foo5(i8* %p, i32 %val) nounwind {
4747
ret void
4848
}
4949

50-
;; FIXME: Again, with the writing of a quadword zero...
51-
5250
define void @foo(%struct.X* nocapture %p) nounwind optsize ssp {
5351
; CHECK-LABEL: foo:
5452
; CHECK-NOT: str
55-
; CHECK: stur q0, [x0, #4]
56-
; CHECK-FIXME: stur xzr, [x0, #12]
57-
; CHECK-FIXME-NEXT: stur xzr, [x0, #4]
53+
; CHECK: stur xzr, [x0, #12]
54+
; CHECK-NEXT: stur xzr, [x0, #4]
5855
; CHECK-NEXT: ret
5956
%B = getelementptr inbounds %struct.X, %struct.X* %p, i64 0, i32 1
6057
%val = bitcast i64* %B to i8*

llvm/test/CodeGen/AArch64/merge-store.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44
@g0 = external global <3 x float>, align 16
55
@g1 = external global <3 x float>, align 4
66

7-
; CHECK: ldr q[[R0:[0-9]+]], {{\[}}[[R1:x[0-9]+]], :lo12:g0
8-
;; TODO: this next line seems like a redundant no-op move?
9-
; CHECK: ins v0.s[1], v0.s[1]
7+
; CHECK: ldr s[[R0:[0-9]+]], {{\[}}[[R1:x[0-9]+]]{{\]}}, #4
8+
; CHECK: ld1{{\.?s?}} { v[[R0]]{{\.?s?}} }[1], {{\[}}[[R1]]{{\]}}
109
; CHECK: str d[[R0]]
1110

1211
define void @blam() {

llvm/test/CodeGen/AArch64/vector_merge_dep_check.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
; RUN: llc < %s | FileCheck %s
1+
; RUN: llc --combiner-alias-analysis=false < %s | FileCheck %s
2+
; RUN: llc --combiner-alias-analysis=true < %s | FileCheck %s
23

34
; This test checks that we do not merge stores together which have
45
; dependencies through their non-chain operands (e.g. one store is the

llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,12 @@ define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out
8888
; SI-DAG: v_cvt_f32_ubyte2_e32
8989
; SI-DAG: v_cvt_f32_ubyte3_e32
9090

91+
; SI-DAG: v_lshrrev_b32_e32 v{{[0-9]+}}, 24
92+
; SI-DAG: v_lshrrev_b32_e32 v{{[0-9]+}}, 16
9193
; SI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 16
9294
; SI-DAG: v_lshlrev_b32_e32 v{{[0-9]+}}, 8
9395
; SI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xffff,
96+
; SI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff00,
9497
; SI-DAG: v_add_i32
9598

9699
; SI: buffer_store_dwordx4

llvm/test/CodeGen/AMDGPU/debugger-insert-nops.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,13 @@
1-
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
2-
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECKNOP
1+
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-insert-nops -verify-machineinstrs < %s | FileCheck %s
32

4-
; This test expects that we have one instance for each line in some order with "s_nop 0" instances after each.
5-
6-
; Check that each line appears at least once
7-
; CHECK-DAG: test01.cl:2:3
8-
; CHECK-DAG: test01.cl:3:3
9-
; CHECK-DAG: test01.cl:4:3
3+
; CHECK: test01.cl:2:{{[0-9]+}}
4+
; CHECK-NEXT: s_nop 0
105

6+
; CHECK: test01.cl:3:{{[0-9]+}}
7+
; CHECK-NEXT: s_nop 0
118

12-
; Check that each of each of the lines consists of the line output, followed by "s_nop 0"
13-
; CHECKNOP: test01.cl:{{[234]}}:3
14-
; CHECKNOP-NEXT: s_nop 0
15-
; CHECKNOP: test01.cl:{{[234]}}:3
16-
; CHECKNOP-NEXT: s_nop 0
17-
; CHECKNOP: test01.cl:{{[234]}}:3
18-
; CHECKNOP-NEXT: s_nop 0
9+
; CHECK: test01.cl:4:{{[0-9]+}}
10+
; CHECK-NEXT: s_nop 0
1911

2012
; CHECK: test01.cl:5:{{[0-9]+}}
2113
; CHECK-NEXT: s_nop 0
@@ -29,7 +21,7 @@ entry:
2921
call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !17, metadata !18), !dbg !19
3022
%0 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !20
3123
%arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 0, !dbg !20
32-
store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !20
24+
store i32 1, i32 addrspace(1)* %arrayidx, align 4, !dbg !21
3325
%1 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 4, !dbg !22
3426
%arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 1, !dbg !22
3527
store i32 2, i32 addrspace(1)* %arrayidx1, align 4, !dbg !23

llvm/test/CodeGen/AMDGPU/merge-stores.ll

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
2-
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
1+
; RUN: llc -march=amdgcn -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
2+
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
3+
4+
; RUN: llc -march=amdgcn -verify-machineinstrs -combiner-alias-analysis -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
5+
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -combiner-alias-analysis -amdgpu-load-store-vectorizer=0 < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
36

47
; This test is mostly to test DAG store merging, so disable the vectorizer.
58
; Run with devices with different unaligned load restrictions.
@@ -146,10 +149,17 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
146149
ret void
147150
}
148151

152+
; FIXME: Should be able to merge this
149153
; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
150-
; GCN: buffer_store_dwordx2
151-
; GCN: buffer_store_dword v
152-
; GCN: buffer_store_dword v
154+
; GCN-NOAA: buffer_store_dword v
155+
; GCN-NOAA: buffer_store_dword v
156+
; GCN-NOAA: buffer_store_dword v
157+
; GCN-NOAA: buffer_store_dword v
158+
159+
; GCN-AA: buffer_store_dwordx2
160+
; GCN-AA: buffer_store_dword v
161+
; GCN-AA: buffer_store_dword v
162+
153163
; GCN: s_endpgm
154164
define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
155165
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
@@ -468,9 +478,17 @@ define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1
468478
ret void
469479
}
470480

481+
; This works once AA is enabled on the subtarget
471482
; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32:
472483
; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
473-
; GCN: buffer_store_dwordx4 [[LOAD]]
484+
485+
; GCN-NOAA: buffer_store_dword v
486+
; GCN-NOAA: buffer_store_dword v
487+
; GCN-NOAA: buffer_store_dword v
488+
; GCN-NOAA: buffer_store_dword v
489+
490+
; GCN-AA: buffer_store_dwordx4 [[LOAD]]
491+
474492
; GCN: s_endpgm
475493
define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
476494
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1

llvm/test/CodeGen/AMDGPU/private-element-size.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@
3232
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:24{{$}}
3333
; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:28{{$}}
3434

35-
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
36-
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
37-
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
38-
; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
35+
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen{{$}}
36+
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:4{{$}}
37+
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:8{{$}}
38+
; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], s9 offen offset:12{{$}}
3939
define void @private_elt_size_v4i32(<4 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
4040
entry:
4141
%tid = call i32 @llvm.amdgcn.workitem.id.x()

llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,9 @@ define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out,
156156

157157
; FUNC-LABEL: @reorder_local_offsets
158158
; CI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:100 offset1:102
159-
; CI-DAG: ds_write2_b32 {{v[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:3 offset1:100
160-
; CI-DAG: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408
159+
; CI: ds_write2_b32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} offset0:3 offset1:100
160+
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
161+
; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:408
161162
; CI: buffer_store_dword
162163
; CI: s_endpgm
163164
define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 {
@@ -179,12 +180,12 @@ define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspa
179180
}
180181

181182
; FUNC-LABEL: @reorder_global_offsets
182-
; CI-DAG: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
183-
; CI-DAG: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
184-
; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
185-
; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
186-
; CI-DAG: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
187-
; CI: buffer_store_dword
183+
; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
184+
; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
185+
; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
186+
; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
187+
; CI: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:408
188+
; CI: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
188189
; CI: s_endpgm
189190
define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 {
190191
%ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 3

llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,6 @@
33
; RUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA -check-prefix=HSA %s
44
; RUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA -check-prefix=HSA %s
55

6-
; FIXME: this fails because the load generated from extractelement is
7-
;; now properly recognized as forwardable to the value stored in
8-
;; insertelement, and thus the loads/stores drop away entirely. This
9-
;; makes the intended test, of running out of registers, not occur.
10-
11-
;; XFAIL: *
12-
136
; This ends up using all 256 registers and requires register
147
; scavenging which will fail to find an unsued register.
158

llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
11
; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
22
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
33

4-
;; FIXME: this fails because the load generated from extractelement is
5-
;; now properly recognized as forwardable to the value stored in
6-
;; insertelement, and thus the loads/stores drop away entirely. This
7-
;; makes the intended test, of running out of registers, not occur.
8-
;; XFAIL: *
9-
104
; This ends up using all 255 registers and requires register
115
; scavenging which will fail to find an unsued register.
126

llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ define void @test_byval_8_bytes_alignment(i32 %i, ...) {
1212
entry:
1313
; CHECK: sub sp, sp, #12
1414
; CHECK: sub sp, sp, #4
15-
; CHECK: add r0, sp, #4
16-
; CHECK: stm sp, {r0, r1, r2, r3}
15+
; CHECK: stmib sp, {r1, r2, r3}
1716
%g = alloca i8*
1817
%g1 = bitcast i8** %g to i8*
1918
call void @llvm.va_start(i8* %g1)

llvm/test/CodeGen/ARM/alloc-no-stack-realign.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,12 @@ entry:
5151
; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
5252

5353

54-
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
5554
; REALIGN: orr r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48
5655
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
5756
; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #32
5857
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
59-
; REALIGN: orr r[[R1:[0-9]+]], r[[R1]], #16
58+
; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #16
59+
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
6060
; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
6161

6262
; REALIGN: add r[[R1:[0-9]+]], r[[R0:0]], #48

llvm/test/CodeGen/ARM/ifcvt10.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ entry:
99
; CHECK-LABEL: t:
1010
; CHECK: vpop {d8}
1111
; CHECK-NOT: vpopne
12+
; CHECK: pop {r7, pc}
13+
; CHECK: vpop {d8}
1214
; CHECK: pop {r7, pc}
1315
br i1 undef, label %if.else, label %if.then
1416

llvm/test/CodeGen/ARM/memset-inline.ll

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,9 @@
33
define void @t1(i8* nocapture %c) nounwind optsize {
44
entry:
55
; CHECK-LABEL: t1:
6-
7-
;; FIXME: like with arm64-memset-inline.ll, learning how to merge
8-
;; stores made this code worse, since it now uses a vector move,
9-
;; instead of just using an strd instruction taking two registers.
10-
11-
; CHECK: vmov.i32 d16, #0x0
12-
; CHECK: vst1.32 {d16}, [r0:64]!
136
; CHECK: movs r1, #0
14-
; CHECK: str r1, [r0]
7+
; CHECK: strd r1, r1, [r0]
8+
; CHECK: str r1, [r0, #8]
159
call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
1610
ret void
1711
}

llvm/test/CodeGen/ARM/static-addr-hoisting.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ define void @multiple_store() {
66
; CHECK: movs [[VAL:r[0-9]+]], #42
77
; CHECK: movt r[[BASE1]], #15
88

9-
; CHECK-DAG: str [[VAL]], [r[[BASE1]]]
10-
; CHECK-DAG: str [[VAL]], [r[[BASE1]], #24]
11-
; CHECK-DAG: str.w [[VAL]], [r[[BASE1]], #42]
9+
; CHECK: str [[VAL]], [r[[BASE1]]]
10+
; CHECK: str [[VAL]], [r[[BASE1]], #24]
11+
; CHECK: str.w [[VAL]], [r[[BASE1]], #42]
1212

1313
; CHECK: movw r[[BASE2:[0-9]+]], #20394
1414
; CHECK: movt r[[BASE2]], #18

0 commit comments

Comments
 (0)