Skip to content

Commit 7d5e2cb

Browse files
committed
R600: Run more tests with promote alloca disabled.
Re-run tests changed in r211110 to test both paths. Also fix broken check line. llvm-svn: 212895
1 parent d0b6f3e commit 7d5e2cb

File tree

4 files changed

+57
-22
lines changed

4 files changed

+57
-22
lines changed

llvm/test/CodeGen/R600/array-ptr-calc-i32.ll

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
1+
; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
2+
; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
23

34
declare i32 @llvm.SI.tid() nounwind readnone
45
declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
@@ -9,13 +10,17 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
910
; be 32-bits.
1011

1112
; SI-LABEL: @test_private_array_ptr_calc:
13+
1214
; SI: V_ADD_I32_e32 [[PTRREG:v[0-9]+]]
15+
16+
; SI-ALLOCA: V_MOVRELD_B32_e32 {{v[0-9]+}}, [[PTRREG]]
1317
;
1418
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
1519
; alloca to a vector. It currently fails because it does not know how
1620
; to interpret:
1721
; getelementptr [4 x i32]* %alloca, i32 1, i32 %b
18-
; SI: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]]
22+
23+
; SI-PROMOTE: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]]
1924
define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
2025
%alloca = alloca [4 x i32], i32 4, align 16
2126
%tid = call i32 @llvm.SI.tid() readnone

llvm/test/CodeGen/R600/indirect-private-64.ll

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
1-
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
1+
; RUN: llc -march=r600 -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
2+
; RUN: llc -march=r600 -mcpu=SI -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
3+
24

35
declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
46

57
; SI-LABEL: @private_access_f64_alloca:
6-
; SI: DS_WRITE_B64
7-
; SI: DS_READ_B64
8+
9+
; SI-ALLOCA: V_MOVRELD_B32_e32
10+
; SI-ALLOCA: V_MOVRELD_B32_e32
11+
; SI-ALLOCA: V_MOVRELS_B32_e32
12+
; SI-ALLOCA: V_MOVRELS_B32_e32
13+
14+
; SI-PROMOTE: DS_WRITE_B64
15+
; SI-PROMOTE: DS_READ_B64
816
define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
917
%val = load double addrspace(1)* %in, align 8
1018
%array = alloca double, i32 16, align 8
@@ -17,10 +25,16 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
1725
}
1826

1927
; SI-LABEL: @private_access_v2f64_alloca:
20-
; SI: DS_WRITE_B64
21-
; SI: DS_WRITE_B64
22-
; SI: DS_READ_B64
23-
; SI: DS_READ_B64
28+
29+
; SI-ALLOCA: V_MOVRELD_B32_e32
30+
; SI-ALLOCA: V_MOVRELD_B32_e32
31+
; SI-ALLOCA: V_MOVRELS_B32_e32
32+
; SI-ALLOCA: V_MOVRELS_B32_e32
33+
34+
; SI-PROMOTE: DS_WRITE_B64
35+
; SI-PROMOTE: DS_WRITE_B64
36+
; SI-PROMOTE: DS_READ_B64
37+
; SI-PROMOTE: DS_READ_B64
2438
define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
2539
%val = load <2 x double> addrspace(1)* %in, align 16
2640
%array = alloca <2 x double>, i32 16, align 16
@@ -33,8 +47,14 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
3347
}
3448

3549
; SI-LABEL: @private_access_i64_alloca:
36-
; SI: DS_WRITE_B64
37-
; SI: DS_READ_B64
50+
51+
; SI-ALLOCA: V_MOVRELD_B32_e32
52+
; SI-ALLOCA: V_MOVRELD_B32_e32
53+
; SI-ALLOCA: V_MOVRELS_B32_e32
54+
; SI-ALLOCA: V_MOVRELS_B32_e32
55+
56+
; SI-PROMOTE: DS_WRITE_B64
57+
; SI-PROMOTE: DS_READ_B64
3858
define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
3959
%val = load i64 addrspace(1)* %in, align 8
4060
%array = alloca i64, i32 16, align 8
@@ -47,10 +67,20 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
4767
}
4868

4969
; SI-LABEL: @private_access_v2i64_alloca:
50-
; SI: DS_WRITE_B64
51-
; SI: DS_WRITE_B64
52-
; SI: DS_READ_B64
53-
; SI: DS_READ_B64
70+
71+
; SI-ALLOCA: V_MOVRELD_B32_e32
72+
; SI-ALLOCA: V_MOVRELD_B32_e32
73+
; SI-ALLOCA: V_MOVRELD_B32_e32
74+
; SI-ALLOCA: V_MOVRELD_B32_e32
75+
; SI-ALLOCA: V_MOVRELS_B32_e32
76+
; SI-ALLOCA: V_MOVRELS_B32_e32
77+
; SI-ALLOCA: V_MOVRELS_B32_e32
78+
; SI-ALLOCA: V_MOVRELS_B32_e32
79+
80+
; SI-PROMOTE: DS_WRITE_B64
81+
; SI-PROMOTE: DS_WRITE_B64
82+
; SI-PROMOTE: DS_READ_B64
83+
; SI-PROMOTE: DS_READ_B64
5484
define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
5585
%val = load <2 x i64> addrspace(1)* %in, align 16
5686
%array = alloca <2 x i64>, i32 16, align 16

llvm/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
2-
31
; XFAIL: *
2+
; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI %s
43

54
; 64-bit select was originally lowered with a build_pair, and this
65
; could be simplified to 1 cndmask instead of 2, but that broken when
@@ -16,8 +15,8 @@ define void @trunc_select_i64(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) {
1615

1716
; FIXME: Fix truncating store for local memory
1817
; SI-LABEL: @trunc_load_alloca_i64:
19-
; SI: DS_READ_B32
20-
; SI-NOT: DS_READ_B64
18+
; SI: V_MOVRELS_B32
19+
; SI-NOT: V_MOVRELS_B32
2120
; SI: S_ENDPGM
2221
define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) {
2322
%idx = add i32 %a, %b

llvm/test/CodeGen/R600/vector-alloca.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
2-
; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
1+
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
2+
; RUN: llc -march=r600 -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
3+
; RUN: llc -march=r600 -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
34

45
; FUNC-LABEL: @vector_read
56
; EG: MOV
@@ -53,7 +54,7 @@ entry:
5354
; This test should be optimize to:
5455
; store i32 0, i32 addrspace(1)* %out
5556
; FUNC-LABEL: @bitcast_gep
56-
; CHECK: STORE_RAW
57+
; EG: STORE_RAW
5758
define void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
5859
entry:
5960
%0 = alloca [4 x i32]

0 commit comments

Comments
 (0)