3
3
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
4
4
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
5
5
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
6
+ ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
7
+
8
+ ; OPT-LABEL: @vector_read(
9
+ ; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
10
+ ; OPT: store i32 %0, i32 addrspace(1)* %out, align 4
6
11
7
12
; FUNC-LABEL: {{^}}vector_read:
8
13
; EG: MOV
12
17
; EG: MOVA_INT
13
18
define void @vector_read (i32 addrspace (1 )* %out , i32 %index ) {
14
19
entry:
15
- %0 = alloca [4 x i32 ]
16
- %x = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 0
17
- %y = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 1
18
- %z = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 2
19
- %w = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 3
20
+ %tmp = alloca [4 x i32 ]
21
+ %x = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 0
22
+ %y = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 1
23
+ %z = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 2
24
+ %w = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 3
20
25
store i32 0 , i32* %x
21
26
store i32 1 , i32* %y
22
27
store i32 2 , i32* %z
23
28
store i32 3 , i32* %w
24
- %1 = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 %index
25
- %2 = load i32 , i32* %1
26
- store i32 %2 , i32 addrspace (1 )* %out
29
+ %tmp1 = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 %index
30
+ %tmp2 = load i32 , i32* %tmp1
31
+ store i32 %tmp2 , i32 addrspace (1 )* %out
27
32
ret void
28
33
}
29
34
35
+ ; OPT-LABEL: @vector_write(
36
+ ; OPT: %0 = insertelement <4 x i32> zeroinitializer, i32 1, i32 %w_index
37
+ ; OPT: %1 = extractelement <4 x i32> %0, i32 %r_index
38
+ ; OPT: store i32 %1, i32 addrspace(1)* %out, align 4
39
+
30
40
; FUNC-LABEL: {{^}}vector_write:
31
41
; EG: MOV
32
42
; EG: MOV
@@ -36,42 +46,95 @@ entry:
36
46
; EG: MOVA_INT
37
47
define void @vector_write (i32 addrspace (1 )* %out , i32 %w_index , i32 %r_index ) {
38
48
entry:
39
- %0 = alloca [4 x i32 ]
40
- %x = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 0
41
- %y = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 1
42
- %z = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 2
43
- %w = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 3
49
+ %tmp = alloca [4 x i32 ]
50
+ %x = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 0
51
+ %y = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 1
52
+ %z = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 2
53
+ %w = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 3
44
54
store i32 0 , i32* %x
45
55
store i32 0 , i32* %y
46
56
store i32 0 , i32* %z
47
57
store i32 0 , i32* %w
48
- %1 = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 %w_index
49
- store i32 1 , i32* %1
50
- %2 = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 %r_index
51
- %3 = load i32 , i32* %2
52
- store i32 %3 , i32 addrspace (1 )* %out
58
+ %tmp1 = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 %w_index
59
+ store i32 1 , i32* %tmp1
60
+ %tmp2 = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 %r_index
61
+ %tmp3 = load i32 , i32* %tmp2
62
+ store i32 %tmp3 , i32 addrspace (1 )* %out
53
63
ret void
54
64
}
55
65
56
66
; This test should be optimize to:
57
67
; store i32 0, i32 addrspace(1)* %out
68
+
69
+ ; OPT-LABEL: @bitcast_gep(
70
+ ; OPT-LABEL: store i32 0, i32 addrspace(1)* %out, align 4
71
+
58
72
; FUNC-LABEL: {{^}}bitcast_gep:
59
73
; EG: STORE_RAW
60
74
define void @bitcast_gep (i32 addrspace (1 )* %out , i32 %w_index , i32 %r_index ) {
61
75
entry:
62
- %0 = alloca [4 x i32 ]
63
- %x = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 0
64
- %y = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 1
65
- %z = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 2
66
- %w = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 3
76
+ %tmp = alloca [4 x i32 ]
77
+ %x = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 0
78
+ %y = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 1
79
+ %z = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 2
80
+ %w = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 3
67
81
store i32 0 , i32* %x
68
82
store i32 0 , i32* %y
69
83
store i32 0 , i32* %z
70
84
store i32 0 , i32* %w
71
- %1 = getelementptr [4 x i32 ], [4 x i32 ]* %0 , i32 0 , i32 1
72
- %2 = bitcast i32* %1 to [4 x i32 ]*
73
- %3 = getelementptr [4 x i32 ], [4 x i32 ]* %2 , i32 0 , i32 0
74
- %4 = load i32 , i32* %3
75
- store i32 %4 , i32 addrspace (1 )* %out
85
+ %tmp1 = getelementptr [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 1
86
+ %tmp2 = bitcast i32* %tmp1 to [4 x i32 ]*
87
+ %tmp3 = getelementptr [4 x i32 ], [4 x i32 ]* %tmp2 , i32 0 , i32 0
88
+ %tmp4 = load i32 , i32* %tmp3
89
+ store i32 %tmp4 , i32 addrspace (1 )* %out
90
+ ret void
91
+ }
92
+
93
+ ; OPT-LABEL: @vector_read_bitcast_gep(
94
+ ; OPT: %0 = extractelement <4 x i32> <i32 1065353216, i32 1, i32 2, i32 3>, i32 %index
95
+ ; OPT: store i32 %0, i32 addrspace(1)* %out, align 4
96
+ define void @vector_read_bitcast_gep (i32 addrspace (1 )* %out , i32 %index ) {
97
+ entry:
98
+ %tmp = alloca [4 x i32 ]
99
+ %x = getelementptr inbounds [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 0
100
+ %y = getelementptr inbounds [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 1
101
+ %z = getelementptr inbounds [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 2
102
+ %w = getelementptr inbounds [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 3
103
+ %bc = bitcast i32* %x to float *
104
+ store float 1 .0 , float * %bc
105
+ store i32 1 , i32* %y
106
+ store i32 2 , i32* %z
107
+ store i32 3 , i32* %w
108
+ %tmp1 = getelementptr inbounds [4 x i32 ], [4 x i32 ]* %tmp , i32 0 , i32 %index
109
+ %tmp2 = load i32 , i32* %tmp1
110
+ store i32 %tmp2 , i32 addrspace (1 )* %out
111
+ ret void
112
+ }
113
+
114
+ ; FIXME: Should be able to promote this. Instcombine should fold the
115
+ ; cast in the hasOneUse case so it might not matter in practice
116
+
117
+ ; OPT-LABEL: @vector_read_bitcast_alloca(
118
+ ; OPT: alloca [4 x float]
119
+ ; OPT: store float
120
+ ; OPT: store float
121
+ ; OPT: store float
122
+ ; OPT: store float
123
+ ; OPT: load float
124
+ define void @vector_read_bitcast_alloca (float addrspace (1 )* %out , i32 %index ) {
125
+ entry:
126
+ %tmp = alloca [4 x i32 ]
127
+ %tmp.bc = bitcast [4 x i32 ]* %tmp to [4 x float ]*
128
+ %x = getelementptr inbounds [4 x float ], [4 x float ]* %tmp.bc , i32 0 , i32 0
129
+ %y = getelementptr inbounds [4 x float ], [4 x float ]* %tmp.bc , i32 0 , i32 1
130
+ %z = getelementptr inbounds [4 x float ], [4 x float ]* %tmp.bc , i32 0 , i32 2
131
+ %w = getelementptr inbounds [4 x float ], [4 x float ]* %tmp.bc , i32 0 , i32 3
132
+ store float 0 .0 , float * %x
133
+ store float 1 .0 , float * %y
134
+ store float 2 .0 , float * %z
135
+ store float 4 .0 , float * %w
136
+ %tmp1 = getelementptr inbounds [4 x float ], [4 x float ]* %tmp.bc , i32 0 , i32 %index
137
+ %tmp2 = load float , float * %tmp1
138
+ store float %tmp2 , float addrspace (1 )* %out
76
139
ret void
77
140
}
0 commit comments