@@ -24,10 +24,11 @@ function ArrayRefStruct(ls::LoopSet, mref::ArrayReferenceMeta, arraysymbolinds::
24
24
for (n,ind) ∈ enumerate (@view (indv[start: end ]))
25
25
index_types <<= 8
26
26
indices <<= 8
27
- if mref. loopindex [n]
27
+ if mref. loopedindex [n]
28
28
index_types |= LoopIndex
29
+ indices |= getloopid (ls, ind)
29
30
else
30
- parent = getop ( opdict, ind, nothing )
31
+ parent = get (ls . opdict, ind, nothing )
31
32
if parent === nothing
32
33
index_types |= SymbolicIndex
33
34
indices |= findindoradd! (arraysymbolinds, ind)
@@ -41,14 +42,19 @@ function ArrayRefStruct(ls::LoopSet, mref::ArrayReferenceMeta, arraysymbolinds::
41
42
end
42
43
43
44
struct OperationStruct
44
- instruction:: Instruction
45
+ # instruction::Instruction
45
46
loopdeps:: UInt64
46
47
reduceddeps:: UInt64
47
48
childdeps:: UInt64
48
49
parents:: UInt64
49
50
node_type:: OperationType
50
51
array:: UInt8
52
+ symid:: UInt8
51
53
end
54
+ isload (os:: OperationStruct ) = os. node_type == memload
55
+ isstore (os:: OperationStruct ) = os. node_type == memstore
56
+ iscompute (os:: OperationStruct ) = os. node_type == compute
57
+ isconstant (os:: OperationStruct ) = os. node_type == constant
52
58
function findmatchingarray (ls:: LoopSet , array:: Symbol )
53
59
id = 0x01
54
60
for as ∈ ls. refs_aliasing_syms
@@ -80,19 +86,19 @@ function parents_uint(ls::LoopSet, op::Operation)
80
86
p = zero (UInt64)
81
87
for parent ∈ parents (op)
82
88
p <<= 8
83
- p |= identifier (op )
89
+ p |= identifier (parent )
84
90
end
85
91
p
86
92
end
87
- function OperationStruct ( ls:: LoopSet , op:: Operation )
93
+ function OperationStruct! (varnames :: Vector{Symbol} , ls:: LoopSet , op:: Operation )
88
94
instr = instruction (op)
89
95
ld = loopdeps_uint (ls, op)
90
96
rd = reduceddeps_uint (ls, op)
91
97
cd = childdeps_uint (ls, op)
92
98
p = parents_uint (ls, op)
93
99
array = accesses_memory (op) ? findmatchingarray (ls, vptr (op. ref)) : 0x00
94
100
OperationStruct (
95
- instr, ld, rd, cd, p, op. node_type, array
101
+ ld, rd, cd, p, op. node_type, array, findindoradd! (varnames, name (op))
96
102
)
97
103
end
98
104
# # turn a LoopSet into a type object which can be used to reconstruct the LoopSet.
@@ -112,12 +118,12 @@ function loop_boundaries(ls::LoopSet)
112
118
else
113
119
Expr (:call , Expr (:call , :(:), loop. startsym, loop. stopsym))
114
120
end
115
- push! (lbd, lexpr)
121
+ push! (lbd. args , lexpr)
116
122
end
117
123
lbd
118
124
end
119
125
120
- function argmeta_and_costs_description (ls:: LoopSet , arraysymbolinds)
126
+ function argmeta_and_consts_description (ls:: LoopSet , arraysymbolinds)
121
127
Expr (
122
128
:curly , :Tuple ,
123
129
length (arraysymbolinds),
@@ -130,14 +136,22 @@ function argmeta_and_costs_description(ls::LoopSet, arraysymbolinds)
130
136
)
131
137
end
132
138
133
- function loopset_return_value (ls:: LoopSet )
139
+ function loopset_return_value (ls:: LoopSet , :: Val{extract} ) where {extract}
134
140
if length (ls. outer_reductions) == 1
135
- Expr (:call , :extract_data , Symbol (mangledvar (operations (ls)[ls. outer_reductions[1 ]]), 0 ))
141
+ if extract
142
+ Expr (:call , :extract_data , Symbol (mangledvar (operations (ls)[ls. outer_reductions[1 ]]), 0 ))
143
+ else
144
+ Symbol (mangledvar (operations (ls)[ls. outer_reductions[1 ]]), 0 )
145
+ end
136
146
elseif length (ls. outer_reductions) > 1
137
147
ret = Expr (:tuple )
138
148
ops = operations (ls)
139
149
for or ∈ ls. outer_reductions
140
- push! (ret. args, Expr (:call , :extract_data , Symbol (mangledvar (ops[or]), 0 )))
150
+ if extract
151
+ push! (ret. args, Expr (:call , :extract_data , Symbol (mangledvar (ops[or]), 0 )))
152
+ else
153
+ push! (ret. args, Symbol (mangledvar (ops[or]), 0 ))
154
+ end
141
155
end
142
156
ret
143
157
else
@@ -149,14 +163,20 @@ end
149
163
# Try to condense in type stable manner
150
164
function generate_call (ls:: LoopSet )
151
165
operation_descriptions = Expr (:curly , :Tuple )
152
- foreach (op -> push! (operation_descriptions. args, OperationStruct (ls, op)), operations (ls))
166
+ varnames = Symbol[]
167
+ for op ∈ operations (ls)
168
+ instr = instruction (op)
169
+ push! (operation_descriptions. args, QuoteNode (instr. mod))
170
+ push! (operation_descriptions. args, QuoteNode (instr. instr))
171
+ push! (operation_descriptions. args, OperationStruct! (varnames, ls, op))
172
+ end
153
173
arraysymbolinds = Symbol[]
154
174
arrayref_descriptions = Expr (:curly , :Tuple )
155
175
foreach (ref -> push! (arrayref_descriptions. args, ArrayRefStruct (ls, ref, arraysymbolinds)), ls. refs_aliasing_syms)
156
176
argmeta = argmeta_and_consts_description (ls, arraysymbolinds)
157
177
loop_bounds = loop_boundaries (ls)
158
178
159
- q = Expr (:call , :_avx! , operation_descriptions, arrayref_descriptions, argmeta, loop_bounds)
179
+ q = Expr (:call , lv ( :_avx_! ) , operation_descriptions, arrayref_descriptions, argmeta, loop_bounds)
160
180
161
181
foreach (ref -> push! (q. args, vptr (ref)), ls. refs_aliasing_syms)
162
182
foreach (is -> push! (q. args, last (is)), ls. preamble_symsym)
@@ -166,16 +186,29 @@ end
166
186
167
187
function setup_call (ls:: LoopSet )
168
188
call = generate_call (ls)
169
- retv = loopset_return_value (ls)
170
- q = Expr (:block ,gc_preserve (ls, Expr (:(= ), retv, call)))
189
+ hasouterreductions = length (ls. outer_reductions) > 0
190
+ if hasouterreductions
191
+ retv = loopset_return_value (ls, Val (false ))
192
+ call = Expr (:(= ), retv, call)
193
+ end
194
+ q = Expr (:block ,gc_preserve (ls, call))
195
+ outer_reducts = Expr (:local )
171
196
for or ∈ ls. outer_reductions
172
197
op = ls. operations[or]
173
198
var = name (op)
174
199
mvar = mangledvar (op)
175
200
instr = instruction (op)
176
- push! (q. args, Expr (:(= ), var, Expr (:call , REDUCTION_SCALAR_COMBINE[instr], var, Symbol (mvar, 0 ))))
201
+ out = Symbol (mvar, 0 )
202
+ push! (outer_reducts. args, out)
203
+ # push!(q.args, Expr(:(=), var, Expr(:call, lv(reduction_scalar_combine(instr)), Expr(:call, lv(:SVec), out), var)))
204
+ push! (q. args, Expr (:(= ), var, Expr (:call , lv (reduction_scalar_combine (instr)), out, var)))
177
205
end
178
-
206
+ hasouterreductions && pushpreamble! (ls, outer_reducts)
207
+ append! (ls. preamble. args, q. args)
208
+ ls. preamble
179
209
end
180
210
211
+ macro _avx (q)
212
+ esc (setup_call (LoopSet (q)))
213
+ end
181
214
0 commit comments