Skip to content

Commit ee20262

Browse files
committed
Bump version, and reuse a couple preallocated vectors.
1 parent 537325b commit ee20262

File tree

4 files changed

+8
-11
lines changed

4 files changed

+8
-11
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.6.2"
4+
version = "0.6.3"
55

66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

src/determinestrategy.jl

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,7 @@ end
6767
function evaluate_cost_unroll(
6868
ls::LoopSet, order::Vector{Symbol}, max_cost = typemax(Float64), vectorized::Symbol = first(order)
6969
)
70-
# included_vars = Set{UInt}()
71-
included_vars = fill(false, length(operations(ls)))
70+
included_vars = fill!(resize!(ls.included_vars, length(operations(ls))), false)
7271
nested_loop_syms = Symbol[]#Set{Symbol}()
7372
total_cost = 0.0
7473
iter = 1.0
@@ -368,9 +367,9 @@ function evaluate_cost_tile(
368367
unrolled = order[2]
369368
ops = operations(ls)
370369
nops = length(ops)
371-
included_vars = fill(false, nops)
370+
included_vars = fill!(resize!(ls.included_vars, nops), false)
372371
unrolledtiled = fill(false, 2, nops)
373-
descendentsininnerloop = fill(false, nops)
372+
descendentsininnerloop = fill!(resize!(ls.place_after_loop, nops), false)
374373
innerloop = last(order)
375374
iters = fill(-99.9, nops)
376375
nested_loop_syms = Symbol[]# Set{Symbol}()

src/graphs.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,22 +142,22 @@ Base.@propagate_inbounds Base.getindex(lo::LoopOrder, i...) = lo.oporder[LinearI
142142

143143
# Must make it easy to iterate
144144
# outer_reductions is a vector of indices (within operation vectors) of the reduction operation, eg the vmuladd op in a dot product
145+
# O(N) search is faster at small sizes
145146
struct LoopSet
146147
loopsymbols::Vector{Symbol}
147148
loops::Vector{Loop}
148149
opdict::Dict{Symbol,Operation}
149150
operations::Vector{Operation} # Split them to make it easier to iterate over just a subset
150151
outer_reductions::Vector{Int} # IDs of reduction operations that need to be reduced at end.
151152
loop_order::LoopOrder
152-
# stridesets::Dict{ShortVector{Symbol},ShortVector{Symbol}}
153153
preamble::Expr
154154
preamble_symsym::Vector{Tuple{Int,Symbol}}
155155
preamble_symint::Vector{Tuple{Int,Int}}
156156
preamble_symfloat::Vector{Tuple{Int,Float64}}
157157
preamble_zeros::Vector{Int}
158158
preamble_ones::Vector{Int}
159159
includedarrays::Vector{Symbol}
160-
syms_aliasing_refs::Vector{Symbol} # O(N) search is faster at small sizes
160+
syms_aliasing_refs::Vector{Symbol}
161161
refs_aliasing_syms::Vector{ArrayReferenceMeta}
162162
cost_vec::Matrix{Float64}
163163
reg_pres::Matrix{Int}

src/operation_evaluation_order.jl

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,8 @@ function fillorder!(ls::LoopSet, order::Vector{Symbol}, loopistiled::Bool)
6161
end
6262
ops = operations(ls)
6363
nops = length(ops)
64-
included_vars = resize!(ls.included_vars, nops)
65-
fill!(included_vars, false)
66-
place_after_loop = resize!(ls.place_after_loop, nops)
67-
fill!(ls.place_after_loop, true)
64+
included_vars = fill!(resize!(ls.included_vars, nops), false)
65+
place_after_loop = fill!(resize!(ls.place_after_loop, nops), true)
6866
# to go inside out, we just have to include all those not-yet included depending on the current sym
6967
empty!(lo)
7068
for _n 1:nloops

0 commit comments

Comments
 (0)