Skip to content

WIP: support for CartesianIndices #66

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,6 @@ version = "0.1.0"

[[VectorizationBase]]
deps = ["CpuId", "LinearAlgebra"]
git-tree-sha1 = "0ca41b27f0a918c8ee51bd495818fb7a12b9a19f"
git-tree-sha1 = "76e8817f7732d9a127191f5bcd5fe3a5eed0fb3e"
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
version = "0.9.0"
version = "0.9.2"
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ OffsetArrays = "1"
SIMDPirates = "0.7.1"
SLEEFPirates = "0.4"
UnPack = "0"
VectorizationBase = "0.9"
VectorizationBase = "0.9.2"
julia = "1.1"

[extras]
Expand Down
1 change: 1 addition & 0 deletions src/LoopVectorization.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ using VectorizationBase: REGISTER_SIZE, REGISTER_COUNT, extract_data, num_vector
mask, masktable, pick_vector_width_val, valmul, valrem, valmuladd, valadd, valsub, _MM,
maybestaticlength, maybestaticsize, staticm1, subsetview, vzero, stridedpointer_for_broadcast,
Static, StaticUnitRange, StaticLowerUnitRange, StaticUpperUnitRange, unwrap, maybestaticrange,
AbstractColumnMajorStridedPointer, AbstractRowMajorStridedPointer, AbstractSparseStridedPointer, AbstractStaticStridedPointer,
PackedStridedPointer, SparseStridedPointer, RowMajorStridedPointer, StaticStridedPointer, StaticStridedStruct,
maybestaticfirst, maybestaticlast
using SIMDPirates: VECTOR_SYMBOLS, evadd, evsub, evmul, evfdiv, vrange, reduced_add, reduced_prod, reduce_to_add, reduce_to_prod,
Expand Down
1 change: 0 additions & 1 deletion src/add_loads.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ function add_load!(
ls::LoopSet, var::Symbol, mpref::ArrayReferenceMetaPosition, elementbytes::Int
)
length(mpref.loopdependencies) == 0 && return add_constant!(ls, var, mpref, elementbytes)
ref = mpref.mref
op = Operation( ls, var, elementbytes, :getindex, memload, mpref )
add_load!(ls, op, true, false)
end
Expand Down
12 changes: 6 additions & 6 deletions src/condense_loopset.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@
Base.:|(u::Unsigned, it::IndexType) = u | UInt8(it)
Base.:(==)(u::Unsigned, it::IndexType) = (u % UInt8) == UInt8(it)

"""
`ArrayRefStruct` stores a representation of an array-reference expression such as `A[i,j]`.
It supports array-references with up to 8 indexes, where the data for each consecutive index is packed into corresponding 8-bit fields
of `index_types` (storing the enum `IndexType`), `indices` (the `id` for each index symbol), and `offsets` (currently unused).
"""
struct ArrayRefStruct{array,ptr}
index_types::UInt64
indices::UInt64
Expand Down Expand Up @@ -387,13 +392,8 @@ function setup_call(ls::LoopSet, inline = Int8(2), U = zero(Int8), T = zero(Int8
# Creating an anonymous function and calling it also achieves the outlining, while still
# inlining the generated function into the loop preamble.
if inline == Int8(2)
if num_loops(ls) == 1
iszero(U) ? lower(ls) : lower(ls, U, -one(U))
else
setup_call_inline(ls, U, T)
end
setup_call_inline(ls, U, T)
else
setup_call_noinline(ls, U, T)
end
end

7 changes: 3 additions & 4 deletions src/determinestrategy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ function maybedemotesize(T::Int, N::Int, U::Int, Uloop::Loop, maxTbase::Int)
end
function solve_tilesize(
ls::LoopSet, unrolled::Symbol, tiled::Symbol,
cost_vec::AbstractVector{Float64},
cost_vec::AbstractVector{Float64},
reg_pressure::AbstractVector{Int},
W::Int, vectorized::Symbol
)
Expand Down Expand Up @@ -440,7 +440,7 @@ function evaluate_cost_tile(
# Need to check if fusion is possible
size_T = biggest_type_size(ls)
W, Wshift = VectorizationBase.pick_vector_width_shift(length(ls, vectorized), size_T)::Tuple{Int,Int}
# costs =
# costs =
# cost_mat[1] / ( unrolled * tiled)
# cost_mat[2] / ( tiled)
# cost_mat[3] / ( unrolled)
Expand Down Expand Up @@ -574,7 +574,7 @@ function choose_unroll_order(ls::LoopSet, lowest_cost::Float64 = Inf)
iter = iterate(lo, state)
iter === nothing && return best_order, best_vec, lowest_cost
new_order, state = iter
end
end
end
function choose_tile(ls::LoopSet)
lo = LoopOrders(ls)
Expand Down Expand Up @@ -632,4 +632,3 @@ function register_pressure(ls::LoopSet)
tU * tT * rp[1] + tU * rp[2] + rp[3] + rp[4]
end
end

6 changes: 4 additions & 2 deletions src/graphs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,11 @@ Base.@propagate_inbounds Base.getindex(lo::LoopOrder, i...) = lo.oporder[LinearI
# O(N) search is faster at small sizes
struct LoopSet
loopsymbols::Vector{Symbol}
loopsymbol_offsets::Vector{Int} # symbol loopsymbols[i] corresponds to loops[lso[i]+1:lso[i+1]] (CartesianIndex handling)
loops::Vector{Loop}
opdict::Dict{Symbol,Operation}
operations::Vector{Operation} # Split them to make it easier to iterate over just a subset
operation_offsets::Vector{Int}
outer_reductions::Vector{Int} # IDs of reduction operations that need to be reduced at end.
loop_order::LoopOrder
preamble::Expr
Expand Down Expand Up @@ -281,9 +283,9 @@ includesarray(ls::LoopSet, array::Symbol) = array ∈ ls.includedarrays

function LoopSet(mod::Symbol)# = :LoopVectorization)
LoopSet(
Symbol[], Loop[],
Symbol[], [0], Loop[],
Dict{Symbol,Operation}(),
Operation[],
Operation[], [0],
Int[],
LoopOrder(),
Expr(:block),
Expand Down
5 changes: 1 addition & 4 deletions src/lower_load.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ function pushvectorload!(q::Expr, op::Operation, var::Symbol, td::UnrollArgs, U:
end
push!(q.args, Expr(:(=), name, instrcall))
end
function lower_load_scalar!(
function lower_load_scalar!(
q::Expr, op::Operation, vectorized::Symbol, W::Symbol, unrolled::Symbol, tiled::Symbol, U::Int,
suffix::Union{Nothing,Int}, mask::Union{Nothing,Symbol,Unsigned} = nothing
)
Expand Down Expand Up @@ -60,6 +60,3 @@ function lower_load!(
lower_load_scalar!(q, op, vectorized, W, unrolled, tiled, U, suffix, mask)
end
end



1 change: 0 additions & 1 deletion src/lower_memory_common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,3 @@ function name_memoffset(var::Symbol, op::Operation, td::UnrollArgs, W::Symbol, v
end
name, mo
end

6 changes: 4 additions & 2 deletions src/lowering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,10 @@ function setup_preamble!(ls::LoopSet, us::UnrollSpecification)
vectorized = order[vectorizedloopnum]
# println("Setup preamble")
W = ls.W; typeT = ls.T
length(ls.includedarrays) == 0 || push!(ls.preamble.args, Expr(:(=), typeT, determine_eltype(ls)))
push!(ls.preamble.args, Expr(:(=), W, determine_width(ls, vectorized)))
if length(ls.includedarrays) > 0
push!(ls.preamble.args, Expr(:(=), typeT, determine_eltype(ls)))
push!(ls.preamble.args, Expr(:(=), W, determine_width(ls, vectorized)))
end
lower_licm_constants!(ls)
pushpreamble!(ls, definemask(getloop(ls, vectorized), W))#, U > 1 && unrolledloopnum == vectorizedloopnum))
for op ∈ operations(ls)
Expand Down
2 changes: 1 addition & 1 deletion src/memory_ops_common.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ function array_reference_meta!(ls::LoopSet, array::Symbol, rawindices, elementby
else
indop = get(ls.opdict, ind, nothing)
if indop !== nothing && !isconstant(indop)
pushparent!(parents, loopdependencies, reduceddeps, parent)
pushparent!(parents, loopdependencies, reduceddeps, parent) # FIXME where does `parent` come from?
# var = get(ls.opdict, ind, nothing)
push!(indices, name(parent)); ninds += 1
push!(loopedindex, false)
Expand Down
5 changes: 1 addition & 4 deletions src/operations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ These names will be further processed if op is tiled and/or unrolled.
if tiled ∈ loopdependencies(op) # `suffix` is tilenumber
mvar = Symbol(op, suffix, :_)
end
if unrolled ∈ loopdependencies(op) # `u` is unroll number
if unrolled ∈ loopdependencies(op) # `u` is unroll number
mvar = Symbol(op, u)
end
```
Expand Down Expand Up @@ -240,6 +240,3 @@ getindices(op::Operation) = op.ref.ref.indices
# # access stride info?
# op.numerical_metadata[symposition(op,sym)]
# end



Loading