Skip to content

Commit 4123024

Browse files
authored
Merge pull request #66 from timholy/teh/cartesianindices
WIP: support for CartesianIndices
2 parents 89395b7 + 51b37e2 commit 4123024

16 files changed

+295
-107
lines changed

Manifest.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,6 @@ version = "0.1.0"
6969

7070
[[VectorizationBase]]
7171
deps = ["CpuId", "LinearAlgebra"]
72-
git-tree-sha1 = "0ca41b27f0a918c8ee51bd495818fb7a12b9a19f"
72+
git-tree-sha1 = "76e8817f7732d9a127191f5bcd5fe3a5eed0fb3e"
7373
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
74-
version = "0.9.0"
74+
version = "0.9.2"

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ OffsetArrays = "1"
1616
SIMDPirates = "0.7.1"
1717
SLEEFPirates = "0.4"
1818
UnPack = "0"
19-
VectorizationBase = "0.9"
19+
VectorizationBase = "0.9.2"
2020
julia = "1.1"
2121

2222
[extras]

src/LoopVectorization.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ using VectorizationBase: REGISTER_SIZE, REGISTER_COUNT, extract_data, num_vector
55
mask, masktable, pick_vector_width_val, valmul, valrem, valmuladd, valadd, valsub, _MM,
66
maybestaticlength, maybestaticsize, staticm1, subsetview, vzero, stridedpointer_for_broadcast,
77
Static, StaticUnitRange, StaticLowerUnitRange, StaticUpperUnitRange, unwrap, maybestaticrange,
8+
AbstractColumnMajorStridedPointer, AbstractRowMajorStridedPointer, AbstractSparseStridedPointer, AbstractStaticStridedPointer,
89
PackedStridedPointer, SparseStridedPointer, RowMajorStridedPointer, StaticStridedPointer, StaticStridedStruct,
910
maybestaticfirst, maybestaticlast
1011
using SIMDPirates: VECTOR_SYMBOLS, evadd, evsub, evmul, evfdiv, vrange, reduced_add, reduced_prod, reduce_to_add, reduce_to_prod,

src/add_loads.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ function add_load!(
2424
ls::LoopSet, var::Symbol, mpref::ArrayReferenceMetaPosition, elementbytes::Int
2525
)
2626
length(mpref.loopdependencies) == 0 && return add_constant!(ls, var, mpref, elementbytes)
27-
ref = mpref.mref
2827
op = Operation( ls, var, elementbytes, :getindex, memload, mpref )
2928
add_load!(ls, op, true, false)
3029
end

src/condense_loopset.jl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@
44
Base.:|(u::Unsigned, it::IndexType) = u | UInt8(it)
55
Base.:(==)(u::Unsigned, it::IndexType) = (u % UInt8) == UInt8(it)
66

7+
"""
8+
`ArrayRefStruct` stores a representation of an array-reference expression such as `A[i,j]`.
9+
It supports array-references with up to 8 indexes, where the data for each consecutive index is packed into corresponding 8-bit fields
10+
of `index_types` (storing the enum `IndexType`), `indices` (the `id` for each index symbol), and `offsets` (currently unused).
11+
"""
712
struct ArrayRefStruct{array,ptr}
813
index_types::UInt64
914
indices::UInt64
@@ -387,13 +392,8 @@ function setup_call(ls::LoopSet, inline = Int8(2), U = zero(Int8), T = zero(Int8
387392
# Creating an anonymous function and calling it also achieves the outlining, while still
388393
# inlining the generated function into the loop preamble.
389394
if inline == Int8(2)
390-
if num_loops(ls) == 1
391-
iszero(U) ? lower(ls) : lower(ls, U, -one(U))
392-
else
393-
setup_call_inline(ls, U, T)
394-
end
395+
setup_call_inline(ls, U, T)
395396
else
396397
setup_call_noinline(ls, U, T)
397398
end
398399
end
399-

src/determinestrategy.jl

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ function maybedemotesize(T::Int, N::Int, U::Int, Uloop::Loop, maxTbase::Int)
348348
end
349349
function solve_tilesize(
350350
ls::LoopSet, unrolled::Symbol, tiled::Symbol,
351-
cost_vec::AbstractVector{Float64},
351+
cost_vec::AbstractVector{Float64},
352352
reg_pressure::AbstractVector{Int},
353353
W::Int, vectorized::Symbol
354354
)
@@ -440,7 +440,7 @@ function evaluate_cost_tile(
440440
# Need to check if fusion is possible
441441
size_T = biggest_type_size(ls)
442442
W, Wshift = VectorizationBase.pick_vector_width_shift(length(ls, vectorized), size_T)::Tuple{Int,Int}
443-
# costs =
443+
# costs =
444444
# cost_mat[1] / ( unrolled * tiled)
445445
# cost_mat[2] / ( tiled)
446446
# cost_mat[3] / ( unrolled)
@@ -574,7 +574,7 @@ function choose_unroll_order(ls::LoopSet, lowest_cost::Float64 = Inf)
574574
iter = iterate(lo, state)
575575
iter === nothing && return best_order, best_vec, lowest_cost
576576
new_order, state = iter
577-
end
577+
end
578578
end
579579
function choose_tile(ls::LoopSet)
580580
lo = LoopOrders(ls)
@@ -632,4 +632,3 @@ function register_pressure(ls::LoopSet)
632632
tU * tT * rp[1] + tU * rp[2] + rp[3] + rp[4]
633633
end
634634
end
635-

src/graphs.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,9 +174,11 @@ Base.@propagate_inbounds Base.getindex(lo::LoopOrder, i...) = lo.oporder[LinearI
174174
# O(N) search is faster at small sizes
175175
struct LoopSet
176176
loopsymbols::Vector{Symbol}
177+
loopsymbol_offsets::Vector{Int} # symbol loopsymbols[i] corresponds to loops[lso[i]+1:lso[i+1]] (CartesianIndex handling)
177178
loops::Vector{Loop}
178179
opdict::Dict{Symbol,Operation}
179180
operations::Vector{Operation} # Split them to make it easier to iterate over just a subset
181+
operation_offsets::Vector{Int}
180182
outer_reductions::Vector{Int} # IDs of reduction operations that need to be reduced at end.
181183
loop_order::LoopOrder
182184
preamble::Expr
@@ -281,9 +283,9 @@ includesarray(ls::LoopSet, array::Symbol) = array ∈ ls.includedarrays
281283

282284
function LoopSet(mod::Symbol)# = :LoopVectorization)
283285
LoopSet(
284-
Symbol[], Loop[],
286+
Symbol[], [0], Loop[],
285287
Dict{Symbol,Operation}(),
286-
Operation[],
288+
Operation[], [0],
287289
Int[],
288290
LoopOrder(),
289291
Expr(:block),

src/lower_load.jl

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ function pushvectorload!(q::Expr, op::Operation, var::Symbol, td::UnrollArgs, U:
88
end
99
push!(q.args, Expr(:(=), name, instrcall))
1010
end
11-
function lower_load_scalar!(
11+
function lower_load_scalar!(
1212
q::Expr, op::Operation, vectorized::Symbol, W::Symbol, unrolled::Symbol, tiled::Symbol, U::Int,
1313
suffix::Union{Nothing,Int}, mask::Union{Nothing,Symbol,Unsigned} = nothing
1414
)
@@ -60,6 +60,3 @@ function lower_load!(
6060
lower_load_scalar!(q, op, vectorized, W, unrolled, tiled, U, suffix, mask)
6161
end
6262
end
63-
64-
65-

src/lower_memory_common.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,4 +117,3 @@ function name_memoffset(var::Symbol, op::Operation, td::UnrollArgs, W::Symbol, v
117117
end
118118
name, mo
119119
end
120-

src/lowering.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -344,8 +344,10 @@ function setup_preamble!(ls::LoopSet, us::UnrollSpecification)
344344
vectorized = order[vectorizedloopnum]
345345
# println("Setup preamble")
346346
W = ls.W; typeT = ls.T
347-
length(ls.includedarrays) == 0 || push!(ls.preamble.args, Expr(:(=), typeT, determine_eltype(ls)))
348-
push!(ls.preamble.args, Expr(:(=), W, determine_width(ls, vectorized)))
347+
if length(ls.includedarrays) > 0
348+
push!(ls.preamble.args, Expr(:(=), typeT, determine_eltype(ls)))
349+
push!(ls.preamble.args, Expr(:(=), W, determine_width(ls, vectorized)))
350+
end
349351
lower_licm_constants!(ls)
350352
pushpreamble!(ls, definemask(getloop(ls, vectorized), W))#, U > 1 && unrolledloopnum == vectorizedloopnum))
351353
for op operations(ls)

src/memory_ops_common.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ function array_reference_meta!(ls::LoopSet, array::Symbol, rawindices, elementby
6666
else
6767
indop = get(ls.opdict, ind, nothing)
6868
if indop !== nothing && !isconstant(indop)
69-
pushparent!(parents, loopdependencies, reduceddeps, parent)
69+
pushparent!(parents, loopdependencies, reduceddeps, parent) # FIXME where does `parent` come from?
7070
# var = get(ls.opdict, ind, nothing)
7171
push!(indices, name(parent)); ninds += 1
7272
push!(loopedindex, false)

src/operations.jl

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ These names will be further processed if op is tiled and/or unrolled.
174174
if tiled ∈ loopdependencies(op) # `suffix` is tilenumber
175175
mvar = Symbol(op, suffix, :_)
176176
end
177-
if unrolled ∈ loopdependencies(op) # `u` is unroll number
177+
if unrolled ∈ loopdependencies(op) # `u` is unroll number
178178
mvar = Symbol(op, u)
179179
end
180180
```
@@ -240,6 +240,3 @@ getindices(op::Operation) = op.ref.ref.indices
240240
# # access stride info?
241241
# op.numerical_metadata[symposition(op,sym)]
242242
# end
243-
244-
245-

0 commit comments

Comments
 (0)