Skip to content

Commit 89395b7

Browse files
committed
Removed precompile body, and reduced read code generation in presence of non-vectorized static loops.
1 parent f02efaa commit 89395b7

File tree

6 files changed

+17
-356
lines changed

6 files changed

+17
-356
lines changed

Manifest.toml

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,6 @@ git-tree-sha1 = "6a35d9446b40ae5004cd7bd0f1ae3505528c7fd6"
3636
uuid = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
3737
version = "1.0.3"
3838

39-
[[OrderedCollections]]
40-
deps = ["Random", "Serialization", "Test"]
41-
git-tree-sha1 = "c4c13474d23c60d20a67b217f1d7f22a40edf8f1"
42-
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
43-
version = "1.1.0"
44-
45-
[[Parameters]]
46-
deps = ["OrderedCollections"]
47-
git-tree-sha1 = "b62b2558efb1eef1fa44e4be5ff58a515c287e38"
48-
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
49-
version = "0.12.0"
50-
5139
[[Random]]
5240
deps = ["Serialization"]
5341
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -74,6 +62,11 @@ uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
7462
deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
7563
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
7664

65+
[[UnPack]]
66+
git-tree-sha1 = "e0cb9715adda456f7657e45377fd3063bf87179a"
67+
uuid = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
68+
version = "0.1.0"
69+
7770
[[VectorizationBase]]
7871
deps = ["CpuId", "LinearAlgebra"]
7972
git-tree-sha1 = "0ca41b27f0a918c8ee51bd495818fb7a12b9a19f"

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@ version = "0.6.22"
66
[deps]
77
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
88
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
9-
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
109
SIMDPirates = "21efa798-c60a-11e8-04d3-e1a92915a26a"
1110
SLEEFPirates = "476501e8-09a2-5ece-8869-fb82de89a1fa"
11+
UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
1212
VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1313

1414
[compat]
1515
OffsetArrays = "1"
16-
Parameters = "0"
1716
SIMDPirates = "0.7.1"
1817
SLEEFPirates = "0.4"
18+
UnPack = "0"
1919
VectorizationBase = "0.9"
2020
julia = "1.1"
2121

benchmark/driver.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ include(joinpath(LOOPVECBENCHDIR, "benchmarkflops.jl"))
1010
include(joinpath(LOOPVECBENCHDIR, "plotbenchmarks.jl"))
1111

1212

13-
addprocs((Sys.CPU_THREADS >> 1)-1); nprocs()
13+
addprocs((Sys.CPU_THREADS >> 1)-1); nworkers()
1414

1515
@everywhere begin
1616
pkgdir(pkg::String) = abspath(joinpath(dirname(Base.find_package(pkg)), ".."))

src/LoopVectorization.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module LoopVectorization
22

3-
using VectorizationBase, SIMDPirates, SLEEFPirates, Parameters
3+
using VectorizationBase, SIMDPirates, SLEEFPirates, UnPack
44
using VectorizationBase: REGISTER_SIZE, REGISTER_COUNT, extract_data, num_vector_load_expr,
55
mask, masktable, pick_vector_width_val, valmul, valrem, valmuladd, valadd, valsub, _MM,
66
maybestaticlength, maybestaticsize, staticm1, subsetview, vzero, stridedpointer_for_broadcast,

src/lowering.jl

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ function lower_unrolled_dynamic(ls::LoopSet, us::UnrollSpecification, n::Int, in
156156
vectorized = order[vectorizedloopnum]
157157
nisunrolled = isunrolled(us, n)
158158
nisvectorized = isvectorized(us, n)
159+
loopisstatic = isstaticloop(loop) & (!nisvectorized)
159160

160161
remmask = inclmask | nisvectorized
161162
Ureduct = (n == num_loops(ls)) ? calc_Ureduct(ls, us) : -1
@@ -178,8 +179,12 @@ function lower_unrolled_dynamic(ls::LoopSet, us::UnrollSpecification, n::Int, in
178179
end
179180
remblock = init_remblock(loop, loopsym)
180181
push!(q.args, remblock)
181-
UFt = 1
182-
while true
182+
UFt = if loopisstatic
183+
length(loop) % UF
184+
else
185+
1
186+
end
187+
while !iszero(UFt)
183188
comparison = if nisvectorized
184189
itercount = if loop.stopexact
185190
Expr(:call, :-, loop.stophint, Expr(:call, lv(:valmul), ls.W, UFt))
@@ -196,7 +201,7 @@ function lower_unrolled_dynamic(ls::LoopSet, us::UnrollSpecification, n::Int, in
196201
remblocknew = Expr(:elseif, comparison, lower_block(ls, ust, n, remmask, UFt))
197202
push!(remblock.args, remblocknew)
198203
remblock = remblocknew
199-
if !(UFt < UF - 1 + nisvectorized) || UFt == Ureduct
204+
if !(UFt < UF - 1 + nisvectorized) || UFt == Ureduct || loopisstatic
200205
break
201206
else
202207
UFt += 1

0 commit comments

Comments
 (0)