Skip to content

Commit c02d009

Browse files
committed
don't consider shuffles that require absurd unroll factors
1 parent 4fcf851 commit c02d009

File tree

2 files changed

+3
-17
lines changed

2 files changed

+3
-17
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.108"
4+
version = "0.12.109"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"

src/modeling/determinestrategy.jl

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ function cost(
111111
# cannot shuffle false means reject curly
112112
# either false means shuffle
113113
dont_shuffle =
114-
rejectinterleave(op) && (cannot_shuffle(op, u₁, u₂, contigind, indices))
114+
(Wshift > 3) || (rejectinterleave(op) && (cannot_shuffle(op, u₁, u₂, contigind, indices)))
115115
if dont_shuffle
116116
# offset = 0.0 # gather/scatter, alignment doesn't matter
117117
r = 1 << shifter
@@ -301,7 +301,6 @@ function unroll_no_reductions(ls, order, vloopsym)
301301
u = if compute_rt 1
302302
4
303303
elseif compute_rt > memory_rt
304-
# @show load_rt, store_rt, compute_rt, compute_l, rpc, rpp
305304
# if compute_rt > 40
306305
# max(VectorizationBase.nextpow2( min( 4, round(Int, compute_rt / memory_rt) ) ), 1)
307306
# else
@@ -312,7 +311,6 @@ function unroll_no_reductions(ls, order, vloopsym)
312311
else
313312
max(1, min(4, round(Int, 1.75compute_rt / load_rt)))
314313
end
315-
# @show load_rt, store_rt, compute_rt, compute_l, u, rpc, rpp
316314
# u = min(u, max(1, (reg_count(ls) ÷ max(1,round(Int,rp)))))
317315
# commented out here is to decide to align loops
318316
# if memory_rt > compute_rt && isone(u) && (length(order) > 1) && (last(order) === vloopsym) && length(getloop(ls, last(order))) > 8W
@@ -381,7 +379,6 @@ function determine_unroll_factor(
381379
end
382380
recip_throughput =
383381
max(compute_recip_throughput, load_recip_throughput, store_recip_throughput)
384-
# @show latency, recip_throughput
385382
recip_throughput, latency
386383
end
387384
function count_reductions(ls::LoopSet)
@@ -992,7 +989,6 @@ function load_elimination_cost_factor!(
992989
# cost_vec[1] -= rt
993990
# cost_vec[1] -= 0.5625 * iters
994991
# cost_vec[1] -= 0.5625 * iters / 2
995-
# @show rto, 0.8rt, op
996992
# reg_pressure[1] += 0.25rp
997993
reg_pressure[1] += 0.25rp
998994
cost_vec[2] += rt
@@ -1156,7 +1152,6 @@ function evaluate_cost_tile!(
11561152
@unpack u₁loopsym, u₂loopsym, vloopsym = unrollsyms
11571153
cacheunrolled!(ls, u₁loopsym, u₂loopsym, vloopsym)
11581154
# println("\n")
1159-
# @show order unrollsyms
11601155
# u₂loopsym = order[1]
11611156
# u₁loopsym = order[2]
11621157
ops = operations(ls)
@@ -1248,9 +1243,6 @@ function evaluate_cost_tile!(
12481243
inner₁ = u₁reached | depends_on_u₂
12491244
inner₂ = u₂reached | depends_on_u₁
12501245
# if isconstantop(op)
1251-
# if iscompute(op)
1252-
# @show inner₁, depends_on_u₁, inner₂, depends_on_u₂, op
1253-
# end
12541246
reduced_by_unrolling[1, 2, id] = inner₁ & !depends_on_u₁
12551247
reduced_by_unrolling[2, 2, id] = inner₂ & !depends_on_u₂
12561248
# else
@@ -1285,7 +1277,6 @@ function evaluate_cost_tile!(
12851277
size_T,
12861278
opisininnerloop,
12871279
)
1288-
# println("constoffelim")
12891280
continue
12901281
elseif load_elimination_cost_factor!(
12911282
cost_vec,
@@ -1298,11 +1289,8 @@ function evaluate_cost_tile!(
12981289
Wshift,
12991290
size_T,
13001291
)
1301-
# println("loadelim")
1302-
# A[i,j-1], A[i,j]
13031292
continue
13041293
end
1305-
#elseif isconstant(op)
13061294
end
13071295
rt, lat, rp = cost(ls, op, (u₁loopsym, u₂loopsym), vloopsym, Wshift, size_T)
13081296
if isload(op) & (!prefetch_good_idea)
@@ -1315,9 +1303,8 @@ function evaluate_cost_tile!(
13151303
else #FIXME: hack to not go crazy
13161304
max(zero(rp), rp - one(rp))
13171305
end
1318-
rto = rt
1306+
# rto = rt
13191307
rt *= iters[id]
1320-
# @show (u₁reducesrt, u₂reducesrt), (u₁reducesrp, u₂reducesrp), rto, rt, lat, rp, op
13211308
if isstore(op) & (!u₁reducesrt) & (!u₂reducesrt)
13221309
irreducible_storecosts += rt
13231310
end
@@ -1340,7 +1327,6 @@ function evaluate_cost_tile!(
13401327
end
13411328
end
13421329
end
1343-
# @show u₁reducesrp, u₂reducesrp, rp, op
13441330
update_reg_pres!(reg_pressure, rp, u₁reducesrp, u₂reducesrp)
13451331
# end
13461332
# update_costs!(reg_pressure, rp, u₁reducesrp, u₂reducesrp)

0 commit comments

Comments
 (0)