Skip to content

Commit 3b5c7ce

Browse files
committed
Tests pass currently.
1 parent 93afab1 commit 3b5c7ce

File tree

2 files changed

+34
-4
lines changed

2 files changed

+34
-4
lines changed

src/add_constants.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,17 @@ end
66
function add_constant!(ls::LoopSet, var, elementbytes::Int = 8)
77
sym = gensym(:temp)
88
op = Operation(length(operations(ls)), sym, elementbytes, LOOPCONSTANT, constant, NODEPENDENCY, Symbol[], NOPARENTS)
9-
pushpreamble!(ls, Expr(:(=), mangledvar(op), var))
10-
pushpreamble!(ls, op, mangledvar(op))
9+
temp = gensym(:intermediateconst)
10+
pushpreamble!(ls, Expr(:(=), temp, var))
11+
pushpreamble!(ls, op, temp)
1112
pushop!(ls, op, sym)
1213
end
1314
function add_constant!(ls::LoopSet, var::Symbol, mpref::ArrayReferenceMetaPosition, elementbytes::Int)
1415
op = Operation(length(operations(ls)), var, elementbytes, LOOPCONSTANT, constant, NODEPENDENCY, Symbol[], NOPARENTS, mpref.mref)
1516
add_vptr!(ls, op)
16-
pushpreamble!(ls, Expr(:(=), mangledvar(op), Expr(:call, lv(:load), mpref.mref.ptr, mem_offset(op, UnrollArgs(zero(Int32), Symbol(""), Symbol(""), nothing)))))
17-
pushpreamble!(ls, op, mangledvar(op))
17+
temp = gensym(:intermediateconstref)
18+
pushpreamble!(ls, Expr(:(=), temp, Expr(:call, lv(:load), mpref.mref.ptr, mem_offset(op, UnrollArgs(zero(Int32), Symbol(""), Symbol(""), nothing)))))
19+
pushpreamble!(ls, op, temp)
1820
pushop!(ls, op, var)
1921
end
2022
# This version has loop dependencies. var gets assigned to sym when lowering.

test/runtests.jl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,16 @@ using LinearAlgebra
8686
C[m,n] += ΔCₘₙ * factor
8787
end)
8888
lsAmuladd = LoopVectorization.LoopSet(Amuladdq);
89+
lsAmuladd.operations
8990
@test LoopVectorization.choose_order(lsAmuladd) == (Symbol[:n,:m,:k], :m, Unum, Tnum)
91+
92+
# @macroexpand @avx for m ∈ 1:size(A,1), n ∈ 1:size(B,2)
93+
# ΔCₘₙ = zero(eltype(C))
94+
# for k ∈ 1:size(A,2)
95+
# ΔCₘₙ += A[m,k] * B[k,n]
96+
# end
97+
# C[m,n] += ΔCₘₙ * factor
98+
# end
9099

91100
function AmulB_avx1!(C, A, B)
92101
@_avx for m 1:size(A,1), n 1:size(B,2)
@@ -770,6 +779,8 @@ end
770779
G[d1,κ] = z
771780
end
772781
end
782+
# exit()
783+
# using LoopVectorization
773784
function AtmulvB_avx3!(G, B,κ)
774785
d = size(G,1)
775786
@_avx for d1=1:d
@@ -779,7 +790,24 @@ end
779790
end
780791
end
781792
end
793+
# N = 97; B = rand(N, N);
794+
# G1 = Matrix{Float64}(undef, N, 1);
795+
# AtmulvB_avx3!(G1, B, 1)
782796

797+
@macroexpand @_avx for d1=1:d
798+
G[d1,κ] = B[1,d1]*B[1,κ]
799+
for d2=2:d
800+
G[d1,κ] += B[d2,d1]*B[d2,κ]
801+
end
802+
end
803+
pq = :(for d1=1:d
804+
G[d1,κ] = B[1,d1]*B[1,κ]
805+
for d2=2:d
806+
G[d1,κ] += B[d2,d1]*B[d2,κ]
807+
end
808+
end)
809+
lsp = LoopVectorization.LoopSet(pq);
810+
lsp.preamble_symsym
783811

784812
M, K, N = 51, 49, 61
785813
for T (Float32, Float64, Int32, Int64)

0 commit comments

Comments
 (0)