WIP on the documentation

theogf · theogf · commit 214b83fdbf78 · 2020-03-12T13:38:00.000+01:00
diff --git a/docs/create_kernel_plots.jl b/docs/create_kernel_plots.jl
@@ -6,36 +6,37 @@ using KernelFunctions
 
 default(lw=3.0,titlefontsize=28,tickfontsize=18)
 
-x₀ = 0.0; l=0.1
+x₀ = 0.0; l = 0.1
 n_grid = 101
 fill(x₀,n_grid,1)
 xrange = reshape(collect(range(-3,3,length=n_grid)),:,1)
 
-k = SqExponentialKernel(1.0)
+k = transform(SqExponentialKernel(),1.0)
 K1 = kernelmatrix(k,xrange,obsdim=1)
 p = heatmap(K1,yflip=true,colorbar=false,framestyle=:none,background_color=RGBA(0.0,0.0,0.0,0.0))
 savefig(joinpath(@__DIR__,"src","assets","heatmap_sqexp.png"))
 
 
-k = Matern32Kernel(FunctionTransform(x->(sin.(x)).^2))
+k = @kernel Matern32Kernel FunctionTransform(x->(sin.(x)).^2)
 K2 = kernelmatrix(k,xrange,obsdim=1)
 p = heatmap(K2,yflip=true,colorbar=false,framestyle=:none,background_color=RGBA(0.0,0.0,0.0,0.0))
 savefig(joinpath(@__DIR__,"src","assets","heatmap_matern.png"))
 
 
-k = PolynomialKernel(LowRankTransform(randn(3,1)),2.0,0.0)
+k = transform(PolynomialKernel(c=0.0,d=2.0),LowRankTransform(randn(3,1)))
 K3 = kernelmatrix(k,xrange,obsdim=1)
 p = heatmap(K3,yflip=true,colorbar=false,framestyle=:none,background_color=RGBA(0.0,0.0,0.0,0.0))
 savefig(joinpath(@__DIR__,"src","assets","heatmap_poly.png"))
 
-k = 0.5*SqExponentialKernel()*LinearKernel(0.5) + 0.4*Matern32Kernel(FunctionTransform(x->sin.(x)))
+k = 0.5*SqExponentialKernel()*transform(LinearKernel(),0.5) + 0.4*(@kernel Matern32Kernel() FunctionTransform(x->sin.(x)))
 K4 = kernelmatrix(k,xrange,obsdim=1)
 p = heatmap(K4,yflip=true,colorbar=false,framestyle=:none,background_color=RGBA(0.0,0.0,0.0,0.0))
 savefig(joinpath(@__DIR__,"src","assets","heatmap_prodsum.png"))
 
 plot(heatmap.([K1,K2,K3,K4],yflip=true,colorbar=false)...,layout=(2,2))
 savefig(joinpath(@__DIR__,"src","assets","heatmap_combination.png"))
 
+##
 
 for k in [SqExponentialKernel,ExponentialKernel]
     K = kernelmatrix(k(),xrange,obsdim=1)
diff --git a/docs/src/assets/heatmap_sqexp.png b/docs/src/assets/heatmap_sqexp.png
diff --git a/docs/src/kernels.md b/docs/src/kernels.md
@@ -24,6 +24,7 @@ The [Square Exponential Kernel](@ref KernelFunctions.SqExponentialKernel) is def
 
 ### Gamma Exponential Kernel
 
+The [Gamma Exponential Kernel](@ref KernelFunctions.GammaExponentialKernel) is defined as
 ```math
   k(x,x';\gamma) = \exp\left(-\|x-x'\|^{2\gamma}\right)
 ```
@@ -32,18 +33,24 @@ The [Square Exponential Kernel](@ref KernelFunctions.SqExponentialKernel) is def
 
 ### Matern Kernel
 
+The [Matern Kernel](@ref KernelFunctions.MaternKernel) is defined as
+
 ```math
   k(x,x';\nu) = \frac{2^{1-\nu}}{\Gamma(\nu)}\left(\sqrt{2\nu}|x-x'|\right)K_\nu\left(\sqrt{2\nu}|x-x'|\right)
 ```
 
 ### Matern 3/2 Kernel
 
+The [Matern 3/2 Kernel](@ref KernelFunctions.Matern32Kernel) is defined as
+
 ```math
   k(x,x') = \left(1+\sqrt{3}|x-x'|\right)\exp\left(\sqrt{3}|x-x'|\right)
 ```
 
 ### Matern 5/2 Kernel
 
+The [Matern 5/2 Kernel](@ref KernelFunctions.Matern52Kernel) is defined as
+
 ```math
   k(x,x') = \left(1+\sqrt{5}|x-x'|+\frac{5}{2}\|x-x'\|^2\right)\exp\left(\sqrt{5}|x-x'|\right)
 ```
@@ -52,12 +59,16 @@ The [Square Exponential Kernel](@ref KernelFunctions.SqExponentialKernel) is def
 
 ### Rational Quadratic Kernel
 
+The [Rational Quadratic Kernel](@ref KernelFunctions.RationalQuadraticKernel) is defined as
+
 ```math
   k(x,x';\alpha) = \left(1+\frac{\|x-x'\|^2}{\alpha}\right)^{-\alpha}
 ```
 
 ### Gamma Rational Quadratic Kernel
 
+The [Gamma Rational Quadratic Kernel](@ref KernelFunctions.GammaRationalQuadraticKernel) is defined as
+
 ```math
   k(x,x';\alpha,\gamma) = \left(1+\frac{\|x-x'\|^{2\gamma}}{\alpha}\right)^{-\alpha}
 ```
@@ -66,12 +77,16 @@ The [Square Exponential Kernel](@ref KernelFunctions.SqExponentialKernel) is def
 
 ### LinearKernel
 
+The [Linear Kernel](@ref KernelFunctions.LinearKernel) is defined as
+
 ```math
   k(x,x';c) = \langle x,x'\rangle + c
 ```
 
 ### PolynomialKernel
 
+The [Polynomial Kernel](@ref KernelFunctions.PolynomialKernel) is defined as
+
 ```math
   k(x,x';c,d) = \left(\langle x,x'\rangle + c\right)^d
 ```
@@ -80,28 +95,60 @@ The [Square Exponential Kernel](@ref KernelFunctions.SqExponentialKernel) is def
 
 ### ConstantKernel
 
+The [Constant Kernel](@ref KernelFunctions.ConstantKernel) is defined as
+
 ```math
   k(x,x';c) = c
 ```
 
 ### WhiteKernel
 
+The [White Kernel](@ref KernelFunctions.WhiteKernel) is defined as
+
 ```math
   k(x,x') = \delta(x-x')
 ```
 
 ### ZeroKernel
 
+The [Zero Kernel](@ref KernelFunctions.ZeroKernel) is defined as
+
 ```math
   k(x,x') = 0
 ```
 
 # Composite Kernels
 
-## TransformedKernel
+### TransformedKernel
+
+The [Transformed Kernel](@ref KernelFunctions.TransformedKernel) is a kernel where input are transformed via a function `f`
+
+```math
+  k(x,x';f,\widetile{k}) = \widetilde{k}(f(x),f(x'))
+```
 
-## ScaledKernel
+Where `k̃` is another kernel
 
-## KernelSum
+### ScaledKernel
 
-## KernelProduct
+The [Scalar Kernel](@ref KernelFunctions.ScaledKernel) is defined as
+
+```math
+  k(x,x';\sigma^2,\widetilde{k}) = \sigma^2\widetilde{k}(x,x')
+```
+
+### KernelSum
+
+The [Kernel Sum](@ref KernelFunctions.KernelSum) is defined as a sum of kernel
+
+```math
+  k(x,x';\{w_i\},\{k_i\}) = \sum_i w_i k_i(x,x')
+```
+
+### KernelProduct
+
+The [Kernel Product](@ref KernelFunctions.KernelProduct) is defined as a product of kernel
+
+```math
+  k(x,x';\{k_i\}) = \prod_i k_i(x,x')
+```
diff --git a/docs/src/transform.md b/docs/src/transform.md
@@ -6,16 +6,4 @@ You can also create a pipeline of `Transform` via `TransformChain`. For example
 
 One apply a transformation on a matrix or a vector via `KernelFunctions.apply(t::Transform,v::AbstractVecOrMat)`
 
-## Transforms :
-```@meta
-CurrentModule = KernelFunctions
-```
-
-```@docs
-  IdentityTransform
-  ScaleTransform
-  ARDTransform
-  LowRankTransform
-  FunctionTransform
-  ChainTransform
-```
+Check the list on the [API page](@ref Transforms)
diff --git a/docs/src/userguide.md b/docs/src/userguide.md
@@ -7,26 +7,82 @@ For example to create a square exponential kernel
 ```julia
   k = SqExponentialKernel()
 ```
-All kernels can take as argument a `Transform` object (see [Transform](@ref)) which is directly going to act on the inputs before it's processes.
-But it's also possible to simply give a scalar or a vector if all you are interested in is to modify the lengthscale, respectively for all dimensions or independently for each dimension.
+Instead of having lengthscale(s) for each kernel we use `Transform` objects (see [Transform](@ref)) which are directly going to act on the inputs before passing them to the kernel.
+For example to premultiply the input by 2.0 we create the kernel the following options are possible
+```julia
+  k = transform(SqExponentialKernel(),ScaleTransform(2.0)) # returns a TransformedKernel
+  k = @kernel SqExponentialKernel() l=2.0 # Will be available soon
+  k = TransformedKernel(SqExponentialKernel(),ScaleTransform(2.0))
+```
+Check the [`Transform`](@ref) page to see the other options.
+To premultiply the kernel by a variance, you can use `*` or create a `ScaledKernel`
+```julia
+  k = 3.0*SqExponentialKernel()
+  k = ScaledKernel(SqExponentialKernel(),3.0)
+  @kernel 3.0*SqExponentialKernel()
+```
+
+## Using a kernel function
+
+To compute the kernel function on two vectors you can call
+```julia
+  k = SqExponentialKernel()
+  x1 = rand(3); x2 = rand(3)
+  kappa(k,x1,x2) == k(x1,x2) # Syntactic sugar
+```
 
-## Kernel matrix creation
+## Creating a kernel matrix
 
-Matrix are created via the `kernelmatrix` function or `kerneldiagmatrix`.
+Kernel matrices can be created via the `kernelmatrix` function or `kerneldiagmatrix` for only the diagonal.
 An important argument to give is the dimensionality of the input `obsdim`. It tells if the matrix is of the type `# samples X # features` (`obsdim`=1) or `# features X # samples`(`obsdim`=2) (similarly to [Distances.jl](https://github.com/JuliaStats/Distances.jl))
 For example:
 ```julia
   k = SqExponentialKernel()
   A = rand(10,5)
   kernelmatrix(k,A,obsdim=1) # Return a 10x10 matrix
   kernelmatrix(k,A,obsdim=2) # Return a 5x5 matrix
+  k(A,obsdim=1) # Syntactic sugar
 ```
 
 We also support specific kernel matrices outputs:
-- For a positive-definite matrix object`PDMat` from [`PDMats.jl`](https://github.com/JuliaStats/PDMats.jl). Call `kernelpdmat(k,A,obsdim=1)`, it will create a matrix and in case of bad conditionning will add some diagonal noise until the matrix is considered PSD, it will then return a `PDMat` object. For this method to work in your code you need to include `using PDMats` first
-- For a Kronecker matrix, we rely on [`Kronecker.jl`](https://github.com/MichielStock/Kronecker.jl). We give two methods : `kernelkronmat(k,[x,y,z])` where `x` `y` and `z` are vectors which will return a `KroneckerProduct`, and `kernelkronmat(k,x,dims)` where `x` is a vector and dims and the number of features. Make sure that `k` is a vector compatible with such constructions (with `iskroncompatible`). Both method will return a . For those methods to work in your code you need to include `using Kronecker` first
+- For a positive-definite matrix object`PDMat` from [`PDMats.jl`](https://github.com/JuliaStats/PDMats.jl), you can call the following:
+```julia
+  using PDMats
+  k = SqExponentialKernel()
+  K = kernelpdmat(k,A,obsdim=1) # PDMat
+```
+It will create a matrix and in case of bad conditionning will add some diagonal noise until the matrix is considered PSD, it will then return a `PDMat` object. For this method to work in your code you need to include `using PDMats` first
+- For a Kronecker matrix, we rely on [`Kronecker.jl`](https://github.com/MichielStock/Kronecker.jl). Here are two examples:
+```julia
+using Kronecker
+x = range(0,1,length=10)
+y = range(0,1,length=50)
+K = kernelkronmat(k,[x,y]) # Kronecker matrix
+K = kernelkronmat(k,x,5) # Kronecker matrix
+```
+Make sure that `k` is a vector compatible with such constructions (with `iskroncompatible`). Both method will return a . For those methods to work in your code you need to include `using Kronecker` first
+- For a Nystrom approximation : `kernelmatrix(nystrom(k, X, ρ, obsdim = 1))` where `ρ` is the proportion of sampled used.
 
-## Kernel manipulation
+## Composite kernels
 
 One can create combinations of kernels via `KernelSum` and `KernelProduct` or using simple operators `+` and `*`.
-For
+For example :
+```julia
+  k1 = SqExponentialKernel()
+  k2 = Matern32Kernel()
+  k = 0.5*k1 + 0.2*k2 # KernelSum
+  k = k1*k2 # KernelProduct
+```
+
+## Kernel Parameters
+
+What if you want to differentiate through the kernel parameters? Even in a highly nested structure such as :
+```julia
+  k = transform(0.5*SqExponentialKernel()*MaternKernel()+0.2*(transform(LinearKernel(),2.0)+PolynomialKernel()),[0.1,0.5])
+```
+One can get the array of parameters to optimize via `params` from `Flux.jl`
+
+```julia
+  using Flux
+  params(k)
+```
diff --git a/src/KernelFunctions.jl b/src/KernelFunctions.jl
@@ -1,3 +1,6 @@
+"""
+KernelFunctions. [Github](https://github.com/theogf/KernelFunctions.jl) [Documentation](https://theogf.github.io/KernelFunctions.jl/dev/)
+"""
 module KernelFunctions
 
 export kernelmatrix, kernelmatrix!, kerneldiagmatrix, kerneldiagmatrix!, kappa
@@ -58,7 +61,7 @@ include("zygote_adjoints.jl")
 function __init__()
     @require Kronecker="2c470bb0-bcc8-11e8-3dad-c9649493f05e" include("matrix/kernelkroneckermat.jl")
     @require PDMats="90014a1f-27ba-587c-ab20-58faa44d9150" include("matrix/kernelpdmat.jl")
-    @require Flux="587475ba-b771-5e3f-ad9e-33799f191a9c" include("trainable.jl") 
+    @require Flux="587475ba-b771-5e3f-ad9e-33799f191a9c" include("trainable.jl")
 end
 
 end
diff --git a/src/kernels/scaledkernel.jl b/src/kernels/scaledkernel.jl
@@ -1,3 +1,7 @@
+"""
+`ScaledKernel(k::Kernel,σ²::Real)`
+Return a kernel premultiplied by the variance `σ²` : `σ² k(x,x')`
+"""
 struct ScaledKernel{Tk<:Kernel, Tσ²<:Real} <: Kernel
     kernel::Tk
     σ²::Vector{Tσ²}
diff --git a/src/kernels/transformedkernel.jl b/src/kernels/transformedkernel.jl
@@ -1,3 +1,7 @@
+"""
+`TransformedKernel(k::Kernel,t::Transform)`
+Return a kernel where inputs are pretransformed by `t` : `k(t(x),t(x'))`
+"""
 struct TransformedKernel{Tk<:Kernel,Tr<:Transform} <: Kernel
     kernel::Tk
     transform::Tr