diff --git a/README.md b/README.md index d008951..1436379 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,8 @@ julia> RecursiveFactorization.lu!(copy(A), Vector{Int}(undef, size(A, 2))); # in #### Performance: For small to medium sized matrices, it is beneficial to use -`RecursiveFactorization` over `OpenBLAS`. The benchmark script is available in -`perf/lu.jl` +`RecursiveFactorization` over `OpenBLAS`. The +[SciMLBenchmarks LU factorization benchmark](https://docs.sciml.ai/SciMLBenchmarksOutput/stable/LinearSolve/LUFactorization/) +compares `RFLUFactorization` against the standard alternatives. ![lubench](https://user-images.githubusercontent.com/17304743/81491200-555b1a80-9259-11ea-95c1-ae98b36f3779.png) diff --git a/perf/lu.jl b/perf/lu.jl deleted file mode 100644 index a998cfb..0000000 --- a/perf/lu.jl +++ /dev/null @@ -1,81 +0,0 @@ -using BenchmarkTools, Random -using LinearAlgebra, RecursiveFactorization, VectorizationBase -nc = min(Int(VectorizationBase.num_cores()), Threads.nthreads()) -BLAS.set_num_threads(nc) -BenchmarkTools.DEFAULT_PARAMETERS.seconds = 0.5 - -function luflop(m, n = m; innerflop = 2) - sum(1:min(m, n)) do k - invflop = 1 - scaleflop = isempty((k + 1):m) ? 0 : sum((k + 1):m) - updateflop = isempty((k + 1):n) ? 0 : - sum((k + 1):n) do j - isempty((k + 1):m) ? 0 : sum((k + 1):m) do i - innerflop - end - end - invflop + scaleflop + updateflop - end -end - -bas_mflops = Float64[] -rec_mflops = Float64[] -rec4_mflops = Float64[] -rec800_mflops = Float64[] -ref_mflops = Float64[] -ns = 4:8:500 -for n in ns - @info "$n × $n" - rng = MersenneTwister(123) - global A = rand(rng, n, n) - bt = @belapsed LinearAlgebra.lu!(B) setup=(B = copy(A)) - push!(bas_mflops, luflop(n) / bt / 1e9) - - rt = @belapsed RecursiveFactorization.lu!(B) setup=(B = copy(A)) - push!(rec_mflops, luflop(n) / rt / 1e9) - - rt4 = @belapsed RecursiveFactorization.lu!(B; threshold = 4) setup=(B = copy(A)) - push!(rec4_mflops, luflop(n) / rt4 / 1e9) - - rt800 = @belapsed RecursiveFactorization.lu!(B; threshold = 800) setup=(B = copy(A)) - push!(rec800_mflops, luflop(n) / rt800 / 1e9) - - ref = @belapsed LinearAlgebra.generic_lufact!(B) setup=(B = copy(A)) - push!(ref_mflops, luflop(n) / ref / 1e9) -end - -using DataFrames, VegaLite -blaslib = if VERSION ≥ v"1.7.0-beta2" - config = BLAS.get_config().loaded_libs - occursin("mkl_rt", config[1].libname) ? :MKL : :OpenBLAS -else - BLAS.vendor() === :mkl ? :MKL : :OpenBLAS -end -df = DataFrame(Size = ns, - Reference = ref_mflops) -setproperty!(df, blaslib, bas_mflops) -setproperty!(df, Symbol("RF with default threshold"), rec_mflops) -setproperty!(df, Symbol("RF fully recursive"), rec4_mflops) -setproperty!(df, Symbol("RF fully iterative"), rec800_mflops) -df = stack(df, - [Symbol("RF with default threshold"), - Symbol("RF fully recursive"), - Symbol("RF fully iterative"), - blaslib, - :Reference], variable_name = :Library, value_name = :GFLOPS) -plt = df |> @vlplot(:line, color={:Library, scale = {scheme = "category10"}}, - x={:Size}, y={:GFLOPS}, - width=1000, height=600) -save(joinpath(homedir(), "Pictures", - "lu_float64_$(VERSION)_$(Sys.CPU_NAME)_$(nc)cores_$blaslib.png"), plt) - -#= -using Plot -plt = plot(ns, bas_mflops, legend=:bottomright, lab="OpenBLAS", title="LU Factorization Benchmark", marker=:auto, dpi=300) -plot!(plt, ns, rec_mflops, lab="RecursiveFactorization", marker=:auto) -plot!(plt, ns, ref_mflops, lab="Reference", marker=:auto) -xaxis!(plt, "size (N x N)") -yaxis!(plt, "GFLOPS") -savefig("lubench.png") -savefig("lubench.pdf") -=# \ No newline at end of file