From 25dbbbe9b5632c6f0a0039eb41834eac93c4a4ff Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Fri, 12 Jan 2024 13:49:25 -0500 Subject: [PATCH 1/8] allow providing fft plans --- .github/workflows/UnitTest.yml | 21 +++------ Project.toml | 3 ++ demo.jl | 79 ++++++++++++++++++++++++++++++++++ src/ImageFiltering.jl | 13 +++++- src/imfilter.jl | 54 +++++++++++++++++++++-- test/2d.jl | 40 ++++++++++------- test/gabor.jl | 14 +++--- test/nd.jl | 7 ++- test/runtests.jl | 16 ++++--- 9 files changed, 199 insertions(+), 48 deletions(-) create mode 100644 demo.jl diff --git a/.github/workflows/UnitTest.yml b/.github/workflows/UnitTest.yml index 36f9a763..b2410dde 100644 --- a/.github/workflows/UnitTest.yml +++ b/.github/workflows/UnitTest.yml @@ -7,8 +7,6 @@ on: branches: - master pull_request: - schedule: - - cron: '20 00 1 * *' jobs: test: @@ -20,23 +18,18 @@ jobs: os: [ubuntu-latest, windows-latest, macOS-latest] steps: - - uses: actions/checkout@v1.0.0 + - uses: actions/checkout@v4 - name: "Set up Julia" uses: julia-actions/setup-julia@v1 with: version: ${{ matrix.julia-version }} - - name: Cache artifacts - uses: actions/cache@v1 - env: - cache-name: cache-artifacts - with: - path: ~/.julia/artifacts - key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} - restore-keys: | - ${{ runner.os }}-test-${{ env.cache-name }}- - ${{ runner.os }}-test- - ${{ runner.os }}- + - uses: julia-actions/cache@v1 + + - run: | + import Pkg + Pkg.add(Pkg.PackageSpec(url="https://github.com/HolyLab/RFFT.jl", rev="ib/add_copy")) + shell: julia --project --color=yes {0} - name: "Unit Test" uses: julia-actions/julia-runtest@v1 diff --git a/Project.toml b/Project.toml index f281ce96..021fb11b 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ author = ["Tim Holy ", "Jan Weidner "] version = "0.7.8" [deps] +AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" CatIndices = "aafaddc9-749c-510e-ac4f-586e18779b91" ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" @@ -15,6 +16,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" +RFFT = "3bd9afcd-55df-531a-9b34-dc642dce7b95" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -31,6 +33,7 @@ ImageCore = "0.10" OffsetArrays = "1.9" PrecompileTools = "1" Reexport = "1.1" +RFFT = "0.1.1" StaticArrays = "0.10, 0.11, 0.12, 1.0" Statistics = "1" TiledIteration = "0.2, 0.3, 0.4, 0.5" diff --git a/demo.jl b/demo.jl new file mode 100644 index 00000000..7743112e --- /dev/null +++ b/demo.jl @@ -0,0 +1,79 @@ +using ImageFiltering, FFTW, LinearAlgebra, Profile, Random +# using ProfileView +using ComputationalResources + +FFTW.set_num_threads(parse(Int, get(ENV, "FFTW_NUM_THREADS", "1"))) +BLAS.set_num_threads(parse(Int, get(ENV, "BLAS_NUM_THREADS", string(Threads.nthreads() ÷ 2)))) + +function benchmark(mats) + kernel = ImageFiltering.factorkernel(Kernel.LoG(1)) + Threads.@threads for mat in mats + frame_filtered = deepcopy(mat[:, :, 1]) + r_cached = CPU1(ImageFiltering.planned_fft(frame_filtered, kernel)) + for i in axes(mat, 3) + frame = @view mat[:, :, i] + imfilter!(r_cached, frame_filtered, frame, kernel) + end + return + end +end + +function test(mats) + kernel = ImageFiltering.factorkernel(Kernel.LoG(1)) + for mat in mats + f1 = deepcopy(mat[:, :, 1]) + r_cached = CPU1(ImageFiltering.planned_fft(f1, kernel)) + f2 = deepcopy(mat[:, :, 1]) + r_noncached = CPU1(Algorithm.FFT()) + for i in axes(mat, 3) + frame = @view mat[:, :, i] + @info "imfilter! noncached" + imfilter!(r_noncached, f2, frame, kernel) + @info "imfilter! cached" + imfilter!(r_cached, f1, frame, kernel) + @show f1[1:4] f2[1:4] + f1 ≈ f2 || error("f1 !≈ f2") + end + return + end +end + +function profile() + Random.seed!(1) + nmats = 10 + mats = [rand(Float32, rand(80:100), rand(80:100), rand(2000:3000)) for _ in 1:nmats] + GC.gc(true) + + # benchmark(mats) + + # for _ in 1:3 + # @time "warm run of benchmark(mats)" benchmark(mats) + # end + + test(mats) + + # Profile.clear() + # @profile benchmark(mats) + + # Profile.print(IOContext(stdout, :displaysize => (24, 200)); C=true, combine=true, mincount=100) + # # ProfileView.view() + # GC.gc(true) +end + +profile() + +using ImageFiltering +using ImageFiltering.RFFT + +function mwe() + a = rand(Float64, 10, 10) + out1 = rfft(a) + + buf = RFFT.RCpair{Float64}(undef, size(a)) + rfft_plan = RFFT.plan_rfft!(buf) + copy!(buf, a) + out2 = complex(rfft_plan(buf)) + + return out1 ≈ out2 +end +mwe() \ No newline at end of file diff --git a/src/ImageFiltering.jl b/src/ImageFiltering.jl index bbb164e2..582e018f 100644 --- a/src/ImageFiltering.jl +++ b/src/ImageFiltering.jl @@ -1,12 +1,14 @@ module ImageFiltering using FFTW +using RFFT using ImageCore, FFTViews, OffsetArrays, StaticArrays, ComputationalResources, TiledIteration # Where possible we avoid a direct dependency to reduce the number of [compat] bounds # using FixedPointNumbers: Normed, N0f8 # reexported by ImageCore using ImageCore.MappedArrays using Statistics, LinearAlgebra using Base: Indices, tail, fill_to_length, @pure, depwarn, @propagate_inbounds +import Base: copy! using OffsetArrays: IdentityUnitRange # using the one in OffsetArrays makes this work with multiple Julia versions using SparseArrays # only needed to fix an ambiguity in borderarray using Reexport @@ -30,7 +32,8 @@ export Kernel, KernelFactors, imgradients, padarray, centered, kernelfactors, reflect, freqkernel, spacekernel, findlocalminima, findlocalmaxima, - blob_LoG, BlobLoG + blob_LoG, BlobLoG, + planned_fft FixedColorant{T<:Normed} = Colorant{T} StaticOffsetArray{T,N,A<:StaticArray} = OffsetArray{T,N,A} @@ -50,10 +53,16 @@ function Base.transpose(A::StaticOffsetArray{T,2}) where T end module Algorithm + import FFTW # deliberately don't export these, but it's expected that they # will be used as Algorithm.FFT(), etc. abstract type Alg end - "Filter using the Fast Fourier Transform" struct FFT <: Alg end + "Filter using the Fast Fourier Transform" struct FFT <: Alg + plan1::Union{Function,Nothing} + plan2::Union{Function,Nothing} + plan3::Union{Function,Nothing} + end + FFT() = FFT(nothing, nothing, nothing) "Filter using a direct algorithm" struct FIR <: Alg end "Cache-efficient filtering using tiles" struct FIRTiled{N} <: Alg tilesize::Dims{N} diff --git a/src/imfilter.jl b/src/imfilter.jl index 283352b8..ab9b2f57 100644 --- a/src/imfilter.jl +++ b/src/imfilter.jl @@ -826,7 +826,7 @@ function _imfilter_fft!(r::AbstractCPU{FFT}, for I in CartesianIndices(axes(kern)) krn[I] = kern[I] end - Af = filtfft(A, krn) + Af = filtfft(A, krn, r.settings.plan1, r.settings.plan2, r.settings.plan3) if map(first, axes(out)) == map(first, axes(Af)) R = CartesianIndices(axes(out)) copyto!(out, R, Af, R) @@ -837,13 +837,61 @@ function _imfilter_fft!(r::AbstractCPU{FFT}, src = view(FFTView(Af), axes(dest)...) copyto!(dest, src) end - out + return out +end + +function buffered_planned_rfft(a::AbstractArray{T}) where {T} + buf = RFFT.RCpair{T}(undef, size(a)) + plan = RFFT.plan_rfft!(buf; flags=FFTW.MEASURE) + return function (arr::AbstractArray{T}) where {T} + copy!(buf, OffsetArrays.no_offset_view(arr)) + return plan(buf) + end +end +function buffered_planned_irfft(a::AbstractArray{T}) where {T} + buf = RFFT.RCpair{T}(undef, size(a)) + plan = RFFT.plan_irfft!(buf; flags=FFTW.MEASURE) + return function (arr::AbstractArray{T}) where {T} + copy!(buf, OffsetArrays.no_offset_view(arr)) + return plan(buf) + end end +function planned_fft(A::AbstractArray{T,N}, + kernel::ProcessedKernel, + border::BorderSpecAny=Pad(:replicate) + ) where {T<:AbstractFloat,N} + bord = border(kernel, A, Algorithm.FFT()) + _A = padarray(T, A, bord) + bfp1 = buffered_planned_rfft(_A) + kern = samedims(_A, kernelconv(kernel...)) + krn = FFTView(zeros(eltype(kern), map(length, axes(_A)))) + bfp2 = buffered_planned_rfft(krn) + bfp3 = buffered_planned_irfft(_A) + return Algorithm.FFT(bfp1, bfp2, bfp3) +end +planned_fft(A::AbstractArray, kernel, border::AbstractString) = planned_fft(A, kernel, borderinstance(border)) +planned_fft(A::AbstractArray, kernel::Union{ArrayLike,Laplacian}, border::BorderSpecAny) = planned_fft(A, factorkernel(kernel), border) + +function filtfft(A, krn, planned_rfft1::Function, planned_rfft2::Function, planned_irfft::Function) + B = complex(planned_rfft1(A)) + B .*= conj!(complex(planned_rfft2(krn))) + return real(planned_irfft(complex(B))) +end +# TODO: this does not work. See TODO below +function filtfft(A::AbstractArray{C}, krn, planned_rfft1::Function, planned_rfft2::Function, planned_irfft::Function) where {C<:Colorant} + Av, dims = channelview_dims(A) + kernrs = kreshape(C, krn) + B = complex(planned_rfft1(Av, dims)) # TODO: dims is not supported by planned_rfft1 + B .*= conj!(complex(planned_rfft2(kernrs))) + Avf = real(planned_irfft(complex(B))) + return colorview(base_colorant_type(C){eltype(Avf)}, Avf) +end +filtfft(A, krn, ::Nothing, ::Nothing, ::Nothing) = filtfft(A, krn) function filtfft(A, krn) B = rfft(A) B .*= conj!(rfft(krn)) - irfft(B, length(axes(A, 1))) + return irfft(B, length(axes(A, 1))) end function filtfft(A::AbstractArray{C}, krn) where {C<:Colorant} Av, dims = channelview_dims(A) diff --git a/test/2d.jl b/test/2d.jl index 37d8fd29..25504630 100644 --- a/test/2d.jl +++ b/test/2d.jl @@ -36,6 +36,15 @@ using ImageFiltering: borderinstance end end +function supported_algs(img, kernel, border) + if eltype(img) isa AbstractFloat + (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT(), planned_fft(img, kernel, border)) + else + # TODO: extend planned_fft to support other types + (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT()) + end +end + @testset "FIR/FFT" begin f32type(img) = f32type(eltype(img)) f32type(::Type{C}) where {C<:Colorant} = base_colorant_type(C){Float32} @@ -50,6 +59,7 @@ end # Dense inseparable kernel kern = [0.1 0.2; 0.4 0.5] kernel = OffsetArray(kern, -1:0, 1:2) + border = Inner() for img in (imgf, imgi, imgg, imgc) targetimg = zeros(typeof(img[1]*kern[1]), size(img)) targetimg[3:4,2:3] = rot180(kern) .* img[3,4] @@ -66,7 +76,7 @@ end @test @inferred(imfilter(f32type(img), img, kernel, border)) ≈ float32.(targetimg) fill!(ret, zero(eltype(ret))) @test @inferred(imfilter!(ret, img, kernel, border)) ≈ targetimg - for alg in (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT()) + for alg in supported_algs(img, kernel, border) @test @inferred(imfilter(img, kernel, border, alg)) ≈ targetimg @test @inferred(imfilter(img, (kernel,), border, alg)) ≈ targetimg @test @inferred(imfilter(f32type(img), img, kernel, border, alg)) ≈ float32.(targetimg) @@ -76,12 +86,12 @@ end @test_throws MethodError imfilter!(CPU1(Algorithm.FIR()), ret, img, kernel, border, Algorithm.FFT()) end targetimg_inner = OffsetArray(targetimg[2:end, 1:end-2], 2:5, 1:5) - @test @inferred(imfilter(img, kernel, Inner())) ≈ targetimg_inner - @test @inferred(imfilter(f32type(img), img, kernel, Inner())) ≈ float32.(targetimg_inner) - for alg in (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT()) - @test @inferred(imfilter(img, kernel, Inner(), alg)) ≈ targetimg_inner - @test @inferred(imfilter(f32type(img), img, kernel, Inner(), alg)) ≈ float32.(targetimg_inner) - @test @inferred(imfilter(CPU1(alg), img, kernel, Inner())) ≈ targetimg_inner + @test @inferred(imfilter(img, kernel, border)) ≈ targetimg_inner + @test @inferred(imfilter(f32type(img), img, kernel, border)) ≈ float32.(targetimg_inner) + for alg in supported_algs(img, kernel, border) + @test @inferred(imfilter(img, kernel, border, alg)) ≈ targetimg_inner + @test @inferred(imfilter(f32type(img), img, kernel, border, alg)) ≈ float32.(targetimg_inner) + @test @inferred(imfilter(CPU1(alg), img, kernel, border)) ≈ targetimg_inner end end # Factored kernel @@ -96,7 +106,7 @@ end for border in ("replicate", "circular", "symmetric", "reflect", Fill(zero(eltype(img)))) @test @inferred(imfilter(img, kernel, border)) ≈ targetimg @test @inferred(imfilter(f32type(img), img, kernel, border)) ≈ float32.(targetimg) - for alg in (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT()) + for alg in supported_algs(img, kernel, border) @test @inferred(imfilter(img, kernel, border, alg)) ≈ targetimg @test @inferred(imfilter(f32type(img), img, kernel, border, alg)) ≈ float32.(targetimg) end @@ -106,7 +116,7 @@ end targetimg_inner = OffsetArray(targetimg[2:end, 1:end-2], 2:5, 1:5) @test @inferred(imfilter(img, kernel, Inner())) ≈ targetimg_inner @test @inferred(imfilter(f32type(img), img, kernel, Inner())) ≈ float32.(targetimg_inner) - for alg in (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT()) + for alg in supported_algs(img, kernel, border) @test @inferred(imfilter(img, kernel, Inner(), alg)) ≈ targetimg_inner @test @inferred(imfilter(f32type(img), img, kernel, Inner(), alg)) ≈ float32.(targetimg_inner) end @@ -122,7 +132,7 @@ end for border in ("replicate", "circular", "symmetric", "reflect", Fill(zero(eltype(img)))) @test @inferred(imfilter(img, kernel, border)) ≈ targetimg @test @inferred(imfilter(f32type(img), img, kernel, border)) ≈ float32.(targetimg) - for alg in (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT()) + for alg in supported_algs(img, kernel, border) if alg == Algorithm.FFT() && eltype(img) == Int @test @inferred(imfilter(Float64, img, kernel, border, alg)) ≈ targetimg else @@ -134,7 +144,7 @@ end targetimg_inner = OffsetArray(targetimg[2:end-1, 2:end-1], 2:4, 2:6) @test @inferred(imfilter(img, kernel, Inner())) ≈ targetimg_inner @test @inferred(imfilter(f32type(img), img, kernel, Inner())) ≈ float32.(targetimg_inner) - for alg in (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT()) + for alg in supported_algs(img, kernel, border) if alg == Algorithm.FFT() && eltype(img) == Int @test @inferred(imfilter(Float64, img, kernel, Inner(), alg)) ≈ targetimg_inner else @@ -184,7 +194,7 @@ end targetimg = target1(img, kern, border) @test @inferred(imfilter(img, kernel, border)) ≈ targetimg @test @inferred(imfilter(f32type(img), img, kernel, border)) ≈ float32.(targetimg) - for alg in (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT()) + for alg in supported_algs(img, kernel, border) @test @inferred(imfilter(img, kernel, border, alg)) ≈ targetimg @test @inferred(imfilter(f32type(img), img, kernel, border, alg)) ≈ float32.(targetimg) end @@ -195,7 +205,7 @@ end targetimg = zerona!(copy(targetimg0)) @test @inferred(zerona!(imfilter(img, kernel, border))) ≈ targetimg @test @inferred(zerona!(imfilter(f32type(img), img, kernel, border))) ≈ float32.(targetimg) - for alg in (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT()) + for alg in supported_algs(img, kernel, border) @test @inferred(zerona!(imfilter(img, kernel, border, alg), nanflag)) ≈ targetimg @test @inferred(zerona!(imfilter(f32type(img), img, kernel, border, alg), nanflag)) ≈ float32.(targetimg) end @@ -208,7 +218,7 @@ end targetimg = target1(img, kern, border) @test @inferred(imfilter(img, kernel, border)) ≈ targetimg @test @inferred(imfilter(f32type(img), img, kernel, border)) ≈ float32.(targetimg) - for alg in (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT()) + for alg in supported_algs(img, kernel, border) @test @inferred(imfilter(img, kernel, border, alg)) ≈ targetimg @test @inferred(imfilter(f32type(img), img, kernel, border, alg)) ≈ float32.(targetimg) end @@ -219,7 +229,7 @@ end targetimg = zerona!(copy(targetimg0)) @test @inferred(zerona!(imfilter(img, kernel, border))) ≈ targetimg @test @inferred(zerona!(imfilter(f32type(img), img, kernel, border))) ≈ float32.(targetimg) - for alg in (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT()) + for alg in supported_algs(img, kernel, border) @test @inferred(zerona!(imfilter(img, kernel, border, alg), nanflag)) ≈ targetimg @test @inferred(zerona!(imfilter(f32type(img), img, kernel, border, alg), nanflag)) ≈ float32.(targetimg) end diff --git a/test/gabor.jl b/test/gabor.jl index 5c77508e..faa49cb4 100644 --- a/test/gabor.jl +++ b/test/gabor.jl @@ -7,11 +7,15 @@ using ImageFiltering, Test, Statistics size_y = 6*σy+1 γ = σx/σy # zero size forces default kernel width, with warnings - @info "Four warnings are expected" - kernel = Kernel.gabor(0,0,σx,0,5,γ,0) - @test isequal(size(kernel[1]),(size_x,size_y)) - kernel = Kernel.gabor(0,0,σx,π,5,γ,0) - @test isequal(size(kernel[1]),(size_x,size_y)) + + @test_logs (:warn, r"The input parameter size_") match_mode=:any begin + kernel = Kernel.gabor(0,0,σx,0,5,γ,0) + @test isequal(size(kernel[1]),(size_x,size_y)) + end + @test_logs (:warn, r"The input parameter size_") match_mode=:any begin + kernel = Kernel.gabor(0,0,σx,π,5,γ,0) + @test isequal(size(kernel[1]),(size_x,size_y)) + end for x in 0:4, y in 0:4, z in 0:4, t in 0:4 σx1 = 2*x+1 diff --git a/test/nd.jl b/test/nd.jl index 8842743f..76c1006d 100644 --- a/test/nd.jl +++ b/test/nd.jl @@ -50,6 +50,8 @@ Base.zero(::Type{WrappedFloat}) = WrappedFloat(0.0) v = fill(0xff, 10) kern = centered(fill(0xff, 3)) @info "Two warnings are expected" + # TODO: use @test_logs (:warn, r"Likely overflow or conversion error detected") match_mode=:any + # julia has an internal error currently on 1.10.2 that this hits https://github.com/JuliaLang/julia/pull/50759 @test_throws InexactError imfilter(v, kern) vout = imfilter(UInt32, v, kern) @test eltype(vout) == UInt32 @@ -106,8 +108,8 @@ Base.zero(::Type{WrappedFloat}) = WrappedFloat(0.0) around_i = [abs(i-j) <= 15 for j in eachindex(v)] @test all(isequal(x), wf[around_i]) @test wf[.!around_i] ≈ vf[.!around_i] - end - + end + # Issue #110 img = reinterpret(WrappedFloat, rand(128)) kern = centered(rand(31)) @@ -129,6 +131,7 @@ end img = trues(10,10,10) kernel = centered(trues(3,3,3)/27) for border in ("replicate", "circular", "symmetric", "reflect", Fill(true)) + # TODO: add support for boolean images in planned_fft for alg in (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT()) @test imfilter(img, kernel, border) ≈ img end diff --git a/test/runtests.jl b/test/runtests.jl index cfb5ae59..955179dc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,6 +6,13 @@ using ImageQualityIndexes import StaticArrays using Random +function typestring(::Type{T}) where T # from https://github.com/JuliaImages/ImageCore.jl/pull/133 + buf = IOBuffer() + show(buf, T) + String(take!(buf)) +end + +@testset "ImageFiltering" verbose=true begin @testset "Project meta quality checks" begin # Ambiguity test if Base.VERSION >= v"1.6.0-DEV.1005" # julia #37616 @@ -26,12 +33,6 @@ using Random end end -function typestring(::Type{T}) where T # from https://github.com/JuliaImages/ImageCore.jl/pull/133 - buf = IOBuffer() - show(buf, T) - String(take!(buf)) -end - include("compat.jl") include("border.jl") include("nd.jl") @@ -49,7 +50,6 @@ include("models.jl") CUDA_INSTALLED = false try - global CUDA_INSTALLED # This errors with `IOError` when nvidia driver is not available, # in which case we don't even need to try `using CUDA` run(pipeline(`nvidia-smi`, stdout=devnull, stderr=devnull)) @@ -71,3 +71,5 @@ else @warn "CUDA test: disabled" end nothing + +end From a948fae6abbb948d1977da9bad8cfef246fc5cd4 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Mon, 11 Mar 2024 15:22:17 -0400 Subject: [PATCH 2/8] simplify demo --- demo.jl | 64 +++++++++++++++++++++++---------------------------------- 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/demo.jl b/demo.jl index 7743112e..6075b1b4 100644 --- a/demo.jl +++ b/demo.jl @@ -1,11 +1,10 @@ using ImageFiltering, FFTW, LinearAlgebra, Profile, Random -# using ProfileView using ComputationalResources FFTW.set_num_threads(parse(Int, get(ENV, "FFTW_NUM_THREADS", "1"))) BLAS.set_num_threads(parse(Int, get(ENV, "BLAS_NUM_THREADS", string(Threads.nthreads() ÷ 2)))) -function benchmark(mats) +function benchmark_new(mats) kernel = ImageFiltering.factorkernel(Kernel.LoG(1)) Threads.@threads for mat in mats frame_filtered = deepcopy(mat[:, :, 1]) @@ -17,6 +16,18 @@ function benchmark(mats) return end end +function benchmark_old(mats) + kernel = ImageFiltering.factorkernel(Kernel.LoG(1)) + Threads.@threads for mat in mats + frame_filtered = deepcopy(mat[:, :, 1]) + r_noncached = CPU1(Algorithm.FFT()) + for i in axes(mat, 3) + frame = @view mat[:, :, i] + imfilter!(r_noncached, frame_filtered, frame, kernel) + end + return + end +end function test(mats) kernel = ImageFiltering.factorkernel(Kernel.LoG(1)) @@ -26,11 +37,8 @@ function test(mats) f2 = deepcopy(mat[:, :, 1]) r_noncached = CPU1(Algorithm.FFT()) for i in axes(mat, 3) - frame = @view mat[:, :, i] - @info "imfilter! noncached" - imfilter!(r_noncached, f2, frame, kernel) - @info "imfilter! cached" - imfilter!(r_cached, f1, frame, kernel) + imfilter!(r_noncached, f2, deepcopy(mat[:, :, i]), kernel) + imfilter!(r_cached, f1, deepcopy(mat[:, :, i]), kernel) @show f1[1:4] f2[1:4] f1 ≈ f2 || error("f1 !≈ f2") end @@ -38,42 +46,22 @@ function test(mats) end end -function profile() +function run() Random.seed!(1) nmats = 10 - mats = [rand(Float32, rand(80:100), rand(80:100), rand(2000:3000)) for _ in 1:nmats] - GC.gc(true) + mats = [rand(Float64, rand(80:100), rand(80:100), rand(2000:3000)) for _ in 1:nmats] - # benchmark(mats) + benchmark_new(mats) + for _ in 1:3 + @time "warm run of benchmark_new(mats)" benchmark_new(mats) + end - # for _ in 1:3 - # @time "warm run of benchmark(mats)" benchmark(mats) - # end + benchmark_old(mats) + for _ in 1:3 + @time "warm run of benchmark_old(mats)" benchmark_old(mats) + end test(mats) - - # Profile.clear() - # @profile benchmark(mats) - - # Profile.print(IOContext(stdout, :displaysize => (24, 200)); C=true, combine=true, mincount=100) - # # ProfileView.view() - # GC.gc(true) end -profile() - -using ImageFiltering -using ImageFiltering.RFFT - -function mwe() - a = rand(Float64, 10, 10) - out1 = rfft(a) - - buf = RFFT.RCpair{Float64}(undef, size(a)) - rfft_plan = RFFT.plan_rfft!(buf) - copy!(buf, a) - out2 = complex(rfft_plan(buf)) - - return out1 ≈ out2 -end -mwe() \ No newline at end of file +run() From 6ef477e1ada42a45875f569bc11a3f72c773610c Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 7 May 2024 09:15:26 -0400 Subject: [PATCH 3/8] RFFT -> RealFFTs & tidyup --- .github/workflows/UnitTest.yml | 7 +--- Project.toml | 5 +-- demo.jl | 67 ---------------------------------- src/ImageFiltering.jl | 2 +- src/imfilter.jl | 8 ++-- 5 files changed, 8 insertions(+), 81 deletions(-) delete mode 100644 demo.jl diff --git a/.github/workflows/UnitTest.yml b/.github/workflows/UnitTest.yml index b2410dde..6d0f2912 100644 --- a/.github/workflows/UnitTest.yml +++ b/.github/workflows/UnitTest.yml @@ -14,7 +14,7 @@ jobs: strategy: fail-fast: false matrix: - julia-version: ['1.6', '1', 'nightly'] + julia-version: ['1.6', '1'] os: [ubuntu-latest, windows-latest, macOS-latest] steps: @@ -26,11 +26,6 @@ jobs: - uses: julia-actions/cache@v1 - - run: | - import Pkg - Pkg.add(Pkg.PackageSpec(url="https://github.com/HolyLab/RFFT.jl", rev="ib/add_copy")) - shell: julia --project --color=yes {0} - - name: "Unit Test" uses: julia-actions/julia-runtest@v1 diff --git a/Project.toml b/Project.toml index 021fb11b..9ac19586 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,6 @@ author = ["Tim Holy ", "Jan Weidner "] version = "0.7.8" [deps] -AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c" CatIndices = "aafaddc9-749c-510e-ac4f-586e18779b91" ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" @@ -15,8 +14,8 @@ ImageCore = "a09fc81d-aa75-5fe9-8630-4744c3626534" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +RealFFTs = "3645faec-0534-4e91-afef-021db0981eec" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" -RFFT = "3bd9afcd-55df-531a-9b34-dc642dce7b95" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" @@ -33,7 +32,7 @@ ImageCore = "0.10" OffsetArrays = "1.9" PrecompileTools = "1" Reexport = "1.1" -RFFT = "0.1.1" +RealFFTs = "1" StaticArrays = "0.10, 0.11, 0.12, 1.0" Statistics = "1" TiledIteration = "0.2, 0.3, 0.4, 0.5" diff --git a/demo.jl b/demo.jl deleted file mode 100644 index 6075b1b4..00000000 --- a/demo.jl +++ /dev/null @@ -1,67 +0,0 @@ -using ImageFiltering, FFTW, LinearAlgebra, Profile, Random -using ComputationalResources - -FFTW.set_num_threads(parse(Int, get(ENV, "FFTW_NUM_THREADS", "1"))) -BLAS.set_num_threads(parse(Int, get(ENV, "BLAS_NUM_THREADS", string(Threads.nthreads() ÷ 2)))) - -function benchmark_new(mats) - kernel = ImageFiltering.factorkernel(Kernel.LoG(1)) - Threads.@threads for mat in mats - frame_filtered = deepcopy(mat[:, :, 1]) - r_cached = CPU1(ImageFiltering.planned_fft(frame_filtered, kernel)) - for i in axes(mat, 3) - frame = @view mat[:, :, i] - imfilter!(r_cached, frame_filtered, frame, kernel) - end - return - end -end -function benchmark_old(mats) - kernel = ImageFiltering.factorkernel(Kernel.LoG(1)) - Threads.@threads for mat in mats - frame_filtered = deepcopy(mat[:, :, 1]) - r_noncached = CPU1(Algorithm.FFT()) - for i in axes(mat, 3) - frame = @view mat[:, :, i] - imfilter!(r_noncached, frame_filtered, frame, kernel) - end - return - end -end - -function test(mats) - kernel = ImageFiltering.factorkernel(Kernel.LoG(1)) - for mat in mats - f1 = deepcopy(mat[:, :, 1]) - r_cached = CPU1(ImageFiltering.planned_fft(f1, kernel)) - f2 = deepcopy(mat[:, :, 1]) - r_noncached = CPU1(Algorithm.FFT()) - for i in axes(mat, 3) - imfilter!(r_noncached, f2, deepcopy(mat[:, :, i]), kernel) - imfilter!(r_cached, f1, deepcopy(mat[:, :, i]), kernel) - @show f1[1:4] f2[1:4] - f1 ≈ f2 || error("f1 !≈ f2") - end - return - end -end - -function run() - Random.seed!(1) - nmats = 10 - mats = [rand(Float64, rand(80:100), rand(80:100), rand(2000:3000)) for _ in 1:nmats] - - benchmark_new(mats) - for _ in 1:3 - @time "warm run of benchmark_new(mats)" benchmark_new(mats) - end - - benchmark_old(mats) - for _ in 1:3 - @time "warm run of benchmark_old(mats)" benchmark_old(mats) - end - - test(mats) -end - -run() diff --git a/src/ImageFiltering.jl b/src/ImageFiltering.jl index 582e018f..860c07e0 100644 --- a/src/ImageFiltering.jl +++ b/src/ImageFiltering.jl @@ -1,7 +1,7 @@ module ImageFiltering using FFTW -using RFFT +using RealFFTs using ImageCore, FFTViews, OffsetArrays, StaticArrays, ComputationalResources, TiledIteration # Where possible we avoid a direct dependency to reduce the number of [compat] bounds # using FixedPointNumbers: Normed, N0f8 # reexported by ImageCore diff --git a/src/imfilter.jl b/src/imfilter.jl index ab9b2f57..a7fd760d 100644 --- a/src/imfilter.jl +++ b/src/imfilter.jl @@ -841,16 +841,16 @@ function _imfilter_fft!(r::AbstractCPU{FFT}, end function buffered_planned_rfft(a::AbstractArray{T}) where {T} - buf = RFFT.RCpair{T}(undef, size(a)) - plan = RFFT.plan_rfft!(buf; flags=FFTW.MEASURE) + buf = RealFFTs.RCpair{T}(undef, size(a)) + plan = RealFFTs.plan_rfft!(buf; flags=FFTW.MEASURE) return function (arr::AbstractArray{T}) where {T} copy!(buf, OffsetArrays.no_offset_view(arr)) return plan(buf) end end function buffered_planned_irfft(a::AbstractArray{T}) where {T} - buf = RFFT.RCpair{T}(undef, size(a)) - plan = RFFT.plan_irfft!(buf; flags=FFTW.MEASURE) + buf = RealFFTs.RCpair{T}(undef, size(a)) + plan = RealFFTs.plan_irfft!(buf; flags=FFTW.MEASURE) return function (arr::AbstractArray{T}) where {T} copy!(buf, OffsetArrays.no_offset_view(arr)) return plan(buf) From 2988c21ae086fb00f349d6a027ca262ea6b2bcd3 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 7 May 2024 09:27:59 -0400 Subject: [PATCH 4/8] use test_logs for overflow warnings --- test/nd.jl | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/test/nd.jl b/test/nd.jl index 76c1006d..b57ae3f6 100644 --- a/test/nd.jl +++ b/test/nd.jl @@ -49,10 +49,11 @@ Base.zero(::Type{WrappedFloat}) = WrappedFloat(0.0) # Element-type widening (issue #17) v = fill(0xff, 10) kern = centered(fill(0xff, 3)) - @info "Two warnings are expected" - # TODO: use @test_logs (:warn, r"Likely overflow or conversion error detected") match_mode=:any - # julia has an internal error currently on 1.10.2 that this hits https://github.com/JuliaLang/julia/pull/50759 - @test_throws InexactError imfilter(v, kern) + + @test_logs (:warn, r"Likely overflow or conversion error detected") begin + @test_throws InexactError imfilter(v, kern) + end + vout = imfilter(UInt32, v, kern) @test eltype(vout) == UInt32 @test all(x->x==0x0002fa03, vout) @@ -121,7 +122,9 @@ end # issue #17 img = fill(typemax(Int16), 10, 10) kern = centered(Int16[1 2 2 2 1]) - @test_throws InexactError imfilter(img, kern) + @test_logs (:warn, r"Likely overflow or conversion error detected") begin + @test_throws InexactError imfilter(img, kern) + end ret = imfilter(Int32, img, kern) @test eltype(ret) == Int32 @test all(x->x==262136, ret) From 270e59f7d2d4c7ff0a7304eaf710a168afeeb0e0 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 7 May 2024 09:29:03 -0400 Subject: [PATCH 5/8] bump julia compat to 1.10 --- .github/workflows/UnitTest.yml | 2 +- Project.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/UnitTest.yml b/.github/workflows/UnitTest.yml index 6d0f2912..9460199e 100644 --- a/.github/workflows/UnitTest.yml +++ b/.github/workflows/UnitTest.yml @@ -14,7 +14,7 @@ jobs: strategy: fail-fast: false matrix: - julia-version: ['1.6', '1'] + julia-version: ['1.10', '1'] os: [ubuntu-latest, windows-latest, macOS-latest] steps: diff --git a/Project.toml b/Project.toml index 9ac19586..17a6ff43 100644 --- a/Project.toml +++ b/Project.toml @@ -36,7 +36,7 @@ RealFFTs = "1" StaticArrays = "0.10, 0.11, 0.12, 1.0" Statistics = "1" TiledIteration = "0.2, 0.3, 0.4, 0.5" -julia = "1.6" +julia = "1.10" [extras] AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9" From 9a290c9c67ce85d6988e6c9ad30f1f37cd788066 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 7 May 2024 09:51:20 -0400 Subject: [PATCH 6/8] expand TODO based on comment --- src/imfilter.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/imfilter.jl b/src/imfilter.jl index a7fd760d..e1a3a44f 100644 --- a/src/imfilter.jl +++ b/src/imfilter.jl @@ -878,11 +878,17 @@ function filtfft(A, krn, planned_rfft1::Function, planned_rfft2::Function, plann B .*= conj!(complex(planned_rfft2(krn))) return real(planned_irfft(complex(B))) end -# TODO: this does not work. See TODO below +# TODO: this Colorant method does not work. See TODO below function filtfft(A::AbstractArray{C}, krn, planned_rfft1::Function, planned_rfft2::Function, planned_irfft::Function) where {C<:Colorant} Av, dims = channelview_dims(A) kernrs = kreshape(C, krn) B = complex(planned_rfft1(Av, dims)) # TODO: dims is not supported by planned_rfft1 + # Quoting Tim Holy in https://github.com/JuliaImages/ImageFiltering.jl/pull/271/files#r1559210348 + # I don't think dims can be a point of flexibility: these plans are specific to the + # memory layout of the array. (The planning explores various implementations and picks + # the fastest discovered; performance is strongly dependent on memory layout, so the + # choice for one layout may not be the same as another.) You'd have to create a plan + # specifically to the colorant array-type. B .*= conj!(complex(planned_rfft2(kernrs))) Avf = real(planned_irfft(complex(B))) return colorview(base_colorant_type(C){eltype(Avf)}, Avf) From 7ff1a55c35ac4030ea325ce89f7d3550da6c2733 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 7 May 2024 10:05:23 -0400 Subject: [PATCH 7/8] add to docs --- docs/src/index.md | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index fd57e3f7..7f168f1d 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -13,7 +13,7 @@ processing. The main functions provided by this package are: -| Function | Action | +| Function | Action | |:-------------------------|:---------------| |[`imfilter`](@ref) | Filter a one, two or multidimensional array img with a kernel by computing their correlation | |[`imfilter!`](@ref) | Filter an array img with kernel kernel by computing their correlation, storing the result in imgfilt | @@ -25,7 +25,7 @@ The main functions provided by this package are: |[`findlocalminima`](@ref) | Returns the coordinates of elements whose value is smaller than all of their immediate neighbors | |[`findlocalmaxima`](@ref) | Returns the coordinates of elements whose value is larger than all of their immediate neighbors | -Common kernels (filters) are organized in the `Kernel` and `KernelFactors` modules. +Common kernels (filters) are organized in the `Kernel` and `KernelFactors` modules. A common task in image processing and computer vision is computing image *gradients* (derivatives), for which there is the dedicated @@ -80,6 +80,24 @@ transform (FFT). By default, this choice is made based on kernel size. You can manually specify the algorithm using [`Algorithm.FFT()`](@ref) or [`Algorithm.FIR()`](@ref). +#### Reusing FFT plans + +It is possible to reuse FFT plans if the operation is going to be done on the +same array type and dimensions i.e. on each image of an image stack + +```julia +using ImageFiltering, ComputationalResources +imgstack = rand(Float64, 200, 100, 10) +imgstack_filtered = similar(imgstack) + +kernel = ImageFiltering.factorkernel(Kernel.LoG(1)) +fft_planned = CPU1(ImageFiltering.planned_fft(imgstack_filtered[:,:,1], kernel)) + +for i in axes(imgstack, 3) + imfilter!(fft_planned, imgstack_filtered[:,:,i], imgstack[:,:,i], kernel) +end +``` + ### Feature: Multithreading If you launch Julia with `JULIA_NUM_THREADS=n` (where `n > 1`), then From 9db8caf73954b4753af27d1460319cd139d9048e Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 7 May 2024 10:25:47 -0400 Subject: [PATCH 8/8] fix test coverage --- test/2d.jl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/test/2d.jl b/test/2d.jl index 25504630..9331699f 100644 --- a/test/2d.jl +++ b/test/2d.jl @@ -36,13 +36,12 @@ using ImageFiltering: borderinstance end end -function supported_algs(img, kernel, border) - if eltype(img) isa AbstractFloat - (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT(), planned_fft(img, kernel, border)) - else - # TODO: extend planned_fft to support other types - (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT()) - end +# TODO: extend planned_fft to support more types than just floats +function supported_algs(img::AbstractArray{T}, kernel, border) where {T<:AbstractFloat} + return (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT(), planned_fft(img, kernel, border)) +end +function supported_algs(img::AbstractArray, kernel, border) + return (Algorithm.FIR(), Algorithm.FIRTiled(), Algorithm.FFT()) end @testset "FIR/FFT" begin