Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for CUDA (New Subpackage) #156

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions ImageTransformationsCUDA/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name = "ImageTransformationsCUDA"
uuid = "c180182a-82a0-4af3-89e8-12346a81d86b"
authors = ["Chantal Chen <[email protected]>"]
version = "0.0.1"

[deps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CoordinateTransformations = "150eb455-5306-5404-9cee-2592286d6298"
ImageTransformations = "02fcd773-0e25-5acc-982a-7f6622650795"
Interpolations = "a98d9a8b-a2ab-59e6-89dd-64a1c18fca59"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881"
Rotations = "6038ab10-8711-5258-84ad-4b1120ba62dc"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[compat]
CUDA = "3"
CoordinateTransformations = "0.6.2"
ImageTransformations = "0.9.4"
Interpolations = "0.13.6"
OffsetArrays = "1"
Rotations = "1"
StaticArrays = "1"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
21 changes: 21 additions & 0 deletions ImageTransformationsCUDA/src/ImageTransformationsCUDA.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""

This package provides analogous ImageTransformation functions for CuArrays.

- `warp`: Transforms coordinates of a CuArray, returning an OffsetArray with a CuArray within.
"""
module ImageTransformationsCUDA

using ImageTransformations

using OffsetArrays, Rotations, StaticArrays, CoordinateTransformations
using CUDA

export

warp

include("warpCUDA.jl")
include("interpolationsCUDA.jl")

end # module
75 changes: 75 additions & 0 deletions ImageTransformationsCUDA/src/interpolationsCUDA.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
using ImageTransformations: _default_fillvalue

#Julia has heuristics for deciding on when it's no longer worth specializing. "I" forces julia to specialize in the full tuple type of inds.
function access_value(img::C, inds::I, ::Type{T}) where {T,C, I<:Union{Tuple,SVector}} #Getting the wrong type for T because broadcasting over Ref(img) doesn't return a concrete type.

inds_upper = ceil.(Int, Tuple(inds))
inds_lower = floor.(Int, Tuple(inds))

return checkbounds(Bool, img, inds_upper...) && checkbounds(Bool, img, inds_lower...) ? recursive_bspline(SVector(inds...), img, T) : _default_fillvalue(T) #this line doesn't work because of potential type instability
end

#No need to interpolate on CartesianIndices. Seems to be type unstable?
function access_value(img::C, inds::CartesianIndex, ::Type{T}) where {T,C} #Getting the wrong type for T because broadcasting over Ref(img) doesn't return a concrete type.
return checkbounds(Bool, img, inds) ? T(img[inds]) : _default_fillvalue(T) #this line doesn't work because of potential type instability
end

_getweights(ind::T) where {T} = (1+floor(ind)-ind, ind -floor(ind)) #this returns 0,0 for whole numbers when it needs to return 1, 0

_weightcalc(w::Tuple{T,T}, a::Tuple{S,S}) where{T,S} = sum(w.*a)

_getinds(ind::T) where {T} = (floor(ind), ceil(ind))



"""
BSpline(Linear()) interpolation for CUDA

recursive_bspline(inds::SVector, img, ::Type) = weighted interpolation value

"""
function recursive_bspline(ind_list::SVector{N, S}, a::AbstractArray, ::Type{T}) where {N, S, T} #this only works if you cut down the array to the correct 2x2x2 window!

ind = last(ind_list)
a_1 = viewdim(a,floor(Int, ind))

if ind == floor(ind)
return T(recursive_bspline(pop(ind_list), a_1, T))
else
a_2 = viewdim(a,(floor(Int, ind)+1))
weights = _getweights(ind)
return T(weights[1]*recursive_bspline(pop(ind_list), a_1, T) + weights[2]*recursive_bspline(pop(ind_list), a_2, T))
end
end

function recursive_bspline(weightlist::SVector{0, S}, a::AbstractArray, ::Type{T}) where {S, T}
return a[1] #type
end

viewdim(a::AbstractArray{T,3}, ind) where{T} = view(a, :, :, ind)
viewdim(a::AbstractArray{T,2}, ind) where{T} = view(a, :, ind)
viewdim(a::AbstractArray{T,1}, ind) where{T} = view(a, ind)


#What if I adjusted this to do the BSpline Function without needing recursion?
# using Interpolations
# using Interpolations: padded_similar, copy_with_padding

# function Interpolations.copy_with_padding(::Type{TC}, A::CuArray, it::Interpolations.DimSpec{Interpolations.InterpolationType}) where {TC}
# indsA = axes(A)
# indspad = Interpolations.padded_axes(indsA, it)
# coefs = padded_similar(TC, indspad, A)
# if indspad == indsA
# coefs = copyto!(coefs, A)
# else
# fill!(coefs, zero(TC))
# Interpolations.ct!(coefs, indsA, A, indsA)
# end
# coefs
# end

# Interpolations.padded_similar(::Type{TC}, inds::Tuple{Vararg{Base.OneTo{Int}}}, A::CuArray) where TC = CuArray{TC}(undef, length.(inds))

#this is working out to be a lot more trouble than it's worth I have no idea why get_index doesn't work here

# I don't even know where to begin here. I guess GPU indexing doesn't work?
36 changes: 36 additions & 0 deletions ImageTransformationsCUDA/src/warpCUDA.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
using LinearAlgebra

using ImageTransformations: warp, warp!
# @Tim should I import warp and warp! instead, or continue using the ImageTransformation.warp syntax for clarity
using ImageTransformations: autorange, try_static


function ImageTransformations.warp(img::Union{CuArray{T,N},OffsetArray{T,N,<:CuArray}}, tform, inds::Tuple = autorange(img, inv(tform))) where {T, N}
out = OffsetArray(CuArray{T}(undef, map(length, inds)), inds); #Can't make CuArray of OffsetArray
warp!(out, img, try_static(tform, img))
end

function ImageTransformations.warp!(out, img::OffsetArray{T,N,<:CuArray}, tform) where {T,N}
warp!(out, img.parent, tform, img.offsets)
end

function ImageTransformations.warp!(out, img::CuArray{T,N}, tform, in_offsets = ntuple(i->0, N)) where {T,N} #why is this now unstable?!
img_inds = map(out->out.I, CartesianIndices(axes(out.parent)))
tform_offset = offset_calc(out, in_offsets, tform)

tformindex = CuArray(tform_offset.(SVector.(img_inds)))
Comment on lines +18 to +21
Copy link
Member

@johnnychen94 johnnychen94 Jun 8, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This calculates all image indexes in the single thread CPU and converts them to GPU. I believe this is unnecessarily inefficient -- I think we need to figure out how to directly generate the target indexes on GPU. Maybe manually write a small CUDA kernel for this.

# TODO write an extra function here that converts tformindex to CartesianIndices if possible.
# return tformindex, out.offsets, T #for checking

return out = OffsetArray(access_value.(Ref(img), tformindex, T), out.offsets...)
end

#calculates translation array with input and output offset stripped off.
#tform(xi + Δxi) = xo + Δxo
#tform.linear(xi) + tform(Δxi) - Δxo = xo
#tform2(xi) = (tform.linear, (tform(Δxi) - Δxo))(xi) = xo
function offset_calc(out::OffsetArray{T,N,<:CuArray}, in_offsets, tform) where {T,N} # doesn't work for 3D!
in_translation = AffineMap(I, -1 .*[in_offsets...]) #diagm(ones(T, N))
out_translation = AffineMap(I, [out.offsets...])
return in_translation∘tform∘out_translation
end
22 changes: 22 additions & 0 deletions ImageTransformationsCUDA/test/runtests.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
using OffsetArrays, Rotations, StaticArrays, CoordinateTransformations, LinearAlgebra
using ImageTransformations, ImageTransformationsCUDA, Interpolations
using CUDA
using Test

# helper function from ImageTransformations/test.jl to compare NaN
nearlysame(x, y) = x ≈ y || (isnan(x) & isnan(y))
nearlysame(A::AbstractArray, B::AbstractArray) = all(map(nearlysame, A, B))

tests = [
"warpCUDA.jl",
]

@testset "ImageTransformations" begin
for t in tests
@testset "$t" begin
include(t)
end
end
end

nothing
72 changes: 72 additions & 0 deletions ImageTransformationsCUDA/test/warpCUDA.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import ImageTransformations: warp

# TODO design more tests, esp with color.

@testset "Conditional tests for warping on GPU" begin #should break this into 2D, 3D, etc.
#Float32 2D
x = zeros(Float32, 5,5)
x[2,2] = 1.0
x[2,3] = 2.0
y = convert(CuArray, x)

tform = AffineMap(RotMatrix(pi/2), [6,0])

z = warp(y, tform);
@test isa(z, OffsetArray{Float32, 2, <:CuArray})
@test nearlysame(OffsetArray(Array(z.parent), z.offsets),warp(x, tform))

#Float32 3D
a = zeros(3,3,3)
a[1,2,3] = 1.0
b = CuArray(a)

tform3DNull = AffineMap(RotXYZ(0,0,0), [0.0,0.0,0.0]);
c = warp(b, tform3DNull) #3D is broken now.

@test nearlysame(warp(a, tform3DNull), OffsetArray(Array(c.parent), c.offsets))

#introduce offsets
d = zeros(6,6)
d[1,2] = 1.0
e = CuArray(d)

tform_trans = AffineMap(RotMatrix(0), [1, 2]) #just translation
f = warp(e, tform_trans);

@test nearlysame(warp(d, tform_trans), OffsetArray(Array(f.parent), f.offsets)) #OffsetArrays doesn't work with CuArray

#Basic rotation
tform_lin = AffineMap([0 -1; 1 0], [0,0])
g = warp(e, tform_lin);

@test nearlysame(warp(d, tform_lin), OffsetArray(Array(g.parent), g.offsets))#OffsetArrays doesn't work with CuArray

h = OffsetArray(d, -1, 0)
i = OffsetArray(e, -1, 0);

#Basic translation
j = warp(i, tform_trans);
@test nearlysame(warp(h, tform_trans), OffsetArray(Array(j.parent), j.offsets)) #OffsetArrays doesn't work with CuArray

#With BSpline Interpolation
tform_trans_float = AffineMap(RotMatrix(0), [0.1, 0.2])
k = warp(e, tform_trans_float)

@test nearlysame(warp(d, tform_trans_float), OffsetArray(Array(k.parent), k.offsets))

tform_lin_float = AffineMap(RotMatrix(pi/4), [0,0])
l = warp(e, tform_lin_float);

@test nearlysame(warp(d, tform_lin_float), OffsetArray(Array(l.parent), l.offsets))

m = rand(3,3,3)
n = CuArray(m)

tform3D_complex = AffineMap(RotXYZ(0.1, 0.1, 0.1), [0.3,0.2,0.1])

o = warp(n, tform3D_complex);

@test nearlysame(warp(m, tform3D_complex), OffsetArray(Array(o.parent), o.offsets))

end