Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added save compatible version of filenames, arrays and images function #30

Closed
wants to merge 31 commits into from
Closed
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
3121308
added process function
siddharthlal25 Jul 7, 2020
b2a69d4
minor fix to signatures
siddharthlal25 Jul 7, 2020
022f9b7
minor doc fix
siddharthlal25 Jul 7, 2020
1b2c4a0
added basic tests
siddharthlal25 Jul 7, 2020
b4fde15
added saving tests
siddharthlal25 Jul 7, 2020
2c4a4aa
removed generated_data folder from gitignore
siddharthlal25 Jul 7, 2020
fff6af7
fixed test issue
siddharthlal25 Jul 7, 2020
404656c
fixed file naming bug and added name checking tests
siddharthlal25 Jul 7, 2020
28d732f
modified function to close file after operations
siddharthlal25 Jul 8, 2020
255efa2
modified docs
siddharthlal25 Jul 8, 2020
0ed0b73
modified constructors and added tests
siddharthlal25 Jul 12, 2020
27b614d
added save compatible version of filenames
siddharthlal25 Jul 12, 2020
811fb14
fixed docs of `fitscollection`
siddharthlal25 Jul 13, 2020
7b0a157
switched to `write_fits` and `generate_filename`
siddharthlal25 Jul 13, 2020
cb2fa43
switched from df to collection
siddharthlal25 Jul 13, 2020
ef93510
changed names of internal variables
siddharthlal25 Jul 13, 2020
3e862d5
Merge branch 'devel' of https://github.com/siddharthlal25/CCDReductio…
siddharthlal25 Jul 13, 2020
aeb6090
modified API of mapping version and minor change in tests
siddharthlal25 Jul 15, 2020
1d34a34
switched from `write_fits` to `write_data`
siddharthlal25 Jul 15, 2020
4066765
modified `generate_filename`
siddharthlal25 Jul 15, 2020
37c000f
added a minor test
siddharthlal25 Jul 15, 2020
ca21271
modified docs
siddharthlal25 Jul 15, 2020
ef1881f
minor change in tests
siddharthlal25 Jul 15, 2020
c095b6e
remove data generated after testing
siddharthlal25 Jul 16, 2020
8be44c8
fixed issue of closing files in iterative version of `images`
siddharthlal25 Jul 16, 2020
6064f26
minor test fix
siddharthlal25 Jul 16, 2020
79e97ac
closed all open handles during testing
siddharthlal25 Jul 16, 2020
ef0b18d
added documentation for mapping version of functions
siddharthlal25 Jul 16, 2020
ab98374
added docs for iterative version of functions
siddharthlal25 Jul 17, 2020
0207990
doc fix
siddharthlal25 Jul 17, 2020
a7dd75f
added docs and minor fix
siddharthlal25 Jul 17, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 147 additions & 13 deletions src/collection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,66 @@ end

parse_name(filename, ext, ::Val{true}) = filename


# utility function for generating filename
function generate_filename(path, save_location, save_prefix, save_suffix, save_delim, ext)
# get the filename
filename = last(splitdir(path))

# splitting name and extension
modified_name, ext = parse_name_ext(filename, "." * ext)

# adding prefix and suffix with delimiter
if !isnothing(save_prefix)
modified_name = string(save_prefix, save_delim, modified_name)
end
if !isnothing(save_suffix)
modified_name = string(modified_name, save_delim, save_suffix)
end

# adding extension to modified_name
if ext == ""
file_path = joinpath(save_location, modified_name * ".fits")
else
file_path = joinpath(save_location, modified_name * ext)
end
siddharthlal25 marked this conversation as resolved.
Show resolved Hide resolved
return file_path
end


# utility function to return filename and extension separately
# returns extension including "." at the beginning
function parse_name_ext(filename, ext)
idxs = findall(ext, filename)
length(idxs) == 0 && return (filename, "")
breaking_index = first(last(idxs))
return filename[1:breaking_index - 1], filename[breaking_index:end]
end


"""
write_data(file_path, data)

Writes `data` in FITS format at `file_path`.
siddharthlal25 marked this conversation as resolved.
Show resolved Hide resolved
"""
function write_data(file_path, data)
d = ndims(data)
transposed_data = permutedims(data, d:-1:1)
FITS(file_path, "w") do fh
write(fh, transposed_data)
end
end

#---------------------------------------------------------------------------------------
@doc raw"""
fitscollection(dir; recursive=true, abspath=true, keepext=true, ext=r"fits(\.tar\.gz)?", exclude=nothing, exclude_dir=nothing, exclude_key = ("", "HISTORY"))
fitscollection(dir;
recursive=true,
abspath=true,
keepext=true,
ext=r"fits(\.tar\.gz)?",
exclude=nothing,
exclude_dir=nothing,
exclude_key=("", "HISTORY"))

Walk through `dir` collecting FITS files, scanning their headers, and culminating into a `DataFrame` that can be used with the generators for iterating over many files and processing them. If `recursive` is false, no subdirectories will be walked through.

Expand Down Expand Up @@ -60,7 +117,7 @@ function fitscollection(basedir::String;
exclude = nothing,
exclude_dir = nothing,
exclude_key = ("", "HISTORY"))
df = DataFrame()
collection = DataFrame()

for (root, dirs, files) in walkdir(basedir)
# recursive searching functionality
Expand Down Expand Up @@ -88,25 +145,25 @@ function fitscollection(basedir::String;
# filtering out comment columns
_keys = filter(k -> k ∉ exclude_key, keys(header_data))
_values = (header_data[k] for k in _keys)
push!(df, (path = path, name = name, hdu = index, zip(Symbol.(_keys), _values)...); cols = :union)
push!(collection, (path = path, name = name, hdu = index, zip(Symbol.(_keys), _values)...); cols = :union)
end
close(fits_data)
end
end
return df
return collection
end


"""
arrays(df::DataFrame)
arrays(collection::DataFrame)

Generator for arrays of images of entries in data frame.
siddharthlal25 marked this conversation as resolved.
Show resolved Hide resolved
"""
function arrays end

# generator for image arrays specified by data frames (i.e. path of file, hdu etc.)
@resumable function arrays(df::DataFrame)
for row in eachrow(df)
@resumable function arrays(collection::DataFrame)
for row in eachrow(collection)
fh = FITS(row.path)
@yield getdata(fh[row.hdu])
close(fh)
Expand All @@ -115,30 +172,107 @@ end


"""
filenames(df::DataFrame)
filenames(collection::DataFrame)

Generator for filenames of entries in data frame.
"""
function filenames end

# generator for filenames specified by data frame (i.e. path of file, hdu etc.)
@resumable function filenames(df::DataFrame)
for row in eachrow(df)
@resumable function filenames(collection::DataFrame)
for row in eachrow(collection)
@yield row.path
end
end


"""
images(df::DataFrame)
images(collection::DataFrame)

Generator for `ImageHDU`s of entries in data frame.
"""
function images end

# generator for ImageHDU specified by data frame (i.e. path of file, hdu etc.)
@resumable function images(df::DataFrame)
for row in eachrow(df)
@resumable function images(collection::DataFrame)
for row in eachrow(collection)
@yield FITS(row.path)[row.hdu]
end
end


"""
images(f, collection::DataFrame; save = false, path = nothing, save_prefix = nothing, save_suffix = nothing, save_delim = "_", ext = r"fits(\\.tar\\.gz)?"i)

Applies function `f` on all ImageHDUs present in data frame and saves it in FITS file.

It returns an array of output values of function `f` applied on ImageHDUs. If `save = false`, the save functionality does not execute. File is saved at `path` specified by the user.
Suffix and prefix can be added to filename of newly created files by modifying `save_suffix` and `save_prefix`, `save_delim` is used as delimiter.
`ext` is the extension of files to be taken into consideration for applying function, by default it is set to `r"fits(\\.tar\\.gz)?"i`.
"""
function images(f, collection::DataFrame; save = false, path = nothing, save_prefix = nothing, save_suffix = nothing, save_delim = "_", ext = r"fits(\.tar\.gz)?"i, kwargs...)
image_iterator = images(collection; kwargs...)
locations = collection.path

processed_images = map(zip(locations, image_iterator)) do (location, output)
processed_image = f(output)
if save
save_path = generate_filename(location, path, save_prefix, save_suffix, save_delim, ext)
write_data(save_path, processed_image)
end
processed_image
end

return processed_images
end

"""
filenames(f, collection::DataFrame; save = false, path = nothing, save_prefix = nothing, save_suffix = nothing, save_delim = "_", ext = r"fits(\\.tar\\.gz)?"i)

Applies function `f` on all file paths present in data frame and saves it in FITS file.
siddharthlal25 marked this conversation as resolved.
Show resolved Hide resolved

It returns an array of output values of function `f` applied on file paths. If `save = false`, the save functionality does not execute. File is saved at `path` specified by the user.
Suffix and prefix can be added to filename of newly created files by modifying `save_suffix` and `save_prefix`, `save_delim` is used as delimiter.
`ext` is the extension of files to be taken into consideration for applying function, by default it is set to `r"fits(\\.tar\\.gz)?"i`.
"""
function filenames(f, collection::DataFrame; save = false, path = nothing, save_prefix = nothing, save_suffix = nothing, save_delim = "_", ext = r"fits(\.tar\.gz)?"i, kwargs...)
path_iterator = filenames(collection; kwargs...)
locations = collection.path

processed_images = map(zip(locations, path_iterator)) do (location, output)
processed_image = f(output)
if save
save_path = generate_filename(location, path, save_prefix, save_suffix, save_delim, ext)
write_data(save_path, processed_image)
end
processed_image
end

return processed_images
end


"""
arrays(f, collection::DataFrame; save = false, path = nothing, save_prefix = nothing, save_suffix = nothing, save_delim = "_", ext = r"fits(\\.tar\\.gz)?"i)

Applies function `f` on all image arrays present in data frame and saves it in FITS file.

It returns an array of output values of function `f` applied on image arrays. If `save = false`, the save functionality does not execute. File is saved at `path` specified by the user.
Suffix and prefix can be added to filename of newly created files by modifying `save_suffix` and `save_prefix`, `save_delim` is used as delimiter.
`ext` is the extension of files to be taken into consideration for applying function, by default it is set to `r"fits(\\.tar\\.gz)?"i`.
"""
function arrays(f, collection::DataFrame; save = false, path = nothing, save_prefix = nothing, save_suffix = nothing, save_delim = "_", ext = r"fits(\.tar\.gz)?"i, kwargs...)
array_iterator = arrays(collection; kwargs...)
locations = collection.path

processed_images = map(zip(locations, array_iterator)) do (location, output)
processed_image = f(output)
if save
save_path = generate_filename(location, path, save_prefix, save_suffix, save_delim, ext)
write_data(save_path, processed_image)
end
processed_image
end

return processed_images
end
Loading