JuliaML
diff --git a/Diff for: ‎docs/src/api.md
+1 b/Diff for: ‎docs/src/api.md
+1
diff --git a/Diff for: ‎src/MLUtils.jl
+6-3 b/Diff for: ‎src/MLUtils.jl
+6-3
diff --git a/Diff for: ‎src/batch.jl
+155 b/Diff for: ‎src/batch.jl
+155
diff --git a/Diff for: ‎src/folds.jl
+19-20 b/Diff for: ‎src/folds.jl
+19-20
diff --git a/Diff for: ‎src/utils.jl
+1-110 b/Diff for: ‎src/utils.jl
+1-110
@@ -27,6 +27,7 @@ shuffleobs
 
 ```@docs
 batch
+batch_sequence
 batchsize
 batchseq
 BatchView
 
@@ -63,10 +63,14 @@ export slidingwindow
 include("splitobs.jl")
 export splitobs
 
-include("utils.jl")
+include("batch.jl")
 export batch,
        batchseq,
-       chunk,
+       batch_sequence,
+       unbatch
+
+include("utils.jl")
+export chunk,
        falses_like,
        fill_like,
        flatten,
@@ -79,7 +83,6 @@ export batch,
        rpad_constant,
        stack, # in Base since julia v1.9
        trues_like,
-       unbatch,
        unsqueeze,
        unstack,
        zeros_like
 
@@ -0,0 +1,155 @@
+
+"""
+    batch(xs)
+
+Batch the arrays in `xs` into a single array with 
+an extra dimension.
+
+If the elements of `xs` are tuples, named tuples, or dicts, 
+the output will be of the same type. 
+
+See also [`unbatch`](@ref) and [`batch_sequence`](@ref).
+
+# Examples
+
+```jldoctest
+julia> batch([[1,2,3], 
+              [4,5,6]])
+3×2 Matrix{Int64}:
+ 1  4
+ 2  5
+ 3  6
+
+julia> batch([(a=[1,2], b=[3,4])
+               (a=[5,6], b=[7,8])]) 
+(a = [1 5; 2 6], b = [3 7; 4 8])
+```
+"""
+function batch(xs)
+# Fallback for generric iterables
+    @assert length(xs) > 0 "Input should be non-empty" 
+    data = first(xs) isa AbstractArray ?
+        similar(first(xs), size(first(xs))..., length(xs)) :
+        Vector{eltype(xs)}(undef, length(xs))
+    for (i, x) in enumerate(xs)
+        data[batchindex(data, i)...] = x
+    end
+    return data
+end
+
+batchindex(xs, i) = (reverse(Base.tail(reverse(axes(xs))))..., i)
+
+batch(xs::AbstractArray{<:AbstractArray}) = stack(xs)
+
+function batch(xs::Vector{<:Tuple})
+    @assert length(xs) > 0 "Input should be non-empty"
+    n = length(first(xs))
+    @assert all(length.(xs) .== n) "Cannot batch tuples with different lengths"
+    return ntuple(i -> batch([x[i] for x in xs]), n)
+end
+
+function batch(xs::Vector{<:NamedTuple})
+    @assert length(xs) > 0 "Input should be non-empty"
+    all_keys = [sort(collect(keys(x))) for x in xs]
+    ks = all_keys[1]
+    @assert all(==(ks), all_keys) "Cannot batch named tuples with different keys"
+    return NamedTuple(k => batch([x[k] for x in xs]) for k in ks)
+end
+
+function batch(xs::Vector{<:Dict})
+    @assert length(xs) > 0 "Input should be non-empty"
+    all_keys = [sort(collect(keys(x))) for x in xs]
+    ks = all_keys[1]
+    @assert all(==(ks), all_keys) "cannot batch dicts with different keys"
+    return Dict(k => batch([x[k] for x in xs]) for k in ks)
+end
+
+"""
+    unbatch(x)
+
+Reverse of the [`batch`](@ref) operation,
+unstacking the last dimension of the array `x`.
+
+See also [`unstack`](@ref) and [`chunk`](@ref).
+
+# Examples
+
+```jldoctest
+julia> unbatch([1 3 5 7;
+                2 4 6 8])
+4-element Vector{Vector{Int64}}:
+ [1, 2]
+ [3, 4]
+ [5, 6]
+ [7, 8]
+```                                                                                          
+"""
+unbatch(x::AbstractArray) = [getobs(x, i) for i in 1:numobs(x)]
+unbatch(x::AbstractVector) = x
+
+"""
+    batchseq(seqs, val = 0)
+
+Take a list of `N` sequences, and turn them into a single sequence where each
+item is a batch of `N`. Short sequences will be padded by `val`.
+
+# Examples
+
+```jldoctest
+julia> batchseq([[1, 2, 3], [4, 5]], 0)
+3-element Vector{Vector{Int64}}:
+ [1, 4]
+ [2, 5]
+ [3, 0]
+```
+"""
+function batchseq(xs, val = 0)
+    n = maximum(numobs, xs)
+    xs_ = [rpad_constant(x, n, val; dims=ndims(x)) for x in xs]
+    return [batch([getobs(xs_[j], i) for j = 1:length(xs_)]) for i = 1:n]
+end
+
+"""
+    batch_sequence(seqs; pad = 0)
+
+Take a list of `N` sequences `seqs`, 
+where the `i`-th sequence is an array with last dimension `Li`,
+and turn the into a single array with size `(..., Lmax, N)`.
+
+The sequences need to have the same size, except for the last dimension.
+
+Short sequences will be padded by `pad`.
+
+See also [`batch`](@ref).
+
+# Examples
+
+```jldoctest
+julia> batch_sequence([[1, 2, 3], [10, 20]])
+3×2 Matrix{Int64}:
+ 1  10
+ 2  20
+ 3   0
+
+julia> seqs = (ones(2, 3), fill(2.0, (2, 5)))
+([1.0 1.0 1.0; 1.0 1.0 1.0], [2.0 2.0 … 2.0 2.0; 2.0 2.0 … 2.0 2.0])
+
+julia> batch_sequence(seqs, pad=-1)
+2×5×2 Array{Float64, 3}:
+[:, :, 1] =
+ 1.0  1.0  1.0  -1.0  -1.0
+ 1.0  1.0  1.0  -1.0  -1.0
+
+[:, :, 2] =
+ 2.0  2.0  2.0  2.0  2.0
+ 2.0  2.0  2.0  2.0  2.0
+```
+"""
+function batch_sequence(xs; pad = 0)
+    sz = size(xs[1])[1:end-1]
+    @assert all(x -> size(x)[1:end-1] == sz, xs) "Array dimensions do not match."
+    n = ndims(xs[1])
+    Lmax = maximum(numobs, xs)
+    padded_seqs = [rpad_constant(x, Lmax, pad, dims=n) for x in xs]
+    return batch(padded_seqs)
+end
@@ -7,30 +7,29 @@ Compute the train/validation assignments for `k` repartitions of
 first vector contains the index-vectors for the training subsets,
 and the second vector the index-vectors for the validation subsets
 respectively. A general rule of thumb is to use either `k = 5` or
-`k = 10`. The following code snippet generates the indices
-assignments for `k = 5`
-
-```julia
-julia> train_idx, val_idx = kfolds(10, 5);
-```
+`k = 10`. 
 
 Each observation is assigned to the validation subset once (and
 only once). Thus, a union over all validation index-vectors
 reproduces the full range `1:n`. Note that there is no random
 assignment of observations to subsets, which means that adjacent
 observations are likely to be part of the same validation subset.
 
-```julia
+# Examples
+
+```jldoctest
+julia> train_idx, val_idx = kfolds(10, 5);
+
 julia> train_idx
-5-element Array{Array{Int64,1},1}:
- [3,4,5,6,7,8,9,10]
- [1,2,5,6,7,8,9,10]
- [1,2,3,4,7,8,9,10]
- [1,2,3,4,5,6,9,10]
- [1,2,3,4,5,6,7,8]
+5-element Vector{Vector{Int64}}:
+ [3, 4, 5, 6, 7, 8, 9, 10]
+ [1, 2, 5, 6, 7, 8, 9, 10]
+ [1, 2, 3, 4, 7, 8, 9, 10]
+ [1, 2, 3, 4, 5, 6, 9, 10]
+ [1, 2, 3, 4, 5, 6, 7, 8]
 
 julia> val_idx
-5-element Array{UnitRange{Int64},1}:
+5-element Vector{UnitRange{Int64}}:
  1:2
  3:4
  5:6
@@ -42,7 +41,7 @@ function kfolds(n::Integer, k::Integer = 5)
     2 <= k <= n || throw(ArgumentError("n must be positive and k must to be within 2:$(max(2,n))"))
     # Compute the size of each fold. This is important because
     # in general the number of total observations might not be
-    # divideable by k. In such cases it is custom that the remaining
+    # divisible by k. In such cases it is custom that the remaining
     # observations are divided among the folds. Thus some folds
     # have one more observation than others.
     sizes = fill(floor(Int, n/k), k)
@@ -52,15 +51,15 @@ function kfolds(n::Integer, k::Integer = 5)
     # Compute start offset for each fold
     offsets = cumsum(sizes) .- sizes .+ 1
     # Compute the validation indices using the offsets and sizes
-    val_indices = map((o,s)->(o:o+s-1), offsets, sizes)
+    val_indices = map((o,s) -> (o:o+s-1), offsets, sizes)
     # The train indices are then the indicies not in validation
-    train_indices = map(idx->setdiff(1:n,idx), val_indices)
+    train_indices = map(idx -> setdiff(1:n, idx), val_indices)
     # We return a tuple of arrays
-    train_indices, val_indices
+    return train_indices, val_indices
 end
 
 """
-    kfolds(data, [k = 5])
+    kfolds(data, k = 5)
 
 Repartition a `data` container `k` times using a `k` folds
 strategy and return the sequence of folds as a lazy iterator. 
@@ -96,7 +95,7 @@ By default the folds are created using static splits. Use
 folds.
 
 ```julia
-for (x_train, x_val) in kfolds(shuffleobs(X), k = 10)
+for (x_train, x_val) in kfolds(shuffleobs(X), k=10)
     # ...
 end
 ```
 
@@ -326,116 +326,6 @@ function group_indices(classes::T) where T<:AbstractVector
 end
 
 
-"""
-    batch(xs)
-
-Batch the arrays in `xs` into a single array with 
-an extra dimension.
-
-If the elements of `xs` are tuples, named tuples, or dicts, 
-the output will be of the same type. 
-
-See also [`unbatch`](@ref).
-
-# Examples
-
-```jldoctest
-julia> batch([[1,2,3], 
-              [4,5,6]])
-3×2 Matrix{Int64}:
- 1  4
- 2  5
- 3  6
-
-julia> batch([(a=[1,2], b=[3,4])
-               (a=[5,6], b=[7,8])]) 
-(a = [1 5; 2 6], b = [3 7; 4 8])
-```
-"""
-function batch(xs)
-# Fallback for generric iterables
-    @assert length(xs) > 0 "Input should be non-empty" 
-    data = first(xs) isa AbstractArray ?
-        similar(first(xs), size(first(xs))..., length(xs)) :
-        Vector{eltype(xs)}(undef, length(xs))
-    for (i, x) in enumerate(xs)
-        data[batchindex(data, i)...] = x
-    end
-    return data
-end
-
-batchindex(xs, i) = (reverse(Base.tail(reverse(axes(xs))))..., i)
-
-batch(xs::AbstractArray{<:AbstractArray}) = stack(xs)
-
-function batch(xs::Vector{<:Tuple})
-    @assert length(xs) > 0 "Input should be non-empty"
-    n = length(first(xs))
-    @assert all(length.(xs) .== n) "Cannot batch tuples with different lengths"
-    return ntuple(i -> batch([x[i] for x in xs]), n)
-end
-
-function batch(xs::Vector{<:NamedTuple})
-    @assert length(xs) > 0 "Input should be non-empty"
-    all_keys = [sort(collect(keys(x))) for x in xs]
-    ks = all_keys[1]
-    @assert all(==(ks), all_keys) "Cannot batch named tuples with different keys"
-    return NamedTuple(k => batch([x[k] for x in xs]) for k in ks)
-end
-
-function batch(xs::Vector{<:Dict})
-    @assert length(xs) > 0 "Input should be non-empty"
-    all_keys = [sort(collect(keys(x))) for x in xs]
-    ks = all_keys[1]
-    @assert all(==(ks), all_keys) "cannot batch dicts with different keys"
-    return Dict(k => batch([x[k] for x in xs]) for k in ks)
-end
-
-"""
-    unbatch(x)
-
-Reverse of the [`batch`](@ref) operation,
-unstacking the last dimension of the array `x`.
-
-See also [`unstack`](@ref) and [`chunk`](@ref).
-
-# Examples
-
-```jldoctest
-julia> unbatch([1 3 5 7;
-                2 4 6 8])
-4-element Vector{Vector{Int64}}:
- [1, 2]
- [3, 4]
- [5, 6]
- [7, 8]
-```                                                                                          
-"""
-unbatch(x::AbstractArray) = [getobs(x, i) for i in 1:numobs(x)]
-unbatch(x::AbstractVector) = x
-
-"""
-    batchseq(seqs, val = 0)
-
-Take a list of `N` sequences, and turn them into a single sequence where each
-item is a batch of `N`. Short sequences will be padded by `val`.
-
-# Examples
-
-```jldoctest
-julia> batchseq([[1, 2, 3], [4, 5]], 0)
-3-element Vector{Vector{Int64}}:
- [1, 4]
- [2, 5]
- [3, 0]
-```
-"""
-function batchseq(xs, val = 0)
-    n = maximum(numobs, xs)
-    xs_ = [rpad_constant(x, n, val; dims=ndims(x)) for x in xs]
-    return [batch([getobs(xs_[j], i) for j = 1:length(xs_)]) for i = 1:n]
-end
-
 """
     rpad_constant(v::AbstractArray, n::Union{Integer, Tuple}, val = 0; dims=:)
 
@@ -765,3 +655,4 @@ function rrule(::typeof(fill_like), x::AbstractArray, val, T::Type, sz)
     end
     return fill_like(x, val, T, sz), fill_like_pullback
 end
+