Skip to content

Commit 33c5257

Browse files
authored
Merge pull request #200 from theabhirath/midlevel
2 parents f001221 + ab96700 commit 33c5257

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+1182
-851
lines changed

Project.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
1919

2020
[compat]
2121
BSON = "0.3.2"
22-
Flux = "0.13"
23-
Functors = "0.2, 0.3"
2422
CUDA = "3"
2523
ChainRulesCore = "1"
24+
Flux = "0.13"
25+
Functors = "0.2, 0.3"
2626
MLUtils = "0.2.10"
2727
NNlib = "0.8"
2828
NNlibCUDA = "0.2"

docs/dev-guide/contributing.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ To add a new model architecture to Metalhead.jl, you can [open a PR](https://git
1616

1717
- reuse layers from Flux as much as possible (e.g. use `Parallel` before defining a `Bottleneck` struct)
1818
- adhere as closely as possible to a reference such as a published paper (i.e. the structure of your model should follow intuitively from the paper)
19-
- use generic functional builders (e.g. [`resnet`](#) is the core function that builds "ResNet-like" models)
19+
- use generic functional builders (e.g. [`Metalhead.resnet`](@ref) is the core function that builds "ResNet-like" models)
2020
- use multiple dispatch to add convenience constructors that wrap your functional builder
2121

2222
When in doubt, just open a PR! We are more than happy to help review your code to help it align with the rest of the library. After adding a model, you might consider adding some pre-trained weights (see below).

docs/tutorials/quickstart.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
using Flux, Metalhead
66
```
77

8-
Using a model from Metalhead is as simple as selecting a model from the table of [available models](#). For example, below we use the pre-trained ResNet-18 model.
8+
Using a model from Metalhead is as simple as selecting a model from the table of [available models](@ref). For example, below we use the pre-trained ResNet-18 model.
99
{cell=quickstart}
1010
```julia
1111
using Flux, Metalhead

src/Metalhead.jl

+8-7
Original file line numberDiff line numberDiff line change
@@ -20,26 +20,27 @@ using .Layers
2020

2121
# CNN models
2222
## Builders
23-
include("convnets/builders/core.jl")
23+
include("convnets/builders/invresmodel.jl")
2424
include("convnets/builders/mbconv.jl")
2525
include("convnets/builders/resblocks.jl")
26+
include("convnets/builders/resnet.jl")
27+
include("convnets/builders/stages.jl")
2628
## AlexNet and VGG
2729
include("convnets/alexnet.jl")
2830
include("convnets/vgg.jl")
2931
## ResNets
3032
include("convnets/resnets/core.jl")
33+
include("convnets/resnets/res2net.jl")
3134
include("convnets/resnets/resnet.jl")
3235
include("convnets/resnets/resnext.jl")
3336
include("convnets/resnets/seresnet.jl")
34-
include("convnets/resnets/res2net.jl")
3537
## Inceptions
3638
include("convnets/inceptions/googlenet.jl")
3739
include("convnets/inceptions/inceptionv3.jl")
3840
include("convnets/inceptions/inceptionv4.jl")
3941
include("convnets/inceptions/inceptionresnetv2.jl")
4042
include("convnets/inceptions/xception.jl")
4143
## EfficientNets
42-
include("convnets/efficientnets/core.jl")
4344
include("convnets/efficientnets/efficientnet.jl")
4445
include("convnets/efficientnets/efficientnetv2.jl")
4546
## MobileNets
@@ -71,16 +72,16 @@ export AlexNet, VGG, VGG11, VGG13, VGG16, VGG19,
7172
DenseNet, DenseNet121, DenseNet161, DenseNet169, DenseNet201,
7273
GoogLeNet, Inception3, Inceptionv3, Inceptionv4, InceptionResNetv2, Xception,
7374
SqueezeNet, MobileNetv1, MobileNetv2, MobileNetv3, MNASNet,
74-
EfficientNet, EfficientNetv2,
75-
MLPMixer, ResMLP, gMLP, ViT, ConvMixer, ConvNeXt
75+
EfficientNet, EfficientNetv2, ConvMixer, ConvNeXt,
76+
MLPMixer, ResMLP, gMLP, ViT
7677

7778
# use Flux._big_show to pretty print large models
7879
for T in (:AlexNet, :VGG, :SqueezeNet, :ResNet, :WideResNet, :ResNeXt,
7980
:SEResNet, :SEResNeXt, :Res2Net, :Res2NeXt, :GoogLeNet, :DenseNet,
8081
:Inceptionv3, :Inceptionv4, :InceptionResNetv2, :Xception,
8182
:MobileNetv1, :MobileNetv2, :MobileNetv3, :MNASNet,
82-
:EfficientNet, :EfficientNetv2,
83-
:MLPMixer, :ResMLP, :gMLP, :ViT, :ConvMixer, :ConvNeXt)
83+
:EfficientNet, :EfficientNetv2, :ConvMixer, :ConvNeXt,
84+
:MLPMixer, :ResMLP, :gMLP, :ViT)
8485
@eval Base.show(io::IO, ::MIME"text/plain", model::$T) = _maybe_big_show(io, model)
8586
end
8687

src/convnets/alexnet.jl

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
"""
2-
alexnet(; inchannels::Integer = 3, nclasses::Integer = 1000)
2+
alexnet(; dropout_prob = 0.5, inchannels::Integer = 3, nclasses::Integer = 1000)
33
44
Create an AlexNet model
55
([reference](https://papers.nips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf)).
66
77
# Arguments
88
9+
- `dropout_prob`: dropout probability for the classifier
910
- `inchannels`: The number of input channels.
1011
- `nclasses`: the number of output classes
1112
"""
12-
function alexnet(; inchannels::Integer = 3, nclasses::Integer = 1000)
13+
function alexnet(; dropout_prob = 0.5, inchannels::Integer = 3, nclasses::Integer = 1000)
1314
backbone = Chain(Conv((11, 11), inchannels => 64, relu; stride = 4, pad = 2),
1415
MaxPool((3, 3); stride = 2),
1516
Conv((5, 5), 64 => 192, relu; pad = 2),
@@ -19,9 +20,9 @@ function alexnet(; inchannels::Integer = 3, nclasses::Integer = 1000)
1920
Conv((3, 3), 256 => 256, relu; pad = 1),
2021
MaxPool((3, 3); stride = 2))
2122
classifier = Chain(AdaptiveMeanPool((6, 6)), MLUtils.flatten,
22-
Dropout(0.5),
23+
Dropout(dropout_prob),
2324
Dense(256 * 6 * 6, 4096, relu),
24-
Dropout(0.5),
25+
Dropout(dropout_prob),
2526
Dense(4096, 4096, relu),
2627
Dense(4096, nclasses))
2728
return Chain(backbone, classifier)
@@ -44,7 +45,7 @@ Create a `AlexNet`.
4445
4546
`AlexNet` does not currently support pretrained weights.
4647
47-
See also [`alexnet`](#).
48+
See also [`Metalhead.alexnet`](@ref).
4849
"""
4950
struct AlexNet
5051
layers::Any

src/convnets/builders/core.jl

-19
This file was deleted.

src/convnets/builders/invresmodel.jl

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
function build_invresmodel(scalings::NTuple{2, Real},
2+
block_configs::AbstractVector{<:Tuple};
3+
inplanes::Integer = 32, connection = +, activation = relu,
4+
norm_layer = BatchNorm, divisor::Integer = 8,
5+
tail_conv::Bool = true, expanded_classifier::Bool = false,
6+
stochastic_depth_prob = nothing, headplanes::Integer,
7+
dropout_prob = nothing, inchannels::Integer = 3,
8+
nclasses::Integer = 1000, kwargs...)
9+
width_mult, _ = scalings
10+
# building first layer
11+
inplanes = _round_channels(inplanes * width_mult, divisor)
12+
layers = []
13+
append!(layers,
14+
conv_norm((3, 3), inchannels, inplanes, activation; stride = 2, pad = 1,
15+
norm_layer))
16+
# building inverted residual blocks
17+
get_layers, block_repeats = mbconv_stage_builder(block_configs, inplanes, scalings;
18+
stochastic_depth_prob, norm_layer,
19+
divisor, kwargs...)
20+
append!(layers, cnn_stages(get_layers, block_repeats, connection))
21+
# building last layers
22+
outplanes = _round_channels(block_configs[end][3] * width_mult, divisor)
23+
if tail_conv
24+
# special case, supported fully only for MobileNetv3
25+
if expanded_classifier
26+
midplanes = _round_channels(outplanes * block_configs[end][4], divisor)
27+
append!(layers,
28+
conv_norm((1, 1), outplanes, midplanes, activation; norm_layer))
29+
classifier = create_classifier(midplanes, headplanes, nclasses,
30+
(hardswish, identity); dropout_prob)
31+
else
32+
append!(layers,
33+
conv_norm((1, 1), outplanes, headplanes, activation; norm_layer))
34+
classifier = create_classifier(headplanes, nclasses; dropout_prob)
35+
end
36+
else
37+
classifier = create_classifier(outplanes, nclasses; dropout_prob)
38+
end
39+
return Chain(Chain(layers...), classifier)
40+
end
41+
function build_invresmodel(width_mult::Real, block_configs::AbstractVector{<:Tuple};
42+
kwargs...)
43+
return build_invresmodel((width_mult, 1), block_configs; kwargs...)
44+
end

src/convnets/builders/mbconv.jl

+56-66
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,24 @@
1-
function dwsepconv_builder(block_configs, inplanes::Integer, stage_idx::Integer,
2-
width_mult::Real; norm_layer = BatchNorm, kwargs...)
1+
"""
2+
invresbuilder(::typeof(irblockfn), block_configs::AbstractVector{<:Tuple},
3+
inplanes::Integer, stage_idx::Integer, scalings::NTuple{2, Real};
4+
stochastic_depth_prob = nothing, norm_layer = BatchNorm,
5+
divisor::Integer = 8, kwargs...)
6+
7+
Creates a block builder for `irblockfn` within a given stage.
8+
Note that this function is not intended to be called directly, but instead passed to
9+
[`mbconv_stage_builder`](@ref) which will return a builder over all stages.
10+
Users wanting to provide a custom inverted residual block type can extend this
11+
function by defining `invresbuilder(::typeof(my_block), ...)`.
12+
"""
13+
function invresbuilder(::typeof(dwsep_conv_norm), block_configs::AbstractVector{<:Tuple},
14+
inplanes::Integer, stage_idx::Integer, scalings::NTuple{2, Real};
15+
stochastic_depth_prob = nothing, norm_layer = BatchNorm,
16+
divisor::Integer = 8, kwargs...)
17+
width_mult, depth_mult = scalings
318
block_fn, k, outplanes, stride, nrepeats, activation = block_configs[stage_idx]
4-
outplanes = _round_channels(outplanes * width_mult)
19+
outplanes = _round_channels(outplanes * width_mult, divisor)
520
if stage_idx != 1
6-
inplanes = _round_channels(block_configs[stage_idx - 1][3] * width_mult)
21+
inplanes = _round_channels(block_configs[stage_idx - 1][3] * width_mult, divisor)
722
end
823
function get_layers(block_idx::Integer)
924
inplanes = block_idx == 1 ? inplanes : outplanes
@@ -12,15 +27,17 @@ function dwsepconv_builder(block_configs, inplanes::Integer, stage_idx::Integer,
1227
stride, pad = SamePad(), norm_layer, kwargs...)...)
1328
return (block,)
1429
end
15-
return get_layers, nrepeats
30+
return get_layers, ceil(Int, nrepeats * depth_mult)
1631
end
1732

18-
function mbconv_builder(block_configs, inplanes::Integer, stage_idx::Integer,
19-
scalings::NTuple{2, Real}; norm_layer = BatchNorm,
20-
divisor::Integer = 8, se_from_explanes::Bool = false,
21-
kwargs...)
33+
function invresbuilder(::typeof(mbconv), block_configs::AbstractVector{<:Tuple},
34+
inplanes::Integer, stage_idx::Integer, scalings::NTuple{2, Real};
35+
stochastic_depth_prob = nothing, norm_layer = BatchNorm,
36+
divisor::Integer = 8, se_from_explanes::Bool = false, kwargs...)
2237
width_mult, depth_mult = scalings
23-
block_fn, k, outplanes, expansion, stride, nrepeats, reduction, activation = block_configs[stage_idx]
38+
block_repeats = [ceil(Int, block_configs[idx][end - 2] * depth_mult)
39+
for idx in eachindex(block_configs)]
40+
block_fn, k, outplanes, expansion, stride, _, reduction, activation = block_configs[stage_idx]
2441
# calculate number of reduced channels for squeeze-excite layer from explanes instead of inplanes
2542
if !isnothing(reduction)
2643
reduction = !se_from_explanes ? reduction * expansion : reduction
@@ -29,79 +46,52 @@ function mbconv_builder(block_configs, inplanes::Integer, stage_idx::Integer,
2946
inplanes = _round_channels(block_configs[stage_idx - 1][3] * width_mult, divisor)
3047
end
3148
outplanes = _round_channels(outplanes * width_mult, divisor)
49+
sdschedule = linear_scheduler(stochastic_depth_prob; depth = sum(block_repeats))
3250
function get_layers(block_idx::Integer)
3351
inplanes = block_idx == 1 ? inplanes : outplanes
3452
explanes = _round_channels(inplanes * expansion, divisor)
3553
stride = block_idx == 1 ? stride : 1
3654
block = block_fn((k, k), inplanes, explanes, outplanes, activation; norm_layer,
3755
stride, reduction, kwargs...)
38-
return stride == 1 && inplanes == outplanes ? (identity, block) : (block,)
56+
use_skip = stride == 1 && inplanes == outplanes
57+
if use_skip
58+
schedule_idx = sum(block_repeats[1:(stage_idx - 1)]) + block_idx
59+
drop_path = StochasticDepth(sdschedule[schedule_idx])
60+
return (drop_path, block)
61+
else
62+
return (block,)
63+
end
3964
end
40-
return get_layers, ceil(Int, nrepeats * depth_mult)
41-
end
42-
43-
function mbconv_builder(block_configs, inplanes::Integer, stage_idx::Integer,
44-
width_mult::Real; norm_layer = BatchNorm, kwargs...)
45-
return mbconv_builder(block_configs, inplanes, stage_idx, (width_mult, 1);
46-
norm_layer, kwargs...)
65+
return get_layers, block_repeats[stage_idx]
4766
end
4867

49-
function fused_mbconv_builder(block_configs, inplanes::Integer, stage_idx::Integer;
50-
norm_layer = BatchNorm, kwargs...)
51-
block_fn, k, outplanes, expansion, stride, nrepeats, activation = block_configs[stage_idx]
68+
function invresbuilder(::typeof(fused_mbconv), block_configs::AbstractVector{<:Tuple},
69+
inplanes::Integer, stage_idx::Integer, scalings::NTuple{2, Real};
70+
stochastic_depth_prob = nothing, norm_layer = BatchNorm,
71+
divisor::Integer = 8, kwargs...)
72+
width_mult, depth_mult = scalings
73+
block_repeats = [ceil(Int, block_configs[idx][end - 1] * depth_mult)
74+
for idx in eachindex(block_configs)]
75+
block_fn, k, outplanes, expansion, stride, _, activation = block_configs[stage_idx]
5276
inplanes = stage_idx == 1 ? inplanes : block_configs[stage_idx - 1][3]
77+
outplanes = _round_channels(outplanes * width_mult, divisor)
78+
sdschedule = linear_scheduler(stochastic_depth_prob; depth = sum(block_repeats))
5379
function get_layers(block_idx::Integer)
5480
inplanes = block_idx == 1 ? inplanes : outplanes
55-
explanes = _round_channels(inplanes * expansion, 8)
81+
explanes = _round_channels(inplanes * expansion, divisor)
5682
stride = block_idx == 1 ? stride : 1
5783
block = block_fn((k, k), inplanes, explanes, outplanes, activation;
5884
norm_layer, stride, kwargs...)
59-
return stride == 1 && inplanes == outplanes ? (identity, block) : (block,)
85+
schedule_idx = sum(block_repeats[1:(stage_idx - 1)]) + block_idx
86+
drop_path = StochasticDepth(sdschedule[schedule_idx])
87+
return stride == 1 && inplanes == outplanes ? (drop_path, block) : (block,)
6088
end
61-
return get_layers, nrepeats
62-
end
63-
64-
# TODO - these builders need to be more flexible to potentially specify stuff like
65-
# activation functions and reductions that don't change
66-
function _get_builder(::typeof(dwsep_conv_bn), block_configs::AbstractVector{<:Tuple},
67-
inplanes::Integer, stage_idx::Integer;
68-
scalings::Union{Nothing, NTuple{2, Real}} = nothing,
69-
width_mult::Union{Nothing, Number} = nothing, norm_layer, kwargs...)
70-
@assert isnothing(scalings) "dwsep_conv_bn does not support the `scalings` argument"
71-
return dwsepconv_builder(block_configs, inplanes, stage_idx, width_mult; norm_layer,
72-
kwargs...)
73-
end
74-
75-
function _get_builder(::typeof(mbconv), block_configs::AbstractVector{<:Tuple},
76-
inplanes::Integer, stage_idx::Integer;
77-
scalings::Union{Nothing, NTuple{2, Real}} = nothing,
78-
width_mult::Union{Nothing, Number} = nothing, norm_layer, kwargs...)
79-
if isnothing(scalings)
80-
return mbconv_builder(block_configs, inplanes, stage_idx, width_mult; norm_layer,
81-
kwargs...)
82-
elseif isnothing(width_mult)
83-
return mbconv_builder(block_configs, inplanes, stage_idx, scalings; norm_layer,
84-
kwargs...)
85-
else
86-
throw(ArgumentError("Only one of `scalings` and `width_mult` can be specified"))
87-
end
88-
end
89-
90-
function _get_builder(::typeof(fused_mbconv), block_configs::AbstractVector{<:Tuple},
91-
inplanes::Integer, stage_idx::Integer;
92-
scalings::Union{Nothing, NTuple{2, Real}} = nothing,
93-
width_mult::Union{Nothing, Number} = nothing, norm_layer)
94-
@assert isnothing(width_mult) "fused_mbconv does not support the `width_mult` argument."
95-
@assert isnothing(scalings)||scalings == (1, 1) "fused_mbconv does not support the `scalings` argument"
96-
return fused_mbconv_builder(block_configs, inplanes, stage_idx; norm_layer)
89+
return get_layers, block_repeats[stage_idx]
9790
end
9891

99-
function mbconv_stack_builder(block_configs::AbstractVector{<:Tuple}, inplanes::Integer;
100-
scalings::Union{Nothing, NTuple{2, Real}} = nothing,
101-
width_mult::Union{Nothing, Number} = nothing,
102-
norm_layer = BatchNorm, kwargs...)
103-
bxs = [_get_builder(block_configs[idx][1], block_configs, inplanes, idx; scalings,
104-
width_mult, norm_layer, kwargs...)
105-
for idx in eachindex(block_configs)]
92+
function mbconv_stage_builder(block_configs::AbstractVector{<:Tuple}, inplanes::Integer,
93+
scalings::NTuple{2, Real}; kwargs...)
94+
bxs = [invresbuilder(block_configs[idx][1], block_configs, inplanes, idx, scalings;
95+
kwargs...) for idx in eachindex(block_configs)]
10696
return (stage_idx, block_idx) -> first.(bxs)[stage_idx](block_idx), last.(bxs)
10797
end

0 commit comments

Comments
 (0)