1
- # Classification of MNIST dataset using a convnet, a variant of the original LeNet
1
+ # Classification of MNIST dataset using a convolutional network,
2
+ # which is a variant of the original LeNet from 1998.
2
3
3
- using MLDatasets, Flux, CUDA, BSON # this will install everything if necc.
4
+ using MLDatasets, Flux, BSON, CUDA # this will install everything if necc.
4
5
5
6
#= ==== DATA =====#
6
7
@@ -13,7 +14,7 @@ test_data = MLDatasets.MNIST(split=:test)
13
14
14
15
# train_data.features is a 28×28×60000 Array{Float32, 3} of the images.
15
16
# Flux needs a 4D array, with the 3rd dim for channels -- here trivial, grayscale.
16
- # Combine the reshape needed other pre-processing:
17
+ # Combine the reshape needed with other pre-processing:
17
18
18
19
function loader (data:: MNIST = train_data; batchsize:: Int = 64 )
19
20
x, y = data[:] # this is a NamedTuple of (features, targets)
@@ -26,7 +27,8 @@ loader() # returns a DataLoader, with first element a tuple like this:
26
27
27
28
x1, y1 = first (loader ()); # (28×28×1×64 Array{Float32, 3}, 10×64 OneHotMatrix(::Vector{UInt32}))
28
29
29
- # If you are using a GPU, these should be CuArray{Float32, 3} etc.
30
+ # If you are using a GPU, these should be CuArray{Float32, 3} etc.
31
+ # If not, the `gpu` function does nothing (except complain the first time).
30
32
31
33
#= ==== MODEL =====#
32
34
@@ -44,6 +46,8 @@ lenet = Chain(
44
46
Dense (84 => 10 ),
45
47
) |> gpu
46
48
49
+ # Notice that most of the parameters are in the final Dense layers.
50
+
47
51
y1hat = lenet (x1) # try it out
48
52
49
53
softmax (y1hat)
@@ -63,7 +67,7 @@ hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9))
63
67
using Statistics: mean # standard library
64
68
65
69
function loss_and_accuracy (model, data:: MNIST = test_data)
66
- (x,y) = only (loader (data; batchsize= 0 )) # batchsize=0 means one big batch
70
+ (x,y) = only (loader (data; batchsize= length (data))) # make one big batch
67
71
ŷ = model (x)
68
72
loss = Flux. logitcrossentropy (ŷ, y) # did not include softmax in the model
69
73
acc = round (100 * mean (Flux. onecold (ŷ) .== Flux. onecold (y)); digits= 2 )
@@ -91,6 +95,7 @@ opt_rule = OptimiserChain(WeightDecay(settings.lambda), Adam(settings.eta))
91
95
opt_state = Flux. setup (opt_rule, lenet);
92
96
93
97
for epoch in 1 : settings. epochs
98
+ # @time will show a much longer time for the first epoch, due to compilation
94
99
@time for (x,y) in loader (batchsize= settings. batchsize)
95
100
grads = Flux. gradient (m -> Flux. logitcrossentropy (m (x), y), lenet)
96
101
Flux. update! (opt_state, lenet, grads[1 ])
@@ -101,7 +106,7 @@ for epoch in 1:settings.epochs
101
106
loss, acc, _ = loss_and_accuracy (lenet)
102
107
test_loss, test_acc, _ = loss_and_accuracy (lenet, test_data)
103
108
@info " logging:" epoch acc test_acc
104
- nt = (; epoch, loss, acc, test_loss, test_acc)
109
+ nt = (; epoch, loss, acc, test_loss, test_acc) # make a NamedTuple
105
110
push! (train_log, nt)
106
111
end
107
112
if epoch % 5 == 0
@@ -118,16 +123,16 @@ hcat(Flux.onecold(y1hat, 0:9), Flux.onecold(y1, 0:9))
118
123
119
124
#= ==== INSPECTION =====#
120
125
121
- using ImageInTerminal, ImageCore
126
+ using ImageCore, ImageInTerminal
122
127
123
- xtest, ytest = only (loader (test_data, batchsize= 0 ))
128
+ xtest, ytest = only (loader (test_data, batchsize= length (test_data)));
124
129
125
130
# There are many ways to look at images, you won't need ImageInTerminal if working in a notebook
126
131
# ImageCore.Gray is a special type, whick interprets numbers between 0.0 and 1.0 as shades:
127
132
128
- xtest[:,:,1 ,5 ] .| > Gray |> transpose # should display a 4
133
+ xtest[:,:,1 ,5 ] .| > Gray |> transpose |> cpu
129
134
130
- Flux. onecold (ytest, 0 : 9 )[5 ] # it's coded as being a 4
135
+ Flux. onecold (ytest, 0 : 9 )[5 ] # true label, should match!
131
136
132
137
# Let's look for the image whose classification is least certain.
133
138
# First, in each column of probabilities, ask for the largest one.
@@ -137,33 +142,18 @@ ptest = softmax(lenet(xtest))
137
142
max_p = maximum (ptest; dims= 1 )
138
143
_, i = findmin (vec (max_p))
139
144
140
- xtest[:,:,1 ,i] .| > Gray |> transpose
145
+ xtest[:,:,1 ,i] .| > Gray |> transpose |> cpu
141
146
142
147
Flux. onecold (ytest, 0 : 9 )[i] # true classification
148
+ ptest[:,i] # probabilities of all outcomes
143
149
Flux. onecold (ptest[:,i], 0 : 9 ) # uncertain prediction
144
150
145
- # Next, let's look for the most confident, yet wrong, prediction.
146
- # Often this will look quite ambiguous to you too.
147
-
148
- iwrong = findall (Flux. onecold (lenet (xtest)) .!= Flux. onecold (ytest))
149
-
150
- max_p = maximum (ptest[:,iwrong]; dims= 1 )
151
- _, k = findmax (vec (max_p)) # now max not min
152
- i = iwrong[k]
153
-
154
- xtest[:,:,1 ,i] .| > Gray |> transpose
155
-
156
- Flux. onecold (ytest, 0 : 9 )[i] # true classification
157
- Flux. onecold (ptest[:,i], 0 : 9 ) # prediction
158
-
159
151
#= ==== SIZES =====#
160
152
161
- # Maybe... at first I had this above, but it makes things long.
162
-
163
153
# A layer like Conv((5, 5), 1=>6) takes 5x5 patches of an image, and matches them to each
164
154
# of 6 different 5x5 filters, placed at every possible position. These filters are here:
165
155
166
- Conv ((5 , 5 ), 1 => 6 ). weights |> summary # 5×5×1×6 Array{Float32, 4}
156
+ Conv ((5 , 5 ), 1 => 6 ). weight |> summary # 5×5×1×6 Array{Float32, 4}
167
157
168
158
# This layer can accept any size of image; let's trace the sizes with the actual input:
169
159
@@ -172,19 +162,19 @@ Conv((5, 5), 1=>6).weights |> summary # 5×5×1×6 Array{Float32, 4}
172
162
julia> x1 |> size
173
163
(28, 28, 1, 64)
174
164
175
- julia> conv_layers [1](x1) |> size
165
+ julia> lenet [1](x1) |> size # after Conv((5, 5), 1=>6, relu),
176
166
(24, 24, 6, 64)
177
167
178
- julia> conv_layers [1:2](x1) |> size
168
+ julia> lenet [1:2](x1) |> size # after MaxPool((2, 2))
179
169
(12, 12, 6, 64)
180
170
181
- julia> conv_layers [1:3](x1) |> size
171
+ julia> lenet [1:3](x1) |> size # after Conv((5, 5), 6 => 16, relu)
182
172
(8, 8, 16, 64)
183
173
184
- julia> conv_layers (x1) |> size
174
+ julia> lenet[1:4] (x1) |> size # after MaxPool((2, 2))
185
175
(4, 4, 16, 64)
186
176
187
- julia> conv_layers (x1) |> Flux.flatten |> size
177
+ julia> lenet[1:5] (x1) |> size # after Flux.flatten
188
178
(256, 64)
189
179
190
180
=#
@@ -193,4 +183,3 @@ julia> conv_layers(x1) |> Flux.flatten |> size
193
183
# This 256 must match the Dense(256 => 120). (See Flux.outputsize for ways to automate this.)
194
184
195
185
#= ==== THE END =====#
196
-
0 commit comments