forked from philipperemy/keras-tcn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvideo_classification.py
62 lines (51 loc) · 2.21 KB
/
video_classification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import numpy as np
import tensorflow.keras.backend as K
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Lambda
from tensorflow.keras.layers import MaxPool2D
from tcn import TCN
num_samples = 1000 # number of videos.
num_frames = 240 # 10 seconds of video at 24 ips.
h, w, c = 32, 32, 3 # def not a HD video! 32x32 color.
def data():
# very very dummy example. The purpose is more to show how to use a RNN/TCN
# in the context of video processing.
inputs = np.zeros(shape=(num_samples, num_frames, h, w, c))
targets = np.zeros(shape=(num_samples, 1))
# class 0 => only 0.
# class 1 => will contain some 1s.
for i in range(num_samples):
if np.random.uniform(low=0, high=1) > 0.50:
for j in range(num_frames):
inputs[i, j] = (np.random.uniform(low=0, high=1) > 0.90)
targets[i] = 1
return inputs, targets
def train():
# Good exercise: https://www.crcv.ucf.edu/data/UCF101.php
# replace data() by this dataset.
# Useful links:
# - https://www.pyimagesearch.com/2019/07/15/video-classification-with-keras-and-deep-learning/
# - https://github.com/sujiongming/UCF-101_video_classification
x_train, y_train = data()
inputs = Input(shape=(num_frames, h, w, c))
# push num_frames in batch_dim to process all the frames independently of their orders (CNN features).
x = Lambda(lambda y: K.reshape(y, (-1, h, w, c)))(inputs)
# apply convolutions to each image of each video.
x = Conv2D(16, 5)(x)
x = MaxPool2D()(x)
# re-creates the videos by reshaping.
# 3D input shape (batch, timesteps, input_dim)
num_features_cnn = np.prod(K.int_shape(x)[1:])
x = Lambda(lambda y: K.reshape(y, (-1, num_frames, num_features_cnn)))(x)
# apply the RNN on the time dimension (num_frames dim).
x = TCN(16)(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(inputs=[inputs], outputs=[x])
model.summary()
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
print('Train...')
model.fit(x_train, y_train, validation_split=0.2, epochs=5)
if __name__ == '__main__':
train()