-
Notifications
You must be signed in to change notification settings - Fork 1
/
references.bib
315 lines (308 loc) · 12.2 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
%% Datasets
@inproceedings{caba2015activitynet,
title={ActivityNet: A Large-Scale Video Benchmark for Human Activity Understanding},
author={Fabian Caba Heilbron, Victor Escorcia, Bernard Ghanem and Juan Carlos Niebles},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={961--970},
year={2015}
}
@misc{THUMOS15,
author = "Gorban, A. and Idrees, H. and Jiang, Y.-G. and Roshan Zamir, A. and Laptev,
I. and Shah, M. and Sukthankar, R.",
title = "{THUMOS} Challenge: Action Recognition with a Large
Number of Classes",
howpublished = "\url{http://www.thumos.info/}",
Year = {2015}
}
@inproceedings{KarpathyCVPR14,
title = {Large-scale Video Classification with Convolutional Neural Networks},
author = {Andrej Karpathy and George Toderici and Sanketh Shetty and Thomas Leung and Rahul Sukthankar and Li Fei-Fei},
year = {2014},
booktitle = {CVPR}
}
%% State of the Art
@incollection{baccouche2011sequential,
title={Sequential deep learning for human action recognition},
author={Baccouche, Moez and Mamalet, Franck and Wolf, Christian and Garcia, Christophe and Baskurt, Atilla},
booktitle={Human Behavior Understanding},
pages={29--39},
year={2011},
publisher={Springer}
}
@inproceedings{simonyan2014two,
title={Two-stream convolutional networks for action recognition in videos},
author={Simonyan, Karen and Zisserman, Andrew},
booktitle={Advances in Neural Information Processing Systems},
pages={568--576},
year={2014}
}
@article{tran2015deep,
title={Deep End2End Voxel2Voxel Prediction},
author={Tran, Du and Bourdev, Lubomir and Fergus, Rob and Torresani, Lorenzo and Paluri, Manohar},
journal={arXiv preprint arXiv:1511.06681},
year={2015}
}
@article{yeung2015every,
title={Every Moment Counts: Dense Detailed Labeling of Actions in Complex Videos},
author={Yeung, Serena and Russakovsky, Olga and Jin, Ning and Andriluka, Mykhaylo and Mori, Greg and Fei-Fei, Li},
journal={arXiv preprint arXiv:1507.05738},
year={2015}
}
@inproceedings{gkioxari2015contextual,
title={Contextual action recognition with r* cnn},
author={Gkioxari, Georgia and Girshick, Ross and Malik, Jitendra},
booktitle={Proceedings of the IEEE International Conference on Computer Vision},
pages={1080--1088},
year={2015}
}
@article{yeung2015end,
title={End-to-end Learning of Action Detection from Frame Glimpses in Videos},
author={Yeung, Serena and Russakovsky, Olga and Mori, Greg and Fei-Fei, Li},
journal={arXiv preprint arXiv:1511.06984},
year={2015}
}
@article{tran2014learning,
title={Learning spatiotemporal features with 3d convolutional networks},
author={Tran, Du and Bourdev, Lubomir and Fergus, Rob and Torresani, Lorenzo and Paluri, Manohar},
journal={arXiv preprint arXiv:1412.0767},
year={2014}
}
@inproceedings{Ng_2015_CVPR,
author = {Yue-Hei Ng, Joe and Hausknecht, Matthew and Vijayanarasimhan, Sudheendra and Vinyals, Oriol and Monga, Rajat and Toderici, George},
title = {Beyond Short Snippets: Deep Networks for Video Classification},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2015}
}
@inproceedings{lrcn2014,
author = {Jeff Donahue and Lisa Anne Hendricks and Sergio Guadarrama and Marcus Rohrbach and
Subhashini Venugopalan and Kate Saenko and Trevor Darrell},
title = {Long-term Recurrent Convolutional Networks for Visual Recognition and Description},
year = {2015},
booktitle = {CVPR}
}
@article{ballas2015delving,
title={Delving Deeper into Convolutional Networks for Learning Video Representations},
author={Ballas, Nicolas and Yao, Li and Pal, Chris and Courville, Aaron},
journal={arXiv preprint arXiv:1511.06432},
year={2015}
}
@inproceedings{yao2015describing,
title={Describing videos by exploiting temporal structure},
author={Yao, Li and Torabi, Atousa and Cho, Kyunghyun and Ballas, Nicolas and Pal, Christopher and Larochelle, Hugo and Courville, Aaron},
booktitle={Proceedings of the IEEE International Conference on Computer Vision},
pages={4507--4515},
year={2015}
}
@article{zhang2016modelling,
title={Modelling Temporal Information Using Discrete Fourier Transform for Video Classification},
author={Zhang, Haimin and Xua, Min and Xu, Changsheng and Jain, Ramesh},
journal={arXiv preprint arXiv:1603.06182},
year={2016}
}
@article{pascanu2012difficulty,
title={On the difficulty of training recurrent neural networks},
author={Pascanu, Razvan and Mikolov, Tomas and Bengio, Yoshua},
journal={arXiv preprint arXiv:1211.5063},
year={2012}
}
@inproceedings{scnn_shou_wang_chang_cvpr16,
author = {Zheng Shou and Dongang Wang and Shih-Fu Chang},
title = {Temporal Action Localization in Untrimmed Videos via Multi-stage CNNs},
year = {2016},
booktitle = {CVPR}
}
@Article{Simonyan14c,
author = "Simonyan, K. and Zisserman, A.",
title = "Very Deep Convolutional Networks for Large-Scale Image Recognition",
journal = "CoRR",
volume = "abs/1409.1556",
year = "2014"
}
@article{chung2014empirical,
title={Empirical evaluation of gated recurrent neural networks on sequence modeling},
author={Chung, Junyoung and Gulcehre, Caglar and Cho, KyungHyun and Bengio, Yoshua},
journal={arXiv preprint arXiv:1412.3555},
year={2014}
}
@article{cho2014learning,
title={Learning phrase representations using RNN encoder-decoder for statistical machine translation},
author={Cho, Kyunghyun and Van Merri{\"e}nboer, Bart and Gulcehre, Caglar and Bahdanau, Dzmitry and Bougares, Fethi and Schwenk, Holger and Bengio, Yoshua},
journal={arXiv preprint arXiv:1406.1078},
year={2014}
}
@article{hochreiter1997long,
title={Long short-term memory},
author={Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
journal={Neural computation},
volume={9},
number={8},
pages={1735--1780},
year={1997},
publisher={MIT Press}
}
@article{wang2015towards,
title={Towards good practices for very deep two-stream convnets},
author={Wang, Limin and Xiong, Yuanjun and Wang, Zhe and Qiao, Yu},
journal={arXiv preprint arXiv:1507.02159},
year={2015}
}
@inproceedings{girshick2014rich,
title={Rich feature hierarchies for accurate object detection and semantic segmentation},
author={Girshick, Ross and Donahue, Jeff and Darrell, Trevor and Malik, Jitendra},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={580--587},
year={2014}
}
@inproceedings{donahue2015long,
title={Long-term recurrent convolutional networks for visual recognition and description},
author={Donahue, Jeffrey and Anne Hendricks, Lisa and Guadarrama, Sergio and Rohrbach, Marcus and Venugopalan, Subhashini and Saenko, Kate and Darrell, Trevor},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={2625--2634},
year={2015}
}
@article{sharma2015action,
title={Action recognition using visual attention},
author={Sharma, Shikhar and Kiros, Ryan and Salakhutdinov, Ruslan},
journal={arXiv preprint arXiv:1511.04119},
year={2015}
}
@article{piergiovanni2016temporal,
title={Temporal attention filters for human activity recognition in videos},
author={Piergiovanni, AJ and Fan, Chenyou and Ryoo, Michael S},
journal={arXiv preprint arXiv:1605.08140},
year={2016}
}
@article{singhmulti,
title={A Multi-Stream Bi-Directional Recurrent Neural Network for Fine-Grained Action Detection},
author={Singh, Bharat and Shao, Ming}
}
@article{harvey2015semi,
title={Semi-supervised Learning with Encoder-Decoder Recurrent Neural Networks: Experiments with Motion Capture Sequences},
author={Harvey, F{\'e}lix G and Pal, Christopher},
journal={arXiv preprint arXiv:1511.06653},
year={2015}
}
@inproceedings{bengio2015scheduled,
title={Scheduled sampling for sequence prediction with recurrent neural networks},
author={Bengio, Samy and Vinyals, Oriol and Jaitly, Navdeep and Shazeer, Noam},
booktitle={Advances in Neural Information Processing Systems},
pages={1171--1179},
year={2015}
}
@inproceedings{WangQT15action,
author = {Limin Wang and Yu Qiao and Xiaoou Tang},
title = {Action Recognition With Trajectory-Pooled Deep-Convolutional Descriptors},
booktitle = {CVPR},
year = {2015},
pages = {4305-4314},
}
@ARTICLE{2015arXiv150702159W,
author = {{Wang}, L. and {Xiong}, Y. and {Wang}, Z. and {Qiao}, Y.},
title = "{Towards Good Practices for Very Deep Two-Stream ConvNets}",
journal = {ArXiv e-prints},
archivePrefix = "arXiv",
eprint = {1507.02159},
primaryClass = "cs.CV",
keywords = {Computer Science - Computer Vision and Pattern Recognition},
year = 2015,
month = jul,
}
%% Methodology
% RMSprop
@article{dauphin2015rmsprop,
title={RMSProp and equilibrated adaptive learning rates for non-convex optimization},
author={Dauphin, Yann N and de Vries, Harm and Chung, Junyoung and Bengio, Yoshua},
journal={arXiv preprint arXiv:1502.04390},
year={2015}
}
% Dropout
@article{srivastava2014dropout,
title={Dropout: A simple way to prevent neural networks from overfitting},
author={Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
journal={The Journal of Machine Learning Research},
volume={15},
number={1},
pages={1929--1958},
year={2014},
publisher={JMLR. org}
}
% Batch Normalization
@article{ioffe2015batch,
title={Batch normalization: Accelerating deep network training by reducing internal covariate shift},
author={Ioffe, Sergey and Szegedy, Christian},
journal={arXiv preprint arXiv:1502.03167},
year={2015}
}
%% Software
@article{jia2014caffe,
Author = {Jia, Yangqing and Shelhamer, Evan and Donahue, Jeff and Karayev, Sergey and Long, Jonathan and Girshick, Ross and Guadarrama, Sergio and Darrell, Trevor},
Journal = {arXiv preprint arXiv:1408.5093},
Title = {Caffe: Convolutional Architecture for Fast Feature Embedding},
Year = {2014}
}
@article{theano2016theano,
author = {{Theano Development Team}},
title = "{Theano: A {Python} framework for fast computation of mathematical expressions}",
journal = {arXiv e-prints},
volume = {abs/1605.02688},
primaryClass = "cs.SC",
keywords = {Computer Science - Symbolic Computation, Computer Science - Learning, Computer Science - Mathematical Software},
year = 2016,
url = {http://arxiv.org/abs/1605.02688},
}
@article{abadi2016tensorflow,
title={Tensorflow: Large-scale machine learning on heterogeneous distributed systems},
author={Abadi, Mart{\i}n and Agarwal, Ashish and Barham, Paul and Brevdo, Eugene and Chen, Zhifeng and Citro, Craig and Corrado, Greg S and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and others},
journal={arXiv preprint arXiv:1603.04467},
year={2016}
}
@article{opencv_library,
author = {Bradski, G.},
citeulike-article-id = {2236121},
journal = {Dr. Dobb's Journal of Software Tools},
keywords = {bibtex-import},
posted-at = {2008-01-15 19:21:54},
priority = {4},
title = {OpenCV},
year = {2000}
}
% Introduction
@inproceedings{ngiam2011multimodal,
title={Multimodal deep learning},
author={Ngiam, Jiquan and Khosla, Aditya and Kim, Mingyu and Nam, Juhan and Lee, Honglak and Ng, Andrew Y},
booktitle={Proceedings of the 28th international conference on machine learning (ICML-11)},
pages={689--696},
year={2011}
}
%% Audio Features
@misc{gravier2010spro,
title={Spro: a free speech signal processing toolkit},
author={Gravier, G},
year={2010}
}
@inproceedings{bogdanov2013essentia,
title={Essentia: An Audio Analysis Library for Music Information Retrieval.},
author={Bogdanov, Dmitry and Wack, Nicolas and G{\'o}mez, Emilia and Gulati, Sankalp and Herrera, Perfecto and Mayor, Oscar and Roma, Gerard and Salamon, Justin and Zapata, Jos{\'e} R and Serra, Xavier},
booktitle={ISMIR},
pages={493--498},
year={2013},
organization={Citeseer}
}
@article{heittola2013context,
title={Context-dependent sound event detection},
author={Heittola, Toni and Mesaros, Annamaria and Eronen, Antti and Virtanen, Tuomas},
journal={EURASIP Journal on Audio, Speech, and Music Processing},
volume={2013},
number={1},
pages={1--13},
year={2013},
publisher={Springer}
}
% from thumos challenge
@article{xu2015uts,
title={Uts-cmu at thumos 2015},
author={Xu, Zhongwen and Zhu, Linchao and Yang, Yi and Hauptmann, Alexander G},
journal={THUMOS challenge},
year={2015}
}