references.bib

%% Datasets
@inproceedings{caba2015activitynet,
 title={ActivityNet: A Large-Scale Video Benchmark for Human Activity Understanding},
 author={Fabian Caba Heilbron, Victor Escorcia, Bernard Ghanem and Juan Carlos Niebles},
 booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
 pages={961--970},
 year={2015}
}
@misc{THUMOS15,
  author = "Gorban, A. and Idrees, H. and Jiang, Y.-G. and Roshan Zamir, A. and Laptev,
  I. and Shah, M. and Sukthankar, R.",
  title = "{THUMOS} Challenge: Action Recognition with a Large
  Number of Classes",
  howpublished = "\url{http://www.thumos.info/}",
  Year = {2015}
}
@inproceedings{KarpathyCVPR14,
 title     = {Large-scale Video Classification with Convolutional Neural Networks},
 author    = {Andrej Karpathy and George Toderici and Sanketh Shetty and Thomas Leung and Rahul Sukthankar and Li Fei-Fei},
 year      = {2014},
 booktitle = {CVPR}
}

%% State of the Art
@incollection{baccouche2011sequential,
   title={Sequential deep learning for human action recognition},
   author={Baccouche, Moez and Mamalet, Franck and Wolf, Christian and Garcia, Christophe and Baskurt, Atilla},
   booktitle={Human Behavior Understanding},
   pages={29--39},
   year={2011},
   publisher={Springer}
}
@inproceedings{simonyan2014two,
   title={Two-stream convolutional networks for action recognition in videos},
   author={Simonyan, Karen and Zisserman, Andrew},
   booktitle={Advances in Neural Information Processing Systems},
   pages={568--576},
   year={2014}
}
@article{tran2015deep,
   title={Deep End2End Voxel2Voxel Prediction},
   author={Tran, Du and Bourdev, Lubomir and Fergus, Rob and Torresani, Lorenzo and Paluri, Manohar},
   journal={arXiv preprint arXiv:1511.06681},
   year={2015}
}
@article{yeung2015every,
   title={Every Moment Counts: Dense Detailed Labeling of Actions in Complex Videos},
   author={Yeung, Serena and Russakovsky, Olga and Jin, Ning and Andriluka, Mykhaylo and Mori, Greg and Fei-Fei, Li},
   journal={arXiv preprint arXiv:1507.05738},
   year={2015}
}
@inproceedings{gkioxari2015contextual,
   title={Contextual action recognition with r* cnn},
   author={Gkioxari, Georgia and Girshick, Ross and Malik, Jitendra},
   booktitle={Proceedings of the IEEE International Conference on Computer Vision},
   pages={1080--1088},
   year={2015}
}
@article{yeung2015end,
   title={End-to-end Learning of Action Detection from Frame Glimpses in Videos},
   author={Yeung, Serena and Russakovsky, Olga and Mori, Greg and Fei-Fei, Li},
   journal={arXiv preprint arXiv:1511.06984},
   year={2015}
}
@article{tran2014learning,
   title={Learning spatiotemporal features with 3d convolutional networks},
   author={Tran, Du and Bourdev, Lubomir and Fergus, Rob and Torresani, Lorenzo and Paluri, Manohar},
   journal={arXiv preprint arXiv:1412.0767},
   year={2014}
}
@inproceedings{Ng_2015_CVPR,
   author = {Yue-Hei Ng, Joe and Hausknecht, Matthew and Vijayanarasimhan, Sudheendra and Vinyals, Oriol and Monga, Rajat and Toderici, George},
   title = {Beyond Short Snippets: Deep Networks for Video Classification},
   booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
   month = {June},
   year = {2015}
}
@inproceedings{lrcn2014,
   author = {Jeff Donahue and Lisa Anne Hendricks and Sergio Guadarrama and Marcus Rohrbach and
             Subhashini Venugopalan and Kate Saenko and Trevor Darrell},
   title = {Long-term Recurrent Convolutional Networks for Visual Recognition and Description},
   year  = {2015},
   booktitle = {CVPR}
}
@article{ballas2015delving,
   title={Delving Deeper into Convolutional Networks for Learning Video Representations},
   author={Ballas, Nicolas and Yao, Li and Pal, Chris and Courville, Aaron},
   journal={arXiv preprint arXiv:1511.06432},
   year={2015}
}
@inproceedings{yao2015describing,
 title={Describing videos by exploiting temporal structure},
 author={Yao, Li and Torabi, Atousa and Cho, Kyunghyun and Ballas, Nicolas and Pal, Christopher and Larochelle, Hugo and Courville, Aaron},
 booktitle={Proceedings of the IEEE International Conference on Computer Vision},
 pages={4507--4515},
 year={2015}
}
@article{zhang2016modelling,
   title={Modelling Temporal Information Using Discrete Fourier Transform for Video Classification},
   author={Zhang, Haimin and Xua, Min and Xu, Changsheng and Jain, Ramesh},
   journal={arXiv preprint arXiv:1603.06182},
   year={2016}
}
@article{pascanu2012difficulty,
 title={On the difficulty of training recurrent neural networks},
 author={Pascanu, Razvan and Mikolov, Tomas and Bengio, Yoshua},
 journal={arXiv preprint arXiv:1211.5063},
 year={2012}
}
@inproceedings{scnn_shou_wang_chang_cvpr16,
  author = {Zheng Shou and Dongang Wang and Shih-Fu Chang},
  title = {Temporal Action Localization in Untrimmed Videos via Multi-stage CNNs},
  year = {2016},
  booktitle = {CVPR}
}
@Article{Simonyan14c,
   author       = "Simonyan, K. and Zisserman, A.",
   title        = "Very Deep Convolutional Networks for Large-Scale Image Recognition",
   journal      = "CoRR",
   volume       = "abs/1409.1556",
   year         = "2014"
}
@article{chung2014empirical,
 title={Empirical evaluation of gated recurrent neural networks on sequence modeling},
 author={Chung, Junyoung and Gulcehre, Caglar and Cho, KyungHyun and Bengio, Yoshua},
 journal={arXiv preprint arXiv:1412.3555},
 year={2014}
}
@article{cho2014learning,
 title={Learning phrase representations using RNN encoder-decoder for statistical machine translation},
 author={Cho, Kyunghyun and Van Merri{\"e}nboer, Bart and Gulcehre, Caglar and Bahdanau, Dzmitry and Bougares, Fethi and Schwenk, Holger and Bengio, Yoshua},
 journal={arXiv preprint arXiv:1406.1078},
 year={2014}
}
@article{hochreiter1997long,
 title={Long short-term memory},
 author={Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
 journal={Neural computation},
 volume={9},
 number={8},
 pages={1735--1780},
 year={1997},
 publisher={MIT Press}
}
@article{wang2015towards,
 title={Towards good practices for very deep two-stream convnets},
 author={Wang, Limin and Xiong, Yuanjun and Wang, Zhe and Qiao, Yu},
 journal={arXiv preprint arXiv:1507.02159},
 year={2015}
}
@inproceedings{girshick2014rich,
 title={Rich feature hierarchies for accurate object detection and semantic segmentation},
 author={Girshick, Ross and Donahue, Jeff and Darrell, Trevor and Malik, Jitendra},
 booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
 pages={580--587},
 year={2014}
}
@inproceedings{donahue2015long,
 title={Long-term recurrent convolutional networks for visual recognition and description},
 author={Donahue, Jeffrey and Anne Hendricks, Lisa and Guadarrama, Sergio and Rohrbach, Marcus and Venugopalan, Subhashini and Saenko, Kate and Darrell, Trevor},
 booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
 pages={2625--2634},
 year={2015}
}
@article{sharma2015action,
 title={Action recognition using visual attention},
 author={Sharma, Shikhar and Kiros, Ryan and Salakhutdinov, Ruslan},
 journal={arXiv preprint arXiv:1511.04119},
 year={2015}
}
@article{piergiovanni2016temporal,
 title={Temporal attention filters for human activity recognition in videos},
 author={Piergiovanni, AJ and Fan, Chenyou and Ryoo, Michael S},
 journal={arXiv preprint arXiv:1605.08140},
 year={2016}
}
@article{singhmulti,
 title={A Multi-Stream Bi-Directional Recurrent Neural Network for Fine-Grained Action Detection},
 author={Singh, Bharat and Shao, Ming}
}
@article{harvey2015semi,
 title={Semi-supervised Learning with Encoder-Decoder Recurrent Neural Networks: Experiments with Motion Capture Sequences},
 author={Harvey, F{\'e}lix G and Pal, Christopher},
 journal={arXiv preprint arXiv:1511.06653},
 year={2015}
}
@inproceedings{bengio2015scheduled,
 title={Scheduled sampling for sequence prediction with recurrent neural networks},
 author={Bengio, Samy and Vinyals, Oriol and Jaitly, Navdeep and Shazeer, Noam},
 booktitle={Advances in Neural Information Processing Systems},
 pages={1171--1179},
 year={2015}
}
@inproceedings{WangQT15action,
 author    = {Limin Wang and Yu Qiao and Xiaoou Tang},
 title     = {Action Recognition With Trajectory-Pooled Deep-Convolutional Descriptors},
 booktitle = {CVPR},
 year      = {2015},
 pages     = {4305-4314},
}
@ARTICLE{2015arXiv150702159W,
  author = {{Wang}, L. and {Xiong}, Y. and {Wang}, Z. and {Qiao}, Y.},
   title = "{Towards Good Practices for Very Deep Two-Stream ConvNets}",
 journal = {ArXiv e-prints},
archivePrefix = "arXiv",
  eprint = {1507.02159},
primaryClass = "cs.CV",
keywords = {Computer Science - Computer Vision and Pattern Recognition},
    year = 2015,
   month = jul,
}


%% Methodology
% RMSprop
@article{dauphin2015rmsprop,
 title={RMSProp and equilibrated adaptive learning rates for non-convex optimization},
 author={Dauphin, Yann N and de Vries, Harm and Chung, Junyoung and Bengio, Yoshua},
 journal={arXiv preprint arXiv:1502.04390},
 year={2015}
}
% Dropout
@article{srivastava2014dropout,
 title={Dropout: A simple way to prevent neural networks from overfitting},
 author={Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
 journal={The Journal of Machine Learning Research},
 volume={15},
 number={1},
 pages={1929--1958},
 year={2014},
 publisher={JMLR. org}
}
% Batch Normalization
@article{ioffe2015batch,
 title={Batch normalization: Accelerating deep network training by reducing internal covariate shift},
 author={Ioffe, Sergey and Szegedy, Christian},
 journal={arXiv preprint arXiv:1502.03167},
 year={2015}
}

%% Software
@article{jia2014caffe,
   Author = {Jia, Yangqing and Shelhamer, Evan and Donahue, Jeff and Karayev, Sergey and Long, Jonathan and Girshick, Ross and Guadarrama, Sergio and Darrell, Trevor},
   Journal = {arXiv preprint arXiv:1408.5093},
   Title = {Caffe: Convolutional Architecture for Fast Feature Embedding},
   Year = {2014}
}
@article{theano2016theano,
   author = {{Theano Development Team}},
   title = "{Theano: A {Python} framework for fast computation of mathematical expressions}",
   journal = {arXiv e-prints},
   volume = {abs/1605.02688},
   primaryClass = "cs.SC",
   keywords = {Computer Science - Symbolic Computation, Computer Science - Learning, Computer Science - Mathematical Software},
   year = 2016,
   url = {http://arxiv.org/abs/1605.02688},
}
@article{abadi2016tensorflow,
   title={Tensorflow: Large-scale machine learning on heterogeneous distributed systems},
   author={Abadi, Mart{\i}n and Agarwal, Ashish and Barham, Paul and Brevdo, Eugene and Chen, Zhifeng and Citro, Craig and Corrado, Greg S and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and others},
   journal={arXiv preprint arXiv:1603.04467},
   year={2016}
}
@article{opencv_library,
   author = {Bradski, G.},
   citeulike-article-id = {2236121},
   journal = {Dr. Dobb's Journal of Software Tools},
   keywords = {bibtex-import},
   posted-at = {2008-01-15 19:21:54},
   priority = {4},
   title = {OpenCV},
   year = {2000}
}

% Introduction
@inproceedings{ngiam2011multimodal,
 title={Multimodal deep learning},
 author={Ngiam, Jiquan and Khosla, Aditya and Kim, Mingyu and Nam, Juhan and Lee, Honglak and Ng, Andrew Y},
 booktitle={Proceedings of the 28th international conference on machine learning (ICML-11)},
 pages={689--696},
 year={2011}
}

%% Audio Features
@misc{gravier2010spro,
 title={Spro: a free speech signal processing toolkit},
 author={Gravier, G},
 year={2010}
}
@inproceedings{bogdanov2013essentia,
 title={Essentia: An Audio Analysis Library for Music Information Retrieval.},
 author={Bogdanov, Dmitry and Wack, Nicolas and G{\'o}mez, Emilia and Gulati, Sankalp and Herrera, Perfecto and Mayor, Oscar and Roma, Gerard and Salamon, Justin and Zapata, Jos{\'e} R and Serra, Xavier},
 booktitle={ISMIR},
 pages={493--498},
 year={2013},
 organization={Citeseer}
}
@article{heittola2013context,
 title={Context-dependent sound event detection},
 author={Heittola, Toni and Mesaros, Annamaria and Eronen, Antti and Virtanen, Tuomas},
 journal={EURASIP Journal on Audio, Speech, and Music Processing},
 volume={2013},
 number={1},
 pages={1--13},
 year={2013},
 publisher={Springer}
}
% from thumos challenge
@article{xu2015uts,
 title={Uts-cmu at thumos 2015},
 author={Xu, Zhongwen and Zhu, Linchao and Yang, Yi and Hauptmann, Alexander G},
 journal={THUMOS challenge},
 year={2015}
}