-
Notifications
You must be signed in to change notification settings - Fork 0
/
references.bib
424 lines (368 loc) · 22.1 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
@article{alphafold,
title={Highly accurate protein structure prediction with AlphaFold},
author={Jumper, John and Evans, Richard and Pritzel, Alexander and Green, Tim and Figurnov, Michael and Ronneberger, Olaf and Tunyasuvunakool, Kathryn and Bates, Russ and Zidek, Augustin and Potapenko, Anna and Bridgland, Alex and Meyer, Clemens and Kohl, Simon A. A. and Ballard, Andrew K. and Cowie, Andrew and Romera-Paredes, Bernardino and Nikolov, Stanislav and Jain, Rishub and Adler, Jonas and Back, Trevor and Petersen, Stig and Reiman, David and Clancy, Ellen and Zielinski, Michal and Steinegger, Martin and Pacholska, Michalina and Berghammer, Tamas and Bodenstein, Sebastian and Silver, David and Vinyals, Oriol and Senior, Andrew W. and Kavukcuoglu, Koray and Kohli, Pushmeet and Hassabis, Demis},
journal={Nature},
volume={596},
number={7873},
pages={1476--4687},
year={2021},
doi = {10.1038/s41586-021-03819-2},
}
@article{pdbbind2016,
author = {Liu, Zhihai and Su, Minyi and Han, Li and Liu, Jie and Yang, Qifan and Li, Yan and Wang, Renxiao},
title = {Forging the Basis for Developing Protein–Ligand Interaction Scoring Functions},
journal = {Acc. Chem. Res.},
volume = {50},
number = {2},
pages = {302-309},
year = {2017},
doi = {10.1021/acs.accounts.6b00491},
note ={PMID: 28182403},
URL = {
http://dx.doi.org/10.1021/acs.accounts.6b00491
},
eprint = {
http://dx.doi.org/10.1021/acs.accounts.6b00491
}
}
@article{crossdocked2020,
title={Three-Dimensional Convolutional Neural Networks and a Cross-Docked Data Set for Structure-Based Drug Design},
author={Francoeur, Paul G and Masuda, Tomohide and Sunseri, Jocelyn and Jia, Andrew and Iovanisci, Richard B and Snyder, Ian and Koes, David R},
journal={Journal of Chemical Information and Modeling},
volume={60},
number={9},
pages={4200--4215},
year={2020},
publisher={ACS Publications}
}
@article{sunseri2019libmolgrid,
author = {Sunseri, Jocelyn and Koes, David R.},
title = {libmolgrid: Graphics Processing Unit Accelerated Molecular Gridding for Deep Learning Applications},
journal = {J. Chem. Inf. Model.},
volume = {60},
number = {3},
pages = {1079-1084},
year = {2020},
doi = {10.1021/acs.jcim.9b01145},
note ={PMID: 32049525},
URL = {
https://doi.org/10.1021/acs.jcim.9b01145
},
eprint = {
https://doi.org/10.1021/acs.jcim.9b01145
}
}
@article{ProBiS,
title={ProBiS algorithm for detection of structurally similar protein binding sites by local structural alignment},
author={Konc, Janez and Jane{\v{z}}i{\v{c}}, Du{\v{s}}anka},
journal={Bioinformatics},
volume={26},
number={9},
pages={1160--1168},
year={2010},
publisher={Oxford University Press}
}
@article{jia2014caffe,
Author = {Jia, Yangqing and Shelhamer, Evan and Donahue, Jeff and Karayev, Sergey and Long, Jonathan and Girshick, Ross and Guadarrama, Sergio and Darrell, Trevor},
Journal = {arXiv preprint arXiv:1408.5093},
Title = {Caffe: Convolutional Architecture for Fast Feature Embedding},
Year = {2014}
}
@article{pocketome,
author={Irina Kufareva and Andrey V. Ilatovskiy and Ruben Abagyan},
title={Pocketome: an encyclopedia of small-molecule binding sites in 4D},
journal={Nucleic Acids Res.},
volume={40},
pages={535-540},
doi={10.1093/nar/gkr825},
url={https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3245087/},
year={2012}
}
@article{ragoza2017protein,
title={Protein--ligand scoring with convolutional neural networks},
author={Ragoza, Matthew and Hochuli, Joshua and Idrobo, Elisa and Sunseri, Jocelyn and Koes, David Ryan},
journal={Journal of chemical information and modeling},
volume={57},
number={4},
pages={942--957},
year={2017},
publisher={ACS Publications}
}
@article{mcnutt2021gnina,
title={GNINA 1.0: molecular docking with deep learning},
author={McNutt, Andrew T and Francoeur, Paul and Aggarwal, Rishal and Masuda, Tomohide and Meli, Rocco and Ragoza, Matthew and Sunseri, Jocelyn and Koes, David Ryan},
journal={Journal of cheminformatics},
volume={13},
number={1},
pages={1--20},
year={2021},
publisher={BioMed Central}
}
@article{surveyReview1,
Author = {Emmanuel, Tlamelo and Maupong, Thabiso and Mpoeleng, Dimane and Semong, Thabo and Mphago, Banyatsang and Tabona, Oteng},
Journal = {Journal of Big Data},
Title = {A survey on missing data in machine learning.},
volume={8},
doi={10.1186/s40537-021-00516-9},
Year = {2021}
}
@Book{rev1support,
Author = {Little, Roderick J. A. and Rubin, Donald B.},
Title = {Statistical analysis with missing data.},
Edition = {2nd ed.},
FSeries = {Wiley Series in Probability and Statistics},
Series = {Wiley Ser. Probab. Stat.},
ISSN = {1940-6347},
ISBN = {0-471-18386-5},
Year = {2002},
Publisher = {Chichester: Wiley},
Language = {English},
Keywords = {62-01,62F10,62N01,62H17,62D05},
zbMATH = {1834445},
Zbl = {1011.62004}
}
@article{review2,
title = {Missing value imputation affects the performance of machine learning: A review and analysis of the literature (2010–2021)},
journal = {Informatics in Medicine Unlocked},
volume = {27},
pages = {100799},
year = {2021},
issn = {2352-9148},
doi = {https://doi.org/10.1016/j.imu.2021.100799},
url = {https://www.sciencedirect.com/science/article/pii/S2352914821002653},
author = {Md. Kamrul Hasan and Md. Ashraful Alam and Shidhartho Roy and Aishwariya Dutta and Md. Tasnim Jawad and Sunanda Das},
keywords = {Incomplete datasets, Imputation methods and evaluations, Machine learning classifiers and evaluations, PRISMA technique},
abstract = {Recently, numerous studies have been conducted on Missing Value Imputation (MVI), intending the primary solution scheme for the datasets containing one or more missing attribute’s values. The incorporation of MVI reinforces the Machine Learning (ML) models’ performance and necessitates a systematic review of MVI methodologies employed for different tasks and datasets. It will aid beginners as guidance towards composing an effective ML-based decision-making system in various fields of applications. This article aims to conduct a rigorous review and analysis of the state-of-the-art MVI methods in the literature published in the last decade. Altogether, 191 articles, published from 2010 to August 2021, are selected for review using the well-known Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) technique. We summarize those articles with relevant definitions, theories, and analyses to provide essential information for building a precise decision-making framework. In addition, the evaluation metrics employed for MVI methods and ML-based classification models are also discussed and explored. Remarkably, the trends for the MVI method and its evaluation are also scrutinized from the last twelve years’ data. To come up with the conclusion, several ML-based pipelines, where the MVI schemes are incorporated for performance enhancement, are investigated and reviewed for many different datasets. In the end, informative observations and recommendations are addressed for future research directions and trends in related fields of interest.}
}
@article{ClimateImp,
author = {Afrifa-Yamoah, E and Mueller, U. A. and Taylor, S. M. and Fisher, A. J.},
title = {Missing data imputation of high-resolution temporal climate time series data},
journal = {Meteorological Applications},
volume = {27},
number = {1},
pages = {e1873},
keywords = {high-resolution climate time series data, imputation, missing observations, short cycle duration, state-space modelling},
doi = {https://doi.org/10.1002/met.1873},
url = {https://rmets.onlinelibrary.wiley.com/doi/abs/10.1002/met.1873},
eprint = {https://rmets.onlinelibrary.wiley.com/doi/pdf/10.1002/met.1873},
abstract = {Abstract Analysis of high-resolution data offers greater opportunity to understand the nature of data variability, behaviours, trends and to detect small changes. Climate studies often require complete time series data which, in the presence of missing data, means imputation must be undertaken. Research on the imputation of high-resolution temporal climate time series data is still at an early phase. In this study, multiple approaches to the imputation of missing values were evaluated, including a structural time series model with Kalman smoothing, an autoregressive integrated moving average (ARIMA) model with Kalman smoothing and multiple linear regression. The methods were applied to complete subsets of data from 12 month time series of hourly temperature, humidity and wind speed data from four locations along the coast of Western Australia. Assuming that observations were missing at random, artificial gaps of missing observations were studied using a five-fold cross-validation methodology with the proportion of missing data set to 10\%. The techniques were compared using the pooled mean absolute error, root mean square error and symmetric mean absolute percentage error. The multiple linear regression model was generally the best model based on the pooled performance indicators, followed by the ARIMA with Kalman smoothing. However, the low error values obtained from each of the approaches suggested that the models competed closely and imputed highly plausible values. To some extent, the performance of the models varied among locations. It can be concluded that the modelling approaches studied have demonstrated suitability in imputing missing data in hourly temperature, humidity and wind speed data and are therefore recommended for application in other fields where high-resolution data with missing values are common.},
year = {2020}
}
@article {peptideMHCimp,
author = {Alex Rubinsteyn and Timothy O{\textquoteright}Donnell and Nandita Damaraju and Jeff Hammerbacher},
title = {Predicting Peptide-MHC Binding Affinities with Imputed Training Data},
elocation-id = {054775},
year = {2016},
doi = {10.1101/054775},
publisher = {Cold Spring Harbor Laboratory},
abstract = {Predicting the binding affinity between MHC proteins and their peptide ligands is a key problem in computational immunology. State of the art performance is currently achieved by the allele-specific predictor NetMHC and the pan-allele predictor NetMHCpan, both of which are ensembles of shallow neural networks. We explore an intermediate between allele-specific and pan-allele prediction: training allele-specific predictors with synthetic samples generated by imputation of the peptide-MHC affinity matrix. We find that the imputation strategy is useful on alleles with very little training data. We have implemented our predictor as an open-source software package called MHCflurry and show that MHCflurry achieves competitive performance to NetMHC and NetMHCpan.},
URL = {https://www.biorxiv.org/content/early/2016/06/07/054775},
eprint = {https://www.biorxiv.org/content/early/2016/06/07/054775.full.pdf},
journal = {bioRxiv}
}
@article{MLimpMedsurvey,
author = {McClellan, Chandler and Mitchell, Emily and Anderson, Jerrod and Zuvekas, Samuel},
title = {Using machine-learning algorithms to improve imputation in the medical expenditure panel survey},
journal = {Health Services Research},
volume = {58},
number = {2},
pages = {423-432},
keywords = {imputation, machine learning, medical expenditures, MEPS, predictive mean matching},
doi = {https://doi.org/10.1111/1475-6773.14115},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/1475-6773.14115},
eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1111/1475-6773.14115},
abstract = {Abstract Objective To assess the feasibility of applying machine learning (ML) methods to imputation in the Medical Expenditure Panel Survey (MEPS). Data Sources All data come from the 2016–2017 MEPS. Study Design Currently, expenditures for medical encounters in the MEPS are imputed with a predictive mean matching (PMM) algorithm in which a linear regression model is used to predict expenditures for events with (donors) and without (recipients) data. Recipient events and donor events are then matched based on the smallest distance between predicted expenditures, and the donor event's expenditures are used as the recipient event's imputation. We replace linear regression algorithm in the PMM framework with ML methods to predict expenditures. We examine five alternatives to linear regression: Gradient Boosting, Random Forests, Extreme Random Forests, Deep Neural Networks, and a Stacked Ensemble approach. Additionally, we introduce an alternative matching scheme, which matches on a vector of predicted expenditures by sources of payment instead of a single total expenditure prediction to generate potentially superior matches. Data Collection Study data is derived from a large federal survey. Principal Findings ML algorithms perform better at both prediction and matching imputation than Ordinary Least Squares (OLS), the most common prediction algorithm used in PMM. On average, the Stacked Ensemble approach that combines all the ML algorithms performs best, improving expenditure prediction R2 by 108\% (0.156 points) and final imputation R2 by 227\% (0.397 points). Matching on a prediction vector also improves alignment of sources of payments between donor and recipient events. Conclusions ML algorithms and an alternative matching scheme improve the overall quality of expenditure PMM imputation in the MEPS. These methods may have additional value in other national surveys that currently rely on PMM or similar methods for imputation.},
year = {2023}
}
@article{MLclinicDecision,
author = {Wang, Huimin and Tang, Jianxiang and Wu, Mengyao and Wang, Xiaoyu and Zhang, Tao},
title = {Application of machine learning missing data imputation techniques in clinical decision making: taking the discharge assessment of patients with spontaneous supratentorial intracerebral hemorrhage as an example},
journal = {BMC Medical Informatics and Decision Making},
volume = {22},
number = {1},
doi = {10.1186/s12911-022-01752-6},
url = {https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-022-01752-6#citeas},
year = {2022}
}
@article{SICE,
author = {Khan, Shahidul Islam and Hoque, Abu Sayed Md Latiful},
title = {SICE: an improved missing data imputation technique},
journal = {Journal of Big Data},
volume = {7},
number = {1},
doi = {10.1186/s40537-020-00313-w},
url = {https://doi.org/10.1186/s40537-020-00313-w},
year = {2020}
}
@article{graphEditDist,
author = {Garcia-Hernandez, Carlos and Fernández, Alberto and Serratosa, Francesc},
title = {Ligand-Based Virtual Screening Using Graph Edit Distance as Molecular Similarity Measure},
journal = {Journal of Chemical Information and Modeling},
volume = {59},
number = {4},
pages = {1410-1421},
year = {2019},
doi = {10.1021/acs.jcim.8b00820},
note ={PMID: 30920214},
URL = {
https://doi.org/10.1021/acs.jcim.8b00820
},
eprint = {
https://doi.org/10.1021/acs.jcim.8b00820
}
}
@article{Chembl,
author = {Mendez, David and Gaulton, Anna and Bento, A Patrícia and Chambers, Jon and De Veij, Marleen and Félix, Eloy and Magariños, María Paula and Mosquera, Juan F and Mutowo, Prudence and Nowotka, Michał and Gordillo-Marañón, María and Hunter, Fiona and Junco, Laura and Mugumbate, Grace and Rodriguez-Lopez, Milagros and Atkinson, Francis and Bosc, Nicolas and Radoux, Chris J and Segura-Cabrera, Aldo and Hersey, Anne and Leach, Andrew R},
title = "{ChEMBL: towards direct deposition of bioassay data}",
journal = {Nucleic Acids Research},
volume = {47},
number = {D1},
pages = {D930-D940},
year = {2018},
month = {11},
abstract = "{ChEMBL is a large, open-access bioactivity database (https://www.ebi.ac.uk/chembl), previously described in the 2012, 2014 and 2017 Nucleic Acids Research Database Issues. In the last two years, several important improvements have been made to the database and are described here. These include more robust capture and representation of assay details; a new data deposition system, allowing updating of data sets and deposition of supplementary data; and a completely redesigned web interface, with enhanced search and filtering capabilities.}",
issn = {0305-1048},
doi = {10.1093/nar/gky1075},
url = {https://doi.org/10.1093/nar/gky1075},
eprint = {https://academic.oup.com/nar/article-pdf/47/D1/D930/27437436/gky1075.pdf},
}
@article{tosstorff2022high,
title={A high quality, industrial data set for binding affinity prediction: performance comparison in different early drug discovery scenarios},
author={Tosstorff, Andreas and Rudolph, Markus G and Cole, Jason C and Reutlinger, Michael and Kramer, Christian and Schaffhauser, Herv{\'e} and Nilly, Agn{\`e}s and Flohr, Alexander and Kuhn, Bernd},
journal={Journal of Computer-Aided Molecular Design},
volume={36},
number={10},
pages={753--765},
year={2022},
publisher={Springer}
}
@article{pafnuncy,
author={Marta M Stepniewska-Dziubinska and Piotr Zielenkiewicz and Pawel Siedlecki},
title={Development and evaluation of a deep learning model for protein-ligand binding affinity prediction},
journal={Bioinformatics},
volume={34},
number={21},
pages={3666-3674},
doi={10.1093/bioinformatics/bty374},
url={https://academic.oup.com/bioinformatics/article/34/21/3666/4994792},
year={2018}
}
@article{kdeep,
title={K DEEP: Protein-ligand absolute binding affinity prediction via 3D-convolutional neural networks.},
author={Jim{\'e}nez Luna, Jos{\'e} and Skalic, Miha and Martinez-Rosell, Gerard and De Fabritiis, Gianni},
journal={J. Chem. Inf. Model.},
year={2018},
publisher={ACS Publications}
}
@article{onionnet,
author = {Zheng, Liangzhen and Fan, Jingrong and Mu, Yuguang},
title = {OnionNet: a Multiple-Layer Intermolecular-Contact-Based Convolutional Neural Network for Protein–Ligand Binding Affinity Prediction},
journal = {ACS Omega},
volume = {4},
number = {14},
pages = {15956-15965},
year = {2019},
doi = {10.1021/acsomega.9b01997},
note ={PMID: 31592466},
URL = {
https://doi.org/10.1021/acsomega.9b01997
},
eprint = {
https://doi.org/10.1021/acsomega.9b01997
}
}
@article{bapa,
author = {Seo, Sangmin and Choi, Jonghwan and Park, Sanghyun and Ahn, Jaegyoon},
title = {Binding affinity prediction for protein–ligand complex using deep attention mechanism based on intermolecular interactions},
journal = {BMC Bioinformatics},
year = {2021},
volume = {22},
number = {1},
pages = {1471-2105},
doi = {10.1186/s12859-021-04466-0},
URL = {https://doi.org/10.1186/s12859-021-04466-0}
}
@article{fusionAff,
author = {Jones, Derek and Kim, Hyojin and Zhang, Xiaohua and Zemla, Adam and Stevenson, Garrett and Bennett, W. F. Drew and Kirshner, Daniel and Wong, Sergio E. and Lightstone, Felice C. and Allen, Jonathan E.},
title = {Improved Protein–Ligand Binding Affinity Prediction with Structure-Based Deep Fusion Inference},
journal = {Journal of Chemical Information and Modeling},
volume = {61},
number = {4},
pages = {1583-1592},
year = {2021},
doi = {10.1021/acs.jcim.0c01306},
note ={PMID: 33754707},
URL = {
https://doi.org/10.1021/acs.jcim.0c01306
},
eprint = {
https://doi.org/10.1021/acs.jcim.0c01306
}
}
@article{roccoBAreview,
title={Scoring functions for protein-ligand binding affinity prediction using structure-based deep learning: A review},
author={Meli, Rocco and Morris, Garrett M and Biggin, Philip C},
journal={Frontiers in bioinformatics},
volume={2},
pages={57},
year={2022},
publisher={Frontiers}
}
@article{gu2023protein,
title={Protein--ligand binding affinity prediction with edge awareness and supervised attention},
author={Gu, Yuliang and Zhang, Xiangzhou and Xu, Anqi and Chen, Weiqi and Liu, Kang and Wu, Lijuan and Mo, Shenglong and Hu, Yong and Liu, Mei and Luo, Qichao},
journal={Iscience},
volume={26},
number={1},
year={2023},
publisher={Elsevier}
}
@article{zhao2022brief,
title={A brief review of protein--ligand interaction prediction},
author={Zhao, Lingling and Zhu, Yan and Wang, Junjie and Wen, Naifeng and Wang, Chunyu and Cheng, Liang},
journal={Computational and Structural Biotechnology Journal},
volume={20},
pages={2831--2838},
year={2022},
publisher={Elsevier}
}
@article{cang2018representability,
title={Representability of algebraic topology for biomolecules in machine learning based scoring and virtual screening},
author={Cang, Zixuan and Mu, Lin and Wei, Guo-Wei},
journal={PLoS computational biology},
volume={14},
number={1},
pages={e1005929},
year={2018},
publisher={Public Library of Science San Francisco, CA USA}
}
@article{NNscore,
author={Jacob D. Durrant and J. Andrew McCammon},
title={NNScore 2.0: A Neural-Network Receptor-Ligand Scoring Function},
journal={J. Chem. Inf. Model.},
volume={51},
number={11},
pages={2897-2903},
doi={10.1021/ci2003889},
url={https://pubs.acs.org/doi/10.1021/ci2003889},
year={2011}
}
@article{RFscore,
author={Pedro J. Ballester and John B. O. Mitchell},
title={A machine learning approach to predicting protein-ligand binding affinity with applications to molecular docking},
journal={Bioinformatics},
volume={26},
number={9},
pages={1169-1175},
doi={10.1093/bioinformatics/btq112},
url={https://academic.oup.com/bioinformatics/article/26/9/1169/199938},
year={2010}
}
@article{DLscore,
author={Md Mahmudulla Hassan and Daniel Castaneda Mogollon and Olac Fuentes and Suman Sirimulla},
title={DLSCORE: A Deep Learning Model for Predicting Protein-Ligand Binding Affinities},
journal={ChemRxiv},
doi={10.26434/chemrxiv.6159143.v1},
url={https://doi.org/10.26434/chemrxiv.6159143.v1},
year={2018}
}