Skip to content

Commit c876632

Browse files
committed
Merge branch 'develop' into feature/disable-filtering
2 parents f92d3d7 + 8c54827 commit c876632

File tree

4 files changed

+107
-2
lines changed

4 files changed

+107
-2
lines changed

api/src/controllers/mpa/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,7 @@ pub fn default_tryptic() -> bool { false }
1111
pub fn default_report_taxa() -> bool {
1212
false
1313
}
14+
15+
pub fn default_blacklist_crap() -> bool {
16+
false
17+
}

api/src/controllers/mpa/pept2data.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@ use axum::{extract::State, Json};
33
use serde::{Deserialize, Serialize};
44
use index::{ProteinInfo, SearchResult};
55
use crate::{
6-
controllers::{generate_handlers, mpa::default_equate_il, mpa::default_tryptic, mpa::default_report_taxa, api::default_cutoff, api::default_validate_taxa},
6+
controllers::{generate_handlers, mpa::default_equate_il, mpa::default_tryptic, mpa::default_report_taxa, mpa::default_blacklist_crap, api::default_cutoff, api::default_validate_taxa},
77
helpers::{
88
fa_helper::{calculate_fa, FunctionalAggregation},
99
lca_helper::calculate_lca,
1010
lineage_helper::{get_lineage_array, LineageVersion}
1111
},
1212
AppState
1313
};
14+
use crate::helpers::filters::crap_filter::CrapFilter;
1415
use crate::helpers::filters::empty_filter::EmptyFilter;
1516
use crate::helpers::filters::protein_filter::ProteinFilter;
1617
use crate::helpers::filters::proteome_filter::ProteomeFilter;
@@ -32,6 +33,8 @@ pub struct Parameters {
3233
report_taxa: bool,
3334
#[serde(default = "default_validate_taxa")]
3435
validate_taxa: bool,
36+
#[serde(default = "default_blacklist_crap")]
37+
blacklist_crap: bool,
3538
filter: Option<Filter>,
3639
}
3740

@@ -62,7 +65,7 @@ pub struct Data {
6265

6366
async fn handler(
6467
State(AppState { index, datastore, .. }): State<AppState>,
65-
Parameters { mut peptides, equate_il, tryptic, cutoff, report_taxa, validate_taxa, filter }: Parameters
68+
Parameters { mut peptides, equate_il, tryptic, cutoff, report_taxa, validate_taxa, blacklist_crap, filter }: Parameters
6669
) -> Result<Data, ()> {
6770
if peptides.is_empty() {
6871
return Ok(Data { peptides: Vec::new() });
@@ -95,6 +98,12 @@ async fn handler(
9598
None => Box::new(EmptyFilter::new())
9699
};
97100

101+
let crap_blacklist = if blacklist_crap {
102+
Some(CrapFilter::new())
103+
} else {
104+
None
105+
};
106+
98107
Ok(Data {
99108
peptides: result
100109
.into_iter()
@@ -108,6 +117,13 @@ async fn handler(
108117
return None;
109118
}
110119

120+
// Remove all peptide results when any protein is in the crap blacklist
121+
if let Some(ref filter) = crap_blacklist {
122+
if filtered_proteins.iter().any(|p| filter.filter(p)) {
123+
return None;
124+
}
125+
}
126+
111127
let taxa: Vec<u32> = filtered_proteins.iter().map(|protein| protein.taxon).collect();
112128

113129
let lca = calculate_lca(
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
use std::collections::HashSet;
2+
use index::ProteinInfo;
3+
use crate::helpers::filters::protein_filter::ProteinFilter;
4+
use crate::helpers::filters::UniprotFilter;
5+
6+
pub struct CrapFilter {
7+
protein_filter: ProteinFilter,
8+
}
9+
10+
impl UniprotFilter for CrapFilter {
11+
fn filter(&self, protein: &ProteinInfo) -> bool {
12+
self.protein_filter.filter(protein)
13+
}
14+
}
15+
16+
impl Default for CrapFilter {
17+
fn default() -> Self {
18+
Self::new()
19+
}
20+
}
21+
22+
impl CrapFilter {
23+
pub fn new() -> Self {
24+
CrapFilter {
25+
protein_filter: ProteinFilter::new(Self::get_crap_proteins()),
26+
}
27+
}
28+
29+
fn get_crap_proteins() -> HashSet<String> {
30+
let crap_accessions = vec![
31+
"P02769", "P0DTE7", "P0DTE8", "P0DUB6", "P02662", "P02663",
32+
"P02666", "P02668", "P00766", "P00767", "P13645", "O77727",
33+
"P35527", "Q15323", "Q14532", "O76011", "Q92764", "O76013",
34+
"O76014", "O76015", "O76009", "P01920", "P02534", "P02539",
35+
"P35908", "P04264", "P15241", "P25691", "P02444", "P81054",
36+
"P02445", "P02443", "P02441", "Q02958", "P02438", "P02439",
37+
"P02440", "P08131", "Q14533", "Q9NSB4", "P78385", "Q9NSB2",
38+
"P78386", "O43790", "P26372", "P00711", "Q7M135", "P00792",
39+
"P00791", "Q10735", "P30879", "P0C1U8", "P00760", "Q29463",
40+
"A0A8K0BFD9", "P02768", "P01008", "D6RCN3", "P61769", "P55957",
41+
"P00915", "P00918", "P04040", "P07339", "P08311", "P01031",
42+
"P02741", "P00167", "P99999", "P01133", "P05413", "P06396",
43+
"Q9BX51", "A0A2R8Y5E5", "P69905", "P68871", "P01344", "P10145",
44+
"P06732", "P00709", "P80384", "P61626", "P02144", "Q15843",
45+
"P15559", "U3KQG7", "P01127", "P62937", "A0A0A0MRQ5", "P01112",
46+
"P02753", "P00441", "B8ZZN6", "P12081", "P10636", "P10599",
47+
"P01375", "P02787", "P02788", "P51965", "O00762", "A8MUA9",
48+
"P62979", "P32503", "P00004", "P00921", "P00330", "P00883",
49+
"P00698", "P68082", "P01012", "P00722", "P00366", "A0A5J6CYK8",
50+
"A0A182BM84", "P15252"
51+
];
52+
53+
crap_accessions.into_iter().map(String::from).collect()
54+
}
55+
}
56+
57+
#[cfg(test)]
58+
mod tests {
59+
use super::*;
60+
61+
#[test]
62+
fn test_protein_in_crap_filter() {
63+
let filter = CrapFilter::new();
64+
let protein_in_filter = ProteinInfo {
65+
taxon: 1,
66+
uniprot_accession: "P68082".to_string(),
67+
functional_annotations: "GO:0001234;GO:0005678".to_string()
68+
};
69+
70+
assert!(filter.filter(&protein_in_filter));
71+
}
72+
73+
#[test]
74+
fn test_protein_not_in_crap_filter() {
75+
let filter = CrapFilter::new();
76+
let protein_not_in_filter = ProteinInfo {
77+
taxon: 1,
78+
uniprot_accession: "PXXXXX".to_string(),
79+
functional_annotations: "GO:0001234;GO:0005678".to_string()
80+
};
81+
82+
assert!(!filter.filter(&protein_not_in_filter));
83+
}
84+
}

api/src/helpers/filters/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ pub mod taxa_filter;
44
pub mod proteome_filter;
55
pub mod protein_filter;
66
pub mod empty_filter;
7+
pub mod crap_filter;
78

89
pub trait UniprotFilter {
910
fn filter(&self, protein: &ProteinInfo) -> bool;

0 commit comments

Comments
 (0)