Skip to content

Commit 2eaa6ca

Browse files
committed
Filter crap proteins after analyzing the peptides
1 parent 8ce06ac commit 2eaa6ca

File tree

4 files changed

+62
-2
lines changed

4 files changed

+62
-2
lines changed

api/src/controllers/mpa/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,7 @@ pub fn default_tryptic() -> bool { false }
1111
pub fn default_report_taxa() -> bool {
1212
false
1313
}
14+
15+
pub fn default_blacklist_crap() -> bool {
16+
false
17+
}

api/src/controllers/mpa/pept2data.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@ use axum::{extract::State, Json};
33
use serde::{Deserialize, Serialize};
44
use index::{ProteinInfo, SearchResult};
55
use crate::{
6-
controllers::{generate_handlers, mpa::default_equate_il, mpa::default_tryptic, mpa::default_report_taxa, api::default_cutoff},
6+
controllers::{generate_handlers, mpa::default_equate_il, mpa::default_tryptic, mpa::default_report_taxa, mpa::default_blacklist_crap, api::default_cutoff},
77
helpers::{
88
fa_helper::{calculate_fa, FunctionalAggregation},
99
lca_helper::calculate_lca,
1010
lineage_helper::{get_lineage_array, LineageVersion}
1111
},
1212
AppState
1313
};
14+
use crate::helpers::filters::crap_filter::CrapFilter;
1415
use crate::helpers::filters::empty_filter::EmptyFilter;
1516
use crate::helpers::filters::protein_filter::ProteinFilter;
1617
use crate::helpers::filters::proteome_filter::ProteomeFilter;
@@ -30,6 +31,8 @@ pub struct Parameters {
3031
cutoff: usize,
3132
#[serde(default = "default_report_taxa")]
3233
report_taxa: bool,
34+
#[serde(default = "default_blacklist_crap")]
35+
blacklist_crap: bool,
3336
filter: Option<Filter>,
3437
}
3538

@@ -60,7 +63,7 @@ pub struct Data {
6063

6164
async fn handler(
6265
State(AppState { index, datastore, .. }): State<AppState>,
63-
Parameters { mut peptides, equate_il, tryptic, cutoff, report_taxa, filter }: Parameters
66+
Parameters { mut peptides, equate_il, tryptic, cutoff, report_taxa, blacklist_crap, filter }: Parameters
6467
) -> Result<Data, ()> {
6568
if peptides.is_empty() {
6669
return Ok(Data { peptides: Vec::new() });
@@ -93,13 +96,16 @@ async fn handler(
9396
None => Box::new(EmptyFilter::new())
9497
};
9598

99+
let crap_blacklist = CrapFilter::new();
100+
96101
Ok(Data {
97102
peptides: result
98103
.into_iter()
99104
.filter_map(|SearchResult { proteins, sequence, .. }| {
100105
let filtered_proteins: Vec<ProteinInfo> = proteins
101106
.into_iter()
102107
.filter(|protein| filter_proteins.filter(protein))
108+
.filter(|protein| !(blacklist_crap && crap_blacklist.filter(protein)))
103109
.collect();
104110

105111
if filtered_proteins.is_empty() {
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
use std::collections::HashSet;
2+
use index::ProteinInfo;
3+
use crate::helpers::filters::protein_filter::ProteinFilter;
4+
use crate::helpers::filters::UniprotFilter;
5+
6+
pub struct CrapFilter {
7+
protein_filter: ProteinFilter,
8+
}
9+
10+
impl UniprotFilter for CrapFilter {
11+
fn filter(&self, protein: &ProteinInfo) -> bool {
12+
self.protein_filter.filter(protein)
13+
}
14+
}
15+
16+
impl CrapFilter {
17+
pub fn new() -> Self {
18+
CrapFilter {
19+
protein_filter: ProteinFilter::new(Self::get_crap_proteins()),
20+
}
21+
}
22+
23+
fn get_crap_proteins() -> HashSet<String> {
24+
let crap_accessions = vec![
25+
"P02769", "P0DTE7", "P0DTE8", "P0DUB6", "P02662", "P02663",
26+
"P02666", "P02668", "P00766", "P00767", "P13645", "O77727",
27+
"P35527", "Q15323", "Q14532", "O76011", "Q92764", "O76013",
28+
"O76014", "O76015", "O76009", "P01920", "P02534", "P02539",
29+
"P35908", "P04264", "P02539", "P15241", "P25691", "P02444",
30+
"P02445", "P02443", "P02441", "Q02958", "P02438", "P02439",
31+
"P02440", "P08131", "Q14533", "Q9NSB4", "P78385", "Q9NSB2",
32+
"P78386", "O43790", "P26372", "P00711", "Q7M135", "P00792",
33+
"P00791", "Q10735", "P30879", "P0C1U8", "P00760", "Q29463",
34+
"A0A8K0BFD9", "P02768", "P01008", "D6RCN3", "P61769", "P55957",
35+
"P00915", "P00918", "P04040", "P07339", "P08311", "P01031",
36+
"P02741", "P00167", "P99999", "P01133", "P05413", "P06396",
37+
"Q9BX51", "A0A2R8Y5E5", "P69905", "P68871", "P01344", "P10145",
38+
"P06732", "P00709", "P80384", "P61626", "P02144", "Q15843",
39+
"P15559", "U3KQG7", "P01127", "P62937", "A0A0A0MRQ5", "P01112",
40+
"P02753", "P00441", "B8ZZN6", "P12081", "P10636", "P10599",
41+
"P01375", "P02787", "P02788", "P51965", "O00762", "A8MUA9",
42+
"P62979", "P32503", "P00004", "P00921", "P00330", "P00883",
43+
"P00698", "P68082", "P01012", "P00722", "P00366", "A0A5J6CYK8",
44+
"A0A182BM84", "P15252", "P81054"
45+
];
46+
47+
crap_accessions.into_iter().map(String::from).collect()
48+
}
49+
}

api/src/helpers/filters/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ pub mod taxa_filter;
44
pub mod proteome_filter;
55
pub mod protein_filter;
66
pub mod empty_filter;
7+
pub mod crap_filter;
78

89
pub trait UniprotFilter {
910
fn filter(&self, protein: &ProteinInfo) -> bool;

0 commit comments

Comments
 (0)