Skip to content

Commit 1cb2ff7

Browse files
authored
Merge pull request #80 from unipept/feature/disable-filtering
Add option to disable taxon filter (hidden, API-only option)
2 parents 8c54827 + c876632 commit 1cb2ff7

File tree

9 files changed

+10049
-10021
lines changed

9 files changed

+10049
-10021
lines changed

api/benches/helpers/lca_helper.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ pub fn lca_benchmark(c: &mut criterion::Criterion) {
3232
generate_arguments,
3333
|arguments| {
3434
let (taxa, version, taxon_store, lineage_store) = arguments;
35-
black_box(calculate_lca(taxa, version, &taxon_store, &lineage_store))
35+
black_box(calculate_lca(taxa, version, &taxon_store, &lineage_store, true))
3636
},
3737
criterion::BatchSize::SmallInput
3838
)

api/src/controllers/api/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,7 @@ pub fn default_compact() -> bool {
6666
pub fn default_cutoff() -> usize {
6767
10000
6868
}
69+
70+
pub fn default_validate_taxa() -> bool {
71+
true
72+
}

api/src/controllers/api/pept2lca.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize};
33

44
use crate::{
55
controllers::{
6-
api::{default_equate_il, default_extra, default_names},
6+
api::{default_equate_il, default_extra, default_names, default_validate_taxa},
77
generate_handlers
88
},
99
helpers::{
@@ -26,7 +26,9 @@ pub struct Parameters {
2626
#[serde(default = "default_extra")]
2727
extra: bool,
2828
#[serde(default = "default_names")]
29-
names: bool
29+
names: bool,
30+
#[serde(default = "default_validate_taxa")]
31+
validate_taxa: bool
3032
}
3133

3234
#[derive(Serialize)]
@@ -47,7 +49,7 @@ pub struct Taxon {
4749

4850
async fn handler(
4951
State(AppState { index, datastore, .. }): State<AppState>,
50-
Parameters { input, equate_il, extra, names }: Parameters,
52+
Parameters { input, equate_il, extra, names, validate_taxa }: Parameters,
5153
version: LineageVersion
5254
) -> Result<Vec<LcaInformation>, ()> {
5355
let input = sanitize_peptides(input);
@@ -63,7 +65,8 @@ async fn handler(
6365
item.proteins.iter().map(|protein| protein.taxon).collect(),
6466
version,
6567
taxon_store,
66-
lineage_store
68+
lineage_store,
69+
validate_taxa
6770
);
6871

6972
let (name, rank, _) = taxon_store.get(lca as u32)?;

api/src/controllers/api/peptinfo.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize};
33

44
use crate::{
55
controllers::{
6-
api::{default_domains, default_equate_il, default_extra, default_names},
6+
api::{default_domains, default_equate_il, default_extra, default_names, default_validate_taxa},
77
generate_handlers
88
},
99
helpers::{
@@ -32,7 +32,9 @@ pub struct Parameters {
3232
#[serde(default = "default_domains")]
3333
domains: bool,
3434
#[serde(default = "default_names")]
35-
names: bool
35+
names: bool,
36+
#[serde(default = "default_validate_taxa")]
37+
validate_taxa: bool
3638
}
3739

3840
#[derive(Serialize)]
@@ -57,7 +59,7 @@ pub struct Taxon {
5759

5860
async fn handler(
5961
State(AppState { index, datastore, .. }): State<AppState>,
60-
Parameters { input, equate_il, extra, domains, names }: Parameters,
62+
Parameters { input, equate_il, extra, domains, names, validate_taxa }: Parameters,
6163
version: LineageVersion
6264
) -> Result<Vec<PeptInformation>, ()> {
6365
let input = sanitize_peptides(input);
@@ -83,7 +85,8 @@ async fn handler(
8385
item.proteins.iter().map(|protein| protein.taxon).collect(),
8486
version,
8587
taxon_store,
86-
lineage_store
88+
lineage_store,
89+
validate_taxa
8790
);
8891
let (name, rank, _) = taxon_store.get(lca as u32)?;
8992
let lineage = match (extra, names) {

api/src/controllers/api/taxa2lca.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize};
33

44
use crate::{
55
controllers::{
6-
api::{default_extra, default_names},
6+
api::{default_extra, default_names, default_validate_taxa},
77
generate_handlers
88
},
99
helpers::{
@@ -24,7 +24,9 @@ pub struct Parameters {
2424
#[serde(default = "default_extra")]
2525
extra: bool,
2626
#[serde(default = "default_names")]
27-
names: bool
27+
names: bool,
28+
#[serde(default = "default_validate_taxa")]
29+
validate_taxa: bool
2830
}
2931

3032
#[derive(Serialize)]
@@ -44,7 +46,7 @@ pub struct Taxon {
4446

4547
async fn handler(
4648
State(AppState { datastore, .. }): State<AppState>,
47-
Parameters { input, extra, names }: Parameters,
49+
Parameters { input, extra, names, validate_taxa }: Parameters,
4850
version: LineageVersion
4951
) -> Result<LcaInformation, ()> {
5052
let taxon_store = datastore.taxon_store();
@@ -56,7 +58,7 @@ async fn handler(
5658
.collect();
5759

5860
// Calculate the LCA of all taxa
59-
let lca: i32 = calculate_lca(casted_input, version, taxon_store, lineage_store);
61+
let lca: i32 = calculate_lca(casted_input, version, taxon_store, lineage_store, validate_taxa);
6062

6163
if let Some((taxon_name, taxon_rank, _)) = taxon_store.get(lca as u32) {
6264
// Calculate the lineage of the LCA

api/src/controllers/mpa/pept2data.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use axum::{extract::State, Json};
33
use serde::{Deserialize, Serialize};
44
use index::{ProteinInfo, SearchResult};
55
use crate::{
6-
controllers::{generate_handlers, mpa::default_equate_il, mpa::default_tryptic, mpa::default_report_taxa, mpa::default_blacklist_crap, api::default_cutoff},
6+
controllers::{generate_handlers, mpa::default_equate_il, mpa::default_tryptic, mpa::default_report_taxa, mpa::default_blacklist_crap, api::default_cutoff, api::default_validate_taxa},
77
helpers::{
88
fa_helper::{calculate_fa, FunctionalAggregation},
99
lca_helper::calculate_lca,
@@ -31,6 +31,8 @@ pub struct Parameters {
3131
cutoff: usize,
3232
#[serde(default = "default_report_taxa")]
3333
report_taxa: bool,
34+
#[serde(default = "default_validate_taxa")]
35+
validate_taxa: bool,
3436
#[serde(default = "default_blacklist_crap")]
3537
blacklist_crap: bool,
3638
filter: Option<Filter>,
@@ -63,7 +65,7 @@ pub struct Data {
6365

6466
async fn handler(
6567
State(AppState { index, datastore, .. }): State<AppState>,
66-
Parameters { mut peptides, equate_il, tryptic, cutoff, report_taxa, blacklist_crap, filter }: Parameters
68+
Parameters { mut peptides, equate_il, tryptic, cutoff, report_taxa, validate_taxa, blacklist_crap, filter }: Parameters
6769
) -> Result<Data, ()> {
6870
if peptides.is_empty() {
6971
return Ok(Data { peptides: Vec::new() });
@@ -128,7 +130,8 @@ async fn handler(
128130
taxa.clone(),
129131
LineageVersion::V2,
130132
taxon_store,
131-
lineage_store
133+
lineage_store,
134+
validate_taxa
132135
);
133136
let lineage = get_lineage_array(lca as u32, LineageVersion::V2, lineage_store);
134137

api/src/helpers/lca_helper.rs

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,13 @@ pub fn calculate_lca(
88
taxa: Vec<u32>,
99
version: LineageVersion,
1010
taxon_store: &TaxonStore,
11-
lineage_store: &LineageStore
11+
lineage_store: &LineageStore,
12+
only_valid_taxa: bool
1213
) -> i32 {
13-
let cleaned_taxa = taxa.into_iter().filter(|&taxon_id| taxon_store.is_valid(taxon_id));
14-
14+
let cleaned_taxa = taxa
15+
.into_iter()
16+
.filter(|&taxon_id| !only_valid_taxa || taxon_store.is_valid(taxon_id));
17+
1518
let lineages: Vec<Vec<i32>> = cleaned_taxa
1619
.into_iter()
1720
.map(|taxon_id| get_lineage_array_numeric(taxon_id, version, lineage_store))
@@ -69,7 +72,7 @@ mod tests {
6972
let lineage_store: LineageStore = LineageStore::try_from_file("../data/lineages_subset_10000.tsv").expect("Reading the file failed");
7073

7174
assert_eq!(calculate_lca(
72-
taxa, version, &taxon_store, &lineage_store), 8287);
75+
taxa, version, &taxon_store, &lineage_store, true), 8287);
7376
}
7477

7578

@@ -81,6 +84,16 @@ mod tests {
8184
let lineage_store: LineageStore = LineageStore::try_from_file("../data/lineages_subset_10000.tsv").expect("Reading the file failed");
8285

8386
assert_eq!(calculate_lca(
84-
taxa, version, &taxon_store, &lineage_store), 1);
87+
taxa, version, &taxon_store, &lineage_store, true), 1);
88+
}
89+
90+
#[test]
91+
fn test_calculate_lca_validate() {
92+
let version: LineageVersion = LineageVersion::V2;
93+
let taxon_store: TaxonStore = TaxonStore::try_from_file("../data/taxons_subset_10000.tsv").expect("Reading the file failed");
94+
let lineage_store: LineageStore = LineageStore::try_from_file("../data/lineages_subset_10000.tsv").expect("Reading the file failed");
95+
96+
assert_eq!(calculate_lca(vec![27], version, &taxon_store, &lineage_store, true), 1);
97+
assert_eq!(calculate_lca(vec![27], version, &taxon_store, &lineage_store, false), 27);
8598
}
8699
}

0 commit comments

Comments
 (0)