@@ -6,7 +6,7 @@ use text_compression::ProteinTextSlice;
66use crate :: {
77 Nullable , SuffixArray ,
88 sa_searcher:: BoundSearch :: { Maximum , Minimum } ,
9- suffix_to_protein_index:: { DenseSuffixToProtein , SparseSuffixToProtein , SuffixToProteinIndex }
9+ suffix_to_protein_index:: { DenseSuffixToProtein , SparseSuffixToProtein , BitVecSuffixToProtein , SuffixToProteinIndex }
1010} ;
1111
1212/// Enum indicating if we are searching for the minimum, or maximum bound in the suffix array
@@ -90,6 +90,24 @@ impl Deref for SparseSearcher {
9090 }
9191}
9292
93+ pub struct BitVecSearcher ( Searcher ) ;
94+
95+ impl BitVecSearcher {
96+ pub fn new ( sa : SuffixArray , proteins : Proteins ) -> Self {
97+ let suffix_index_to_protein = BitVecSuffixToProtein :: new ( & proteins. text ) ;
98+ let searcher = Searcher :: new ( sa, proteins, Box :: new ( suffix_index_to_protein) ) ;
99+ Self ( searcher)
100+ }
101+ }
102+
103+ impl Deref for BitVecSearcher {
104+ type Target = Searcher ;
105+
106+ fn deref ( & self ) -> & Self :: Target {
107+ & self . 0
108+ }
109+ }
110+
93111pub struct DenseSearcher ( Searcher ) ;
94112
95113impl DenseSearcher {
@@ -495,9 +513,7 @@ mod tests {
495513 use text_compression:: ProteinText ;
496514
497515 use crate :: {
498- SuffixArray ,
499- sa_searcher:: { BoundSearchResult , SearchAllSuffixesResult , Searcher } ,
500- suffix_to_protein_index:: SparseSuffixToProtein
516+ sa_searcher:: { BoundSearchResult , SearchAllSuffixesResult , Searcher } , suffix_to_protein_index:: { BitVecSuffixToProtein , DenseSuffixToProtein , SparseSuffixToProtein } , SuffixArray
501517 } ;
502518
503519 #[ test]
@@ -556,7 +572,7 @@ mod tests {
556572 let proteins = get_example_proteins ( ) ;
557573 let sa = SuffixArray :: Original ( vec ! [ 19 , 10 , 2 , 13 , 9 , 8 , 11 , 5 , 0 , 3 , 12 , 15 , 6 , 1 , 4 , 17 , 14 , 16 , 7 , 18 ] , 1 ) ;
558574
559- let suffix_index_to_protein = SparseSuffixToProtein :: new ( & proteins. text ) ;
575+ let suffix_index_to_protein = BitVecSuffixToProtein :: new ( & proteins. text ) ;
560576 let searcher = Searcher :: new ( sa, proteins, Box :: new ( suffix_index_to_protein) ) ;
561577
562578 // search bounds 'A'
@@ -589,12 +605,29 @@ mod tests {
589605 assert_eq ! ( found_suffixes, SearchAllSuffixesResult :: SearchResult ( vec![ 5 , 11 ] ) ) ;
590606 }
591607
608+ #[ test]
609+ fn test_search_dense ( ) {
610+ let proteins = get_example_proteins ( ) ;
611+ let sa = SuffixArray :: Original ( vec ! [ 9 , 0 , 3 , 12 , 15 , 6 , 18 ] , 3 ) ;
612+
613+ let suffix_index_to_protein = DenseSuffixToProtein :: new ( & proteins. text ) ;
614+ let searcher = Searcher :: new ( sa, proteins, Box :: new ( suffix_index_to_protein) ) ;
615+
616+ // search suffix 'VAA'
617+ let found_suffixes = searcher. search_matching_suffixes ( b"VAA" , usize:: MAX , false , false ) ;
618+ assert_eq ! ( found_suffixes, SearchAllSuffixesResult :: SearchResult ( vec![ 7 ] ) ) ;
619+
620+ // search suffix 'AC'
621+ let found_suffixes = searcher. search_matching_suffixes ( b"AC" , usize:: MAX , false , false ) ;
622+ assert_eq ! ( found_suffixes, SearchAllSuffixesResult :: SearchResult ( vec![ 5 , 11 ] ) ) ;
623+ }
624+
592625 #[ test]
593626 fn test_il_equality ( ) {
594627 let proteins = get_example_proteins ( ) ;
595628 let sa = SuffixArray :: Original ( vec ! [ 19 , 10 , 2 , 13 , 9 , 8 , 11 , 5 , 0 , 3 , 12 , 15 , 6 , 1 , 4 , 17 , 14 , 16 , 7 , 18 ] , 1 ) ;
596629
597- let suffix_index_to_protein = SparseSuffixToProtein :: new ( & proteins. text ) ;
630+ let suffix_index_to_protein = BitVecSuffixToProtein :: new ( & proteins. text ) ;
598631 let searcher = Searcher :: new ( sa, proteins, Box :: new ( suffix_index_to_protein) ) ;
599632
600633 let bounds_res = searcher. search_bounds ( b"I" ) ;
@@ -638,7 +671,7 @@ mod tests {
638671 } ;
639672
640673 let sparse_sa = SuffixArray :: Original ( vec ! [ 0 , 2 , 4 ] , 2 ) ;
641- let suffix_index_to_protein = SparseSuffixToProtein :: new ( & proteins. text ) ;
674+ let suffix_index_to_protein = BitVecSuffixToProtein :: new ( & proteins. text ) ;
642675 let searcher = Searcher :: new ( sparse_sa, proteins, Box :: new ( suffix_index_to_protein) ) ;
643676
644677 // search bounds 'IM' with equal I and L
@@ -661,7 +694,7 @@ mod tests {
661694 } ;
662695
663696 let sparse_sa = SuffixArray :: Original ( vec ! [ 6 , 0 , 1 , 5 , 4 , 3 , 2 ] , 1 ) ;
664- let suffix_index_to_protein = SparseSuffixToProtein :: new ( & proteins. text ) ;
697+ let suffix_index_to_protein = BitVecSuffixToProtein :: new ( & proteins. text ) ;
665698 let searcher = Searcher :: new ( sparse_sa, proteins, Box :: new ( suffix_index_to_protein) ) ;
666699
667700 let found_suffixes = searcher. search_matching_suffixes ( b"I" , usize:: MAX , true , false ) ;
@@ -683,7 +716,7 @@ mod tests {
683716 } ;
684717
685718 let sparse_sa = SuffixArray :: Original ( vec ! [ 6 , 5 , 4 , 3 , 2 , 1 , 0 ] , 1 ) ;
686- let suffix_index_to_protein = SparseSuffixToProtein :: new ( & proteins. text ) ;
719+ let suffix_index_to_protein = BitVecSuffixToProtein :: new ( & proteins. text ) ;
687720 let searcher = Searcher :: new ( sparse_sa, proteins, Box :: new ( suffix_index_to_protein) ) ;
688721
689722 let found_suffixes = searcher. search_matching_suffixes ( b"II" , usize:: MAX , true , false ) ;
@@ -705,7 +738,7 @@ mod tests {
705738 } ;
706739
707740 let sparse_sa = SuffixArray :: Original ( vec ! [ 6 , 4 , 2 , 0 ] , 2 ) ;
708- let suffix_index_to_protein = SparseSuffixToProtein :: new ( & proteins. text ) ;
741+ let suffix_index_to_protein = BitVecSuffixToProtein :: new ( & proteins. text ) ;
709742 let searcher = Searcher :: new ( sparse_sa, proteins, Box :: new ( suffix_index_to_protein) ) ;
710743
711744 // search all places where II is in the string IIIILL, but with a sparse SA
@@ -729,7 +762,7 @@ mod tests {
729762 } ;
730763
731764 let sparse_sa = SuffixArray :: Original ( vec ! [ 6 , 5 , 4 , 3 , 2 , 1 , 0 ] , 1 ) ;
732- let suffix_index_to_protein = SparseSuffixToProtein :: new ( & proteins. text ) ;
765+ let suffix_index_to_protein = BitVecSuffixToProtein :: new ( & proteins. text ) ;
733766 let searcher = Searcher :: new ( sparse_sa, proteins, Box :: new ( suffix_index_to_protein) ) ;
734767
735768 // search bounds 'IM' with equal I and L
@@ -752,7 +785,7 @@ mod tests {
752785 } ;
753786
754787 let sparse_sa = SuffixArray :: Original ( vec ! [ 13 , 3 , 12 , 11 , 1 , 4 , 2 , 5 , 9 , 8 , 6 , 10 , 0 , 7 ] , 1 ) ;
755- let suffix_index_to_protein = SparseSuffixToProtein :: new ( & proteins. text ) ;
788+ let suffix_index_to_protein = BitVecSuffixToProtein :: new ( & proteins. text ) ;
756789 let searcher = Searcher :: new ( sparse_sa, proteins, Box :: new ( suffix_index_to_protein) ) ;
757790
758791 let found_suffixes_1 = searcher. search_matching_suffixes ( b"PAA" , usize:: MAX , false , true ) ;
0 commit comments