Skip to content

Commit b8d8eb0

Browse files
Add iscoding and decision rule functions to criteria module
1 parent b883807 commit b8d8eb0

File tree

1 file changed

+126
-0
lines changed

1 file changed

+126
-0
lines changed

src/criteria.jl

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
export iscoding,
2+
log_odds_ratio_decision_rule, lordr,
3+
ribsome_binding_site_decision_rule, rbsdr
4+
5+
6+
@doc raw"""
7+
iscoding(orf::ORFI{N,F}; criteria::Function = lordr, kwargs...) -> Bool
8+
9+
Check if the given DNA sequence of an ORF is likely to be coding based on a scoring scheme/function.
10+
11+
## Scoring Criteria/Functions
12+
- `lordr`: Log-Odds Ratio Decision Rule
13+
- `rbsdr`: Ribosome Binding Site Decision Rule
14+
15+
```
16+
phi = dna"GTGTGAGGTTATAACGCCGAAGCGGTAAAAATTTTAAT...AGTGTTTCCAGTCCGTTCAGTTAATAGTCAGGTTAAAGATAAAAGATTGA"
17+
orfs = findorfs(phi)
18+
19+
orf = orfs[2]
20+
ORFI{NaiveFinder}(94:126, '-', 1)
21+
22+
iscoding(orf) # Returns: true
23+
```
24+
"""
25+
function iscoding(
26+
orf::ORFI{N,F};
27+
criteria::Function = lordr, #rbsdr
28+
kwargs...
29+
) where {N,F<:GeneFinderMethod}
30+
return criteria(orf; kwargs...)
31+
end
32+
33+
### Actual criteria functions ###
34+
35+
## Log-Odds Ratio Decision Rule
36+
37+
@doc raw"""
38+
log_odds_ratio_decision_rule(
39+
sequence::LongSequence{DNAAlphabet{4}};
40+
modela::BioMarkovChain,
41+
modelb::BioMarkovChain,
42+
η::Float64 = 1e-5
43+
)
44+
45+
Check if a given DNA sequence is likely to be coding based on a log-odds ratio.
46+
The log-odds ratio is a statistical measure used to assess the likelihood of a sequence being coding or non-coding. It compares the probability of the sequence generated by a coding model to the probability of the sequence generated by a non-coding model. If the log-odds ratio exceeds a given threshold (`η`), the sequence is considered likely to be coding.
47+
It is formally described as a decision rule:
48+
49+
```math
50+
S(X) = \log \left( \frac{{P_C(X_1=i_1, \ldots, X_T=i_T)}}{{P_N(X_1=i_1, \ldots, X_T=i_T)}} \right) \begin{cases} > \eta & \Rightarrow \text{{coding}} \\ < \eta & \Rightarrow \text{{noncoding}} \end{cases}
51+
```
52+
53+
# Arguments
54+
- `sequence::NucleicSeqOrView{DNAAlphabet{N}}`: The DNA sequence to be evaluated.
55+
56+
## Keyword Arguments
57+
- `codingmodel::BioMarkovChain`: The transition model for coding regions, (default: `ECOLICDS`).
58+
- `noncodingmodel::BioMarkovChain`: The transition model for non-coding regions, (default: `ECOLINOCDS`)
59+
- `b::Number = 2`: The base of the logarithm used to calculate the log-odds ratio (default: 2).
60+
- `η::Float64 = 1e-5`: The threshold value (eta) for the log-odds ratio (default: 1e-5).
61+
62+
# Returns
63+
- `true` if the sequence is likely to be coding.
64+
- `false` if the sequence is likely to be non-coding.
65+
66+
# Raises
67+
- `ErrorException`: if the length of the sequence is not divisible by 3.
68+
- `ErrorException`: if the sequence contains a premature stop codon.
69+
70+
# Example
71+
72+
```
73+
sequence = dna"ATGGCATCTAG"
74+
iscoding(sequence) # Returns: true or false
75+
```
76+
"""
77+
function lordr( #log_odds_ratio_decision, also lordr/cudr/kfdr/aadr
78+
orf::ORFI{N,F};
79+
modela::BioMarkovChain = ECOLICDS,
80+
modelb::BioMarkovChain = ECOLINOCDS,
81+
b::Number = 2,
82+
η::Float64 = 5e-3
83+
) where {N,F<:GeneFinderMethod}
84+
orfseq = sequence(orf)
85+
scorea = log_odds_ratio_score(orfseq; modela=modela, b=b)
86+
scoreb = log_odds_ratio_score(orfseq; modela=modelb, b=b)
87+
88+
logodds = scorea / scoreb
89+
90+
if logodds > η
91+
return true
92+
else
93+
false
94+
end
95+
end
96+
97+
const log_odds_ratio_decision_rule = lordr # criteria
98+
99+
## Ribosome Binding Site Decision Rule
100+
101+
"""
102+
ribsome_binding_site_decision_rule(orf::ORFI{N,F}) where {N,F<:GeneFinderMethod} -> Bool
103+
104+
Evaluates if an Open Reading Frame (ORF) has a significant ribosome binding site (RBS).
105+
106+
The function uses the `orf_rbs_score` to calculate a score for the ORF's RBS region and returns
107+
true if the score exceeds a threshold of 9, indicating the presence of at least one RBS.
108+
109+
# Arguments
110+
- `orf::ORFI{N,F}`: An Open Reading Frame Interface (ORFI) object parameterized by N and F,
111+
where F is a subtype of GeneFinderMethod
112+
113+
# Returns
114+
- `Bool`: `true` if the RBS score is greater than 9, `false` otherwise
115+
116+
"""
117+
function rbsdr(orf::ORFI{N,F}) where {N,F<:GeneFinderMethod}
118+
scr = orf_rbs_score(orf)
119+
if scr > 9 # at least one RBS...
120+
return true
121+
else
122+
return false
123+
end
124+
end
125+
126+
const ribsome_binding_site_decision_rule = rbsdr

0 commit comments

Comments
 (0)