P2GX
diff --git a/‎docs/conf.py
Lines changed: 1 addition & 1 deletion b/‎docs/conf.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/tutorial.rst
Lines changed: 8 additions & 27 deletions b/‎docs/tutorial.rst
Lines changed: 8 additions & 27 deletions
diff --git a/‎docs/user-guide/analyses/phenotype-groups.rst
Lines changed: 53 additions & 28 deletions b/‎docs/user-guide/analyses/phenotype-groups.rst
Lines changed: 53 additions & 28 deletions
diff --git a/‎docs/user-guide/mtc.rst
Lines changed: 9 additions & 19 deletions b/‎docs/user-guide/mtc.rst
Lines changed: 9 additions & 19 deletions
diff --git a/‎docs/user-guide/predicates/devries.rst
Lines changed: 29 additions & 13 deletions b/‎docs/user-guide/predicates/devries.rst
Lines changed: 29 additions & 13 deletions
diff --git a/‎src/gpsea/__init__.py
Lines changed: 1 addition & 1 deletion b/‎src/gpsea/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/gpsea/analysis/_base.py
Lines changed: 43 additions & 0 deletions b/‎src/gpsea/analysis/_base.py
Lines changed: 43 additions & 0 deletions
@@ -62,7 +62,7 @@
 # The short X.Y version.
 version = u'0.7'
 # The full version, including alpha/beta/rc tags.
-release = u'0.7.0'
+release = u'0.7.1'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 
@@ -237,39 +237,20 @@ Testing multiple hypothesis on the same dataset increases the chance of receivin
 However, GPSEA simplifies the application of an appropriate multiple testing correction.
 
 For general use, we recommend using a combination
-of a *Phenotype MTC filter* (:class:`~gpsea.analysis.mtc_filter.PhenotypeMtcFilter`) with a *multiple testing correction*.
-Phenotype MTC filter chooses the HPO terms to test according to several heuristics, which
+of a *phenotype MT filter* (:class:`~gpsea.analysis.mtc_filter.PhenotypeMtcFilter`) with a *multiple testing correction*.
+Phenotype MT filter chooses the HPO terms to test according to several heuristics, which
 reduce the multiple testing burden and focus the analysis
-on the most interesting terms (see :ref:`HPO MTC filter <hpo-mtc-filter-strategy>` for more info).
+on the most interesting terms (see :ref:`HPO MT filter <hpo-mtc-filter-strategy>` for more info).
 Then the multiple testing correction, such as Bonferroni or Benjamini-Hochberg,
 is used to control the family-wise error rate or the false discovery rate.
 See :ref:`mtc` for more information.
 
-In this example, we will use a combination of the HPO MTC filter (:class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`)
-with Benjamini-Hochberg procedure (``mtc_correction='fdr_bh'``)
-with a false discovery control level at (``mtc_alpha=0.05``):
+>>> from gpsea.analysis.pcats import configure_hpo_term_analysis
+>>> analysis = configure_hpo_term_analysis(hpo)
 
->>> from gpsea.analysis.mtc_filter import HpoMtcFilter
->>> mtc_filter = HpoMtcFilter.default_filter(hpo)
->>> mtc_correction = 'fdr_bh'
->>> mtc_alpha = 0.05
-
-Choosing the statistical procedure for assessment of association between genotype and phenotype
-groups is the last missing piece of the analysis. We will use Fisher Exact Test:
-
->>> from gpsea.analysis.pcats.stats import FisherExactTest
->>> count_statistic = FisherExactTest()
-
-and we finalize the analysis setup by putting all components together
-into :class:`~gpsea.analysis.pcats.HpoTermAnalysis`:
-
->>> from gpsea.analysis.pcats import HpoTermAnalysis
->>> analysis = HpoTermAnalysis(
-...     count_statistic=count_statistic,
-...     mtc_filter=mtc_filter,
-...     mtc_correction=mtc_correction,
-...     mtc_alpha=mtc_alpha,
-... )
+:func:`~gpsea.analysis.pcats.configure_hpo_term_analysis` configures the analysis
+that uses HPO MTC filter (:class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`) for selecting HPO terms of interest,
+Fisher Exact test for computing nominal p values, and Benjamini-Hochberg for multiple testing correction.
 
 Now we can perform the analysis and investigate the results.
 
 
@@ -167,59 +167,84 @@ The function finds 369 HPO terms that annotate at least one individual,
 including the *indirect* annotations whose presence is implied by the :ref:`true-path-rule`.
 
 
-Statistical test
-----------------
+Statistical analysis
+--------------------
 
 We will use :ref:`fisher-exact-test` to test the association
 between genotype and phenotype groups, as described previously.
 
->>> from gpsea.analysis.pcats.stats import FisherExactTest
->>> count_statistic = FisherExactTest()
+In the case of this cohort, we can test association between having a frameshift variant and one of 369 HPO terms.
+However, testing multiple hypotheses on the same dataset increases the risk of finding
+a significant association solely by chance.
+GPSEA uses a two-pronged strategy to reduce the number of tests and, therefore, mitigate this risk:
+a phenotype multiple testing (MT) filter and multiple testing correction (MTC).
 
-FET will compute a p value for each genotype phenotype group.
+Phenotype MT filter selects a (sub)set of HPO terms for testing,
+for instance only the user-selected terms (see :class:`~gpsea.analysis.mtc_filter.SpecifyTermsStrategy`)
+or the terms selected by :class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`.
 
+Multiple testing correction then adjusts the nominal p values for the increased risk
+of false positive G/P associations.
+The available MTC procedures are listed in the :ref:`mtc-correction-procedures` section.
 
-Multiple testing correction
----------------------------
+We must pick one of these to perform genotype-phenotype analysis.
 
-In the case of this cohort, we could test association between having a frameshift variant and one of 369 HPO terms.
-However, testing multiple hypotheses on the same dataset increases the risk of finding a significant association
-by chance.
-GPSEA uses a two-pronged strategy to mitigate this risk - a phenotype MTC filter and multiple testing correction.
 
-.. note::
+Default analysis
+^^^^^^^^^^^^^^^^
+
+We recommend using HPO MT filter (:class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`) as a phenotype MT filter
+and Benjamini-Hochberg for multiple testing correction.
+The default analysis can be configured with :func:`~gpsea.analysis.pcats.configure_hpo_term_analysis` convenience method.
+
+>>> from gpsea.analysis.pcats import configure_hpo_term_analysis
+>>> analysis = configure_hpo_term_analysis(hpo)
 
-   See the :ref:`mtc` section for more info on multiple testing procedures.
 
-Here we will use a combination of the HPO MTC filter (:class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`)
-with Benjamini-Hochberg procedure (``mtc_correction='fdr_bh'``)
-with a false discovery control level set to `0.05` (``mtc_alpha=0.05``):
+Custom analysis
+^^^^^^^^^^^^^^^
+
+If the defaults do not work, we can configure the analysis manually.
+First, we choose a phenotype MT filter (e.g. :class:`~gpsea.analysis.mtc_filter.HpoMtcFilter`):
 
 >>> from gpsea.analysis.mtc_filter import HpoMtcFilter
->>> mtc_filter = HpoMtcFilter.default_filter(hpo, term_frequency_threshold=0.2)
->>> mtc_correction = 'fdr_bh'
->>> mtc_alpha = 0.05
+>>> mtc_filter = HpoMtcFilter.default_filter(hpo, term_frequency_threshold=.2)
+
+.. note::
 
+   See the :ref:`mtc-filters` section for more info on the available MT filters.
 
-Final analysis
---------------
+then a statistical test (e.g. Fisher Exact test):
+
+>>> from gpsea.analysis.pcats.stats import FisherExactTest
+>>> count_statistic = FisherExactTest()
+
+.. note::
+
+   See the :mod:`gpsea.analysis.pcats.stats` module for the available multiple testing procedures
+   (TL;DR, just Fisher Exact test at this time).
+
+and we finalize the setup by choosing a MTC procedure
+(e.g. `fdr_bh` for Benjamini-Hochberg) along with the MTC alpha:
+
+>>> mtc_correction = 'fdr_bh'
+>>> mtc_alpha = 0.05
 
-We finalize the analysis setup by putting all components together
-into :class:`~gpsea.analysis.pcats.HpoTermAnalysis`:
+The final :class:`~gpsea.analysis.pcats.HpoTermAnalysis` is created as:
 
 >>> from gpsea.analysis.pcats import HpoTermAnalysis
 >>> analysis = HpoTermAnalysis(
 ...     count_statistic=count_statistic,
 ...     mtc_filter=mtc_filter,
-...     mtc_correction=mtc_correction,
-...     mtc_alpha=mtc_alpha,
+...     mtc_correction='fdr_bh',
+...     mtc_alpha=0.05,
 ... )
 
 
 Analysis
 ========
 
-We can now execute the analysis:
+We can now test associations between the genotype groups and the HPO terms:
 
 >>> result = analysis.compare_genotype_vs_phenotypes(
 ...     cohort=cohort,
@@ -232,8 +257,8 @@ We can now execute the analysis:
 24
 
 
-Thanks to phenotype MTC filter, we only tested 24 out of 369 terms.
-We can learn more by showing the MTC filter report:
+Thanks to phenotype MT filter, we only tested 24 out of 369 terms.
+We can learn more by showing the MT filter report:
 
 >>> from gpsea.view import MtcStatsViewer
 >>> mtc_viewer = MtcStatsViewer()
 
@@ -36,8 +36,9 @@ Therefore, unless we take into account the fact that multiple statistical tests
 it is likely that we will obtain one or more false-positive results.
 
 GPSEA offers two approaches to mitigate this problem: multiple-testing correction (MTC) procedures
-and MTC filters to choose the terms to be tested.
+and MT filters to choose the terms to be tested.
 
+.. _mtc-correction-procedures:
 
 Multiple-testing correction procedures
 ======================================
@@ -106,7 +107,7 @@ when creating an instance of :class:`~gpsea.analysis.pcats.HpoTermAnalysis`:
 
 .. _mtc-filters:
 
-MTC filters: Choosing which terms to test
+MT filters: Choosing which terms to test
 =========================================
 
 We can reduce the overall MTC burden by choosing which terms to test. 
@@ -117,28 +118,17 @@ may "survive" the multiple-testing correction.
 
 In the context of GPSEA, we represent the concept of phenotype filtering 
 by :class:`~gpsea.analysis.mtc_filter.PhenotypeMtcFilter`.
-The filter must be chosen before the :class:`~gpsea.analysis.pcats.MultiPhenotypeAnalysis`,
-such as :class:`~gpsea.analysis.pcats.HpoTermAnalysis`, is run:
-
->>> from gpsea.analysis.pcats import HpoTermAnalysis
->>> analysis = HpoTermAnalysis()  # doctest: +ELLIPSIS
-Traceback (most recent call last):
-  ...
-TypeError: HpoTermAnalysis.__init__() missing 2 required positional arguments: 'count_statistic' and 'mtc_filter'
-
-Note the missing `mtc_filter` option.
-
-We describe the three filtering strategies in the following sections.
+We provide three filtering strategies.
 
 
 .. _use-all-terms-strategy:
 
 Test all terms
 --------------
 
-The first MTC filtering strategy is the simplest - do not apply any filtering at all.
-This will result in testing all terms. We do not recommend this strategy, 
-but it can be useful to disable MTC filtering.
+The first MT filtering strategy is the simplest - do not apply any filtering at all.
+This will result in testing all terms and we do not recommend this strategy, 
+but it can be used to disable MT filtering.
 
 The strategy is implemented in :class:`~gpsea.analysis.mtc_filter.UseAllTermsMtcFilter`.
 
@@ -171,10 +161,10 @@ we pass an iterable (e.g. a tuple) with these two terms as an argument:
 
 .. _hpo-mtc-filter-strategy:
 
-HPO MTC filter strategy
+HPO MT filter strategy
 -----------------------
 
-Last, the HPO MTC strategy involves making several domain judgments to take advantage of the HPO structure.
+The HPO MT strategy involves making several domain judgments and takes advantage of the HPO structure.
 The strategy needs access to HPO:
 
 >>> import hpotk
 
@@ -15,7 +15,7 @@ Statistical significance of a difference in the De Vries score between groups ca
 determined using the Mann-Whitney-U test.
 
 We refer to `Feenstra et al. (2011) <https://pubmed.ncbi.nlm.nih.gov/21712853/>`_ for
-the original description of the adjusted De Vries score. Here we offer a version of the
+the original description of the adjusted De Vries score. Here we offer an adapted version of the
 score that leverages the structure of the Human Phenotype Ontology to assess the phenotype.
 
 
@@ -113,38 +113,54 @@ is 2 because the same individual cannot have both tall and short stature or both
 Facial dysmorphic features
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-This section assigns two points if two or more anomalies are identified in the following
-categories: hypertelorism, nasal anomalies and ear anomalies. Our implementation of this feature counts the total
-number of terms or descendents of the following HPO terms.
+This section assigns two points if two or more facial dysmorphisms are identified. In contrast to the list of anomalies described
+in the original 2011 publication of the DeVries score, we leverage the structure of the HPO to include many more HPO terms that 
+denote various kinds of facial dysmorphism (e.g., `Abnormality of globe location <https://hpo.jax.org/browse/term/HP:0100886>`_ instead of just
+`Hypertelorism (HP:0000316) <https://hpo.jax.org/browse/term/HP:0000316>`_).
+
+Our implementation of this feature counts the total number of terms or descendents of the following HPO terms. Up to one point is given
+for each of the categories.
 
 +----------------------------------------------------------------------------------------------------------+-----------+
 | HPO term                                                                                                 | Score     |
 +==========================================================================================================+===========+
-| `Hypertelorism (HP:0000316) <https://hpo.jax.org/browse/term/HP:0000316>`_                               | 1         |
+| `Abnormality of globe location (HP:0000316) <https://hpo.jax.org/browse/term/HP:0100886>`_               | 0 or 1    |
++----------------------------------------------------------------------------------------------------------+-----------+
+| `Abnormal lip morphology (HP:0000159) <https://hpo.jax.org/browse/term/HP:0000159>`_                     |  0 or 1   |
++----------------------------------------------------------------------------------------------------------+-----------+
+| `Abnormal facial shape (HP:0001999) <https://hpo.jax.org/browse/term/HP:0001999>`_                       |  0 or 1   |
++----------------------------------------------------------------------------------------------------------+-----------+
+| `Abnormal midface morphology (HP:0000309) <https://hpo.jax.org/browse/term/HP:0000309>`_                 |  0 or 1   |
 +----------------------------------------------------------------------------------------------------------+-----------+
-| `Abnormal external nose morphology (HP:0010938) <https://hpo.jax.org/browse/term/HP:0010938>`_           | 1 each    |
+| `Abnormal forehead morphology (HP:0000290) <https://hpo.jax.org/browse/term/HP:0000290>`_                | 0 or 1    |
 +----------------------------------------------------------------------------------------------------------+-----------+
-| `Abnormal pinna morphology (HP:0000377)  <https://hpo.jax.org/browse/term/HP:0000377>`_                  | 1 each    |
+| `Abnormal chin morphology (HP:0000306) <https://hpo.jax.org/browse/term/HP:0000306>`_                    |  0 or 1   |
++----------------------------------------------------------------------------------------------------------+-----------+
+| `Abnormal external nose morphology (HP:0010938) <https://hpo.jax.org/browse/term/HP:0010938>`_           |  0 or 1   |
++----------------------------------------------------------------------------------------------------------+-----------+
+| `Abnormal pinna morphology (HP:0000377)  <https://hpo.jax.org/browse/term/HP:0000377>`_                  |  0 or 1   |
 +----------------------------------------------------------------------------------------------------------+-----------+
 
-If two or more terms are found, the score is 2, otherwise a score of zero is assigned.
+If items from two or more categories are found, the score is 2, otherwise a score of zero is assigned.
 
 
 Non-facial dysmorphism and congenital abnormalities
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-One point is assigned for either the
-corresponding HPO terms or any of their descendents up to a maximum of two points.
+One point is assigned for either the corresponding HPO terms or any of their descendents up to a maximum of two points.
+A maximum of one point is assigned for each of the following categories.
 
 +----------------------------------------------------------------------------------------------------------+-----------+
 | HPO term                                                                                                 | Score     |
 +==========================================================================================================+===========+
-| `Abnormal hand morphology (HP:0005922) <https://hpo.jax.org/browse/term/HP:0005922>`_                    | 1 each    |
+| `Abnormal hand morphology (HP:0005922) <https://hpo.jax.org/browse/term/HP:0005922>`_                    | 0 or 1    |
 +----------------------------------------------------------------------------------------------------------+-----------+
-| `Abnormal heart morphology (HP:0001627) <https://hpo.jax.org/browse/term/HP:0001627>`_                   | 1 each    |
+| `Abnormal heart morphology (HP:0001627) <https://hpo.jax.org/browse/term/HP:0001627>`_                   |  0 or 1   |
 +----------------------------------------------------------------------------------------------------------+-----------+
-| `Hypospadias (HP:0000047) <https://hpo.jax.org/browse/term/HP:0000047>`_                                 | 1         |
+| `Abnormal external genitalia morphology (HP:0000811) <https://hpo.jax.org/browse/term/HP:0000811>`_      |  0 or 1   |
 +----------------------------------------------------------------------------------------------------------+-----------+
 
+The score for this section can thus be 0, 1, or 2.
+
 
 Final score
 ~~~~~~~~~~~
 
@@ -2,4 +2,4 @@
 GPSEA is a library for analyzing genotype-phenotype correlations in cohorts of rare disease patients.
 """
 
-__version__ = "0.7.0"
+__version__ = "0.7.1"
@@ -3,6 +3,7 @@
 import os
 import typing
 
+import numpy as np
 import pandas as pd
 
 from .predicate.phenotype import PhenotypePolyPredicate, P
@@ -205,6 +206,48 @@ def corrected_pvals(self) -> typing.Optional[typing.Sequence[float]]:
         The sequence includes a `NaN` value for each phenotype that was *not* tested.
         """
         return self._corrected_pvals
+    
+    def n_significant_for_alpha(
+        self,
+        alpha: float = .05,
+    ) -> typing.Optional[int]:
+        """
+        Get the count of the corrected p values with the value being less than or equal to `alpha`.
+
+        :param alpha: a `float` with significance level.
+        """
+        if self.corrected_pvals is None:
+            return None
+        else:
+            return sum(p_val <= alpha for p_val in self.corrected_pvals)
+        
+    def significant_phenotype_indices(
+        self,
+        alpha: float = .05,
+        pval_kind: typing.Literal["corrected", "nominal"] = "corrected",
+    ) -> typing.Optional[typing.Sequence[int]]:
+        """
+        Get the indices of phenotypes that attain significance for provided `alpha`.
+        """
+        if pval_kind == "corrected":
+            if self.corrected_pvals is None:
+                vals = None
+            else:
+                vals = np.array(self.corrected_pvals)
+        elif pval_kind == "nominal":
+            vals = np.array(self.pvals)
+        else:
+            raise ValueError(f"Unsupported `pval_kind` value {pval_kind}")
+        
+        if vals is None:
+            return None
+        
+        not_na = ~np.isnan(vals)
+        significant = vals <= alpha
+        selected = not_na & significant
+
+        return tuple(int(idx) for idx in np.argsort(vals) if selected[idx])
+
 
     @property
     def total_tests(self) -> int: