[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit 3bac9e31412c · 2024-08-26T23:10:31.000Z
for more information, see https://pre-commit.ci
diff --git a/docs/conf.py b/docs/conf.py
@@ -120,8 +120,8 @@
 # -- Options for linkcheck --------------------------------------------------
 linkcheck_ignore = [
     r"^https://doi.org/+",  # Always redirects
-    r'http://regulondb.ccg.unam.mx/', # As of 8/22/24, regulonDB works, but
-                                      # long loading times cause check issues
+    r"http://regulondb.ccg.unam.mx/",  # As of 8/22/24, regulonDB works, but
+    # long loading times cause check issues
 ]
 
 # -- NBSphinx -----------------------------------------------------------------
diff --git a/src/pymodulon/core.py b/src/pymodulon/core.py
@@ -44,7 +44,7 @@ def __init__(
         imodulondb_table=None,
         gene_links=None,
         tf_links=None,
-        chrom=None
+        chrom=None,
     ):
         """
         Initialize IcaData object
@@ -96,7 +96,7 @@ def __init__(
             Dictionary of TFs (from the TRN) to links in a database (default:
             None)
         eukaryotic: flag marking if the organism is eukaryotic or not for use in several functions
-        chrom: lengths of the chromosomes of the organism for plotting purposes, should be a 
+        chrom: lengths of the chromosomes of the organism for plotting purposes, should be a
             dataframe or dictionary with indices of the chromosomes and then the lengths of each
             chromosome as well as the sum of the start location of each chromosome (cumulative sum of
             lengths in the order of desired plotting of the chromosomes)
@@ -261,7 +261,7 @@ def __init__(
                         "goldenrod",
                         "chocolate",
                         "orange",
-                        'navy',
+                        "navy",
                     ],
                 )
             )
@@ -284,8 +284,7 @@ def __init__(
             self._chrom = None
         else:
             self.chrom = chrom
-    
-    
+
     @property
     def M(self):
         """Get M matrix"""
@@ -455,7 +454,6 @@ def trn(self, new_trn):
 
         # mark that our cutoffs are no longer optimized since the TRN
         self._cutoff_optimized = False
-        
 
     # Motif information
     @property
@@ -478,32 +476,33 @@ def chrom(self, chrom):
             self._chrom = None
         else:
             chrom = _check_table(chrom, "chrom")
-    
-            if type(self.gene_table) == type(None) or 'chr' not in self.gene_table.columns:
+
+            if (
+                type(self.gene_table) == type(None)
+                or "chr" not in self.gene_table.columns
+            ):
                 logging.warning(
-                    'Chromosome data not being set as the gene table is '
-                    'not present or does not have a chr column'    
+                    "Chromosome data not being set as the gene table is "
+                    "not present or does not have a chr column"
                 )
             else:
                 index = set(self.gene_table.chr)
                 if set(chrom.index) != set(self.gene_table.chr.values):
                     logging.warning(
-                        'Chromosome data not being set as the gene table does '
-                        'not have the same values in the chr column as the chrom '
-                        'table has as indices.'
+                        "Chromosome data not being set as the gene table does "
+                        "not have the same values in the chr column as the chrom "
+                        "table has as indices."
                     )
                 else:
-                    if 'location' not in chrom.columns:
-                         logging.warning(
-                            'Chromosome data not being set as the chrom table does '
-                            'not have the location data of the chromosomes. This should '
-                            'be a cumulative sum of the chromosome sizes in order for your '
-                             'organism'
-                        ) 
+                    if "location" not in chrom.columns:
+                        logging.warning(
+                            "Chromosome data not being set as the chrom table does "
+                            "not have the location data of the chromosomes. This should "
+                            "be a cumulative sum of the chromosome sizes in order for your "
+                            "organism"
+                        )
                     else:
                         self._chrom = chrom
-        
-        
 
     def _update_imodulon_names(self, new_names):
         """
@@ -662,7 +661,8 @@ def _update_imodulon_table(self, enrichment):
             ~self.imodulon_table.index.isin(enrichment.index)
         ]
         keep_cols = self.imodulon_table.loc[
-            enrichment.index, list(set(self.imodulon_table.columns) - set(enrichment.columns))
+            enrichment.index,
+            list(set(self.imodulon_table.columns) - set(enrichment.columns)),
         ]
         df_top_enrich = pd.concat([enrichment, keep_cols], axis=1)
         new_table = pd.concat([keep_rows, df_top_enrich], sort=False)
@@ -742,7 +742,6 @@ def compute_trn_enrichment(
         force=False,
         evidence=None,
     ) -> pd.DataFrame:
-
         """
         Compare iModulons against all regulons in the TRN
 
diff --git a/src/pymodulon/gene_util.py b/src/pymodulon/gene_util.py
@@ -264,6 +264,7 @@ def uniprot_id_mapping(
     mapping = mapping.sort_values(output_name).drop_duplicates(input_name)
     return mapping
 
+
 def gff2pandas_yeast(gff_file, feature="CDS", index=None):
     """
     Converts GFF file(s) to a Pandas DataFrame
@@ -287,7 +288,7 @@ def gff2pandas_yeast(gff_file, feature="CDS", index=None):
         gff_file = [gff_file]
 
     if isinstance(feature, str):
-        feature = [feature]    
+        feature = [feature]
 
     result = []
 
@@ -296,7 +297,9 @@ def gff2pandas_yeast(gff_file, feature="CDS", index=None):
             lines = f.readlines()
 
         # Get lines to skip
-        skiprow = list(np.where(np.array([line.startswith("#") for line in lines])==True)[0])
+        skiprow = list(
+            np.where(np.array([line.startswith("#") for line in lines]) == True)[0]
+        )
 
         # Read GFF
         names = [
@@ -311,7 +314,7 @@ def gff2pandas_yeast(gff_file, feature="CDS", index=None):
             "attributes",
         ]
         DF_gff = pd.read_csv(gff, sep="\t", skiprows=skiprow, names=names, header=None)
-        
+
         # Filter for CDSs
         DF_cds = DF_gff[DF_gff.feature.isin(feature)]
 
@@ -351,37 +354,38 @@ def gff2pandas_yeast(gff_file, feature="CDS", index=None):
 
     DF_gff = pd.concat(result)
     DF_gff = DF_gff.sort_values(["locus_tag", "feature"])
-    
-    #combine gene product from CDS to mRNA feature line
+
+    # combine gene product from CDS to mRNA feature line
     for locus in DF_gff.locus_tag.unique():
-        subset = DF_gff[DF_gff.locus_tag==locus]
-        if len(subset.feature.unique())>1:
+        subset = DF_gff[DF_gff.locus_tag == locus]
+        if len(subset.feature.unique()) > 1:
             ls = list(subset.gene_product.unique())
             ls.remove(None)
             if subset.iloc[-1].gene_product == None:
                 loc = subset.iloc[-1].name
                 DF_gff.loc[loc, "gene_product"] = ls[0]
-                
 
     if index:
         if DF_gff[index].duplicated().any():
             logging.warning("Duplicate {} detected. Dropping duplicates.".format(index))
-            DF_gff = DF_gff.drop_duplicates(index, keep='last')
+            DF_gff = DF_gff.drop_duplicates(index, keep="last")
         DF_gff.set_index("locus_tag", drop=True, inplace=True)
-        
+
     df_chromosomes = _get_chr(gff_file)
     DF_gff["chr"] = df_chromosomes.chr
 
     return DF_gff
 
-            
+
 def _get_chr(gff_file):
     for gff in gff_file:
         with open(gff, "r") as f:
             lines = f.readlines()
 
         # Get lines to skip
-        skiprow = list(np.where(np.array([line.startswith("#") for line in lines])==True)[0])
+        skiprow = list(
+            np.where(np.array([line.startswith("#") for line in lines]) == True)[0]
+        )
 
         # Read GFF
         names = [
@@ -396,26 +400,35 @@ def _get_chr(gff_file):
             "attributes",
         ]
         DF_gff = pd.read_csv(gff, sep="\t", skiprows=skiprow, names=names, header=None)
-        
+
         current_chrom = 0
         for line in DF_gff.index:
             if "chromosome=" in DF_gff.loc[line].attributes:
                 current_chrom = _get_attr(DF_gff.loc[line].attributes, "chromosome")
             elif "genome=mitochondrion" in DF_gff.loc[line].attributes:
                 current_chrom = "MT"
             elif "locus_tag" in DF_gff.loc[line].attributes:
-                DF_gff.loc[line, "locus_tag"] = _get_attr(DF_gff.loc[line].attributes, "locus_tag")
+                DF_gff.loc[line, "locus_tag"] = _get_attr(
+                    DF_gff.loc[line].attributes, "locus_tag"
+                )
                 DF_gff.loc[line, "chr"] = current_chrom
-        return DF_gff[["locus_tag", "chr"]].drop_duplicates("locus_tag").dropna().set_index("locus_tag")
+        return (
+            DF_gff[["locus_tag", "chr"]]
+            .drop_duplicates("locus_tag")
+            .dropna()
+            .set_index("locus_tag")
+        )
+
 
-    
 def get_chrom_sizes(gff_file):
     for gff in gff_file:
         with open(gff, "r") as f:
             lines = f.readlines()
 
         # Get lines to skip
-        skiprow = list(np.where(np.array([line.startswith("#") for line in lines])==True)[0])
+        skiprow = list(
+            np.where(np.array([line.startswith("#") for line in lines]) == True)[0]
+        )
 
         # Read GFF
         names = [
@@ -430,9 +443,9 @@ def get_chrom_sizes(gff_file):
             "attributes",
         ]
         DF_gff = pd.read_csv(gff, sep="\t", skiprows=skiprow, names=names, header=None)
-        
+
         DF_chrom_sizes = pd.DataFrame()
-        
+
         current_chrom = 0
         for line in DF_gff.index:
             if "chromosome=" in DF_gff.loc[line].attributes:
@@ -442,8 +455,10 @@ def get_chrom_sizes(gff_file):
             elif "genome=mitochondrion" in DF_gff.loc[line].attributes:
                 current_chrom = "MT"
                 DF_chrom_sizes.loc[current_chrom, "length"] = DF_gff.loc[line].end
-        
+
         DF_chrom_sizes["location"] = DF_chrom_sizes.length.cumsum()
-        DF_chrom_sizes["location"] = DF_chrom_sizes.apply(lambda x: x.location-x.length, axis=1)
-        
+        DF_chrom_sizes["location"] = DF_chrom_sizes.apply(
+            lambda x: x.location - x.length, axis=1
+        )
+
         return DF_chrom_sizes
diff --git a/src/pymodulon/imodulondb.py b/src/pymodulon/imodulondb.py
@@ -299,9 +299,9 @@ def imodulondb_compatibility(model, inplace=False, tfcomplex_to_gene=None):
                     .replace("+", " and ")
                 )
             except AttributeError:
-                model.imodulon_table.loc[
-                    idx, "regulator_readable"
-                ] = model.imodulon_table.regulator[idx]
+                model.imodulon_table.loc[idx, "regulator_readable"] = (
+                    model.imodulon_table.regulator[idx]
+                )
 
     # check the TRN
     cols = ["in_trn", "has_link", "has_gene"]
@@ -337,7 +337,6 @@ def imodulondb_export(
     skip_iMs=False,
     skip_genes=False,
 ):
-
     """
     Generates the iModulonDB page for the data and exports to the path.
     If certain columns are unavailable but can be filled in automatically,
diff --git a/src/pymodulon/plotting.py b/src/pymodulon/plotting.py
@@ -1,6 +1,7 @@
 """
 Plotting functions for iModulons
 """
+
 import logging
 import warnings
 from collections import Counter
@@ -778,7 +779,7 @@ def plot_gene_weights(ica_data, imodulon, by="start", xaxis=None, xname="", **kw
     # call chromosomally aware plotting function
     if type(ica_data.chrom) != type(None):
         return plot_gene_weights_chrom(ica_data, imodulon, by, xaxis, xname, **kwargs)
-    
+
     # Check that iModulon exists
     if imodulon in ica_data.M.columns:
         y = ica_data.M[imodulon]
@@ -1346,6 +1347,7 @@ def plot_dima(
 # Other plots #
 ###############
 
+
 # TODO: Add kind=bar to plot top explained variance
 def plot_explained_variance(ica_data, pc=True, ax=None):
     """
@@ -2415,7 +2417,9 @@ def _set_xaxis(xaxis, y):
     return x
 
 
-def plot_gene_weights_chrom(ica_data, imodulon, by="start", xaxis=None, xname="", **kwargs):
+def plot_gene_weights_chrom(
+    ica_data, imodulon, by="start", xaxis=None, xname="", **kwargs
+):
     """
     Plot gene weights on a scatter plot.
 
@@ -2439,14 +2443,14 @@ def plot_gene_weights_chrom(ica_data, imodulon, by="start", xaxis=None, xname=""
         :class:`~matplotlib.axes.Axes` containing the scatterplot
     """
     chr_info = ica_data.chrom
-    
+
     # Check that iModulon exists
     if imodulon in ica_data.M.columns:
         y = ica_data.M[imodulon]
         ylabel = f"{imodulon} Gene Weight"
     else:
         raise ValueError(f"iModulon does not exist: {imodulon}")
-    
+
     # Get genes in the iModulon
     bin_M = ica_data.M_binarized
     component_genes = list(bin_M[imodulon].loc[bin_M[imodulon] == 1].index)
@@ -2471,7 +2475,7 @@ def plot_gene_weights_chrom(ica_data, imodulon, by="start", xaxis=None, xname=""
         elif by == "start":
             if type(chr_info) != type(None):
                 x = ica_data.gene_table[["start", "chr"]]
-                x = x.apply(lambda x: x.start+chr_info.loc[x.chr, "location"], axis=1)
+                x = x.apply(lambda x: x.start + chr_info.loc[x.chr, "location"], axis=1)
             else:
                 x = ica_data.gene_table.start
             xlabel = "Gene Start"
@@ -2593,7 +2597,14 @@ def plot_gene_weights_chrom(ica_data, imodulon, by="start", xaxis=None, xname=""
 
     if type(chr_info) != type(None):
         for chromosome in chr_info.index:
-            ax.axvline(x = chr_info.loc[chromosome, "location"], color = 'k', linestyle='-',alpha=.1)
-        ax.set_xticks(chr_info.location, labels=chr_info.index, fontsize=8, rotation=315)
-        
+            ax.axvline(
+                x=chr_info.loc[chromosome, "location"],
+                color="k",
+                linestyle="-",
+                alpha=0.1,
+            )
+        ax.set_xticks(
+            chr_info.location, labels=chr_info.index, fontsize=8, rotation=315
+        )
+
     return ax
diff --git a/src/pymodulon/util.py b/src/pymodulon/util.py
@@ -1,6 +1,7 @@
 """
 General utility functions for the pymodulon package
 """
+
 import json
 import logging
 import re