Skip to content

Commit 3bac9e3

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 776cae9 commit 3bac9e3

File tree

6 files changed

+85
-60
lines changed

6 files changed

+85
-60
lines changed

docs/conf.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@
120120
# -- Options for linkcheck --------------------------------------------------
121121
linkcheck_ignore = [
122122
r"^https://doi.org/+", # Always redirects
123-
r'http://regulondb.ccg.unam.mx/', # As of 8/22/24, regulonDB works, but
124-
# long loading times cause check issues
123+
r"http://regulondb.ccg.unam.mx/", # As of 8/22/24, regulonDB works, but
124+
# long loading times cause check issues
125125
]
126126

127127
# -- NBSphinx -----------------------------------------------------------------

src/pymodulon/core.py

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def __init__(
4444
imodulondb_table=None,
4545
gene_links=None,
4646
tf_links=None,
47-
chrom=None
47+
chrom=None,
4848
):
4949
"""
5050
Initialize IcaData object
@@ -96,7 +96,7 @@ def __init__(
9696
Dictionary of TFs (from the TRN) to links in a database (default:
9797
None)
9898
eukaryotic: flag marking if the organism is eukaryotic or not for use in several functions
99-
chrom: lengths of the chromosomes of the organism for plotting purposes, should be a
99+
chrom: lengths of the chromosomes of the organism for plotting purposes, should be a
100100
dataframe or dictionary with indices of the chromosomes and then the lengths of each
101101
chromosome as well as the sum of the start location of each chromosome (cumulative sum of
102102
lengths in the order of desired plotting of the chromosomes)
@@ -261,7 +261,7 @@ def __init__(
261261
"goldenrod",
262262
"chocolate",
263263
"orange",
264-
'navy',
264+
"navy",
265265
],
266266
)
267267
)
@@ -284,8 +284,7 @@ def __init__(
284284
self._chrom = None
285285
else:
286286
self.chrom = chrom
287-
288-
287+
289288
@property
290289
def M(self):
291290
"""Get M matrix"""
@@ -455,7 +454,6 @@ def trn(self, new_trn):
455454

456455
# mark that our cutoffs are no longer optimized since the TRN
457456
self._cutoff_optimized = False
458-
459457

460458
# Motif information
461459
@property
@@ -478,32 +476,33 @@ def chrom(self, chrom):
478476
self._chrom = None
479477
else:
480478
chrom = _check_table(chrom, "chrom")
481-
482-
if type(self.gene_table) == type(None) or 'chr' not in self.gene_table.columns:
479+
480+
if (
481+
type(self.gene_table) == type(None)
482+
or "chr" not in self.gene_table.columns
483+
):
483484
logging.warning(
484-
'Chromosome data not being set as the gene table is '
485-
'not present or does not have a chr column'
485+
"Chromosome data not being set as the gene table is "
486+
"not present or does not have a chr column"
486487
)
487488
else:
488489
index = set(self.gene_table.chr)
489490
if set(chrom.index) != set(self.gene_table.chr.values):
490491
logging.warning(
491-
'Chromosome data not being set as the gene table does '
492-
'not have the same values in the chr column as the chrom '
493-
'table has as indices.'
492+
"Chromosome data not being set as the gene table does "
493+
"not have the same values in the chr column as the chrom "
494+
"table has as indices."
494495
)
495496
else:
496-
if 'location' not in chrom.columns:
497-
logging.warning(
498-
'Chromosome data not being set as the chrom table does '
499-
'not have the location data of the chromosomes. This should '
500-
'be a cumulative sum of the chromosome sizes in order for your '
501-
'organism'
502-
)
497+
if "location" not in chrom.columns:
498+
logging.warning(
499+
"Chromosome data not being set as the chrom table does "
500+
"not have the location data of the chromosomes. This should "
501+
"be a cumulative sum of the chromosome sizes in order for your "
502+
"organism"
503+
)
503504
else:
504505
self._chrom = chrom
505-
506-
507506

508507
def _update_imodulon_names(self, new_names):
509508
"""
@@ -662,7 +661,8 @@ def _update_imodulon_table(self, enrichment):
662661
~self.imodulon_table.index.isin(enrichment.index)
663662
]
664663
keep_cols = self.imodulon_table.loc[
665-
enrichment.index, list(set(self.imodulon_table.columns) - set(enrichment.columns))
664+
enrichment.index,
665+
list(set(self.imodulon_table.columns) - set(enrichment.columns)),
666666
]
667667
df_top_enrich = pd.concat([enrichment, keep_cols], axis=1)
668668
new_table = pd.concat([keep_rows, df_top_enrich], sort=False)
@@ -742,7 +742,6 @@ def compute_trn_enrichment(
742742
force=False,
743743
evidence=None,
744744
) -> pd.DataFrame:
745-
746745
"""
747746
Compare iModulons against all regulons in the TRN
748747

src/pymodulon/gene_util.py

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ def uniprot_id_mapping(
264264
mapping = mapping.sort_values(output_name).drop_duplicates(input_name)
265265
return mapping
266266

267+
267268
def gff2pandas_yeast(gff_file, feature="CDS", index=None):
268269
"""
269270
Converts GFF file(s) to a Pandas DataFrame
@@ -287,7 +288,7 @@ def gff2pandas_yeast(gff_file, feature="CDS", index=None):
287288
gff_file = [gff_file]
288289

289290
if isinstance(feature, str):
290-
feature = [feature]
291+
feature = [feature]
291292

292293
result = []
293294

@@ -296,7 +297,9 @@ def gff2pandas_yeast(gff_file, feature="CDS", index=None):
296297
lines = f.readlines()
297298

298299
# Get lines to skip
299-
skiprow = list(np.where(np.array([line.startswith("#") for line in lines])==True)[0])
300+
skiprow = list(
301+
np.where(np.array([line.startswith("#") for line in lines]) == True)[0]
302+
)
300303

301304
# Read GFF
302305
names = [
@@ -311,7 +314,7 @@ def gff2pandas_yeast(gff_file, feature="CDS", index=None):
311314
"attributes",
312315
]
313316
DF_gff = pd.read_csv(gff, sep="\t", skiprows=skiprow, names=names, header=None)
314-
317+
315318
# Filter for CDSs
316319
DF_cds = DF_gff[DF_gff.feature.isin(feature)]
317320

@@ -351,37 +354,38 @@ def gff2pandas_yeast(gff_file, feature="CDS", index=None):
351354

352355
DF_gff = pd.concat(result)
353356
DF_gff = DF_gff.sort_values(["locus_tag", "feature"])
354-
355-
#combine gene product from CDS to mRNA feature line
357+
358+
# combine gene product from CDS to mRNA feature line
356359
for locus in DF_gff.locus_tag.unique():
357-
subset = DF_gff[DF_gff.locus_tag==locus]
358-
if len(subset.feature.unique())>1:
360+
subset = DF_gff[DF_gff.locus_tag == locus]
361+
if len(subset.feature.unique()) > 1:
359362
ls = list(subset.gene_product.unique())
360363
ls.remove(None)
361364
if subset.iloc[-1].gene_product == None:
362365
loc = subset.iloc[-1].name
363366
DF_gff.loc[loc, "gene_product"] = ls[0]
364-
365367

366368
if index:
367369
if DF_gff[index].duplicated().any():
368370
logging.warning("Duplicate {} detected. Dropping duplicates.".format(index))
369-
DF_gff = DF_gff.drop_duplicates(index, keep='last')
371+
DF_gff = DF_gff.drop_duplicates(index, keep="last")
370372
DF_gff.set_index("locus_tag", drop=True, inplace=True)
371-
373+
372374
df_chromosomes = _get_chr(gff_file)
373375
DF_gff["chr"] = df_chromosomes.chr
374376

375377
return DF_gff
376378

377-
379+
378380
def _get_chr(gff_file):
379381
for gff in gff_file:
380382
with open(gff, "r") as f:
381383
lines = f.readlines()
382384

383385
# Get lines to skip
384-
skiprow = list(np.where(np.array([line.startswith("#") for line in lines])==True)[0])
386+
skiprow = list(
387+
np.where(np.array([line.startswith("#") for line in lines]) == True)[0]
388+
)
385389

386390
# Read GFF
387391
names = [
@@ -396,26 +400,35 @@ def _get_chr(gff_file):
396400
"attributes",
397401
]
398402
DF_gff = pd.read_csv(gff, sep="\t", skiprows=skiprow, names=names, header=None)
399-
403+
400404
current_chrom = 0
401405
for line in DF_gff.index:
402406
if "chromosome=" in DF_gff.loc[line].attributes:
403407
current_chrom = _get_attr(DF_gff.loc[line].attributes, "chromosome")
404408
elif "genome=mitochondrion" in DF_gff.loc[line].attributes:
405409
current_chrom = "MT"
406410
elif "locus_tag" in DF_gff.loc[line].attributes:
407-
DF_gff.loc[line, "locus_tag"] = _get_attr(DF_gff.loc[line].attributes, "locus_tag")
411+
DF_gff.loc[line, "locus_tag"] = _get_attr(
412+
DF_gff.loc[line].attributes, "locus_tag"
413+
)
408414
DF_gff.loc[line, "chr"] = current_chrom
409-
return DF_gff[["locus_tag", "chr"]].drop_duplicates("locus_tag").dropna().set_index("locus_tag")
415+
return (
416+
DF_gff[["locus_tag", "chr"]]
417+
.drop_duplicates("locus_tag")
418+
.dropna()
419+
.set_index("locus_tag")
420+
)
421+
410422

411-
412423
def get_chrom_sizes(gff_file):
413424
for gff in gff_file:
414425
with open(gff, "r") as f:
415426
lines = f.readlines()
416427

417428
# Get lines to skip
418-
skiprow = list(np.where(np.array([line.startswith("#") for line in lines])==True)[0])
429+
skiprow = list(
430+
np.where(np.array([line.startswith("#") for line in lines]) == True)[0]
431+
)
419432

420433
# Read GFF
421434
names = [
@@ -430,9 +443,9 @@ def get_chrom_sizes(gff_file):
430443
"attributes",
431444
]
432445
DF_gff = pd.read_csv(gff, sep="\t", skiprows=skiprow, names=names, header=None)
433-
446+
434447
DF_chrom_sizes = pd.DataFrame()
435-
448+
436449
current_chrom = 0
437450
for line in DF_gff.index:
438451
if "chromosome=" in DF_gff.loc[line].attributes:
@@ -442,8 +455,10 @@ def get_chrom_sizes(gff_file):
442455
elif "genome=mitochondrion" in DF_gff.loc[line].attributes:
443456
current_chrom = "MT"
444457
DF_chrom_sizes.loc[current_chrom, "length"] = DF_gff.loc[line].end
445-
458+
446459
DF_chrom_sizes["location"] = DF_chrom_sizes.length.cumsum()
447-
DF_chrom_sizes["location"] = DF_chrom_sizes.apply(lambda x: x.location-x.length, axis=1)
448-
460+
DF_chrom_sizes["location"] = DF_chrom_sizes.apply(
461+
lambda x: x.location - x.length, axis=1
462+
)
463+
449464
return DF_chrom_sizes

src/pymodulon/imodulondb.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -299,9 +299,9 @@ def imodulondb_compatibility(model, inplace=False, tfcomplex_to_gene=None):
299299
.replace("+", " and ")
300300
)
301301
except AttributeError:
302-
model.imodulon_table.loc[
303-
idx, "regulator_readable"
304-
] = model.imodulon_table.regulator[idx]
302+
model.imodulon_table.loc[idx, "regulator_readable"] = (
303+
model.imodulon_table.regulator[idx]
304+
)
305305

306306
# check the TRN
307307
cols = ["in_trn", "has_link", "has_gene"]
@@ -337,7 +337,6 @@ def imodulondb_export(
337337
skip_iMs=False,
338338
skip_genes=False,
339339
):
340-
341340
"""
342341
Generates the iModulonDB page for the data and exports to the path.
343342
If certain columns are unavailable but can be filled in automatically,

src/pymodulon/plotting.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Plotting functions for iModulons
33
"""
4+
45
import logging
56
import warnings
67
from collections import Counter
@@ -778,7 +779,7 @@ def plot_gene_weights(ica_data, imodulon, by="start", xaxis=None, xname="", **kw
778779
# call chromosomally aware plotting function
779780
if type(ica_data.chrom) != type(None):
780781
return plot_gene_weights_chrom(ica_data, imodulon, by, xaxis, xname, **kwargs)
781-
782+
782783
# Check that iModulon exists
783784
if imodulon in ica_data.M.columns:
784785
y = ica_data.M[imodulon]
@@ -1346,6 +1347,7 @@ def plot_dima(
13461347
# Other plots #
13471348
###############
13481349

1350+
13491351
# TODO: Add kind=bar to plot top explained variance
13501352
def plot_explained_variance(ica_data, pc=True, ax=None):
13511353
"""
@@ -2415,7 +2417,9 @@ def _set_xaxis(xaxis, y):
24152417
return x
24162418

24172419

2418-
def plot_gene_weights_chrom(ica_data, imodulon, by="start", xaxis=None, xname="", **kwargs):
2420+
def plot_gene_weights_chrom(
2421+
ica_data, imodulon, by="start", xaxis=None, xname="", **kwargs
2422+
):
24192423
"""
24202424
Plot gene weights on a scatter plot.
24212425
@@ -2439,14 +2443,14 @@ def plot_gene_weights_chrom(ica_data, imodulon, by="start", xaxis=None, xname=""
24392443
:class:`~matplotlib.axes.Axes` containing the scatterplot
24402444
"""
24412445
chr_info = ica_data.chrom
2442-
2446+
24432447
# Check that iModulon exists
24442448
if imodulon in ica_data.M.columns:
24452449
y = ica_data.M[imodulon]
24462450
ylabel = f"{imodulon} Gene Weight"
24472451
else:
24482452
raise ValueError(f"iModulon does not exist: {imodulon}")
2449-
2453+
24502454
# Get genes in the iModulon
24512455
bin_M = ica_data.M_binarized
24522456
component_genes = list(bin_M[imodulon].loc[bin_M[imodulon] == 1].index)
@@ -2471,7 +2475,7 @@ def plot_gene_weights_chrom(ica_data, imodulon, by="start", xaxis=None, xname=""
24712475
elif by == "start":
24722476
if type(chr_info) != type(None):
24732477
x = ica_data.gene_table[["start", "chr"]]
2474-
x = x.apply(lambda x: x.start+chr_info.loc[x.chr, "location"], axis=1)
2478+
x = x.apply(lambda x: x.start + chr_info.loc[x.chr, "location"], axis=1)
24752479
else:
24762480
x = ica_data.gene_table.start
24772481
xlabel = "Gene Start"
@@ -2593,7 +2597,14 @@ def plot_gene_weights_chrom(ica_data, imodulon, by="start", xaxis=None, xname=""
25932597

25942598
if type(chr_info) != type(None):
25952599
for chromosome in chr_info.index:
2596-
ax.axvline(x = chr_info.loc[chromosome, "location"], color = 'k', linestyle='-',alpha=.1)
2597-
ax.set_xticks(chr_info.location, labels=chr_info.index, fontsize=8, rotation=315)
2598-
2600+
ax.axvline(
2601+
x=chr_info.loc[chromosome, "location"],
2602+
color="k",
2603+
linestyle="-",
2604+
alpha=0.1,
2605+
)
2606+
ax.set_xticks(
2607+
chr_info.location, labels=chr_info.index, fontsize=8, rotation=315
2608+
)
2609+
25992610
return ax

src/pymodulon/util.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
General utility functions for the pymodulon package
33
"""
4+
45
import json
56
import logging
67
import re

0 commit comments

Comments
 (0)