Skip to content

Commit 62034be

Browse files
committed
Minor release. Reformatted doc strings, fixed broad try-except clause
All the "save_figure" options were changed to "False". One of the functions takes a list, and the default was wrongly set to a tuple. Minor release.
1 parent 51d0102 commit 62034be

File tree

5 files changed

+103
-73
lines changed

5 files changed

+103
-73
lines changed

HISTORY.rst

+5
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,8 @@ History
1111
------------------
1212

1313
* Second release on PyPI.
14+
15+
0.2.4 (2020-08-22)
16+
------------------
17+
18+
* Minor release on PyPI.

docs/conf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
# The short X.Y version.
5757
version = '0.2'
5858
# The full version, including alpha/beta/rc tags.
59-
release = '0.2.3'
59+
release = '0.2.4'
6060

6161
# The language for content autogenerated by Sphinx. Refer to documentation
6262
# for a list of supported languages.

flowsym/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
__author__ = """Luis Perez Morales, Michael M. Shavlik"""
44
__email__ = '[email protected]'
5-
__version__ = '0.2.3'
5+
__version__ = '0.2.4'
66

77
# Import the main module in this package
88
from flowsym.flowsym import *

flowsym/flowsym.py

+95-70
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,12 @@
1111
from scipy.stats import ks_2samp
1212
from sklearn.mixture import GaussianMixture
1313

14-
# To make Travis happy. Attempt absolute path first and then from raw Github file
15-
try:
16-
spectrum_data = pd.read_csv('flowsym/data/FPbase_Spectra_updated.csv').fillna(value=0)
17-
except:
18-
spectrum_data = pd.read_csv('https://raw.githubusercontent.com/harmslab/flowsym/master/flowsym/data/FPbase_Spectra_updated.csv').fillna(value=0)
14+
# Import from raw Github file
15+
spectrum_data = pd.read_csv('https://raw.githubusercontent.com/harmslab/flowsym/master/flowsym/data'
16+
'/FPbase_Spectra_updated.csv').fillna(value=0)
1917

2018

21-
def create_controls(size, colors=('blue', 'cyan', 'green', 'yellow', 'orange', 'red', 'far_red', 'nir', 'ir')):
19+
def create_controls(size, colors=['blue', 'cyan', 'green', 'yellow', 'orange', 'red', 'far_red', 'nir', 'ir']):
2220
"""
2321
This is a function that takes a DataFrame size (i.e. number of controls) and
2422
a list of colors the user wants to run controls for.
@@ -134,9 +132,10 @@ def create_controls(size, colors=('blue', 'cyan', 'green', 'yellow', 'orange', '
134132
def create_sample(size, colors=['blue', 'cyan', 'green', 'yellow', 'orange', 'red', 'far_red', 'nir', 'ir'],
135133
weights=[]):
136134
"""
137-
This is a function that takes a defined dataframe length for number of samples (int)
138-
and excitation and emission wavelengths (list,list). Assumes equal probability of each
139-
color unless specified by the user.
135+
This is a function that takes a defined dataframe length for
136+
number of samples (int) and excitation and emission wavelengths
137+
(list,list). Assumes equal probability of each color unless
138+
specified by the user.
140139
141140
Parameters
142141
----------
@@ -239,22 +238,25 @@ def create_sample(size, colors=['blue', 'cyan', 'green', 'yellow', 'orange', 're
239238

240239
# Bandwidth on lasers is +-5 nm. channels are [450+-25, 525+-25, 600+-30, 665+-15, 720+-30, 785+-30] for filter set 2
241240
def measure(dataframe, lasers=[405, 488, 561, 638], channels=[1, 2, 3, 4, 5, 6],
242-
create_fcs=True, outfile_name='data/sample_output.fcs'):
241+
create_fcs=False, outfile_name='sample_output.fcs'):
243242
"""
244-
This is a function that will measure fluorescence intensity for any given sample
245-
DataFrame and laser/channel parameters. Output will be an fcs file (default) that is
246-
the same size as the sample you ran in the function. Alternatively, you can return
247-
just a pandas DataFrame object by setting return_fcs=False. The user can set the output
248-
file name manually to simulate creating multiple samples and measurements.
243+
This is a function that will measure fluorescence
244+
intensity for any given sample DataFrame and laser/channel
245+
parameters. Output will be just a pandas DataFrame object
246+
because return_fcs=False by default.
247+
248+
Alternatively, you can return fcs file if return_fcs = True.
249+
The user can set the output file name manually to simulate
250+
creating multiple samples and measurements.
249251
250252
Parameters
251253
----------
252-
dataframe : the Dataframe of sample data that will be used to generate the simulated
253-
fluorescence intensity
254+
dataframe : the Dataframe of sample data that will be used
255+
to generate the simulated fluorescence intensity
254256
lasers : laser channel parameters, default are [405, 488, 561, 638] nm
255257
channels: return output for select channels, options are [1,2,3,4,5,6]
256-
create_fcs : create a .fcs file from generated Pandas Dataframe using 'fcsy' module.
257-
Default = True.
258+
create_fcs : create a .fcs file from generated Pandas Dataframe
259+
using 'fcsy' module. Default = True.
258260
outfile_name : name of the .fcs file created
259261
260262
Returns
@@ -359,36 +361,49 @@ def measure(dataframe, lasers=[405, 488, 561, 638], channels=[1, 2, 3, 4, 5, 6],
359361
return output
360362

361363

362-
def cluster(measured_data, min_cluster_size=50, savefig=True):
364+
def cluster(measured_data, min_cluster_size=50, savefig=False):
363365
"""
364-
This is a function to cluster flow cytometry data that has been measured in fluorescence channels using
365-
density-based spatial clustering of applications with noise (DBSCAN), which clusters based on density of points
366-
in an unsupervised method. The number of clusters does not need to be explicitly stated by the users. The only
367-
parameter that needs to be optimized is min_cluster_size, which is set to 50 here. But I recommend 1% of the len(
368-
data) Resulting plots are a bar chart showing the number of cells in each cluster and a heatmap of the median
369-
fluorescence intensity in each channel for each cluster.
370-
371-
Note: clusters that are labeled '0' are cells that the DBSCAN could not cluster.
372-
373-
Returns a tuple of two dictionaries. The first dictionary is the median fluorescence represented in the heatmap
374-
while the second dictionary holds all the fluorescence vectors for each cluster. Both of these are needed
375-
for a dip test and re-clustering.
366+
This is a function to cluster flow cytometry data that
367+
has been measured in fluorescence channels using density-based
368+
spatial clustering of applications with noise (DBSCAN), which
369+
clusters based on density of points in an unsupervised method.
370+
The number of clusters does not need to be explicitly stated by
371+
the users. The only parameter that needs to be optimized is
372+
min_cluster_size, which is set to 50 here. But I recommend 1% of
373+
the len(data) Resulting plots are a bar chart showing the number
374+
of cells in each cluster and a heatmap of the median fluorescence
375+
intensity in each channel for each cluster.
376+
377+
Note: clusters that are labeled '0' are cells that the
378+
DBSCAN could not cluster.
379+
380+
Returns a tuple of two dictionaries. The first dictionary is the
381+
median fluorescence represented in the heatmap while the second
382+
dictionary holds all the fluorescence vectors for each cluster.
383+
Both of these are needed for a dip test and re-clustering.
376384
377385
Parameters
378386
----------
379-
measured_data : simulated or experimental flow cytometry data that has been measured in
380-
fluorescence channels
381-
min_cluster_size : default = 50, needs to be optimized by user. Typically needs to be
382-
1% of len(data).
383-
savefig: Save generated bar chart showing the number of cells in each cluster and a heat map
384-
of the median fluorescence intensity in each channel for each cluster.
387+
measured_data : simulated or experimental flow cytometry data
388+
that has been measured in fluorescence channels.
389+
min_cluster_size : default = 50, needs to be optimized by user.
390+
Typically needs to be 1% of len(data).
391+
savefig: Save generated bar chart showing the number of cells in
392+
each cluster and a heat map of the median fluorescence
393+
intensity in each channel for each cluster.
385394
Figure is saved using 'matplotlib' module.
386395
387396
Returns
388397
-------
389-
output : a tuple of two dictionaries. The first dictionary is the median fluorescence represented
390-
in the heatmap while the second dictionary holds all the fluorescence vectors for each
391-
cluster. Both of these are needed for a dip test and re-clustering.
398+
(final_dictionary, cluster_dict) : a tuple of two dictionaries.
399+
The first dictionary is the
400+
median fluorescence represented
401+
in the heatmap while the second
402+
dictionary holds all the
403+
fluorescence vectors for each
404+
cluster.
405+
Both of these are needed for a
406+
dip test and re-clustering.
392407
393408
See Also
394409
--------
@@ -473,32 +488,41 @@ def cluster(measured_data, min_cluster_size=50, savefig=True):
473488
return (final_dictionary, cluster_dict)
474489

475490

476-
def dip_test(median_FL_data, total_data, alpha=0.05, save_figure=True):
491+
def dip_test(median_FL_data, total_data, alpha=0.05, save_figure=False):
477492
"""
478-
Perform a Hartigan's dip test to check for unimodality in clusters and splits clusters if bimodality is found.
479-
This function will take the highest intensity channel for each cluster and
480-
check for bimodality to correct for errors in clustering similar fluorescence profiles.
481-
Changing alpha will alter how stringent the dip test is. A higher alpha will result in higher detection
482-
of bimodality, but runs a greater risk of false identification. It is important to note
483-
that this dip test is relatively coarse grained and will not identify very slight populations
484-
of mixed cells (e.g. 10 orange cells clustered with 1000 red cells)
485-
486-
Returns an updated clustering of the primary clustering after performing a dip test
493+
Perform a Hartigan's dip test to check for unimodality
494+
in clusters and splits clusters if bimodality is found.
495+
This function will take the highest intensity channel
496+
for each cluster and check for bimodality to correct for
497+
errors in clustering similar fluorescence profiles.
498+
499+
Changing alpha will alter how stringent the dip test is.
500+
A higher alpha will result in higher detection of bimodality,
501+
but runs a greater risk of false identification. It is
502+
important to note that this dip test is relatively coarse
503+
grained and will not identify very slight populations of mixed
504+
cells (e.g. 10 orange cells clustered with 1000 red cells).
505+
506+
Returns an updated clustering of the primary clustering
507+
after performing a dip test.
487508
488509
Parameters
489510
----------
490-
median_FL_data : dict, clustering data generated by 'flowsym.cluster' function
491-
total_data : other fluorescence profiles for which errors will be corrected
492-
alpha: how stringent the dip test is
493-
save_figure : Save generated bar chart showing the number of cells in each cluster and a heat map
494-
of the median fluorescence intensity in each channel for each cluster.
495-
Figure is saved using 'matplotlib' module.
511+
median_FL_data : dict, clustering data generated by
512+
'flowsym.cluster' function
513+
total_data : other fluorescence profiles for which errors
514+
will be corrected
515+
alpha : how stringent the dip test is
516+
save_figure : Save generated bar chart showing the number of
517+
cells in each cluster and a heat map of the median
518+
fluorescence intensity in each channel for each
519+
cluster. Figure is saved using 'matplotlib' module.
496520
497521
Returns
498522
-------
499-
output : a tuple of two dictionaries. The first dictionary is the median fluorescence represented
500-
in the heatmap while the second dictionary holds all the fluorescence vectors for each
501-
cluster. Both of these are needed for a dip test and re-clustering.
523+
change_dict : a dictory containing the corection that must be
524+
applied to similar fluorescence profiles if
525+
bimodality is found.
502526
503527
See Also
504528
--------
@@ -627,7 +651,7 @@ def dip_test(median_FL_data, total_data, alpha=0.05, save_figure=True):
627651
return change_dict
628652

629653

630-
def gaus_recluster(median_FL_data, total_data, tolerance=.25, savefig=True):
654+
def gaus_recluster(median_FL_data, total_data, tolerance=.25, save_figure=False):
631655
"""
632656
Applies a gaussian mixture model with n_components=2
633657
to try and separate rare populations of cells from
@@ -649,11 +673,11 @@ def gaus_recluster(median_FL_data, total_data, tolerance=.25, savefig=True):
649673
----------
650674
median_FL_data : data with median FL for each cluster
651675
total_data : data with all measured FL for each cluster
652-
tolerance : how different do the sizes of clusters have to be before they
653-
are considered actually distinct spectrally?
676+
tolerance : how different do the sizes of clusters have
677+
to be before they are considered actually distinct?
654678
Increase this to be more stringent in splitting clusters.
655-
Decrease the value to allow more re-clustering at the cost of
656-
false positives.
679+
Decrease the value to allow more re-clustering at
680+
the cost of false positives.
657681
save_figure : Save figure using 'matplotlib' module.
658682
659683
Returns
@@ -712,11 +736,12 @@ def gaus_recluster(median_FL_data, total_data, tolerance=.25, savefig=True):
712736
# Do a ks 2 test to see if clusters are different
713737
result = ks_2samp(clust1[max_channel], clust2[max_channel])
714738

715-
# Test how different our cluster populations are. If the difference between the sizes is more than <tolerance>, of the
716-
# total, then we'll say we actually found a bimodal population to split
739+
# Test how different our cluster populations are. If the difference between the sizes is more than
740+
# <tolerance>, of the total, then we'll say we actually found a bimodal population to split
717741
clust_split = abs(len(clust1) - len(clust2)) / (len(clust1) + len(clust2))
718742

719-
# Keep the split clusters if they meet our splitting criteria, otherwise retain original clusters from DB scan
743+
# Keep the split clusters if they meet our splitting criteria, otherwise retain original clusters from DB
744+
# scan
720745
if clust_split > tolerance:
721746
if result[1] < 1e-10:
722747
new_val = clust1.values.tolist()
@@ -736,7 +761,7 @@ def gaus_recluster(median_FL_data, total_data, tolerance=.25, savefig=True):
736761

737762
plt.tight_layout()
738763

739-
if savefig:
764+
if save_figure:
740765
plt.savefig('gaus_mix_cluster_split')
741766

742767
final_reclustered = {}
@@ -788,7 +813,7 @@ def gaus_recluster(median_FL_data, total_data, tolerance=.25, savefig=True):
788813
plt.yticks(rotation=0)
789814
plt.tight_layout()
790815

791-
if savefig:
816+
if save_figure:
792817
plt.savefig('reclustered_after_gaus_mix_ks2')
793818

794819
return reclustered

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,6 @@
5050
test_suite='tests',
5151
tests_require=test_requirements,
5252
url='https://github.com/harmslab/flowsym',
53-
version='0.2.3',
53+
version='0.2.4',
5454
zip_safe=False,
5555
)

0 commit comments

Comments
 (0)