Skip to content

Commit

Permalink
Fixing so you dont need cell types
Browse files Browse the repository at this point in the history
  • Loading branch information
simonwarchol committed Oct 1, 2021
1 parent 9015f75 commit 3e85ab7
Show file tree
Hide file tree
Showing 2 changed files with 183 additions and 67 deletions.
247 changes: 181 additions & 66 deletions minerva_analysis/server/models/data_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from ome_types import from_xml
from minerva_analysis import config_json_path, data_path, cwd_path
from minerva_analysis.server.utils import pyramid_assemble
from minerva_analysis.server.utils import smallestenclosingcircle
import matplotlib.path as mpltPath
from minerva_analysis.server.models import database_model
import dateutil.parser
import time
Expand Down Expand Up @@ -47,6 +49,14 @@ def load_datasource(datasource_name, reload=False):
csvPath = Path(config[datasource_name]['featureData'][0]['src'])
print("Loading csv data.. (this can take some time)")
datasource = pd.read_csv(csvPath)
if 'cellType' not in datasource.columns:
embedding_data_path = Path(config[datasource_name]['featureData'][0]['embeddingData'])
scatter_df = pd.read_csv(embedding_data_path)
scatter_np = scatter_df.to_numpy()
if scatter_np.shape[1] > 3:
datasource['cellType'] = scatter_np[:, 3].astype('int').tolist()
else:
datasource['cellType'] = 0
datasource['id'] = datasource.index
datasource = datasource.replace(-np.Inf, 0)
source = datasource_name
Expand Down Expand Up @@ -110,11 +120,11 @@ def load_ball_tree(datasource_name_name, reload=False):
# Path(
# os.path.join(os.getcwd())) / data_path / datasource_name_name / "ball_tree.pickle")

#using pathlib now:
# using pathlib now:
pickled_kd_tree_path = str(
PurePath(cwd_path, data_path, datasource_name_name, "ball_tree.pickle"))

#old os.path way: if os.path.isfile(pickled_kd_tree_path) and reload is False:
# old os.path way: if os.path.isfile(pickled_kd_tree_path) and reload is False:
if Path(pickled_kd_tree_path).is_file() and reload is False:

print("Pickled KD Tree Exists, Loading")
Expand Down Expand Up @@ -198,12 +208,21 @@ def get_channel_cells(datasource_name, channels):
return query


def get_celltype_column_name(datasource):
try:
return config[datasource]['featureData'][0]['celltype']
except KeyError:
return 'cellType'
except TypeError:
return 'cellType'


def get_phenotype_description(datasource):
try:
data = ''
csvPath = config[datasource]['featureData'][0]['celltypeData']
if Path(csvPath).is_file():
#old os.path usage: if os.path.isfile(csvPath):
# old os.path usage: if os.path.isfile(csvPath):
data = pd.read_csv(csvPath)
data = data.to_numpy().tolist()
# data = data.to_json(orient='records', lines=True)
Expand All @@ -223,6 +242,48 @@ def get_phenotype_column_name(datasource):
return ''


def get_cell_groups(datasource_name):
global datasource
global source
global config
try:
if 'celltypeData' in config[datasource_name]['featureData'][0]:
celltype_data = Path(config[datasource_name]['featureData'][0]['celltypeData'])
celltype_df = pd.read_csv(celltype_data)
obj = celltype_df.to_numpy()[:, 1].tolist()
else:

celltype_data = sorted(datasource['cellType'].unique())
obj = [str(i) for i in celltype_data]
# Test
return obj
except:
return [0]


def get_cells_by_cell_group(datasource_name, cell_group):
global datasource
global source
global config
try:
if 'celltypeData' in config[datasource_name]['featureData'][0]:
celltype_data = Path(config[datasource_name]['featureData'][0]['celltypeData'])
celltype_df = pd.read_csv(celltype_data)
group_id = celltype_df[celltype_df.name == cell_group].values[0][0]
else:
group_id = int(cell_group)
fields = [config[datasource_name]['featureData'][0]['xCoordinate'],
config[datasource_name]['featureData'][0]['yCoordinate'],
config[datasource_name]['featureData'][0]['celltype'], 'id',
config[datasource_name]['featureData'][0]['idField']]
obj = datasource[
datasource[config[datasource_name]['featureData'][0]['celltype']] == group_id][fields].to_dict(
orient='records')
return obj
except:
return []


def get_cells_phenotype(datasource_name):
global datasource
global source
Expand Down Expand Up @@ -299,63 +360,53 @@ def get_number_of_cells_in_circle(x, y, datasource_name, r):
return 0


def get_color_scheme(datasource_name, refresh, label_field='celltype'):

# old os.path way:
# color_scheme_path = str(
# Path(os.path.join(os.getcwd())) / data_path / datasource_name / str(
# label_field + "_color_scheme.pickle"))

color_scheme_path = str(PurePath(cwd_path, data_path, datasource_name, str(
label_field + "_color_scheme.pickle")) )

if refresh == False:
#old os.path way: if os.path.isfile(color_scheme_path):
if Path(color_scheme_path).is_file():
print("Color Scheme Exists, Loading")
color_scheme = pickle.load(open(color_scheme_path, "rb"))
return color_scheme
if label_field == 'celltype':
labels = get_phenotypes(datasource_name)
print(labels)
labels.append('SelectedCluster')
def get_color_scheme(datasource_name):
labels = get_cell_groups(datasource_name)
color_scheme = {}
colors = ["#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#a65628", "#f781bf", "#808080", "#7A4900",
"#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87", "#5A0007", "#809693", "#FEFFE6",
"#1B4400", "#4FC601", "#3B5DFF", "#4A3B53", "#FF2F80", "#61615A", "#BA0900", "#6B7900", "#00C2A0",
"#FFAA92", "#FF90C9", "#B903AA", "#D16100", "#DDEFFF", "#000035", "#7B4F4B", "#A1C299", "#300018",
"#0AA6D8", "#013349", "#00846F", "#372101", "#FFB500", "#C2FFED", "#A079BF", "#CC0744", "#C0B9B2",
"#C2FF99", "#001E09", "#00489C", "#6F0062", "#0CBD66", "#EEC3FF", "#456D75", "#B77B68", "#7A87A1",
"#788D66", "#885578", "#FAD09F", "#FF8A9A", "#D157A0", "#BEC459", "#456648", "#0086ED", "#886F4C",
"#34362D", "#B4A8BD", "#00A6AA", "#452C2C", "#636375", "#A3C8C9", "#FF913F", "#938A81", "#575329",
"#00FECF", "#B05B6F", "#8CD0FF", "#3B9700", "#04F757", "#C8A1A1", "#1E6E00", "#7900D7", "#A77500",
"#6367A9", "#A05837", "#6B002C", "#772600", "#D790FF", "#9B9700", "#549E79", "#FFF69F", "#201625",
"#72418F", "#BC23FF", "#99ADC0", "#3A2465", "#922329", "#5B4534", "#FDE8DC", "#404E55", "#0089A3",
"#CB7E98", "#A4E804", "#324E72", "#6A3A4C", "#83AB58", "#001C1E", "#D1F7CE", "#004B28", "#C8D0F6",
"#A3A489", "#806C66", "#222800", "#BF5650", "#E83000", "#66796D", "#DA007C", "#FF1A59", "#8ADBB4",
"#1E0200", "#5B4E51", "#C895C5", "#320033", "#FF6832", "#66E1D3", "#CFCDAC", "#D0AC94", "#7ED379",
"#012C58", "#7A7BFF", "#D68E01", "#353339", "#78AFA1", "#FEB2C6", "#75797C", "#837393", "#943A4D",
"#B5F4FF", "#D2DCD5", "#9556BD", "#6A714A", "#001325", "#02525F", "#0AA3F7", "#E98176", "#DBD5DD",
"#5EBCD1", "#3D4F44", "#7E6405", "#02684E", "#962B75", "#8D8546", "#9695C5", "#E773CE", "#D86A78",
"#3E89BE", "#CA834E", "#518A87", "#5B113C", "#55813B", "#E704C4", "#00005F", "#A97399", "#4B8160",
"#59738A", "#FF5DA7", "#F7C9BF", "#643127", "#513A01", "#6B94AA", "#51A058", "#A45B02", "#1D1702",
"#E20027", "#E7AB63", "#4C6001", "#9C6966", "#64547B", "#97979E", "#006A66", "#391406", "#F4D749",
"#0045D2", "#006C31", "#DDB6D0", "#7C6571", "#9FB2A4", "#00D891", "#15A08A", "#BC65E9", "#FFFFFE",
"#C6DC99", "#203B3C", "#671190", "#6B3A64", "#F5E1FF", "#FFA0F2", "#CCAA35", "#374527", "#8BB400",
"#797868", "#C6005A", "#3B000A", "#C86240", "#29607C", "#402334", "#7D5A44", "#CCB87C", "#B88183",
"#AA5199", "#B5D6C3", "#A38469", "#9F94F0", "#A74571", "#B894A6", "#71BB8C", "#00B433", "#789EC9",
"#6D80BA", "#953F00", "#5EFF03", "#E4FFFC", "#1BE177", "#BCB1E5", "#76912F", "#003109", "#0060CD",
"#D20096", "#895563", "#29201D", "#5B3213", "#A76F42", "#89412E", "#1A3A2A", "#494B5A", "#A88C85",
"#F4ABAA", "#A3F3AB", "#00C6C8", "#EA8B66", "#958A9F", "#BDC9D2", "#9FA064", "#BE4700", "#658188",
"#83A485", "#453C23", "#47675D", "#3A3F00", "#061203", "#DFFB71", "#868E7E", "#98D058", "#6C8F7D",
"#D7BFC2", "#3C3E6E", "#D83D66", "#2F5D9B", "#6C5E46", "#D25B88", "#5B656C", "#00B57F", "#545C46",
"#866097", "#365D25", "#252F99", "#00CCFF", "#674E60", "#FC009C", "#92896B"]
# http://godsnotwheregodsnot.blogspot.com/2013/11/kmeans-color-quantization-seeding.html

colors = ["#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059",
"#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87",
"#5A0007", "#809693", "#FEFFE6", "#1B4400", "#4FC601", "#3B5DFF", "#4A3B53", "#FF2F80",
"#61615A", "#BA0900", "#6B7900", "#00C2A0", "#FFAA92", "#FF90C9", "#B903AA", "#D16100",
"#DDEFFF", "#000035", "#7B4F4B", "#A1C299", "#300018", "#0AA6D8", "#013349", "#00846F",
"#372101", "#FFB500", "#C2FFED", "#A079BF", "#CC0744", "#C0B9B2", "#C2FF99", "#001E09",
"#00489C", "#6F0062", "#0CBD66", "#EEC3FF", "#456D75", "#B77B68", "#7A87A1", "#788D66",
"#885578", "#FAD09F", "#FF8A9A", "#D157A0", "#BEC459", "#456648", "#0086ED", "#886F4C",
"#34362D", "#B4A8BD", "#00A6AA", "#452C2C", "#636375", "#A3C8C9", "#FF913F", "#938A81",
"#575329", "#00FECF", "#B05B6F", "#8CD0FF", "#3B9700", "#04F757", "#C8A1A1", "#1E6E00",
"#7900D7", "#A77500", "#6367A9", "#A05837", "#6B002C", "#772600", "#D790FF", "#9B9700",
"#549E79", "#FFF69F", "#201625", "#72418F", "#BC23FF", "#99ADC0", "#3A2465", "#922329",
"#5B4534", "#FDE8DC", "#404E55", "#0089A3", "#CB7E98", "#A4E804", "#324E72", "#6A3A4C",
"#83AB58", "#001C1E", "#D1F7CE", "#004B28", "#C8D0F6", "#A3A489", "#806C66", "#222800",
"#BF5650", "#E83000", "#66796D", "#DA007C", "#FF1A59", "#8ADBB4", "#1E0200", "#5B4E51",
"#C895C5", "#320033", "#FF6832", "#66E1D3", "#CFCDAC", "#D0AC94", "#7ED379", "#012C58",
"#7A7BFF", "#D68E01", "#353339", "#78AFA1", "#FEB2C6", "#75797C", "#837393", "#943A4D",
"#B5F4FF", "#D2DCD5", "#9556BD", "#6A714A", "#001325", "#02525F", "#0AA3F7", "#E98176",
"#DBD5DD", "#5EBCD1", "#3D4F44", "#7E6405", "#02684E", "#962B75", "#8D8546", "#9695C5",
"#E773CE", "#D86A78", "#3E89BE", "#CA834E", "#518A87", "#5B113C", "#55813B", "#E704C4",
"#00005F", "#A97399", "#4B8160", "#59738A", "#FF5DA7", "#F7C9BF", "#643127", "#513A01",
"#6B94AA", "#51A058", "#A45B02", "#1D1702", "#E20027", "#E7AB63", "#4C6001", "#9C6966",
"#64547B", "#97979E", "#006A66", "#391406", "#F4D749", "#0045D2", "#006C31", "#DDB6D0",
"#7C6571", "#9FB2A4", "#00D891", "#15A08A", "#BC65E9", "#FFFFFE", "#C6DC99", "#203B3C",

"#671190", "#6B3A64", "#F5E1FF", "#FFA0F2", "#CCAA35", "#374527", "#8BB400", "#797868",
"#C6005A", "#3B000A", "#C86240", "#29607C", "#402334", "#7D5A44", "#CCB87C", "#B88183",
"#AA5199", "#B5D6C3", "#A38469", "#9F94F0", "#A74571", "#B894A6", "#71BB8C", "#00B433",
"#789EC9", "#6D80BA", "#953F00", "#5EFF03", "#E4FFFC", "#1BE177", "#BCB1E5", "#76912F",
"#003109", "#0060CD", "#D20096", "#895563", "#29201D", "#5B3213", "#A76F42", "#89412E",
"#1A3A2A", "#494B5A", "#A88C85", "#F4ABAA", "#A3F3AB", "#00C6C8", "#EA8B66", "#958A9F",
"#BDC9D2", "#9FA064", "#BE4700", "#658188", "#83A485", "#453C23", "#47675D", "#3A3F00",
"#061203", "#DFFB71", "#868E7E", "#98D058", "#6C8F7D", "#D7BFC2", "#3C3E6E", "#D83D66",
"#2F5D9B", "#6C5E46", "#D25B88", "#5B656C", "#00B57F", "#545C46", "#866097", "#365D25",
"#252F99", "#00CCFF", "#674E60", "#FC009C", "#92896B"]
for i in range(len(labels)):
color_scheme[str(labels[i])] = {}
color_scheme[str(labels[i])]['rgb'] = list(ImageColor.getcolor(colors[i], "RGB"))
color_scheme[str(labels[i])]['hex'] = colors[i]

pickle.dump(color_scheme, open(color_scheme_path, 'wb'))
color_scheme[str(i)] = {}
color_scheme[str(i)]['rgb'] = list(ImageColor.getcolor(colors[i], "RGB"))
color_scheme[str(i)]['hex'] = colors[i]
return color_scheme


Expand Down Expand Up @@ -488,9 +539,31 @@ def get_datasource_description(datasource_name):
return description


def get_scatterplot_data(datasource_name):
global config
global datasource

def spatial_corr (adata, raw=False, log=False, threshold=None, x_coordinate='X_centroid',y_coordinate='Y_centroid',
marker=None, k=500, label='spatial_corr'):
embedding_data_path = Path(config[datasource_name]['featureData'][0]['embeddingData'])
scatter_df = pd.read_csv(embedding_data_path)
scatter_np = scatter_df.to_numpy()
# scatter_np[:, 1:3] = datasource[['X_centroid', 'Y_centroid']].to_numpy()
scatter_np[:, 1:3] = (scatter_np[:, 1:3] - np.min(scatter_np[:, 1:3])) / (
np.max(scatter_np[:, 1:3]) - np.min(scatter_np[:, 1:3])) * 2 - 1
try:
clusters = datasource[get_celltype_column_name(datasource_name)].astype('uint32').values.tolist()
except:
clusters = np.zeros((datasource.shape[0],), dtype='int').tolist()
scatter_np = np.append(scatter_np, np.expand_dims(clusters, 1), 1)
list_of_obs = [[elem[1], elem[2], int(elem[0]), int(elem[3])] for elem in scatter_np]
visData = {
'data': list_of_obs,
'clusters': clusters
}
return visData


def spatial_corr(adata, raw=False, log=False, threshold=None, x_coordinate='X_centroid', y_coordinate='Y_centroid',
marker=None, k=500, label='spatial_corr'):
"""
Parameters
----------
Expand Down Expand Up @@ -526,9 +599,9 @@ def spatial_corr (adata, raw=False, log=False, threshold=None, x_coordinate='X_c
data = pd.DataFrame({'x': bdata.obs[x_coordinate], 'y': bdata.obs[y_coordinate]})
# user defined expression matrix
if raw is True:
exp = pd.DataFrame(bdata.raw.X, index= bdata.obs.index, columns=bdata.var.index)
exp = pd.DataFrame(bdata.raw.X, index=bdata.obs.index, columns=bdata.var.index)
else:
exp = pd.DataFrame(bdata.X, index= bdata.obs.index, columns=bdata.var.index)
exp = pd.DataFrame(bdata.X, index=bdata.obs.index, columns=bdata.var.index)
# log the data if needed
if log is True:
exp = np.log1p(exp)
Expand All @@ -542,19 +615,20 @@ def spatial_corr (adata, raw=False, log=False, threshold=None, x_coordinate='X_c
marker = [marker]
exp = exp[marker]
# find the nearest neighbours
tree = BallTree(data, leaf_size= 2)
dist, ind = tree.query(data, k=k, return_distance= True)
neighbours = pd.DataFrame(ind, index = bdata.obs.index)
tree = BallTree(data, leaf_size=2)
dist, ind = tree.query(data, k=k, return_distance=True)
neighbours = pd.DataFrame(ind, index=bdata.obs.index)
# find the mean dist
rad_approx = np.mean(dist, axis=0)
# Calculate the correlation
mean = np.mean(exp).values
std = np.std(exp).values
A = (exp - mean) / std
def corrfunc (marker, A, neighbours, ind):

def corrfunc(marker, A, neighbours, ind):
print('Processing ' + str(marker))
# Map phenotype
ind_values = dict(zip(list(range(len(ind))), A[marker])) # Used for mapping
ind_values = dict(zip(list(range(len(ind))), A[marker])) # Used for mapping
# Loop through (all functionized methods were very slow)
neigh = neighbours.copy()
for i in neigh.columns:
Expand All @@ -564,9 +638,10 @@ def corrfunc (marker, A, neighbours, ind):
corrfunc = np.mean(Y, axis=1)
# return
return corrfunc

# apply function to all markers # Create lamda function
r_corrfunc = lambda x: corrfunc(marker=x,A=A, neighbours=neighbours, ind=ind)
all_data = list(map(r_corrfunc, exp.columns)) # Apply function
r_corrfunc = lambda x: corrfunc(marker=x, A=A, neighbours=neighbours, ind=ind)
all_data = list(map(r_corrfunc, exp.columns)) # Apply function
# Merge all the results into a single dataframe
df = pd.concat(all_data, axis=1)
df.columns = exp.columns
Expand Down Expand Up @@ -660,6 +735,46 @@ def convertOmeTiff(filePath, channelFilePath=None, dataDirectory=None, isLabelIm
return {'segmentation': str(directory)}


def get_cells_in_polygon(datasource_name, points, similar_neighborhood=False):
global config
global ball_tree
point_tuples = [(e['imagePoints']['x'], e['imagePoints']['y']) for e in points]
(x, y, r) = smallestenclosingcircle.make_circle(point_tuples)
fields = [config[datasource_name]['featureData'][0]['xCoordinate'],
config[datasource_name]['featureData'][0]['yCoordinate'],
config[datasource_name]['featureData'][0]['celltype'], 'id',
config[datasource_name]['featureData'][0]['idField']]
index = ball_tree.query_radius([[x, y]], r=r)
cells = index[0]
circle_cells = datasource.iloc[cells][fields].values
path = mpltPath.Path(point_tuples)
inside = path.contains_points(circle_cells[:, [0, 1]].astype('float'))
neighbor_ids = circle_cells[np.where(inside == True), 3].flatten().tolist()
# obj = get_neighborhood_stats(datasource_name, neighbor_ids, fields=fields)
# try:
if fields and len(fields) > 0:
if len(fields) > 1:
poly_cells = datasource.iloc[neighbor_ids][fields].to_dict(orient='records')
else:
poly_cells = datasource.iloc[neighbor_ids][fields].to_dict()
else:
poly_cells = datasource.iloc[neighbor_ids].to_dict(orient='records')
return poly_cells


def get_cells(elem, datasource_name):
global datasource
global source
global config
fields = [config[datasource_name]['featureData'][0]['xCoordinate'],
config[datasource_name]['featureData'][0]['yCoordinate'],
config[datasource_name]['featureData'][0]['celltype'], 'id',
config[datasource_name]['featureData'][0]['idField']]
ids = np.array(elem['ids'])
obj = datasource.iloc[ids][fields].to_dict(orient='records')
return obj


def save_dot(datasource_name, dot):
database_model.create_or_update(database_model.Dot, id=dot['id'], datasource=datasource_name, group=dot['group'],
name=dot['name'], description=dot['description'], shape_type=dot['shape_type'],
Expand Down
3 changes: 2 additions & 1 deletion minerva_analysis/server/routes/import_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,8 @@ def save_config():
if 'celltypeData' in originalData:
configData[datasetName]['featureData'][0]['celltypeData'] = str(data_path / datasetName / celltypeName)
configData[datasetName]['featureData'][0]['celltype'] = headerList[3][1]['value']

else:
configData[datasetName]['featureData'][0]['celltype'] = 'cellType'
if 'embeddingData' in originalData:
configData[datasetName]['featureData'][0]['embeddingData'] = str(
data_path / datasetName / embeddingName)
Expand Down

0 comments on commit 3e85ab7

Please sign in to comment.