Skip to content

Commit

Permalink
Merge pull request #517 from AllenInstitute/analysis
Browse files Browse the repository at this point in the history
Running bootstraps on hpc
  • Loading branch information
alexpiet authored Nov 16, 2022
2 parents 2299e01 + dce8276 commit 1f97929
Show file tree
Hide file tree
Showing 5 changed files with 282 additions and 27 deletions.
98 changes: 98 additions & 0 deletions scripts/deploy_running_bootstraps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import os
import argparse
import time
import pandas as pd

from simple_slurm import Slurm
import visual_behavior_glm.PSTH as psth

parser = argparse.ArgumentParser(description='deploy glm fits to cluster')
parser.add_argument('--env-path', type=str, default='visual_behavior', metavar='path to conda environment to use')


def already_fit(row):
filename = psth.get_hierarchy_filename(
row.cell_type,
row.response,
row['data'],
'all',
row.nboots,
['visual_strategy_session'],
'running_{}'.format(row.bin_num))
return os.path.exists(filename)

def get_bootstrap_jobs():
nboots=10000
base_jobs = [
{'cell_type':'exc','response':'image','data':'events','nboots':nboots},
{'cell_type':'sst','response':'image','data':'events','nboots':nboots},
{'cell_type':'vip','response':'image','data':'events','nboots':nboots},
{'cell_type':'exc','response':'omission','data':'events','nboots':nboots},
{'cell_type':'sst','response':'omission','data':'events','nboots':nboots},
{'cell_type':'vip','response':'omission','data':'events','nboots':nboots}
]
jobs = []
for b in range(-5,21):
for j in base_jobs:
temp = j.copy()
temp['bin_num'] = b
jobs.append(temp)
jobs = pd.DataFrame(jobs)
return jobs

def make_job_string(row):
arg_string = ''
arg_string += ' --cell_type {}'.format(row.cell_type)
arg_string += ' --response {}'.format(row.response)
arg_string += ' --data {}'.format(row['data'])
arg_string += ' --bin_num {}'.format(row.bin_num)
arg_string += ' --nboots {}'.format(row.nboots)
return arg_string

if __name__ == "__main__":
args = parser.parse_args()
python_executable = "{}/bin/python".format(args.env_path)
print('python executable = {}'.format(python_executable))
python_file = "/home/alex.piet/codebase/GLM/visual_behavior_glm/scripts/running_bootstrap.py"
stdout_basedir = "/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/ophys_glm"
stdout_location = os.path.join(stdout_basedir, 'job_records_running_bootstraps')
if not os.path.exists(stdout_location):
print('making folder {}'.format(stdout_location))
os.mkdir(stdout_location)
print('stdout files will be at {}'.format(stdout_location))

job_count = 0
jobs = get_bootstrap_jobs()

for index, row in jobs.iterrows():
if not already_fit(row):
job_count += 1
args_string = make_job_string(row)
print('starting cluster job. job count = {}'.format(job_count))
print(' ' + args_string)
job_title = 'bootstraps'
walltime = '24:00:00'
mem = '100gb'
job_id = Slurm.JOB_ARRAY_ID
job_array_id = Slurm.JOB_ARRAY_MASTER_ID
output = stdout_location+"/"+str(job_array_id)+"_"+str(job_id)+"_bootstrap.out"

# instantiate a SLURM object
slurm = Slurm(
cpus_per_task=4,
job_name=job_title,
time=walltime,
mem=mem,
output= output,
partition="braintv"
)

slurm.sbatch('{} {} {}'.format(
python_executable,
python_file,
args_string,
)
)
time.sleep(0.001)


5 changes: 5 additions & 0 deletions scripts/deploy_running_bootstraps.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
# Make sure you run conda activate <env> first
# to run this from an environment where the allenSDK is installed

python deploy_running_bootstraps.py --env-path /home/alex.piet/codebase/miniconda3/envs/visbeh
64 changes: 64 additions & 0 deletions scripts/running_bootstrap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import visual_behavior_glm.PSTH as psth
import psy_output_tools as po
import argparse

parser = argparse.ArgumentParser(description='compute hierarchy bootstraps')
parser.add_argument(
'--cell_type',
type=str,
default='',
metavar='cell',
help='cell_type'
)
parser.add_argument(
'--response',
type=str,
default='',
metavar='response',
help='response'
)

parser.add_argument(
'--data',
type=str,
default='',
metavar='',
help='data'
)

parser.add_argument(
'--nboots',
type=int,
default=0,
metavar='',
help='data'
)

parser.add_argument(
'--bin_num',
type=int,
default=0,
metavar='',
help='data'
)


if __name__ == '__main__':
args = parser.parse_args()
print('Starting bootstrap with the following inputs')
print('cell_type {}'.format(args.cell_type))
print('response {}'.format(args.response))
print('data {}'.format(args.data))
print('nboots {}'.format(args.nboots))
print('bin_num {}'.format(args.bin_num))
print('')
summary_df = po.get_ophys_summary_table(21)
psth.load_df_and_compute_running(
summary_df,
args.cell_type,
args.response,
args.data,
args.nboots,
args.bin_num
)
print('finished')
138 changes: 111 additions & 27 deletions visual_behavior_glm/PSTH.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
Expand Down Expand Up @@ -557,6 +558,63 @@ def plot_strategy_histogram(full_df,cre,condition,experience_level,savefig=False

return ax

def compute_running_bootstrap_bin(df, condition, cell_type, bin_num, nboots=10000,
data='events'):

filename = get_hierarchy_filename(cell_type,condition,data,'all',nboots,
['visual_strategy_session'],'running_{}'.format(bin_num))
if os.path.isfile(filename):
print('Already computed {}'.format(bin_num))
return

# Figure out bins
if condition =='omission':
bin_width=5
elif condition =='image':
bin_width=5#2
df['running_bins'] = np.floor(df['running_speed']/bin_width)
bins = np.sort(df['running_bins'].unique())

# Check if any data
if bin_num not in bins:
print('No data for this running bin')
return

# Compute bootstrap for this bin
temp = df.query('running_bins == @bin_num')[['visual_strategy_session',
'ophys_experiment_id','cell_specimen_id','response']]
means = hb.bootstrap(temp, levels=['visual_strategy_session',
'ophys_experiment_id','cell_specimen_id'],nboots=nboots)
if (True in means) & (False in means):
diff = np.array(means[True]) - np.array(means[False])
pboot = np.sum(diff<0)/len(diff)
visual = np.std(means[True])
timing = np.std(means[False])
else:
pboot = 1
if (True in means):
visual = np.std(means[True])
else:
visual = 0
if (False in means):
timing = np.std(means[False])
else:
timing = 0
bootstrap = {
'running_bin':bin_num,
'p_boot':pboot,
'visual_sem':visual,
'timing_sem':timing,
'n_boots':nboots,
}

# Save to file
filename = get_hierarchy_filename(cell_type,condition,data,'all',nboots,
['visual_strategy_session'],'running_{}'.format(bin_num))
with open(filename,'wb') as handle:
pickle.dump(bootstrap, handle, protocol=pickle.HIGHEST_PROTOCOL)
print('bin saved to {}'.format(filename))

def compute_running_bootstrap(df,condition,cell_type,nboots=10000,data='events'):
if condition =='omission':
bin_width=5
Expand All @@ -565,34 +623,45 @@ def compute_running_bootstrap(df,condition,cell_type,nboots=10000,data='events')
df['running_bins'] = np.floor(df['running_speed']/bin_width)

bootstraps = []
bins = df['running_bins'].unique()
bins = np.sort(df['running_bins'].unique())
for b in bins:
temp = df.query('running_bins == @b')[['visual_strategy_session',
'ophys_experiment_id','cell_specimen_id','response']]
means = hb.bootstrap(temp, levels=['visual_strategy_session',
'ophys_experiment_id','cell_specimen_id'],nboots=nboots)
if (True in means) & (False in means):
diff = np.array(means[True]) - np.array(means[False])
pboot = np.sum(diff<0)/len(diff)
visual = np.std(means[True])
timing = np.std(means[False])
# First check if this running bin has already been computed
filename = get_hierarchy_filename(cell_type,condition,data,'all',nboots,
['visual_strategy_session'],'running_{}'.format(int(b)))
if os.path.isfile(filename):
# Load this bin
with open(filename,'rb') as handle:
this_boot = pickle.load(handle)
print('loading this boot from file: {}'.format(b))
bootstraps.append(this_boot)
else:
pboot = 1
if (True in means):
print('Need to compute this bin: {}'.format(b))
temp = df.query('running_bins == @b')[['visual_strategy_session',
'ophys_experiment_id','cell_specimen_id','response']]
means = hb.bootstrap(temp, levels=['visual_strategy_session',
'ophys_experiment_id','cell_specimen_id'],nboots=nboots)
if (True in means) & (False in means):
diff = np.array(means[True]) - np.array(means[False])
pboot = np.sum(diff<0)/len(diff)
visual = np.std(means[True])
else:
visual = 0
if (False in means):
timing = np.std(means[False])
else:
timing = 0
bootstraps.append({
'running_bin':b,
'p_boot':pboot,
'visual_sem':visual,
'timing_sem':timing,
'n_boots':nboots,
})
pboot = 1
if (True in means):
visual = np.std(means[True])
else:
visual = 0
if (False in means):
timing = np.std(means[False])
else:
timing = 0
bootstraps.append({
'running_bin':b,
'p_boot':pboot,
'visual_sem':visual,
'timing_sem':timing,
'n_boots':nboots,
})

# Convert to dataframe, do multiple comparisons corrections
bootstraps = pd.DataFrame(bootstraps)
Expand All @@ -603,21 +672,23 @@ def compute_running_bootstrap(df,condition,cell_type,nboots=10000,data='events')
bootstraps = bootstraps.drop(columns=['location'])

# Save file
filepath = get_hierarchy_filename(cell_type,condition,data,'all',nboots,['visual_strategy_session'],'running')
filepath = get_hierarchy_filename(cell_type,condition,data,'all',nboots,
['visual_strategy_session'],'running')
print('saving bootstraps to: '+filepath)
bootstraps.to_feather(filepath)
return bootstraps

def get_running_bootstraps(cell_type, condition,data,nboots):
filepath = get_hierarchy_filename(cell_type,condition,data,'all',nboots,['visual_strategy_session'],'running')
filepath = get_hierarchy_filename(cell_type,condition,data,'all',nboots,
['visual_strategy_session'],'running')
if os.path.isfile(filepath):
bootstraps = pd.read_feather(filepath)
return bootstraps
else:
print('file not found, compute the running bootstraps first')

def running_responses(df,condition, cre='vip', bootstraps=None,savefig=False,data='events',
split='visual_strategy_session'):
def running_responses(df, condition, cre='vip', bootstraps=None, savefig=False,
data='events', split='visual_strategy_session'):
if condition =='omission':
bin_width=5
elif condition =='image':
Expand Down Expand Up @@ -698,6 +769,19 @@ def running_responses(df,condition, cre='vip', bootstraps=None,savefig=False,dat
print('Figure saved to {}'.format(filename))
plt.savefig(filename)

def load_df_and_compute_running(summary_df, cell_type, response, data, nboots, bin_num):
mapper = {
'exc':'Slc17a7-IRES2-Cre',
'sst':'Sst-IRES-Cre',
'vip':'Vip-IRES-Cre'
}
if response == 'image':
df = load_image_df(summary_df, mapper[cell_type], data)
elif response == 'omission':
df = load_omission_df(summary_df, mapper[cell_type], data)
compute_running_bootstrap_bin(df,response, cell_type, bin_num, nboots=nboots)


def load_df_and_compute_hierarchy(summary_df, cell_type, response, data, depth, nboots,
splits, query='', extra=''):
mapper = {
Expand Down
4 changes: 4 additions & 0 deletions visual_behavior_glm/neuro_dev.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@
vip_omission = psth.load_omission_df(summary_df, cre='Vip-IRES-Cre',data='events')
vip_image = psth.load_image_df(summary_df, cre='Vip-IRES-Cre',data='events')

# To make things work on the HPC, you can compute just one running bin
psth.compute_running_bootstrap_bin(vip_omission,'omission','vip',bin_num,data='events',
nboots=nboots)

# Generate bootstrapped errorbars:
bootstraps_omission = psth.compute_running_bootstrap(vip_omission,'omission','vip',
data='events',nboots=nboots)
Expand Down

0 comments on commit 1f97929

Please sign in to comment.