diff --git a/scripts/deploy_running_bootstraps.py b/scripts/deploy_running_bootstraps.py new file mode 100644 index 00000000..f68a4102 --- /dev/null +++ b/scripts/deploy_running_bootstraps.py @@ -0,0 +1,98 @@ +import os +import argparse +import time +import pandas as pd + +from simple_slurm import Slurm +import visual_behavior_glm.PSTH as psth + +parser = argparse.ArgumentParser(description='deploy glm fits to cluster') +parser.add_argument('--env-path', type=str, default='visual_behavior', metavar='path to conda environment to use') + + +def already_fit(row): + filename = psth.get_hierarchy_filename( + row.cell_type, + row.response, + row['data'], + 'all', + row.nboots, + ['visual_strategy_session'], + 'running_{}'.format(row.bin_num)) + return os.path.exists(filename) + +def get_bootstrap_jobs(): + nboots=10000 + base_jobs = [ + {'cell_type':'exc','response':'image','data':'events','nboots':nboots}, + {'cell_type':'sst','response':'image','data':'events','nboots':nboots}, + {'cell_type':'vip','response':'image','data':'events','nboots':nboots}, + {'cell_type':'exc','response':'omission','data':'events','nboots':nboots}, + {'cell_type':'sst','response':'omission','data':'events','nboots':nboots}, + {'cell_type':'vip','response':'omission','data':'events','nboots':nboots} + ] + jobs = [] + for b in range(-5,21): + for j in base_jobs: + temp = j.copy() + temp['bin_num'] = b + jobs.append(temp) + jobs = pd.DataFrame(jobs) + return jobs + +def make_job_string(row): + arg_string = '' + arg_string += ' --cell_type {}'.format(row.cell_type) + arg_string += ' --response {}'.format(row.response) + arg_string += ' --data {}'.format(row['data']) + arg_string += ' --bin_num {}'.format(row.bin_num) + arg_string += ' --nboots {}'.format(row.nboots) + return arg_string + +if __name__ == "__main__": + args = parser.parse_args() + python_executable = "{}/bin/python".format(args.env_path) + print('python executable = {}'.format(python_executable)) + python_file = "/home/alex.piet/codebase/GLM/visual_behavior_glm/scripts/running_bootstrap.py" + stdout_basedir = "/allen/programs/braintv/workgroups/nc-ophys/visual_behavior/ophys_glm" + stdout_location = os.path.join(stdout_basedir, 'job_records_running_bootstraps') + if not os.path.exists(stdout_location): + print('making folder {}'.format(stdout_location)) + os.mkdir(stdout_location) + print('stdout files will be at {}'.format(stdout_location)) + + job_count = 0 + jobs = get_bootstrap_jobs() + + for index, row in jobs.iterrows(): + if not already_fit(row): + job_count += 1 + args_string = make_job_string(row) + print('starting cluster job. job count = {}'.format(job_count)) + print(' ' + args_string) + job_title = 'bootstraps' + walltime = '24:00:00' + mem = '100gb' + job_id = Slurm.JOB_ARRAY_ID + job_array_id = Slurm.JOB_ARRAY_MASTER_ID + output = stdout_location+"/"+str(job_array_id)+"_"+str(job_id)+"_bootstrap.out" + + # instantiate a SLURM object + slurm = Slurm( + cpus_per_task=4, + job_name=job_title, + time=walltime, + mem=mem, + output= output, + partition="braintv" + ) + + slurm.sbatch('{} {} {}'.format( + python_executable, + python_file, + args_string, + ) + ) + time.sleep(0.001) + + diff --git a/scripts/deploy_running_bootstraps.sh b/scripts/deploy_running_bootstraps.sh new file mode 100755 index 00000000..8aa7bd2f --- /dev/null +++ b/scripts/deploy_running_bootstraps.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Make sure you run conda activate first +# to run this from an environment where the allenSDK is installed + +python deploy_running_bootstraps.py --env-path /home/alex.piet/codebase/miniconda3/envs/visbeh diff --git a/scripts/running_bootstrap.py b/scripts/running_bootstrap.py new file mode 100644 index 00000000..584f0f57 --- /dev/null +++ b/scripts/running_bootstrap.py @@ -0,0 +1,64 @@ +import visual_behavior_glm.PSTH as psth +import psy_output_tools as po +import argparse + +parser = argparse.ArgumentParser(description='compute hierarchy bootstraps') +parser.add_argument( + '--cell_type', + type=str, + default='', + metavar='cell', + help='cell_type' +) +parser.add_argument( + '--response', + type=str, + default='', + metavar='response', + help='response' +) + +parser.add_argument( + '--data', + type=str, + default='', + metavar='', + help='data' +) + +parser.add_argument( + '--nboots', + type=int, + default=0, + metavar='', + help='data' +) + +parser.add_argument( + '--bin_num', + type=int, + default=0, + metavar='', + help='data' +) + + +if __name__ == '__main__': + args = parser.parse_args() + print('Starting bootstrap with the following inputs') + print('cell_type {}'.format(args.cell_type)) + print('response {}'.format(args.response)) + print('data {}'.format(args.data)) + print('nboots {}'.format(args.nboots)) + print('bin_num {}'.format(args.bin_num)) + print('') + summary_df = po.get_ophys_summary_table(21) + psth.load_df_and_compute_running( + summary_df, + args.cell_type, + args.response, + args.data, + args.nboots, + args.bin_num + ) + print('finished') diff --git a/visual_behavior_glm/PSTH.py b/visual_behavior_glm/PSTH.py index 33d36e76..387ab44f 100644 --- a/visual_behavior_glm/PSTH.py +++ b/visual_behavior_glm/PSTH.py @@ -1,4 +1,5 @@ import os +import pickle import numpy as np import pandas as pd import seaborn as sns @@ -557,6 +558,63 @@ def plot_strategy_histogram(full_df,cre,condition,experience_level,savefig=False return ax +def compute_running_bootstrap_bin(df, condition, cell_type, bin_num, nboots=10000, + data='events'): + + filename = get_hierarchy_filename(cell_type,condition,data,'all',nboots, + ['visual_strategy_session'],'running_{}'.format(bin_num)) + if os.path.isfile(filename): + print('Already computed {}'.format(bin_num)) + return + + # Figure out bins + if condition =='omission': + bin_width=5 + elif condition =='image': + bin_width=5#2 + df['running_bins'] = np.floor(df['running_speed']/bin_width) + bins = np.sort(df['running_bins'].unique()) + + # Check if any data + if bin_num not in bins: + print('No data for this running bin') + return + + # Compute bootstrap for this bin + temp = df.query('running_bins == @bin_num')[['visual_strategy_session', + 'ophys_experiment_id','cell_specimen_id','response']] + means = hb.bootstrap(temp, levels=['visual_strategy_session', + 'ophys_experiment_id','cell_specimen_id'],nboots=nboots) + if (True in means) & (False in means): + diff = np.array(means[True]) - np.array(means[False]) + pboot = np.sum(diff<0)/len(diff) + visual = np.std(means[True]) + timing = np.std(means[False]) + else: + pboot = 1 + if (True in means): + visual = np.std(means[True]) + else: + visual = 0 + if (False in means): + timing = np.std(means[False]) + else: + timing = 0 + bootstrap = { + 'running_bin':bin_num, + 'p_boot':pboot, + 'visual_sem':visual, + 'timing_sem':timing, + 'n_boots':nboots, + } + + # Save to file + filename = get_hierarchy_filename(cell_type,condition,data,'all',nboots, + ['visual_strategy_session'],'running_{}'.format(bin_num)) + with open(filename,'wb') as handle: + pickle.dump(bootstrap, handle, protocol=pickle.HIGHEST_PROTOCOL) + print('bin saved to {}'.format(filename)) + def compute_running_bootstrap(df,condition,cell_type,nboots=10000,data='events'): if condition =='omission': bin_width=5 @@ -565,34 +623,45 @@ def compute_running_bootstrap(df,condition,cell_type,nboots=10000,data='events') df['running_bins'] = np.floor(df['running_speed']/bin_width) bootstraps = [] - bins = df['running_bins'].unique() + bins = np.sort(df['running_bins'].unique()) for b in bins: - temp = df.query('running_bins == @b')[['visual_strategy_session', - 'ophys_experiment_id','cell_specimen_id','response']] - means = hb.bootstrap(temp, levels=['visual_strategy_session', - 'ophys_experiment_id','cell_specimen_id'],nboots=nboots) - if (True in means) & (False in means): - diff = np.array(means[True]) - np.array(means[False]) - pboot = np.sum(diff<0)/len(diff) - visual = np.std(means[True]) - timing = np.std(means[False]) + # First check if this running bin has already been computed + filename = get_hierarchy_filename(cell_type,condition,data,'all',nboots, + ['visual_strategy_session'],'running_{}'.format(int(b))) + if os.path.isfile(filename): + # Load this bin + with open(filename,'rb') as handle: + this_boot = pickle.load(handle) + print('loading this boot from file: {}'.format(b)) + bootstraps.append(this_boot) else: - pboot = 1 - if (True in means): + print('Need to compute this bin: {}'.format(b)) + temp = df.query('running_bins == @b')[['visual_strategy_session', + 'ophys_experiment_id','cell_specimen_id','response']] + means = hb.bootstrap(temp, levels=['visual_strategy_session', + 'ophys_experiment_id','cell_specimen_id'],nboots=nboots) + if (True in means) & (False in means): + diff = np.array(means[True]) - np.array(means[False]) + pboot = np.sum(diff<0)/len(diff) visual = np.std(means[True]) - else: - visual = 0 - if (False in means): timing = np.std(means[False]) else: - timing = 0 - bootstraps.append({ - 'running_bin':b, - 'p_boot':pboot, - 'visual_sem':visual, - 'timing_sem':timing, - 'n_boots':nboots, - }) + pboot = 1 + if (True in means): + visual = np.std(means[True]) + else: + visual = 0 + if (False in means): + timing = np.std(means[False]) + else: + timing = 0 + bootstraps.append({ + 'running_bin':b, + 'p_boot':pboot, + 'visual_sem':visual, + 'timing_sem':timing, + 'n_boots':nboots, + }) # Convert to dataframe, do multiple comparisons corrections bootstraps = pd.DataFrame(bootstraps) @@ -603,21 +672,23 @@ def compute_running_bootstrap(df,condition,cell_type,nboots=10000,data='events') bootstraps = bootstraps.drop(columns=['location']) # Save file - filepath = get_hierarchy_filename(cell_type,condition,data,'all',nboots,['visual_strategy_session'],'running') + filepath = get_hierarchy_filename(cell_type,condition,data,'all',nboots, + ['visual_strategy_session'],'running') print('saving bootstraps to: '+filepath) bootstraps.to_feather(filepath) return bootstraps def get_running_bootstraps(cell_type, condition,data,nboots): - filepath = get_hierarchy_filename(cell_type,condition,data,'all',nboots,['visual_strategy_session'],'running') + filepath = get_hierarchy_filename(cell_type,condition,data,'all',nboots, + ['visual_strategy_session'],'running') if os.path.isfile(filepath): bootstraps = pd.read_feather(filepath) return bootstraps else: print('file not found, compute the running bootstraps first') -def running_responses(df,condition, cre='vip', bootstraps=None,savefig=False,data='events', - split='visual_strategy_session'): +def running_responses(df, condition, cre='vip', bootstraps=None, savefig=False, + data='events', split='visual_strategy_session'): if condition =='omission': bin_width=5 elif condition =='image': @@ -698,6 +769,19 @@ def running_responses(df,condition, cre='vip', bootstraps=None,savefig=False,dat print('Figure saved to {}'.format(filename)) plt.savefig(filename) +def load_df_and_compute_running(summary_df, cell_type, response, data, nboots, bin_num): + mapper = { + 'exc':'Slc17a7-IRES2-Cre', + 'sst':'Sst-IRES-Cre', + 'vip':'Vip-IRES-Cre' + } + if response == 'image': + df = load_image_df(summary_df, mapper[cell_type], data) + elif response == 'omission': + df = load_omission_df(summary_df, mapper[cell_type], data) + compute_running_bootstrap_bin(df,response, cell_type, bin_num, nboots=nboots) + + def load_df_and_compute_hierarchy(summary_df, cell_type, response, data, depth, nboots, splits, query='', extra=''): mapper = { diff --git a/visual_behavior_glm/neuro_dev.py b/visual_behavior_glm/neuro_dev.py index a6a30b94..406fb3ec 100644 --- a/visual_behavior_glm/neuro_dev.py +++ b/visual_behavior_glm/neuro_dev.py @@ -108,6 +108,10 @@ vip_omission = psth.load_omission_df(summary_df, cre='Vip-IRES-Cre',data='events') vip_image = psth.load_image_df(summary_df, cre='Vip-IRES-Cre',data='events') +# To make things work on the HPC, you can compute just one running bin +psth.compute_running_bootstrap_bin(vip_omission,'omission','vip',bin_num,data='events', + nboots=nboots) + # Generate bootstrapped errorbars: bootstraps_omission = psth.compute_running_bootstrap(vip_omission,'omission','vip', data='events',nboots=nboots)