Skip to content

Commit 3a9994a

Browse files
authored
Merge pull request #518 from AllenInstitute/fix/linting
This addresses issue #513 and makes PR #514 unnecessary. For future reference, here's the explanation for the #513 fix: This PR addresses issue #513 by changing all of the calls to pandas.read_hdf() to no longer pass 'format' as a kwarg. We used to do this, and the passed kwarg was ignored by HDFStore.open because it wasn't a defined keyword arg for that method, but no alarms were raised. See: pandas-dev/pandas#13291 In newer versions of pandas there is an exception raised if you try to pass 'format' to the read_hdf() method. Removing this kwarg: - Won't change the output of read_hdf, as the kwarg was being ignored before, and - Allows me to run Marina's old notebooks with VBA analysis code and pandas==0.24.2
2 parents a887ead + c5d7574 commit 3a9994a

File tree

6 files changed

+279
-271
lines changed

6 files changed

+279
-271
lines changed

visual_behavior/ophys/dataset/cell_matching_dataset.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,10 @@ def get_analysis_folder_name(self):
8585
lims_data = self.lims_data
8686
date = str(lims_data.experiment_date)[:10].split('-')
8787
analysis_folder_name = str(lims_data.lims_id) + '_' + \
88-
str(lims_data.mouse_id) + '_' + date[0][2:] + date[1] + date[2] + '_' + \
89-
lims_data.structure + '_' + str(lims_data.depth) + '_' + \
90-
lims_data.specimen_driver_line.split('-')[0] + '_' + lims_data.rig[3:5] + \
91-
lims_data.rig[6] + '_' + lims_data.session_type
88+
str(lims_data.mouse_id) + '_' + date[0][2:] + date[1] + date[2] + '_' + \
89+
lims_data.structure + '_' + str(lims_data.depth) + '_' + \
90+
lims_data.specimen_driver_line.split('-')[0] + '_' + lims_data.rig[3:5] + \
91+
lims_data.rig[6] + '_' + lims_data.session_type
9292
self.analysis_folder_name = analysis_folder_name
9393
return self.analysis_folder_name
9494

visual_behavior/ophys/dataset/visual_behavior_ophys_dataset.py

+14-15
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,13 @@ def get_analysis_dir(self):
9696
analysis_dir = LazyLoadable('_analysis_dir', get_analysis_dir)
9797

9898
def get_metadata(self):
99-
self._metadata = pd.read_hdf(os.path.join(self.analysis_dir, 'metadata.h5'), key='df', format='fixed')
99+
self._metadata = pd.read_hdf(os.path.join(self.analysis_dir, 'metadata.h5'), key='df')
100100
return self._metadata
101101

102102
metadata = LazyLoadable('_metadata', get_metadata)
103103

104104
def get_timestamps(self):
105-
self._timestamps = pd.read_hdf(os.path.join(self.analysis_dir, 'timestamps.h5'), key='df', format='fixed')
105+
self._timestamps = pd.read_hdf(os.path.join(self.analysis_dir, 'timestamps.h5'), key='df')
106106
return self._timestamps
107107

108108
timestamps = LazyLoadable('_timestamps', get_timestamps)
@@ -122,7 +122,7 @@ def get_timestamps_ophys(self):
122122
def get_stimulus_table(self):
123123
self._stimulus_table = pd.read_hdf(
124124
os.path.join(self.analysis_dir, 'stimulus_table.h5'),
125-
key='df', format='fixed'
125+
key='df'
126126
)
127127
self._stimulus_table = self._stimulus_table.reset_index()
128128
# self._stimulus_table = self._stimulus_table.drop(
@@ -146,42 +146,42 @@ def get_stimulus_template(self):
146146
def get_stimulus_metadata(self):
147147
self._stimulus_metadata = pd.read_hdf(
148148
os.path.join(self.analysis_dir, 'stimulus_metadata.h5'),
149-
key='df', format='fixed'
149+
key='df'
150150
)
151151
self._stimulus_metadata = self._stimulus_metadata.drop(columns='image_category')
152152
return self._stimulus_metadata
153153

154154
stimulus_metadata = LazyLoadable('_stimulus_metadata', get_stimulus_metadata)
155155

156156
def get_running_speed(self):
157-
self._running_speed = pd.read_hdf(os.path.join(self.analysis_dir, 'running_speed.h5'), key='df', format='fixed')
157+
self._running_speed = pd.read_hdf(os.path.join(self.analysis_dir, 'running_speed.h5'), key='df')
158158
return self._running_speed
159159

160160
running_speed = LazyLoadable('_running_speed', get_running_speed)
161161

162162
def get_licks(self):
163-
self._licks = pd.read_hdf(os.path.join(self.analysis_dir, 'licks.h5'), key='df', format='fixed')
163+
self._licks = pd.read_hdf(os.path.join(self.analysis_dir, 'licks.h5'), key='df')
164164
return self._licks
165165

166166
licks = LazyLoadable('_licks', get_licks)
167167

168168
def get_rewards(self):
169-
self._rewards = pd.read_hdf(os.path.join(self.analysis_dir, 'rewards.h5'), key='df', format='fixed')
169+
self._rewards = pd.read_hdf(os.path.join(self.analysis_dir, 'rewards.h5'), key='df')
170170
return self._rewards
171171

172172
rewards = LazyLoadable('_rewards', get_rewards)
173173

174174
def get_task_parameters(self):
175175
self._task_parameters = pd.read_hdf(
176176
os.path.join(self.analysis_dir, 'task_parameters.h5'),
177-
key='df', format='fixed'
177+
key='df'
178178
)
179179
return self._task_parameters
180180

181181
task_parameters = LazyLoadable('_task_parameters', get_task_parameters)
182182

183183
def get_all_trials(self):
184-
self._all_trials = pd.read_hdf(os.path.join(self.analysis_dir, 'trials.h5'), key='df', format='fixed')
184+
self._all_trials = pd.read_hdf(os.path.join(self.analysis_dir, 'trials.h5'), key='df')
185185
return self._all_trials
186186

187187
all_trials = LazyLoadable('_all_trials', get_all_trials)
@@ -255,13 +255,12 @@ def get_events(self):
255255
events = LazyLoadable('_events', get_events)
256256

257257
def get_roi_metrics(self):
258-
self._roi_metrics = pd.read_hdf(os.path.join(self.analysis_dir, 'roi_metrics.h5'), key='df', format='fixed')
258+
self._roi_metrics = pd.read_hdf(os.path.join(self.analysis_dir, 'roi_metrics.h5'), key='df')
259259
return self._roi_metrics
260260

261261
roi_metrics = LazyLoadable('_roi_metrics', get_roi_metrics)
262262

263263
def get_roi_mask_dict(self):
264-
cell_specimen_ids = self.get_cell_specimen_ids()
265264
f = h5py.File(os.path.join(self.analysis_dir, 'roi_masks.h5'), 'r')
266265
roi_mask_dict = {}
267266
for key in f.keys():
@@ -300,15 +299,15 @@ def get_average_image(self):
300299
def get_motion_correction(self):
301300
self._motion_correction = pd.read_hdf(
302301
os.path.join(self.analysis_dir, 'motion_correction.h5'),
303-
key='df', format='fixed'
302+
key='df'
304303
)
305304
return self._motion_correction
306305

307306
motion_correction = LazyLoadable('_motion_correction', get_motion_correction)
308307

309308
def get_cell_specimen_ids(self):
310309
roi_metrics = self.roi_metrics
311-
self._cell_specimen_ids = np.asarray([roi_metrics[roi_metrics.roi_id==roi_id].id.values[0]
310+
self._cell_specimen_ids = np.asarray([roi_metrics[roi_metrics.roi_id == roi_id].id.values[0]
312311
for roi_id in np.sort(self.roi_metrics.roi_id.values)])
313312
return self._cell_specimen_ids
314313

@@ -322,12 +321,12 @@ def get_cell_indices(self):
322321

323322
def get_cell_specimen_id_for_cell_index(self, cell_index):
324323
roi_metrics = self.roi_metrics
325-
cell_specimen_id = roi_metrics[roi_metrics.cell_index==cell_index].id.values[0]
324+
cell_specimen_id = roi_metrics[roi_metrics.cell_index == cell_index].id.values[0]
326325
return cell_specimen_id
327326

328327
def get_cell_index_for_cell_specimen_id(self, cell_specimen_id):
329328
roi_metrics = self.roi_metrics
330-
cell_index = roi_metrics[roi_metrics.id==cell_specimen_id].cell_index.values[0]
329+
cell_index = roi_metrics[roi_metrics.id == cell_specimen_id].cell_index.values[0]
331330
return cell_index
332331

333332
@classmethod

visual_behavior/ophys/response_analysis/response_analysis.py

+30-26
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,10 @@ def __init__(self, dataset, overwrite_analysis_files=False, use_events=False):
4242
self.trial_window = [-4, 8] # time, in seconds, around change time to extract portion of cell trace
4343
self.flash_window = [-0.5,
4444
0.75] # time, in seconds, around stimulus flash onset time to extract portion of cell trace
45-
self.omitted_flash_window = [-3,3]
45+
self.omitted_flash_window = [-3, 3]
4646
self.response_window_duration = 0.5 # window, in seconds, over which to take the mean for a given trial or flash
4747
self.response_window = [np.abs(self.trial_window[0]), np.abs(self.trial_window[
48-
0]) + self.response_window_duration] # time, in seconds, around change time to take the mean response
48+
0]) + self.response_window_duration] # time, in seconds, around change time to take the mean response
4949
self.baseline_window = np.asarray(
5050
self.response_window) - self.response_window_duration # time, in seconds, relative to change time to take baseline mean response
5151
self.stimulus_duration = 0.25 # self.dataset.task_parameters['stimulus_duration'].values[0]
@@ -98,14 +98,16 @@ def generate_trial_response_df(self):
9898
mean_running_speed = ut.get_mean_in_window(running_speed_trace, self.response_window,
9999
self.stimulus_frame_rate)
100100
df_list.append(
101-
[trial, int(cell_index), int(cell_specimen_id), trace, timestamps, mean_response, baseline_response, n_events,
102-
p_value, sd_over_baseline, mean_running_speed, cell_matching_has_been_run, self.dataset.experiment_id])
103-
#running_speed_trace, running_speed_timestamps,
104-
101+
[trial, int(cell_index), int(cell_specimen_id), trace, timestamps, mean_response, baseline_response,
102+
n_events,
103+
p_value, sd_over_baseline, mean_running_speed, cell_matching_has_been_run,
104+
self.dataset.experiment_id])
105+
# running_speed_trace, running_speed_timestamps,
105106

106107
columns = ['trial', 'cell', 'cell_specimen_id', 'trace', 'timestamps', 'mean_response', 'baseline_response',
107-
'n_events', 'p_value', 'sd_over_baseline', 'mean_running_speed', 'cell_matching_has_been_run', 'experiment_id']
108-
#'running_speed_trace', 'running_speed_timestamps',
108+
'n_events', 'p_value', 'sd_over_baseline', 'mean_running_speed', 'cell_matching_has_been_run',
109+
'experiment_id']
110+
# 'running_speed_trace', 'running_speed_timestamps',
109111
trial_response_df = pd.DataFrame(df_list, columns=columns)
110112
trial_metadata = self.dataset.trials
111113
trial_metadata = trial_metadata.rename(columns={'response': 'behavioral_response'})
@@ -123,7 +125,6 @@ def save_trial_response_df(self, trial_response_df):
123125
def get_trial_response_df(self):
124126
if self.overwrite_analysis_files:
125127
print('overwriting analysis files')
126-
import h5py
127128
file_path = self.get_trial_response_df_path()
128129
if os.path.exists(file_path):
129130
os.remove(file_path)
@@ -132,7 +133,7 @@ def get_trial_response_df(self):
132133
else:
133134
if os.path.exists(self.get_trial_response_df_path()):
134135
print('loading trial response dataframe')
135-
self.trial_response_df = pd.read_hdf(self.get_trial_response_df_path(), key='df', format='fixed')
136+
self.trial_response_df = pd.read_hdf(self.get_trial_response_df_path(), key='df')
136137
tdf = self.trial_response_df
137138
tdf.cell = [int(cell) for cell in tdf.cell.values]
138139
tdf.cell_specimen_id = [int(cell_specimen_id) for cell_specimen_id in tdf.cell_specimen_id.values]
@@ -195,15 +196,19 @@ def generate_flash_response_df(self):
195196
self.stimulus_frame_rate)
196197
reward_rate = flash_data.reward_rate.values[0]
197198

198-
row.append([int(cell), int(cell_specimen_id), int(flash), omitted, flash_time, image_name, image_category,
199-
trace, timestamps, mean_response, baseline_response, n_events, p_value_baseline, sd_over_baseline,
200-
reward_rate, mean_running_speed, cell_matching_has_been_run, int(self.dataset.experiment_id)])
199+
row.append(
200+
[int(cell), int(cell_specimen_id), int(flash), omitted, flash_time, image_name, image_category,
201+
trace, timestamps, mean_response, baseline_response, n_events, p_value_baseline, sd_over_baseline,
202+
reward_rate, mean_running_speed, cell_matching_has_been_run, int(self.dataset.experiment_id)])
201203

202204
flash_response_df = pd.DataFrame(data=row,
203205
columns=['cell', 'cell_specimen_id', 'flash_number', 'omitted', 'start_time',
204-
'image_name', 'image_category', 'trace', 'timestamps', 'mean_response',
205-
'baseline_response', 'n_events', 'p_value_baseline', 'sd_over_baseline',
206-
'reward_rate', 'mean_running_speed', 'cell_matching_has_been_run', 'experiment_id'])
206+
'image_name', 'image_category', 'trace', 'timestamps',
207+
'mean_response',
208+
'baseline_response', 'n_events', 'p_value_baseline',
209+
'sd_over_baseline',
210+
'reward_rate', 'mean_running_speed', 'cell_matching_has_been_run',
211+
'experiment_id'])
207212
flash_response_df = ut.annotate_flash_response_df_with_pref_stim(flash_response_df)
208213
flash_response_df = ut.add_repeat_number_to_flash_response_df(flash_response_df, stimulus_table)
209214
flash_response_df = ut.add_image_block_to_flash_response_df(flash_response_df, stimulus_table)
@@ -238,7 +243,6 @@ def save_flash_response_df(self, flash_response_df):
238243
def get_flash_response_df(self):
239244
if self.overwrite_analysis_files:
240245
# delete old file or else it will keep growing in size
241-
import h5py
242246
file_path = self.get_flash_response_df_path()
243247
if os.path.exists(file_path):
244248
os.remove(file_path)
@@ -247,7 +251,7 @@ def get_flash_response_df(self):
247251
else:
248252
if os.path.exists(self.get_flash_response_df_path()):
249253
print('loading flash response dataframe')
250-
self.flash_response_df = pd.read_hdf(self.get_flash_response_df_path(), key='df', format='fixed')
254+
self.flash_response_df = pd.read_hdf(self.get_flash_response_df_path(), key='df')
251255
fdf = self.flash_response_df
252256
fdf.cell = [int(cell) for cell in fdf.cell.values]
253257
fdf.cell_specimen_id = [int(cell_specimen_id) for cell_specimen_id in fdf.cell_specimen_id.values]
@@ -268,7 +272,7 @@ def get_omitted_flash_response_df_path(self):
268272
def generate_omitted_flash_response_df(self):
269273
print('generating omitted flash response df')
270274
stimulus_table = ut.annotate_flashes_with_reward_rate(self.dataset)
271-
stimulus_table = stimulus_table[stimulus_table.omitted==True]
275+
stimulus_table = stimulus_table[stimulus_table.omitted == True]
272276
row = []
273277
for cell in self.dataset.cell_indices:
274278
cell = int(cell)
@@ -304,13 +308,15 @@ def generate_omitted_flash_response_df(self):
304308
n_events = ut.get_n_nonzero_in_window(trace, response_window, self.ophys_frame_rate)
305309
reward_rate = flash_data.reward_rate.values[0]
306310

307-
row.append([int(cell), int(cell_specimen_id), int(flash), omitted, flash_time, image_name, image_category,
308-
trace, timestamps, mean_response, baseline_response, n_events, p_value, sd_over_baseline,
309-
reward_rate, cell_matching_has_been_run, int(self.dataset.experiment_id)])
311+
row.append(
312+
[int(cell), int(cell_specimen_id), int(flash), omitted, flash_time, image_name, image_category,
313+
trace, timestamps, mean_response, baseline_response, n_events, p_value, sd_over_baseline,
314+
reward_rate, cell_matching_has_been_run, int(self.dataset.experiment_id)])
310315

311316
flash_response_df = pd.DataFrame(data=row,
312317
columns=['cell', 'cell_specimen_id', 'flash_number', 'omitted', 'start_time',
313-
'image_name', 'image_category', 'trace', 'timestamps', 'mean_response',
318+
'image_name', 'image_category', 'trace', 'timestamps',
319+
'mean_response',
314320
'baseline_response', 'n_events', 'p_value', 'sd_over_baseline',
315321
'reward_rate', 'cell_matching_has_been_run', 'experiment_id'])
316322
# flash_response_df = ut.annotate_flash_response_df_with_pref_stim(flash_response_df)
@@ -325,7 +331,6 @@ def save_omitted_flash_response_df(self, omitted_flash_response_df):
325331
def get_omitted_flash_response_df(self):
326332
if self.overwrite_analysis_files:
327333
# delete old file or else it will keep growing in size
328-
import h5py
329334
file_path = self.get_omitted_flash_response_df_path()
330335
if os.path.exists(file_path):
331336
os.remove(file_path)
@@ -334,7 +339,7 @@ def get_omitted_flash_response_df(self):
334339
else:
335340
if os.path.exists(self.get_omitted_flash_response_df_path()):
336341
print('loading omitted flash response dataframe')
337-
self.omitted_flash_response_df = pd.read_hdf(self.get_omitted_flash_response_df_path(), key='df', format='fixed')
342+
self.omitted_flash_response_df = pd.read_hdf(self.get_omitted_flash_response_df_path(), key='df')
338343
fdf = self.omitted_flash_response_df
339344
fdf.cell = [int(cell) for cell in fdf.cell.values]
340345
fdf.cell_specimen_id = [int(cell_specimen_id) for cell_specimen_id in fdf.cell_specimen_id.values]
@@ -344,7 +349,6 @@ def get_omitted_flash_response_df(self):
344349
self.save_omitted_flash_response_df(self.omitted_flash_response_df)
345350
return self.omitted_flash_response_df
346351

347-
348352
def compute_pairwise_correlations(self):
349353
fdf = self.flash_response_df.copy()
350354
if 'omitted' in fdf.keys():

0 commit comments

Comments
 (0)