-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_pitching_dataframes.py
248 lines (167 loc) · 8.18 KB
/
create_pitching_dataframes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
'''
This Script is used after piching pre process and is used to take the data from the csv files and create them in a big
pandas data frame (might also make it a JSON style data frame later on)
'''
# initial importing of libraries needed
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import bs4
import requests_html
import json
import cv2
import csv
import time
import concurrent.futures
import os
#this is a function to reshape images:
def unflatten_images(array_of_images,img_width, img_height):
total_img_size = img_height*img_width
#make sure image size is correct
if array_of_images.shape[1] != total_img_size:
print(f'Error The shape of {img_width} x {img_height} is not the size of the images in the array of '
+ f'{array_of_images.shape[1]}')
raise ValueError
reshaped_img_array = array_of_images.reshape(array_of_images.shape[0],img_width,img_height)
return reshaped_img_array
def normalize_data_globaly(matrix,method = 'min-max'):
'''
:param matrix: (ndarray)
:param method: (string) wither mean or min-max to chose the type of global normalization
:return normalized matrix: (ndarray) the normalized matrix
'''
if method == 'mean':
normalized_matrix = (matrix - matrix.mean())/matrix.std()
elif method == 'min-max':
normalized_matrix = (matrix - matrix.min())/(matrix.max()-matrix.min())
else:
print(f'Error method must be mean or min-max!!\nCurrently it is {method}')
raise ValueError
return normalized_matrix
def crop_frames(frames_matrix,top_left_corner,bottom_right_corner):
#check to make sure frames matrix is 3 dimensions
if frames_matrix.ndim != 3:
print(f'Error frame matrix needs to be 3D!!! (frame,frame width, frame height)')
raise ValueError
#check to make sure size is ok:
cropped_frames_matrix = frames_matrix[:,top_left_corner[1]:bottom_right_corner[1],
top_left_corner[0]:bottom_right_corner[0]]
return cropped_frames_matrix
def display_clip(frames_matrix, img_width, img_height, number_of_frames=(4, 4), crop_the_frames=False,
top_left_corner=(0, 0), bottom_right_corner=(600, 600)):
# unflatten the frames matrix
frames_matrix = unflatten_images(frames_matrix, img_width, img_height)
# crop frames if needed
if crop_the_frames:
frames_matrix = crop_frames(frames_matrix, top_left_corner, bottom_right_corner)
total_num_frames_displayed = number_of_frames[0] * number_of_frames[1]
frames_chosen_to_display = np.random.choice(np.arange(frames_matrix.shape[0]), total_num_frames_displayed,
replace=False)
# create the subplot for the frames
fig, axes = plt.subplots(number_of_frames[0], number_of_frames[1], figsize=(20, 20))
for frame, ax in zip(frames_chosen_to_display, axes.flatten()):
ax.imshow(frames_matrix[frame, :, :], cmap='gray')
plt.show()
def write_csv_clips_to_npy(data_dir_names):
'''
this functions writes this list of clips saved in csv format to npy format
:param list_of_pitch_clips:
:return:
'''
data_collected_dirs = [dirr for dirr in os.listdir('data')]
for dir_name in data_dir_names:
if dir_name not in data_collected_dirs:
print(f'Error {dir_name} has not been collected')
raise ValueError
# loop through each session (dir)
for dir_name in data_dir_names:
print(dir_name)
#make a directory for the numy files
npy_dir_name = f'data/{dir_name}_npy'
try:
os.mkdir(npy_dir_name)
except OSError as error:
print(error)
#get the file path of every file in the original session directory
files_in_dir = [f'data/{dir_name}/{file}' for file in os.listdir(f'data/{dir_name}')]
#loop through every file in the dir
for file_path in files_in_dir:
#load in the csv to a numpy array
csv_array = np.genfromtxt(file_path, delimiter=',')
npy_file_name = file_path.split('/')[-1]
npy_file_name = npy_dir_name + '/' + npy_file_name.split('.')[0] + '.npy'
#save the file to the new directory
np.save(npy_file_name,csv_array)
print(f'Done with write clips to npy method')
def concurrent_load_data(data_path,img_width = 480, img_height = 640,crop_the_frames = True,
top_left_corner=(100,30),bottom_right_corner=(450,420), data_type = 'float32'):
#load in the clip for proper data type
if data_path.split('.')[-1] == 'csv':
frames_matrix = np.genfromtxt(data_path, delimiter=',')
elif data_path.split('.')[-1] == 'npy':
frames_matrix = np.load(data_path)
else:
print(f'Error data file must be a csv or npy file!!!\nCurrently it is {data_path.split(".")[-1]}')
#set data type of frame matrix
frames_matrix = frames_matrix.astype(data_type)
#crop the clip if needed
if crop_the_frames:
#unflatten the frames matrix if needed
if frames_matrix.ndim == 2:
frames_matrix = unflatten_images(frames_matrix,img_width,img_height)
frames_matrix = crop_frames(frames_matrix, top_left_corner=top_left_corner
,bottom_right_corner=bottom_right_corner)
frames_matrix = frames_matrix.reshape(frames_matrix.shape[0],
frames_matrix.shape[1]*frames_matrix.shape[2])
clip_df = pd.DataFrame(frames_matrix)
#set the name of each column to pixels
clip_df.columns = [f'pixel_{col}' for col in clip_df.columns]
# Setting up labeling for each clip in the dataframe
clip_vars = data_path.split('/')[-1]
clip_vars = clip_vars.split('_')[::-1]
clip_vars = clip_vars[:4]
#getting rid of .csv
clip_vars[0] = clip_vars[0].split('.')[0]
pitcher = data_path.split('/')[-2]
pitcher = pitcher.split('_')[:-1]
pitcher = f'{pitcher[0]}_{pitcher[1]}'
clip_df['camera_type'] = [clip_vars[0] for c in range(0,clip_df.shape[0])]
clip_df['pitch_type'] = [clip_vars[2] for c in range(0,clip_df.shape[0])]
clip_df['session_number'] = [clip_vars[3] for c in range(0,clip_df.shape[0])]
clip_df['pitch_number'] = [clip_vars[1] for c in range(0,clip_df.shape[0])]
clip_df['pitcher'] = [pitcher for c in range(0,clip_df.shape[0])]
clip_df['frame_num'] = [c for c in range(0,clip_df.shape[0])]
# print(f'Done with concurrent_load_data()')
return clip_df
def load_clips_to_df(data_dir_names,img_width = 480, img_height = 640,crop_the_frames = True,
top_left_corner=(100,30),bottom_right_corner=(450,420)):
data_collected_dirs = [dirr for dirr in os.listdir('data')]
for dir_name in data_dir_names:
if dir_name not in data_collected_dirs:
print(f'Error {dir_name} has not been collected')
raise ValueError
results_list = [] # a list to hold all the results of the helper function
#loop through each session (dir) and run the function above on the clips)
for dir_name in data_dir_names:
#this is where multithreading will take place
with concurrent.futures.ThreadPoolExecutor() as thread_executer:
clips_list = os.listdir(f'data/{dir_name}/')
clips_list = [f'data/{dir_name}/{clip_name}' for clip_name in clips_list]
#use the helper function with the thread pool executer
results = thread_executer.map(concurrent_load_data, clips_list)
#loop through results and append them to the results list
for results in results:
results_list.append(results)
return results_list
'''
The main function used to test functions from this python script
'''
def main():
# t_load = concurrent_load_data('data/will_t_0_npy/will_t_0_changeup_4_Wc1.npy')
test_clips_df = load_clips_to_df(['will_t_0_npy'])
#write_csv_clips_to_npy(['will_t_1','will_t_2','will_t_3'])
t_c = pd.concat(test_clips_df)
print(f'Done with creating_pitching_dataframes.py')
if __name__ == "__main__":
main()