Skip to content

Commit c0f6bb3

Browse files
authored
Update datasets.py
1 parent 00cc8a8 commit c0f6bb3

File tree

1 file changed

+8
-7
lines changed

1 file changed

+8
-7
lines changed

datasets.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def __init__(self, data_dir, transform = None):
2222

2323
# full dataframe including train_val and test set
2424
self.df = self.get_df()
25-
print('self.df.shape: {}'.format(self.df.shape) )
25+
print('self.df.shape: {}'.format(self.df.shape))
2626

2727
self.make_pkl_dir(config.pkl_dir_path)
2828

@@ -54,12 +54,12 @@ def __init__(self, data_dir, transform = None):
5454
else:
5555
print('\n{}: already exists'.format(config.disease_classes_pkl_path))
5656

57-
self.new_df = self.df.iloc[self.the_chosen, :] # this is the sampled train_val data
57+
self.new_df = self.train_val_df.iloc[self.the_chosen, :] # this is the sampled train_val data
5858
print('\nself.all_classes_dict: {}'.format(self.all_classes_dict))
5959

6060
def resample(self):
6161
self.the_chosen, self.all_classes, self.all_classes_dict = self.choose_the_indices()
62-
self.new_df = self.df.iloc[self.the_chosen, :]
62+
self.new_df = self.train_val_df.iloc[self.the_chosen, :]
6363
print('\nself.all_classes_dict: {}'.format(self.all_classes_dict))
6464

6565
def make_pkl_dir(self, pkl_dir_path):
@@ -79,7 +79,7 @@ def get_train_val_df(self):
7979
if filename in train_val_list:
8080
train_val_df = train_val_df.append(self.df.iloc[i:i+1, :])
8181

82-
print('train_val_df.shape: {}'.format(train_val_df.shape))
82+
# print('train_val_df.shape: {}'.format(train_val_df.shape))
8383

8484
return train_val_df
8585

@@ -119,6 +119,7 @@ def choose_the_indices(self):
119119
all_classes[t] = 1
120120
else:
121121
all_classes[t] += 1
122+
continue
122123

123124
# choose if multiple labels
124125
if len(temp) > 1:
@@ -180,7 +181,7 @@ def get_train_val_list(self):
180181
f = open(os.path.join('data', 'NIH Chest X-rays', 'train_val_list.txt'), 'r')
181182
train_val_list = str.split(f.read(), '\n')
182183
return train_val_list
183-
184+
184185
def __len__(self):
185186
return len(self.new_df)
186187

@@ -194,6 +195,7 @@ def __init__(self, data_dir, transform = None):
194195

195196
# full dataframe including train_val and test set
196197
self.df = self.get_df()
198+
print('\nself.df.shape: {}'.format(self.df.shape))
197199

198200
self.make_pkl_dir(config.pkl_dir_path)
199201

@@ -220,7 +222,7 @@ def __init__(self, data_dir, transform = None):
220222

221223
def __getitem__(self, index):
222224
row = self.test_df.iloc[index, :]
223-
225+
224226
img = cv2.imread(row['image_links'])
225227
labels = str.split(row['Finding Labels'], '|')
226228

@@ -240,7 +242,6 @@ def make_pkl_dir(self, pkl_dir_path):
240242

241243
def get_df(self):
242244
csv_path = os.path.join(self.data_dir, 'Data_Entry_2017.csv')
243-
# print('{} found: {}'.format(csv_path, os.path.exists(csv_path)))
244245

245246
all_xray_df = pd.read_csv(csv_path)
246247

0 commit comments

Comments
 (0)