Skip to content

Commit 313bdd4

Browse files
committed
Load images from directory and train a model
1 parent b42a392 commit 313bdd4

8 files changed

+97
-5
lines changed

Dockerfile.cpu

+1
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,5 @@ WORKDIR /opt/openautoml/runner
2121
COPY requirements.txt .
2222
RUN pip install -r requirements.txt
2323

24+
COPY data/ data/
2425
COPY src/ src/

Dockerfile.nvidia

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,5 +38,5 @@ RUN rm -rf pytorch
3838
COPY requirements.txt .
3939
RUN pip install -r requirements.txt
4040

41-
41+
COPY data/ data/
4242
COPY src/ src/

models/mnist_model.h5

486 KB
Binary file not shown.

models/mnist_model.png

52.3 KB
Loading

src/constants.py

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
import os
2+
from pathlib import Path
3+
4+
BASE_PATH = Path(os.path.realpath(__file__)).parents[1]
5+
6+
DATA_PATH = BASE_PATH / 'data'

src/hello_flowers.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from autokeras.image_supervised import ImageClassifier
2+
from datetime import datetime
3+
from autokeras.image_supervised import load_image_dataset
4+
from constants import DATA_PATH
5+
6+
7+
def main():
8+
started = datetime.now()
9+
10+
x_train, y_train = load_image_dataset(csv_file_path=DATA_PATH / 'flowers_one_dir' / 'train' / 'labels.csv',
11+
images_path=DATA_PATH / 'flowers_one_dir' / 'train')
12+
13+
x_test, y_test = load_image_dataset(csv_file_path=DATA_PATH / 'flowers_one_dir' / 'test' / 'labels.csv',
14+
images_path=DATA_PATH / 'flowers_one_dir' / 'train')
15+
16+
clf = ImageClassifier(verbose=True, searcher_args={
17+
'trainer_args': {
18+
'max_iter_num': 10,
19+
}
20+
})
21+
print(clf)
22+
# clf.fit(x_train, y_train, time_limit=12 * 60 * 60)
23+
clf.fit(x_train, y_train, time_limit=6 * 60 * 60)
24+
# clf.final_fit(x_train, y_train, x_test, y_test, retrain=True)
25+
clf.final_fit(x_train, y_train, x_test, y_test, trainer_args={
26+
'max_iter_num': 10,
27+
})
28+
y = clf.evaluate(x_test, y_test)
29+
print(y)
30+
clf.load_searcher().load_best_model().produce_keras_model().save('flower_model.h5')
31+
finished = datetime.now() - started
32+
print(f'Total training duration: {finished}')
33+
34+
35+
if __name__ == '__main__':
36+
main()

src/app.py src/hello_mnist.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from autokeras.image_supervised import ImageClassifier
33
from datetime import datetime
44

5+
56
def main():
67
started = datetime.now()
78
(x_train, y_train), (x_test, y_test) = mnist.load_data()
@@ -10,19 +11,19 @@ def main():
1011

1112
clf = ImageClassifier(verbose=True, searcher_args={
1213
'trainer_args': {
13-
'max_iter_num': 3,
14+
'max_iter_num': 10,
1415
}
1516
})
1617
print(clf)
1718
# clf.fit(x_train, y_train, time_limit=12 * 60 * 60)
18-
clf.fit(x_train, y_train, time_limit=10 * 60)
19+
clf.fit(x_train, y_train, time_limit=60 * 60)
1920
# clf.final_fit(x_train, y_train, x_test, y_test, retrain=True)
2021
clf.final_fit(x_train, y_train, x_test, y_test, trainer_args={
21-
'max_iter_num': 3,
22+
'max_iter_num': 10,
2223
})
2324
y = clf.evaluate(x_test, y_test)
2425
print(y)
25-
clf.load_searcher().load_best_model().produce_keras_model().save('my_model.h5')
26+
clf.load_searcher().load_best_model().produce_keras_model().save('mnist_model.h5')
2627
finished = datetime.now() - started
2728
print(f'Total training duration: {finished}')
2829

src/preprocess_flowers.py

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
from constants import DATA_PATH
2+
import pandas as pd
3+
from sklearn.model_selection import train_test_split
4+
import shutil
5+
6+
7+
# TODO: Scrape this preprocessing when https://github.com/jhfjhfj1/autokeras/issues/204 is fixed and use
8+
# TODO: keras.preprocessing.image.ImageDataGenerator.flow_from_directory(dir)
9+
# TODO: https://keras.io/preprocessing/image/
10+
11+
df_all_train = pd.DataFrame()
12+
df_all_test = pd.DataFrame()
13+
14+
for dir_path in (DATA_PATH / 'flowers').glob('*'):
15+
df = pd.DataFrame([
16+
{
17+
'File Name': f'{dir_path.name}_{x.name}',
18+
'Label': dir_path.name
19+
} for x in dir_path.glob('*.jpg')
20+
])
21+
22+
df_train, df_test = train_test_split(df, test_size=.15)
23+
24+
df_all_test = df_all_test.append(df_test)
25+
df_all_train = df_all_train.append(df_train)
26+
27+
shutil.rmtree(DATA_PATH / 'flowers_one_dir', ignore_errors=True)
28+
29+
train_dir = DATA_PATH / 'flowers_one_dir' / 'train'
30+
test_dir = DATA_PATH / 'flowers_one_dir' / 'test'
31+
32+
train_dir.mkdir(parents=True)
33+
test_dir.mkdir(parents=True)
34+
35+
df_all_train.to_csv(train_dir / 'labels.csv', index=False)
36+
df_all_test.to_csv(test_dir / 'labels.csv', index=False)
37+
38+
for index, row in df_all_train.iterrows():
39+
src = DATA_PATH / 'flowers' / row['File Name'].replace('_', '/', 1)
40+
dst = train_dir / row['File Name']
41+
shutil.copy(src, dst)
42+
43+
for index, row in df_all_test.iterrows():
44+
src = DATA_PATH / 'flowers' / row['File Name'].replace('_', '/', 1)
45+
dst = test_dir / row['File Name']
46+
shutil.copy(src, dst)
47+
48+
# print(df_all_test)

0 commit comments

Comments
 (0)