-
Notifications
You must be signed in to change notification settings - Fork 0
/
imageToInput.py
103 lines (90 loc) · 3.64 KB
/
imageToInput.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from PIL import Image
import os
import numpy as np
import pandas as pd
#dir refers to the directory that contains the folders with the files
#make sure that csv_name ends with .csv
def imagesToInputMatrixWithLabels(dir, save_as_csv = True, csv_name = "matrix.csv"):
#The foldername that a file is in is used as label in this current implementation. Feel free to change that.
first_elem = True
labels_list = []
for foldername in os.listdir(dir):
for filename in os.listdir(dir + "/" + foldername + "/"):
if filename.endswith(".jpg") or filename.endswith(".png"):
print("eeu")
image = Image.open(dir + foldername + "/" + filename, mode='r')
greyscale = image.convert(mode='L')
resized = greyscale.resize((28, 28))
resized.save("./generated images/" + foldername + "/" + filename)
vector = np.array(resized)
vector = np.reshape(vector.flatten(), newshape=(784))
labels_list.append(int(foldername))
if first_elem:
vector_list = [vector]
first_elem = False
else:
vector_list.append(vector)
matrix = np.array(vector_list)
matrix = 255 - matrix
labels_list = np.array(labels_list)
labels_list = np.reshape(labels_list,newshape=(labels_list.shape[0], 1))
matrix = np.append(matrix, labels_list, axis=1)
np.random.shuffle(matrix)
if save_as_csv:
#Create a list for the CSV header
names = []
for i in range(784):
temp = "label_" + str(i)
names.append(temp)
names.append("labels")
#Save the matrix as .csv
df_matrix = pd.DataFrame(matrix,columns=names)
df_matrix.to_csv(csv_name)
return matrix
def imagesToInputMatrix(dir, save_as_csv = True, csv_name = "matrix.csv"):
first_elem = True
for filename in os.listdir(dir):
if filename.endswith(".jpg") or filename.endswith(".png"):
print("eeu")
image = Image.open(dir + filename, mode='r')
greyscale = image.convert(mode='L')
resized = greyscale.resize((28, 28))
resized.save("./generated images/" + filename)
vector = np.array(resized)
vector = np.reshape(vector.flatten(), newshape=(784))
if first_elem:
vector_list = [vector]
first_elem = False
else:
vector_list.append(vector)
matrix = np.array(vector_list)
matrix = 255 - matrix
np.random.shuffle(matrix)
if save_as_csv:
names = []
for i in range(784):
temp = "label_" + str(i)
names.append(temp)
names.append("labels")
df_matrix = pd.DataFrame(matrix,columns=names)
df_matrix.to_csv(csv_name)
return matrix
#Used to transform a single image
def singleImageToVector(filename, save_as_csv = True, csv_name = "X.csv"):
image = Image.open(filename, mode='r')
greyscale = image.convert(mode='L')
resized = greyscale.resize((28, 28))
resized.save("./generated images/" + filename)
vector = np.array(resized)
vector = np.reshape(vector.flatten(), newshape=(784, 1))
vector = np.transpose(vector)
#We do this because in our dataset 255 is used for the darkest values and in pillow it's used for the lightest
vector = 255 - vector
if save_as_csv:
names = []
for i in range(784):
temp = "label_" + str(i)
names.append(temp)
df_vector = pd.DataFrame(vector, columns=names)
df_vector.to_csv(csv_name)
return vector