forked from Wall-eSociety/CommentVolumeML
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.py
More file actions
53 lines (43 loc) · 1.92 KB
/
parser.py
File metadata and controls
53 lines (43 loc) · 1.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os
import pandas
def obtain_directories():
# Load dirs name
cur_dir = os.path.realpath('.')
data_dir = os.path.join(cur_dir,'Dataset')
# Obtaining directories
train_dir = os.path.join(data_dir,'Training')
print(train_dir)
test_dir = os.path.join(data_dir,'Testing')
print(test_dir)
list_train = []
list_test = []
# Obtain train files
for x in os.listdir(train_dir):
if(x.endswith(".csv")):
list_train.append(os.path.join(train_dir, x))
for x in os.listdir(test_dir):
if(x.endswith(".csv")):
list_test.append(os.path.join(test_dir, x))
# Sorting array
list_train = sorted(list_train)
list_test = sorted(list_test)
print("Obtaining folders...")
return list_train, list_test
def obtain_data(train, test):
columns = ["Page Popularity/likes", "Page Checkins’s", "Page talking about",
"Page Category", "Derived", "Derived", "Derived", "Derived",
"Derived", "Derived", "Derived", "Derived", "Derived",
"Derived", "Derived", "Derived", "Derived", "Derived",
"Derived", "Derived", "Derived", "Derived", "Derived",
"Derived", "Derived", "Derived", "Derived", "Derived",
"Derived", "CC1", "CC2", "CC3", "CC4", "CC5", "Base time",
"Post length", "Post Share Count", "Post Promotion Status", "H Local",
"Post Sunday", "Post Monday", "Post Tuesday", "Post Wednesday", "Post Thursday", "Post Friday", "Post Saturday",
"Base Sunday", "Base Monday", "Base Tuesday", "Base Wednesday", "Base Thursday", "Base Friday", "Base Saturday",
"Target Variable"]
trainData = pandas.read_csv(train, names=columns)
testData = pandas.read_csv(test, names=columns)
print("Obtaining data...")
return trainData, testData
trainFolders,testFolders = obtain_directories()
trainData, testData = obtain_data(trainFolders[0],testFolders[0])