-
Notifications
You must be signed in to change notification settings - Fork 4.8k
/
Copy pathfilereader.py
82 lines (70 loc) · 2.53 KB
/
filereader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
from classes import user
from classes import movie
from numpy import random
from util import min_rating, random_vector, num_users
from random import seed
import pandas as pd
import numpy as np
def read_ratings(filename):
seed(42)
np.random.seed(42)
r_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_csv(filename, sep=',', names=r_cols, encoding='latin-1')
ratings['user_id'] = ratings['user_id'].astype(int)
ratings['movie_id'] = ratings['movie_id'].astype(int)
ratings['rating'] = ratings['rating'].astype(float)
numusers = num_users()
msks = ratings['user_id'] < numusers
ratings = ratings[msks]
users = dict()
testcount = 0
traincount = 0
trainuserdict = dict()
for index, row in ratings.iterrows():
userid = int(row['user_id'])
movieid = int(row['movie_id'])
rating1 = float(row['rating'])
minmovierating = min_rating()
if rating1 >= minmovierating:
if random.random() < 0.7:
traincount = traincount + 1
if userid in users.keys():
user1 = users[userid]
user1.movies_train[movieid] = rating1
else:
user1 = user(userid)
user1.factor = random_vector()
user1.movies_train[movieid] = rating1
users[userid] = user1
trainuserdict[userid] = 1
else:
testcount = testcount + 1
if userid in users.keys():
user1 = users[userid]
user1.movies_test[movieid] = rating1
else:
user1 = user(userid)
user1.factor = random_vector()
user1.movies_test[movieid] = rating1
users[userid] = user1
for index, row in ratings.iterrows():
userid = int(row['user_id'])
movieid = int(row['movie_id'])
rating1 = float(row['rating'])
if userid in users.keys():
user1 = users[userid]
user1.movies_all[movieid] = rating1
return users
def read_movies(filename):
r_cols = ['movie_id', 'title', 'genres']
df = pd.read_csv(filename, sep=",", encoding='latin-1', names=r_cols)
movies = dict()
for index, row in df.iterrows():
movieid = row['movie_id']
movie1 = movie(movieid, 0)
movie1.factor = random_vector()
movies[movieid] = movie1
return movies