Skip to content

Commit 6cce29f

Browse files
authored
Add py files
1 parent ef24180 commit 6cce29f

File tree

5 files changed

+342
-0
lines changed

5 files changed

+342
-0
lines changed

bpr.py

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
# In[ ]:
5+
6+
7+
from util import m_normal, learning_rate, get_lambda
8+
from classes import ret
9+
import random as random
10+
import numpy as np
11+
import math
12+
def bpr_update(users, movies):
13+
count = 0
14+
lr = learning_rate()
15+
lam = get_lambda()
16+
for u1 in users:
17+
u = users[u1]
18+
userid = u.userid
19+
Vu = u.factor
20+
if (len(u.movies_train) > 0):
21+
22+
rand_pos = random.sample(u.movies_train.keys(), 1)[0]
23+
rand_neg = random.sample(movies.keys(), 1)[0]
24+
25+
if rand_neg not in u.movies_train:
26+
Vi = movies[rand_pos].factor
27+
Vj = movies[rand_neg].factor
28+
firstterm = calculate_first_term(Vu, Vi, Vj)
29+
30+
# USER FACTOR
31+
diff = Vi - Vj
32+
d = firstterm * diff
33+
derivative = d
34+
Vu = Vu + lr * (derivative + lam * np.linalg.norm(Vu))
35+
users[u1].factor = Vu
36+
37+
# ITEM POSITIVE FACTOR
38+
d = firstterm * Vu
39+
derivative = d
40+
Vi = Vi + lr * (derivative + lam * np.linalg.norm(Vi))
41+
movies[rand_pos].factor = Vi
42+
43+
#ITEM NEGATIVE FACTOR
44+
negvu = -1 * Vu
45+
d = firstterm * negvu
46+
derivative = d
47+
Vj = Vj + lr * (derivative + lam * np.linalg.norm(Vj))
48+
movies[rand_neg].factor = Vj
49+
50+
def calculate_first_term(Vu, Vi, Vj):
51+
boughtdot = np.dot(Vu, Vi)
52+
notboughtdot = np.dot(Vu, Vj)
53+
negxuij = (boughtdot - notboughtdot) * -1
54+
if negxuij > 500:
55+
negxuij = 500
56+
numerator = math.exp(negxuij)
57+
denominator = 1 + math.exp(negxuij)
58+
firstterm = numerator / denominator
59+
return firstterm
60+

classes.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
# In[ ]:
5+
6+
7+
from util import random_vector
8+
class user:
9+
def __init__(self, userid):
10+
self.userid = userid
11+
self.movies_train = dict()
12+
self.movies_test = dict()
13+
self.movies_all = dict()
14+
self.factor = random_vector()
15+
16+
class movie:
17+
def __init__(self, movieid, rating=0, title=None, genres=None):
18+
self.movieid = movieid
19+
self.rating = rating
20+
self.title = title
21+
self.genres = genres
22+
self.factor = random_vector()
23+
24+
class ret:
25+
def __init__(self):
26+
self.userid = None
27+
self.movieid = None
28+
self.isuser = True
29+
self.retvalue = []
30+
31+
class usermovie:
32+
def __init__(self):
33+
self.userid = None
34+
self.movieid = None
35+
self.rating = 0
36+

filereader.py

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
# In[ ]:
5+
6+
7+
from classes import user
8+
from classes import movie
9+
from numpy import random
10+
from util import min_rating, random_vector, num_users
11+
from random import seed
12+
import pandas as pd
13+
import numpy as np
14+
def read_ratings(filename):
15+
seed(42)
16+
np.random.seed(42)
17+
r_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
18+
ratings = pd.read_csv(filename, sep=',', names=r_cols, encoding='latin-1')
19+
20+
ratings['user_id'] = ratings['user_id'].astype(int)
21+
ratings['movie_id'] = ratings['movie_id'].astype(int)
22+
ratings['rating'] = ratings['rating'].astype(float)
23+
24+
numusers = num_users()
25+
26+
msks = ratings['user_id'] < numusers
27+
ratings = ratings[msks]
28+
users = dict()
29+
testcount = 0
30+
traincount = 0
31+
trainuserdict = dict()
32+
33+
for index, row in ratings.iterrows():
34+
userid = int(row['user_id'])
35+
movieid = int(row['movie_id'])
36+
rating1 = float(row['rating'])
37+
minmovierating = min_rating()
38+
if rating1 >= minmovierating:
39+
if random.random() < 0.7:
40+
traincount = traincount + 1
41+
if userid in users.keys():
42+
user1 = users[userid]
43+
user1.movies_train[movieid] = rating1
44+
else:
45+
user1 = user(userid)
46+
user1.factor = random_vector()
47+
user1.movies_train[movieid] = rating1
48+
users[userid] = user1
49+
trainuserdict[userid] = 1
50+
else:
51+
testcount = testcount + 1
52+
if userid in users.keys():
53+
user1 = users[userid]
54+
user1.movies_test[movieid] = rating1
55+
else:
56+
user1 = user(userid)
57+
user1.factor = random_vector()
58+
user1.movies_test[movieid] = rating1
59+
users[userid] = user1
60+
61+
for index, row in ratings.iterrows():
62+
userid = int(row['user_id'])
63+
movieid = int(row['movie_id'])
64+
rating1 = float(row['rating'])
65+
if userid in users.keys():
66+
user1 = users[userid]
67+
user1.movies_all[movieid] = rating1
68+
69+
return users
70+
71+
def read_movies(filename):
72+
r_cols = ['movie_id', 'title', 'genres']
73+
df = pd.read_csv(filename, sep=",", encoding='latin-1', names=r_cols)
74+
movies = dict()
75+
for index, row in df.iterrows():
76+
movieid = row['movie_id']
77+
movie1 = movie(movieid, 0)
78+
movie1.factor = random_vector()
79+
movies[movieid] = movie1
80+
81+
return movies
82+

hitrate.py

+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
# In[ ]:
5+
6+
7+
from classes import usermovie
8+
import numpy as np
9+
from sklearn.metrics import mean_squared_error
10+
from math import sqrt
11+
12+
def hit_rate(users, movies):
13+
hits = 0
14+
denom = 0
15+
actual = []
16+
predicted = []
17+
actualall = []
18+
predictedall = []
19+
for u1 in users:
20+
u = users[u1]
21+
userid = u.userid
22+
usermovies = []
23+
if userid in users:
24+
denom = denom + 1
25+
ufactor = users[userid].factor
26+
for m1 in movies:
27+
m = movies[m1]
28+
mfactor = m.factor
29+
dotp = np.dot(ufactor, mfactor)
30+
if m.movieid in u.movies_all:
31+
actualall.append(u.movies_all[m.movieid])
32+
predictedall.append(float(dotp))
33+
34+
if m.movieid in u.movies_test:
35+
actual.append(u.movies_test[m.movieid])
36+
predicted.append(dotp)
37+
38+
usermovied = usermovie()
39+
usermovied.userid = userid
40+
usermovied.movieid = m.movieid
41+
usermovied.rating = dotp
42+
usermovies.append(usermovied)
43+
44+
usermovies.sort(key=lambda x: x.rating, reverse=True)
45+
count = 0
46+
for um in usermovies:
47+
userid = um.userid
48+
movieid = um.movieid
49+
#rating = um.rating
50+
if movieid in users[userid].movies_test:
51+
hits = hits + 1
52+
break
53+
count = count + 1
54+
if count > 9:
55+
break
56+
57+
sortedpredicted = predicted
58+
least = min(sortedpredicted)
59+
sortedpredicted = [x + least for x in sortedpredicted]
60+
sortedpredicted = [x / max(sortedpredicted) for x in sortedpredicted]
61+
sortedpredicted = [x * 5 for x in sortedpredicted]
62+
predicted = sortedpredicted
63+
64+
sortedpredicted = predictedall
65+
least = min(sortedpredicted)
66+
sortedpredicted = [x + least for x in sortedpredicted]
67+
sortedpredicted = [x / max(sortedpredicted) for x in sortedpredicted]
68+
sortedpredicted = [x * 5 for x in sortedpredicted]
69+
predictedall = sortedpredicted
70+
71+
rms = sqrt(mean_squared_error(actual, predicted))
72+
rmsall = sqrt(mean_squared_error(actualall, predictedall))
73+
74+
return hits, denom, rms, rmsall
75+
76+
def hit_rate_SVD(users, movies, svd):
77+
hits = 0
78+
denom = 0
79+
actual = []
80+
predicted = []
81+
actualall = []
82+
predictedall = []
83+
for u1 in users:
84+
u = users[u1]
85+
userid = u.userid
86+
usermovies = []
87+
if userid in users:
88+
denom = denom + 1
89+
for m1 in movies:
90+
m = movies[m1]
91+
dotp = float(svd.predict(int(userid), int(m.movieid))[3])
92+
93+
if m.movieid in u.movies_all:
94+
actualall.append(u.movies_all[m.movieid])
95+
predictedall.append(float(dotp))
96+
97+
if (str(m.movieid) in u.movies_test) | (int(m.movieid) in u.movies_test):
98+
actual.append(u.movies_test[m.movieid])
99+
predicted.append(float(dotp))
100+
101+
usermovied = usermovie()
102+
usermovied.userid = userid
103+
usermovied.movieid = m.movieid
104+
usermovied.rating = dotp
105+
usermovies.append(usermovied)
106+
107+
usermovies.sort(key=lambda x: x.rating, reverse=True)
108+
count = 0
109+
for um in usermovies:
110+
userid = um.userid
111+
movieid = um.movieid
112+
113+
if (str(movieid) in users[userid].movies_test) | (int(movieid) in users[userid].movies_test):
114+
hits = hits + 1
115+
break
116+
count = count + 1
117+
if count > 9:
118+
break
119+
120+
rms = sqrt(mean_squared_error(actual, predicted))
121+
rmsall = sqrt(mean_squared_error(actualall, predictedall))
122+
123+
return hits, denom, rms, rmsall
124+

util.py

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
# In[ ]:
5+
6+
7+
from numpy import random
8+
import numpy as np
9+
10+
def num_users():
11+
return 1000000
12+
13+
def dimension():
14+
return 50
15+
16+
def min_rating():
17+
return 4
18+
19+
def learning_rate():
20+
return 1
21+
22+
def get_lambda():
23+
return 0.1
24+
25+
def random_vector():
26+
dim = dimension()
27+
cov_mtx = cov_matrix()
28+
return random.multivariate_normal(np.zeros(dim), cov_mtx)
29+
30+
def cov_matrix():
31+
dim = dimension()
32+
cov = np.zeros((dim, dim), dtype=float)
33+
for i in range(dim):
34+
cov[i][i] = 0.1
35+
return cov
36+
37+
def m_normal(mean):
38+
cov_mtx = cov_matrix()
39+
return random.multivariate_normal(mean=mean, cov=cov_mtx)
40+

0 commit comments

Comments
 (0)