You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
in the tutorial documentation we have a basic implementation that does not use the movie title text features or some other movie feature in listwise ranking., i was trying to implement the same. i tried to use the movie title text as the feature in the listwise ranking model, here is the model definition, here i am also using query features:-
class RankingModel(tfrs.Model):
def __init__(self, loss):
super().__init__()
embedding_dimension = 32
max_tokens = 10_000
# Compute embeddings for users.
self.user_embeddings = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_user_ids),
tf.keras.layers.Embedding(len(unique_user_ids) + 2, embedding_dimension)
])
# compute query embeddings
self.query_vectorizer = tf.keras.layers.TextVectorization(
max_tokens=max_tokens)
self.query_text_embedding = tf.keras.Sequential([
self.query_vectorizer,
tf.keras.layers.Embedding(max_tokens, embedding_dimension, mask_zero=True),
tf.keras.layers.GlobalAveragePooling1D(),
])
self.query_vectorizer.adapt(queries)
# Compute embeddings for movies.
self.movie_embeddings = tf.keras.Sequential([
tf.keras.layers.StringLookup(
vocabulary=unique_movie_titles),
tf.keras.layers.Embedding(len(unique_movie_titles) + 2, embedding_dimension)
])
# compute embeddings for movies title text
self.movie_title_text_vectorizer = tf.keras.layers.TextVectorization(
max_tokens=max_tokens)
self.movie_title_text_embedding = tf.keras.Sequential([
#tf.keras.layers.Faltten(),
self.movie_title_text_vectorizer,
tf.keras.layers.Embedding(max_tokens, embedding_dimension, mask_zero=True),
# We average the embedding of individual words to get one embedding vector
# per title.
tf.keras.layers.GlobalAveragePooling1D()
])
self.movie_title_text_vectorizer.adapt(movies)
# Compute predictions.
self.score_model = tf.keras.Sequential([
# Learn multiple dense layers.
tf.keras.layers.Dense(256, activation="relu"),
tf.keras.layers.Dense(64, activation="relu"),
# Make rating predictions in the final layer.
tf.keras.layers.Dense(1)
])
self.task = tfrs.tasks.Ranking(
loss=loss,
metrics=[
tfr.keras.metrics.NDCGMetric(name="ndcg_metric"),
tf.keras.metrics.RootMeanSquaredError()
]
)
def call(self, features):
# We first convert the id features into embeddings.
# User embeddings are a [batch_size, embedding_dim] tensor.
user_embeddings = self.user_embeddings(features["user_id"])
# User embeddings are a [batch_size, embedding_dim] tensor.
query_embeddings = self.query_text_embedding(features["query"])
# Movie embeddings are a [batch_size, num_movies_in_list, embedding_dim]
# tensor.
movie_embeddings = self.movie_embeddings(features["movie_title"])
# movie title text embedding
movie_title_embeddings = self.movie_title_text_embedding(features["movie_title"])
## print the shape of movie embeddings and movie title embedding
print(f"movie embedding shape: {movie_embeddings.shape} & movie title embedding shape: {movie_title_embeddings.shape}")
# We want to concatenate user embeddings with movie emebeddings to pass
# them into the ranking model. To do so, we need to reshape the user
# embeddings to match the shape of movie embeddings.
list_length = features["movie_title"].shape[1]
user_embedding_repeated = tf.repeat(
tf.expand_dims(user_embeddings, 1), [list_length], axis=1)
# reshape the query embedding like the user embedding
query_embedding_repeated = tf.repeat(
tf.expand_dims(query_embeddings, 1), [list_length], axis=1)
## print query repeated and non repeated shape
print(f"user embeddings shape: {user_embeddings.shape} & user embedding repeated shape: {query_embedding_repeated.shape}")
# Once reshaped, we concatenate and pass into the dense layers to generate
# predictions.
concatenated_embeddings = tf.concat(
[user_embedding_repeated, query_embedding_repeated, movie_title_embeddings , movie_embeddings], 2)
return self.score_model(concatenated_embeddings)
def compute_loss(self, features, training=False):
labels = features.pop("user_rating")
scores = self(features)
return self.task(
labels=labels,
predictions=tf.squeeze(scores, axis=-1),
)
here is how my input dataset looks like
{'movie_title': <tf.Tensor: shape=(5,), dtype=string, numpy=
array([b'Monolith by Monoprice THX Certified Satellite Speakers (Pair)',
b'Monolith by Monoprice M-OW1 THX Certified On-Wall Speaker (Pair)',
b'Monolith by Monoprice THX-365C THX Certified Ultra Center Channel Speaker (Each)',
b'Monolith by Monoprice Encore B5 Bookshelf Speakers (Each)',
b'Monolith by Monoprice Encore T6 Tower Speaker (Each)'],
dtype=object)>,
'query': <tf.Tensor: shape=(), dtype=string, numpy=b'Floorstanding and Bookshelf Speakers'>,
'user_id': <tf.Tensor: shape=(), dtype=string, numpy=b'uid-1652799522552-93464'>,
'user_rating': <tf.Tensor: shape=(5,), dtype=float64, numpy=array([0.3030303 , 0.15151515, 0.3030303 , 0.15151515, 0.15151515])>}
But once i am trying to train the model, it's says When using TextVectorization to tokenize strings, the input rank must be 1 or the last shape dimension must be 1. Received: inputs.shape=(None, 5) with rank=2. Any help or pointer is much appreciated. thanks.
The text was updated successfully, but these errors were encountered:
in the tutorial documentation we have a basic implementation that does not use the movie title text features or some other movie feature in listwise ranking., i was trying to implement the same. i tried to use the movie title text as the feature in the listwise ranking model, here is the model definition, here i am also using query features:-
here is how my input dataset looks like
But once i am trying to train the model, it's says
When using
TextVectorizationto tokenize strings, the input rank must be 1 or the last shape dimension must be 1. Received: inputs.shape=(None, 5) with rank=2
. Any help or pointer is much appreciated. thanks.The text was updated successfully, but these errors were encountered: