Thiết kế website giá rẻ

Question

I have followed the tutorial on the TensorFlow Recommenders website and when trying to adapt it to my dataset, the accuracy suddenly becomes extremely low.. like 0.27 for their dataset and 0.0027 for mine.
I have managed at some point to adapt it correctly and have a similar accuracy on my dataset but somehow I ruined the code and now I can’t make it back up.

This is the code that adapts the dataset:

<code>movies_metadata = pd.read_csv('/content/drive/My Drive/movies_metadata.csv')

ratings=pd.read_csv('/content/drive/My Drive/ratings.csv')

ratings = ratings[['userId', 'movieId','timestamp']].rename(columns={'movieId': 'movie_id', 'userId': 'user_id'})

movies_metadata = movies_metadata[['id', 'title']].rename(columns={'id': 'movie_id', 'title': 'movie_title'})

ratings['movie_id'] = ratings['movie_id'].astype(str)

movies_metadata['movie_id'] = movies_metadata['movie_id'].astype(str)

combined_dataset = pd.merge(ratings, movies_metadata, on='movie_id', how='inner')

combined_dataset= combined_dataset.sample(100_000,random_state=1)

combined_dataset_tf = tf.data.Dataset.from_tensor_slices({

'user_id': combined_dataset['user_id'].astype(str).values,

'movie_title': combined_dataset['movie_title'].astype(str).values,

'timestamp': combined_dataset['timestamp'].values

})

movies = combined_dataset_tf.map(lambda x: x["movie_title"])

unique_user_ids = np.unique(np.concatenate(list(combined_dataset_tf.batch(1_000).map(

lambda x: x["user_id"]))))

unique_movie_titles = np.unique(np.concatenate(list(movies.batch(1_000).map(

lambda x: x))))

</code>

<code>movies_metadata = pd.read_csv('/content/drive/My Drive/movies_metadata.csv') ratings=pd.read_csv('/content/drive/My Drive/ratings.csv') ratings = ratings[['userId', 'movieId','timestamp']].rename(columns={'movieId': 'movie_id', 'userId': 'user_id'}) movies_metadata = movies_metadata[['id', 'title']].rename(columns={'id': 'movie_id', 'title': 'movie_title'}) ratings['movie_id'] = ratings['movie_id'].astype(str) movies_metadata['movie_id'] = movies_metadata['movie_id'].astype(str) combined_dataset = pd.merge(ratings, movies_metadata, on='movie_id', how='inner') combined_dataset= combined_dataset.sample(100_000,random_state=1) combined_dataset_tf = tf.data.Dataset.from_tensor_slices({ 'user_id': combined_dataset['user_id'].astype(str).values, 'movie_title': combined_dataset['movie_title'].astype(str).values, 'timestamp': combined_dataset['timestamp'].values }) movies = combined_dataset_tf.map(lambda x: x["movie_title"]) unique_user_ids = np.unique(np.concatenate(list(combined_dataset_tf.batch(1_000).map( lambda x: x["user_id"])))) unique_movie_titles = np.unique(np.concatenate(list(movies.batch(1_000).map( lambda x: x)))) </code>

movies_metadata = pd.read_csv('/content/drive/My Drive/movies_metadata.csv')
    ratings=pd.read_csv('/content/drive/My Drive/ratings.csv')
    ratings = ratings[['userId', 'movieId','timestamp']].rename(columns={'movieId': 'movie_id',     'userId': 'user_id'})
    movies_metadata = movies_metadata[['id', 'title']].rename(columns={'id': 'movie_id', 'title': 'movie_title'})
    ratings['movie_id'] = ratings['movie_id'].astype(str)
    movies_metadata['movie_id'] = movies_metadata['movie_id'].astype(str)
    combined_dataset = pd.merge(ratings, movies_metadata, on='movie_id', how='inner')
    combined_dataset= combined_dataset.sample(100_000,random_state=1)
    combined_dataset_tf = tf.data.Dataset.from_tensor_slices({
    'user_id': combined_dataset['user_id'].astype(str).values,
    'movie_title': combined_dataset['movie_title'].astype(str).values,
    'timestamp': combined_dataset['timestamp'].values
    })

    movies = combined_dataset_tf.map(lambda x: x["movie_title"])

    unique_user_ids = np.unique(np.concatenate(list(combined_dataset_tf.batch(1_000).map(
    lambda x: x["user_id"]))))
    unique_movie_titles = np.unique(np.concatenate(list(movies.batch(1_000).map(
    lambda x: x))))

Next, this is the adapted movie model:

<code>from typing import Dict, Text

class MovieModel(tf.keras.Model):

def __init__(self):

super().__init__()

max_tokens = 10_000

self.title_embedding = tf.keras.Sequential([

tf.keras.layers.StringLookup(

vocabulary=unique_movie_titles,mask_token=None),

tf.keras.layers.Embedding(len(unique_movie_titles) + 1, 32)

])

self.title_vectorizer = tf.keras.layers.TextVectorization(

max_tokens=max_tokens)

self.title_text_embedding = tf.keras.Sequential([

self.title_vectorizer,

tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),

tf.keras.layers.GlobalAveragePooling1D(),

])

self.title_vectorizer.adapt(movies)

def call(self, inputs):

print(inputs.dtype)

return tf.concat([

self.title_embedding(inputs),

self.title_text_embedding(inputs),

], axis=1)

</code>

<code>from typing import Dict, Text class MovieModel(tf.keras.Model): def __init__(self): super().__init__() max_tokens = 10_000 self.title_embedding = tf.keras.Sequential([ tf.keras.layers.StringLookup( vocabulary=unique_movie_titles,mask_token=None), tf.keras.layers.Embedding(len(unique_movie_titles) + 1, 32) ]) self.title_vectorizer = tf.keras.layers.TextVectorization( max_tokens=max_tokens) self.title_text_embedding = tf.keras.Sequential([ self.title_vectorizer, tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True), tf.keras.layers.GlobalAveragePooling1D(), ]) self.title_vectorizer.adapt(movies) def call(self, inputs): print(inputs.dtype) return tf.concat([ self.title_embedding(inputs), self.title_text_embedding(inputs), ], axis=1) </code>

from typing import Dict, Text
    class MovieModel(tf.keras.Model):
  
    def __init__(self):
    super().__init__()

    max_tokens = 10_000

    self.title_embedding = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
          vocabulary=unique_movie_titles,mask_token=None),
      tf.keras.layers.Embedding(len(unique_movie_titles) + 1, 32)
    ])

    self.title_vectorizer = tf.keras.layers.TextVectorization(
        max_tokens=max_tokens)

    self.title_text_embedding = tf.keras.Sequential([
      self.title_vectorizer,
      tf.keras.layers.Embedding(max_tokens, 32, mask_zero=True),
      tf.keras.layers.GlobalAveragePooling1D(),
    ])

    self.title_vectorizer.adapt(movies)

    def call(self, inputs):
    print(inputs.dtype)
    return tf.concat([
        self.title_embedding(inputs),
        self.title_text_embedding(inputs),
    ], axis=1)

And this is the movie model:

<code>class MovielensModel(tfrs.models.Model):

def __init__(self, layer_sizes):

super().__init__()

self.query_model = QueryModel(layer_sizes)

self.candidate_model = CandidateModel(layer_sizes)

self.task = tfrs.tasks.Retrieval(

metrics=tfrs.metrics.FactorizedTopK(

candidates=movies.batch(128).map(self.candidate_model),

),

)

def compute_loss(self, features, training=False):

# We only pass the user id and timestamp features into the query model. This

# is to ensure that the training inputs would have the same keys as the

# query inputs. Otherwise the discrepancy in input structure would cause an

# error when loading the query model after saving it.

query_embeddings = self.query_model({

"user_id": features["user_id"],

"timestamp": features["timestamp"],

})

movie_embeddings = self.candidate_model(features["movie_title"])

return self.task(

query_embeddings, movie_embeddings)

</code>

<code>class MovielensModel(tfrs.models.Model): def __init__(self, layer_sizes): super().__init__() self.query_model = QueryModel(layer_sizes) self.candidate_model = CandidateModel(layer_sizes) self.task = tfrs.tasks.Retrieval( metrics=tfrs.metrics.FactorizedTopK( candidates=movies.batch(128).map(self.candidate_model), ), ) def compute_loss(self, features, training=False): # We only pass the user id and timestamp features into the query model. This # is to ensure that the training inputs would have the same keys as the # query inputs. Otherwise the discrepancy in input structure would cause an # error when loading the query model after saving it. query_embeddings = self.query_model({ "user_id": features["user_id"], "timestamp": features["timestamp"], }) movie_embeddings = self.candidate_model(features["movie_title"]) return self.task( query_embeddings, movie_embeddings) </code>

class MovielensModel(tfrs.models.Model):

    def __init__(self, layer_sizes):
    super().__init__()
    self.query_model = QueryModel(layer_sizes)
    self.candidate_model = CandidateModel(layer_sizes)
    self.task = tfrs.tasks.Retrieval(
        metrics=tfrs.metrics.FactorizedTopK(
            candidates=movies.batch(128).map(self.candidate_model),
        ),
    )

    def compute_loss(self, features, training=False):
    # We only pass the user id and timestamp features into the query model. This
    # is to ensure that the training inputs would have the same keys as the
    # query inputs. Otherwise the discrepancy in input structure would cause an
    # error when loading the query model after saving it.
    query_embeddings = self.query_model({
        "user_id": features["user_id"],
        "timestamp": features["timestamp"],
    })
       movie_embeddings = self.candidate_model(features["movie_title"])

       return self.task(
        query_embeddings, movie_embeddings)

In the picture, it is attached the picture with the results on the validation, showing the low accuracy. Thank you for the responses, please!!

Thiết kế website giá rẻ

Danh mục

Accuracy extremely low when using different dataset with tensroflow recommenders