Thiết kế website giá rẻ

Question

I’m building a recommender system based of a Netflix dataset. I seem to be having trouble with my reccomender function.
It seems to be taking long to run & it might just time out on me.

So far this is the code I have:

<code># Function to get collaborative recommendations for a user

def get_collaborative_recommendations(user_id, svd, movies_df, ratings_df, n=10):

recommendations = []

rated_movie_ids = ratings_df[ratings_df['userId'] == user_id]['movieId'].unique()

movie_ids = [m for m in movies_df['movieId'].unique() if m not in rated_movie_ids]

for movie_id in movie_ids:

prediction = svd.predict(user_id, movie_id)

recommendations.append((movie_id, prediction.est))

recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True)

top_recommendations = recommendations[:n]

recommendations_info = [

(movies_df[movies_df['movieId'] == movie_id]['title'].values[0], est_rating)

for movie_id, est_rating in top_recommendations

]

return recommendations_info

# Function to generate recommendations for a given user (for parallel processing)

def generate_user_recommendations(user_id):

recommendations = get_collaborative_recommendations(user_id, svd, movies_df, train_df)

user_recommendations = []

for title, predicted_rating in recommendations:

user_recommendations.append({

'userId': user_id,

'Recommended Movie': title,

'Predicted_Rating': predicted_rating

})

return user_recommendations

# Get unique user IDs from the train dataset

user_ids = train_df['userId'].unique()

# Use parallel processing to generate recommendations for all users

with Pool() as pool:

all_recommendations = pool.map(generate_user_recommendations, user_ids)

# Flatten the list of lists

all_recommendations = [rec for sublist in all_recommendations for rec in sublist]

# Convert the list of recommendations to a DataFrame

recommendations_df = pd.DataFrame(all_recommendations)

# Display the recommendations DataFrame

print(recommendations_df.head())```

The function itself is taking long to run, so I'm not sure if it's the fact that I need to have 5 million data points.

I'm still new to this, this is my first real world project.

</code>

<code># Function to get collaborative recommendations for a user def get_collaborative_recommendations(user_id, svd, movies_df, ratings_df, n=10): recommendations = [] rated_movie_ids = ratings_df[ratings_df['userId'] == user_id]['movieId'].unique() movie_ids = [m for m in movies_df['movieId'].unique() if m not in rated_movie_ids] for movie_id in movie_ids: prediction = svd.predict(user_id, movie_id) recommendations.append((movie_id, prediction.est)) recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True) top_recommendations = recommendations[:n] recommendations_info = [ (movies_df[movies_df['movieId'] == movie_id]['title'].values[0], est_rating) for movie_id, est_rating in top_recommendations ] return recommendations_info # Function to generate recommendations for a given user (for parallel processing) def generate_user_recommendations(user_id): recommendations = get_collaborative_recommendations(user_id, svd, movies_df, train_df) user_recommendations = [] for title, predicted_rating in recommendations: user_recommendations.append({ 'userId': user_id, 'Recommended Movie': title, 'Predicted_Rating': predicted_rating }) return user_recommendations # Get unique user IDs from the train dataset user_ids = train_df['userId'].unique() # Use parallel processing to generate recommendations for all users with Pool() as pool: all_recommendations = pool.map(generate_user_recommendations, user_ids) # Flatten the list of lists all_recommendations = [rec for sublist in all_recommendations for rec in sublist] # Convert the list of recommendations to a DataFrame recommendations_df = pd.DataFrame(all_recommendations) # Display the recommendations DataFrame print(recommendations_df.head())``` The function itself is taking long to run, so I'm not sure if it's the fact that I need to have 5 million data points. I'm still new to this, this is my first real world project. </code>

# Function to get collaborative recommendations for a user
def get_collaborative_recommendations(user_id, svd, movies_df, ratings_df, n=10):
    recommendations = []
    rated_movie_ids = ratings_df[ratings_df['userId'] == user_id]['movieId'].unique()
    movie_ids = [m for m in movies_df['movieId'].unique() if m not in rated_movie_ids]

    for movie_id in movie_ids:
        prediction = svd.predict(user_id, movie_id)
        recommendations.append((movie_id, prediction.est))

    recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True)

    top_recommendations = recommendations[:n]
    recommendations_info = [
        (movies_df[movies_df['movieId'] == movie_id]['title'].values[0], est_rating)
        for movie_id, est_rating in top_recommendations
    ]

    return recommendations_info

# Function to generate recommendations for a given user (for parallel processing)
def generate_user_recommendations(user_id):
    recommendations = get_collaborative_recommendations(user_id, svd, movies_df, train_df)
    user_recommendations = []
    for title, predicted_rating in recommendations:
        user_recommendations.append({
            'userId': user_id,
            'Recommended Movie': title,
            'Predicted_Rating': predicted_rating
        })
    return user_recommendations

# Get unique user IDs from the train dataset
user_ids = train_df['userId'].unique()

# Use parallel processing to generate recommendations for all users
with Pool() as pool:
    all_recommendations = pool.map(generate_user_recommendations, user_ids)

# Flatten the list of lists
all_recommendations = [rec for sublist in all_recommendations for rec in sublist]

# Convert the list of recommendations to a DataFrame
recommendations_df = pd.DataFrame(all_recommendations)

# Display the recommendations DataFrame
print(recommendations_df.head())```

The function itself is taking long to run, so I'm not sure if it's the fact that I need to have 5 million data points.

I'm still new to this, this is my first real world project.

Danh mục