Trying to get lyrics from the Genius API.
Here’s a bit of my code. First I get data on artists (need to get their “id” from what I undestood)
<code># Read the CSV file
artists_names <- fread("artistasmayosinespacios.csv", sep = ";", encoding = "Latin-1")
# Ensure unique and lowercased artist names
artists_names <- unique(artists_names)
#artists_names$name <- tolower(artists_names$name)
# Remove duplicates to minimize API calls
unique_artists <- unique(artists_names$name)
# Define a function to search for artist data
fetch_artist_data <- function(artist_name) {
artist_data <- tryCatch({
search_genius_artist(artist_name)
}, error = function(e) {
# Return NA if there is an error
return(NA)
})
return(artist_data)
}
# Plan for parallel processing
plan(multisession, workers = availableCores())
# Use future_lapply to fetch artist data in parallel
artist_data_list <- future_lapply(unique_artists, fetch_artist_data)
# Combine the results into a data.table
combined_artist_data <- rbindlist(lapply(artist_data_list, as.data.table), fill = TRUE)
# Merge with original data to get correct artists
merged_data <- merge(combined_artist_data, artists_names, by = "name", all.y = TRUE)
# Select only the required columns
final_data <- merged_data[, .(id, name)]
final_data <- unique(final_data)
final_data <- final_data[complete.cases(final_data),]
</code>
<code># Read the CSV file
artists_names <- fread("artistasmayosinespacios.csv", sep = ";", encoding = "Latin-1")
# Ensure unique and lowercased artist names
artists_names <- unique(artists_names)
#artists_names$name <- tolower(artists_names$name)
# Remove duplicates to minimize API calls
unique_artists <- unique(artists_names$name)
# Define a function to search for artist data
fetch_artist_data <- function(artist_name) {
artist_data <- tryCatch({
search_genius_artist(artist_name)
}, error = function(e) {
# Return NA if there is an error
return(NA)
})
return(artist_data)
}
# Plan for parallel processing
plan(multisession, workers = availableCores())
# Use future_lapply to fetch artist data in parallel
artist_data_list <- future_lapply(unique_artists, fetch_artist_data)
# Combine the results into a data.table
combined_artist_data <- rbindlist(lapply(artist_data_list, as.data.table), fill = TRUE)
# Merge with original data to get correct artists
merged_data <- merge(combined_artist_data, artists_names, by = "name", all.y = TRUE)
# Select only the required columns
final_data <- merged_data[, .(id, name)]
final_data <- unique(final_data)
final_data <- final_data[complete.cases(final_data),]
</code>
# Read the CSV file
artists_names <- fread("artistasmayosinespacios.csv", sep = ";", encoding = "Latin-1")
# Ensure unique and lowercased artist names
artists_names <- unique(artists_names)
#artists_names$name <- tolower(artists_names$name)
# Remove duplicates to minimize API calls
unique_artists <- unique(artists_names$name)
# Define a function to search for artist data
fetch_artist_data <- function(artist_name) {
artist_data <- tryCatch({
search_genius_artist(artist_name)
}, error = function(e) {
# Return NA if there is an error
return(NA)
})
return(artist_data)
}
# Plan for parallel processing
plan(multisession, workers = availableCores())
# Use future_lapply to fetch artist data in parallel
artist_data_list <- future_lapply(unique_artists, fetch_artist_data)
# Combine the results into a data.table
combined_artist_data <- rbindlist(lapply(artist_data_list, as.data.table), fill = TRUE)
# Merge with original data to get correct artists
merged_data <- merge(combined_artist_data, artists_names, by = "name", all.y = TRUE)
# Select only the required columns
final_data <- merged_data[, .(id, name)]
final_data <- unique(final_data)
final_data <- final_data[complete.cases(final_data),]
Where the csv file is just a collection of artists’ names, think of it like
<code>artists_names <- c("Taylor Swift", "One Republic", "Imagine Dragons")
</code>
<code>artists_names <- c("Taylor Swift", "One Republic", "Imagine Dragons")
</code>
artists_names <- c("Taylor Swift", "One Republic", "Imagine Dragons")
This bit of code works fine, but now I neede to get the lyrics of as many songs from each of these artists as possible.
For that, I have
<code># Define a function to fetch song data for each artist
fetch_songs <- function(artist_id) {
songs_data <- tryCatch({
search_genius_songs(artist_id)
}, error = function(e) {
return(NA)
})
return(songs_data)
}
# Fetch songs for each artist in parallel
songs_list <- future_lapply(final_data$id, fetch_songs)
# Combine the song data into a data.table
combined_songs_data <- rbindlist(lapply(songs_list, as.data.table), fill = TRUE)
# Save songs data
write.csv(combined_songs_data, "songs_data.csv")
# Define a function to fetch lyrics for each song
fetch_lyrics <- function(song_id) {
lyrics <- tryCatch({
get_genius_lyrics(song_id)
}, error = function(e) {
return(NA)
})
return(lyrics)
}
# Fetch lyrics for each song in parallel
lyrics_list <- future_lapply(combined_songs_data$id, fetch_lyrics)
# Combine the lyrics data into a data.table
combined_lyrics_data <- rbindlist(lapply(lyrics_list, as.data.table), fill = TRUE)
</code>
<code># Define a function to fetch song data for each artist
fetch_songs <- function(artist_id) {
songs_data <- tryCatch({
search_genius_songs(artist_id)
}, error = function(e) {
return(NA)
})
return(songs_data)
}
# Fetch songs for each artist in parallel
songs_list <- future_lapply(final_data$id, fetch_songs)
# Combine the song data into a data.table
combined_songs_data <- rbindlist(lapply(songs_list, as.data.table), fill = TRUE)
# Save songs data
write.csv(combined_songs_data, "songs_data.csv")
# Define a function to fetch lyrics for each song
fetch_lyrics <- function(song_id) {
lyrics <- tryCatch({
get_genius_lyrics(song_id)
}, error = function(e) {
return(NA)
})
return(lyrics)
}
# Fetch lyrics for each song in parallel
lyrics_list <- future_lapply(combined_songs_data$id, fetch_lyrics)
# Combine the lyrics data into a data.table
combined_lyrics_data <- rbindlist(lapply(lyrics_list, as.data.table), fill = TRUE)
</code>
# Define a function to fetch song data for each artist
fetch_songs <- function(artist_id) {
songs_data <- tryCatch({
search_genius_songs(artist_id)
}, error = function(e) {
return(NA)
})
return(songs_data)
}
# Fetch songs for each artist in parallel
songs_list <- future_lapply(final_data$id, fetch_songs)
# Combine the song data into a data.table
combined_songs_data <- rbindlist(lapply(songs_list, as.data.table), fill = TRUE)
# Save songs data
write.csv(combined_songs_data, "songs_data.csv")
# Define a function to fetch lyrics for each song
fetch_lyrics <- function(song_id) {
lyrics <- tryCatch({
get_genius_lyrics(song_id)
}, error = function(e) {
return(NA)
})
return(lyrics)
}
# Fetch lyrics for each song in parallel
lyrics_list <- future_lapply(combined_songs_data$id, fetch_lyrics)
# Combine the lyrics data into a data.table
combined_lyrics_data <- rbindlist(lapply(lyrics_list, as.data.table), fill = TRUE)
But returns all nulls…
Any ideas are greatly appreciated!