I have a data set with Likert scale answers. I want to create a new data frame that will calculate the percentages of each category of each level. Doing so i have made the following code:
likert_levels <- c(
"Very Dissatisfied",
"Dissatisfied",
"Average",
"Satisfied",
"Very Satisfied"
)
df <-
tibble(
"q1" = sample(likert_levels, 10, replace = TRUE),
"q2" = sample(likert_levels, 10, replace = TRUE, prob = 5:1),
"q3" = sample(likert_levels, 10, replace = TRUE, prob = 1:5),
"q4" = sample(likert_levels, 10, replace = TRUE, prob = 1:5),
"q5" = sample(c(likert_levels, NA), 10, replace = TRUE)
) %>%
mutate(across(everything(), ~ factor(.x, levels = likert_levels)))%>%
mutate(Country = c("USA","BRAZIL","BRAZIL","BRAZIL","USA","GERMANY","ITALY","GERMANY","BRAZIL","USA"))%>%
relocate(Country,.before=q1)
df$O_1 <- apply(df, 1, function(x) sum(x=="Very Dissatisfied", na.rm=TRUE)) #How many ONEstars in
df$O_2 <- apply(df, 1, function(x) sum(x=="Dissatisfied", na.rm=TRUE)) #each row
df$O_3 <- apply(df, 1, function(x) sum(x=="Average", na.rm=TRUE))
df$O_4 <- apply(df, 1, function(x) sum(x=="Satisfied", na.rm=TRUE))
df$O_5 <- apply(df, 1, function(x) sum(x=="Very Satisfied", na.rm=TRUE))
df$O_sum <- df$O_1 + df$O_2 + df$O_3 + df$O_4 + df$O_5
df <- df[,-c(2: (ncol(df)-6))]
Likert_df = as.data.frame (df %>% group_by(Country,O_sum) %>% summarise(
OO_1 = sum(O_1) / (n() * (O_sum[1])) * 100,
OO_2 = sum(O_2) / (n() * (O_sum[1])) * 100,
OO_3 = sum(O_3) / (n() * (O_sum[1])) * 100,
OO_4 = sum(O_4) / (n() * (O_sum[1])) * 100,
OO_5 = sum(O_5) / (n() * (O_sum[1])) * 100 ) )
Likert_df$ O_sum <- NULL
Likert_df <- as.data.frame(Likert_df %>% group_by(Country) %>% summarise(
OO_1 = mean(OO_1),
OO_2 = mean(OO_2),
OO_3 = mean(OO_3),
OO_4 = mean(OO_4),
OO_5 = mean(OO_5) ))
colnames(Likert_df) <- c("Item", "Strongly Disagree", "Disagree", "So So", "Agree", "Strongly Agree")
DF = Likert_df
resulting to :
DF
Item Strongly Disagree Disagree So So Agree Strongly Agree
1 BRAZIL 10 25 25.0 30.0 10
2 GERMANY 30 20 20.0 20.0 10
3 ITALY 20 40 20.0 0.0 20
4 USA 5 10 42.5 17.5 25
My question is: Is there another way more simpler and faster using dplyr functions with pipes to create the same result ?