I have a data frame with 5 indicators per site. I calculated the reciprocal of 2 of the indicators, and then minmax scaled all 5 indicators. Now I would like to restore them to their original values. I managed to get it right, but only for the indicators for which I originally did not calculate reciprocals. Could you help me to find where the error is? Here is a reproducible example:
# Load necessary libraries
library(dplyr)
# Step 1: Create the example dataframe
set.seed(1)
df <- data.frame(
Region = rep(c("Region1", "Region2"), each = 5),
Site = rep(1:5, times = 2),
Indicator_A = sample(1:100, 10, replace = TRUE),
Indicator_B = sample(1:100, 10, replace = TRUE),
Indicator_C = sample(0:1000, 10, replace = TRUE),
Indicator_D = sample(5:500, 10, replace = TRUE),
Indicator_E = sample(-10:10, 10, replace = TRUE)
)
# define minmax function
norm_minmax <- function(x, na.rm = FALSE) {
(x - min(x, na.rm = na.rm)) / (max(x, na.rm = na.rm) - min(x, na.rm = na.rm))
}
df_scaled <-
df %>%
# calculate reciprocal of selected indicators
mutate(
across(
c(Indicator_A, Indicator_B),
~ 1 / .)) %>%
# min-max scale variables per region
mutate(
across(
where(is.numeric),
~ norm_minmax(., na.rm = TRUE)),
.by = Region)
# revert transformations
# define undo minmax function
undo_minmax <- function(scaled, unscaled, na.rm = FALSE) {
unscaled_min <- min(unscaled, na.rm = na.rm)
unscaled_range <- max(unscaled, na.rm = na.rm) - unscaled_min
original <- scaled * unscaled_range + unscaled_min
return(original)
}
df_restored <-
df_scaled %>%
filter(Region == "Region1") %>%
# undo minmax scaling
mutate(
across(
c(Indicator_A:Indicator_E),
~ undo_minmax(scaled = .x,
unscaled = df[df$Region == "Region1",][[cur_column()]],
na.rm = TRUE))) %>%
# calculate reciprocal of reciprocals
mutate(across(c(Indicator_A, Indicator_B), ~ 1/.)) %>%
# reclassify inf values as 0 resulting from dividing 1/0
mutate(across(c(Indicator_A, Indicator_B),
~ if_else(is.infinite(.), 0, .)))
all.equal(df_restored, df %>% filter(Region == "Region1"))
print(df_restored)
print(df[df$Region == "Region1", ])