I have these 2 shapefiles from the internet:
library(sf)
library(dplyr)
library(fs)
url_1 <- "https://www12.statcan.gc.ca/census-recensement/2021/geo/sip-pis/boundary-limites/files-fichiers/lfsa000b21a_e.zip"
temp_dir <- tempdir()
zip_file <- file.path(temp_dir, "shapefile.zip")
download.file(url_1, destfile = zip_file)
exdir_ <- tempfile(pattern = "shp_1")
unzip(zip_file, exdir = exdir_, junkpaths = TRUE)
fs::dir_tree(exdir_)
file_1 <- st_read(exdir_)
fsa <- st_transform(file_1, "+proj=longlat +datum=WGS84")
unlink(temp_dir, recursive = TRUE)
url_2 <- "https://www12.statcan.gc.ca/census-recensement/alternative_alternatif.cfm?l=eng&dispext=zip&teng=lada000b21a_e.zip&k=%20%20%20151162&loc=//www12.statcan.gc.ca/census-recensement/2021/geo/sip-pis/boundary-limites/files-fichiers/lada000b21a_e.zip"
download.file(url_2, destfile = "lada000b21a_e.zip")
unzip("lada000b21a_e.zip")
ada <- st_read("lada000b21a_e.shp")
For the same PRUID, I want to make a matrix which contains what % each polygon from shapefile 1 intersects with each polygon from shapefile 2.
library(sf)
library(dplyr)
calculate_intersection_percentages <- function(ada, fsa, pruid) {
ada_filtered <- ada %>% filter(PRUID == pruid)
fsa_filtered <- fsa %>% filter(PRUID == pruid)
ada_filtered <- st_transform(ada_filtered, st_crs(fsa_filtered))
results <- list()
total_fsas <- nrow(fsa_filtered)
completed_fsas <- 0
for (ada_id in ada_filtered$ADAUID) {
ada_single <- ada_filtered %>% filter(ADAUID == ada_id)
ada_area <- st_area(ada_single)
for (fsa_id in fsa_filtered$CFSAUID) {
fsa_single <- fsa_filtered %>% filter(CFSAUID == fsa_id)
intersection <- st_intersection(ada_single, fsa_single)
if (length(intersection) > 0) {
intersection_area <- st_area(intersection)
percentage <- as.numeric(intersection_area / ada_area * 100)
print(sprintf("ADA: %s, FSA: %s, Intersection = %.2f%%", ada_id, fsa_id, percentage))
results[[ada_id]][[fsa_id]] <- percentage
}
completed_fsas <- completed_fsas + 1
progress_percentage <- (completed_fsas / (total_fsas * nrow(ada_filtered))) * 100
print(sprintf("Progress: %.2f%% of FSA calculations completed", progress_percentage))
}
}
result_matrix <- do.call(rbind, lapply(results, function(x) {
unlist(x, use.names = TRUE)
}))
result_matrix[is.na(result_matrix)] <- 0
result_matrix <- result_matrix / rowSums(result_matrix) * 100
return(result_matrix)
}
I launch the function like this:
result <- calculate_intersection_percentages(ada, fsa, pruid = "10")
The code runs, but I am getting some very exact numbers which makes me think the intersection is not happening correctly:
20.00000000 20.00000000 20.00000000 20.00000000 20.00000000
How can I correctly intersect these two shapefiles?
heartofdarkness is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.