I have a script that creates schedules for me based on a raw data that I got from the SSIM. I have created all of this in Chatgpt. Feel free to tell me how I can improve it? Also if i have extra things for nothing there. I cannot tell since I am not a coder. Anyways
So the function create schedules creates the departure legs correctly while identify breaks in them such as time change and missing dates. It creates a different schedule. with a new period from and period to for the same flight number. Its just not working correctly on the return flights. for example 207 has 4 rows of data identifying perfectly the missing dates, but the return which is 208 does not. It only creates 2 rows of data. It should correctly match with 207’s schedule.
please tell me what is wrong with the script. 208 should match 207. 207 is correctly assorted as per the script. Im having this issue in all return legs of the flights.
The data.
library(dplyr)
library(tidyr)
library(readxl)
library(writexl)
library(ssimparser)
library(lubridate)
# Function to load SSIM data
load_ssim_data <- function(file_path) {
ssimparser::load_ssim(ssim_file = file_path)
}
# Function to preprocess SSIM data
preprocess_ssim_data <- function(ssim_df) {
ssim_df %>%
separate(std_utc, into = c("std_date", "std_time"), sep = " ", remove = FALSE) %>%
separate(sta_utc, into = c("sta_date", "sta_time"), sep = " ", remove = FALSE) %>%
mutate(
period_from = as.Date(period_from, format = "%d%b%y"),
period_to = as.Date(period_to, format = "%d%b%y"),
period_from = toupper(format(period_from, format = "%d-%b-%y")),
period_to = toupper(format(period_to, format = "%d-%b-%y"))
)
}
# Function to load and clean airport data
load_clean_airports <- function(file_path) {
read.csv(file_path) %>%
filter(IATA != "\N") %>%
select(IATA, Latitude, Longitude, Tz_database_time_zone)
}
# Function to safely convert time to local timezone
safe_convert_time <- function(time, timezone) {
tryCatch(
{
if (is.na(timezone) || timezone == "") {
return(format(time, "%H:%M:%S"))
}
local_time <- with_tz(time, timezone)
return(format(local_time, "%H:%M:%S"))
},
error = function(e) {
warning(paste("Error converting time for timezone:", timezone, "-", e$message))
return(format(time, "%H:%M:%S"))
}
)
}
# Modified function to create the schedule considering breaks, overlaps, and time changes
create_schedule <- function(ssim_df, airports_clean) {
result <- list()
for (flight in unique(ssim_df$flight_number)) {
flight_data <- ssim_df %>%
filter(flight_number == flight) %>%
mutate(
period_from = as.Date(period_from, format = "%d-%b-%y"),
period_to = as.Date(period_to, format = "%d-%b-%y")
) %>%
arrange(period_from)
current_schedule <- NULL
for (i in 1:nrow(flight_data)) {
if (is.null(current_schedule)) {
current_schedule <- flight_data[i,]
current_schedule$days_of_operation <- list(as.numeric(unlist(strsplit(gsub("\s", "", current_schedule$days_of_operation), ""))))
} else if (!is.na(flight_data$period_from[i]) &&
!is.na(current_schedule$period_to) &&
!is.na(flight_data$std_time[i]) &&
!is.na(current_schedule$std_time) &&
!is.na(flight_data$sta_time[i]) &&
!is.na(current_schedule$sta_time) &&
flight_data$period_from[i] <= current_schedule$period_to + 1 &&
flight_data$std_time[i] == current_schedule$std_time &&
flight_data$sta_time[i] == current_schedule$sta_time &&
flight_data$adep_iata[i] == current_schedule$adep_iata &&
flight_data$ades_iata[i] == current_schedule$ades_iata) {
current_schedule$period_to <- max(current_schedule$period_to, flight_data$period_to[i], na.rm = TRUE)
current_schedule$days_of_operation <- list(sort(unique(c(
unlist(current_schedule$days_of_operation),
as.numeric(unlist(strsplit(gsub("\s", "", flight_data$days_of_operation[i]), "")))
))))
} else {
result <- append(result, list(current_schedule))
current_schedule <- flight_data[i,]
current_schedule$days_of_operation <- list(as.numeric(unlist(strsplit(gsub("\s", "", current_schedule$days_of_operation), ""))))
}
}
if (!is.null(current_schedule)) {
result <- append(result, list(current_schedule))
}
}
result <- do.call(rbind, result)
result %>%
group_by(flight_number) %>%
mutate(flight_sequence = row_number()) %>%
left_join(airports_clean, by = c("adep_iata" = "IATA")) %>%
rename(dep_timezone = Tz_database_time_zone) %>%
left_join(airports_clean, by = c("ades_iata" = "IATA")) %>%
rename(arr_timezone = Tz_database_time_zone) %>%
mutate(
std_time_utc = as.POSIXct(paste("2023-01-01", std_time), format="%Y-%m-%d %H:%M:%S", tz="UTC"),
sta_time_utc = as.POSIXct(paste("2023-01-01", sta_time), format="%Y-%m-%d %H:%M:%S", tz="UTC"),
std_time_local = mapply(safe_convert_time, std_time_utc, dep_timezone),
sta_time_local = mapply(safe_convert_time, sta_time_utc, arr_timezone),
days_of_operation = sapply(days_of_operation, function(x) paste(sort(x), collapse = ","))
) %>%
select(
flight_sequence,
flight_number,
adep_iata,
ades_iata,
days_of_operation,
std_time_local,
sta_time_local,
period_from,
period_to
) %>%
rename(
"Sequence" = flight_sequence,
"Flight Number" = flight_number,
"From" = adep_iata,
"To" = ades_iata,
"Pattern" = days_of_operation,
"STD (Local)" = std_time_local,
"STA (Local)" = sta_time_local,
"Start Date" = period_from,
"End Date" = period_to
) %>%
mutate(
`Start Date` = format(as.Date(`Start Date`), "%d-%b-%y"),
`End Date` = format(as.Date(`End Date`), "%d-%b-%y"),
`Is Time Change` = FALSE
) %>%
arrange(`Flight Number`, Sequence, `Start Date`)
}
# Main script
ssim_file_path <- "C:\Users\This pc\Desktop\SSIM input\SSIMTEST (1).ssim"
airports_file_path <- "C:\Users\This pc\Desktop\Airports.csv"
output_file_path <- "C:\Users\This pc\Desktop\SSIM Output\G9-TimeTable.xlsx"
ssim_df <- load_ssim_data(ssim_file_path)
ssim_df <- preprocess_ssim_data(ssim_df)
airports_clean <- load_clean_airports(airports_file_path)
G9Schedule <- create_schedule(ssim_df, airports_clean)
write_xlsx(G9Schedule, output_file_path)
NotACoder is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.