Is it possible to parallelize all the operations with in separate code chunks. For example, I have 3 code chunks that query separate tables in a database. I then perform some data cleaning. Is it possible to have each code chunk be broken into its own process to be ran on a core at the same time as other code chunks? Essentially have process 1+2+3 going at the same time on separate cores instead of sequentially.
Process 1
q_flighthistory <- "SELECT *
FROM CT_FLIGHT_HISTORY;" # WHERE FLIGHT_DATE BETWEEN '2024-07-01' AND '2024-07-02';"
dbGetQuery(q_flighthistory, "view_flighthistory")
# Unique flight history for that date
clean_flighthistory <- view_flighthistory %>%
filter(SEGMENT_STATUS == "A") %>%
select(c(2:10), (12:13), (55:59),EQUIPMENT) %>%
#filter(!duplicated(UNIQUE_ID)) %>%
rename(SCHED_DEPARTURE_TIME = SCHED_DEPARTURE_TIME_RAW)%>%
mutate(updated_dt = paste(UPDATE_DATE, UPDATE_TIME, sep= " ")) %>%
group_by(FLIGHT_NO, FLIGHT_DATE, DEPARTING_CITY, ARRIVAL_CITY, SCHED_DEPARTURE_TIME) %>%
filter(updated_dt == max(updated_dt)) %>%
mutate(temp_id = cur_group_id()) %>%
filter(!duplicated(temp_id)) %>%
select(!c(12:16)) %>%
select(!c(5:7)) %>%
select(!c(10:11))
rm(view_flighthistory)
gc()
Process 2
q_flightleg <- "SELECT *
FROM CT_FLIGHT_LEG;" # WHERE PAIRING_DATE BETWEEN '2024-06-28' AND '2024-07-02';"
dbGetQuery(q_flightleg, "view_flightleg")
flight_leg_join <- view_flightleg %>%
mutate(updated_dt = paste(UPDATE_DATE, UPDATE_TIME, sep= " "),
shed_dept_hour = hour(SCHED_DEPARTURE_TIME),
DEADHEAD = if_else(is.na(DEADHEAD), "C", DEADHEAD)) %>%
relocate(updated_dt, .after=PAIRING_NO) %>%
group_by(PAIRING_POSITION,
DEPARTING_CITY,
ARRIVAL_CITY,
SCHED_DEPARTURE_DATE,
shed_dept_hour,
PAIRING_NO,
PAIRING_DATE) %>%
filter(DEADHEAD == "C",
updated_dt == max(updated_dt)) %>%
mutate(temp_id = cur_group_id(), .after = CREW_INDICATOR) %>%
filter(!duplicated(temp_id)) %>%
ungroup() %>%
select(PAIRING_NO, PAIRING_DATE, FLIGHT_NO, SCHED_DEPARTURE_DATE, SCHED_ARRIVAL_DATE, DEPARTING_CITY, ARRIVAL_CITY,
PAIRING_POSITION)
rm(view_flightleg)
gc()
Process 3
# MasterPairing Query
q_masterpairing <- "SELECT *
FROM CT_MASTER_PAIRING;" # WHERE PAIRING_DATE BETWEEN '2024-06-28' AND '2024-07-02';"
dbGetQuery(q_masterpairing, "view_masterpairing")
join_masterpairing <- view_masterpairing %>%
mutate(updated_dt = paste(UPDATE_DATE, UPDATE_TIME, sep = " ")) %>%
relocate(updated_dt, .after=PAIRING_NO) %>%
group_by(PAIRING_DATE, CREW_ID) %>%
filter(updated_dt == max(updated_dt),
PAIRING_STATUS == "A") %>%
select(PAIRING_POSITION, CREW_ID, PAIRING_NO, PAIRING_DATE, CREW_INDICATOR, updated_dt) %>%
ungroup()
rm(view_masterpairing)
gc()