I have below SQL query that runs as to aggregate data, with intermediary steps. However I run into “Query exhausted resources at this scale factor error” however I change it.
I read the https://docs.aws.amazon.com/athena/latest/ug/performance-tuning.html and tried following the ideas conveyed regarding joins avoiding ordering, only getting required columns etc, but still unable to resolve the issue.
How else can I optimize the query, or otherwise make it work within the given resourses?
WITH turnit_product_activity_views AS (
SELECT DISTINCT
entire_table.basketcode,
entire_table.productcode,
entire_table.transactionstatus,
entire_table.transactioncode,
entire_table.isinboundjourneyway,
entire_table.type,
entire_table.code,
entire_table.operationdatetime,
entire_table.seatclass,
entire_table.journeyoriginstopcode,
entire_table.destinationstopcode,
entire_table.departuredatetime,
entire_table.nblegoutbound,
entire_table.nbleginbound,
entire_table.arrivaldatetime,
entire_table.travelaccountid,
entire_table.journeydestinationstopcode,
entire_table.originstopcode,
entire_table.legorder
FROM "prod-se_delta_glue_database"."turnit_product_activity_view" AS entire_table
WHERE entire_table.type = 'TRANSACTION_TYPE.BUS_TICKET'
AND entire_table.basketcode IN (
SELECT current_table.basketcode
FROM "prod-se_delta_glue_database"."turnit_product_activity_view" AS current_table
WHERE current_table."$path" = 's3://prod-se-delta-lake-bucket/turnit_product_activity_view/part-00000-6ae7752a-1ed4-4505-bce6-11c0d27f853b-c000.snappy.parquet'
)
),
selected_product_codes AS (
SELECT
MIN(productcode) as used_product_code,
basketcode
FROM turnit_product_activity_views as main_table
GROUP BY basketcode
),
nr_tickets AS (
SELECT
t.productcode as used_product_code,
t.basketcode,
COUNT(DISTINCT t.productcode) as Nr_Tickets__c
FROM turnit_product_activity_views t
JOIN selected_product_codes spc ON spc.used_product_code = t.productcode
GROUP BY t.productcode, t.basketcode
),
bus_tickets AS (
SELECT
nr_tickets.used_product_code AS Product_Code__c,
nr_tickets.basketcode AS Basket_Code__c,
main_table.transactionstatus AS Transcation_Status__c,
main_table.transactioncode AS Transcation_Code__c,
main_table.isinboundjourneyway AS Is_Inbound_Journey__c,
main_table.type as Type__c,
main_table.code as Channel__c,
main_table.operationdatetime as Operation_Date_Time__c,
main_table.seatclass as Seat_Class__c,
main_table.journeyoriginstopcode as Journey_Origin_Stop_Code__c,
main_table.destinationstopcode as Destination_Stop_Code__c,
main_table.departuredatetime as Departure_Date_Time__c,
main_table.nblegoutbound as Number_of_leg_for_Outbound__c,
main_table.nbleginbound as Number_of_leg_for_Inbound__c,
main_table.arrivaldatetime as Arrival_Date_Time__c,
main_table.travelaccountid as Traveler_Account_Id__c,
main_table.journeydestinationstopcode as Journey_Destination_Stop_Code__c,
main_table.originstopcode as Origin_Stop_Code__c,
main_table.legorder as Leg_Order__c,
nr_tickets.Nr_Tickets__c AS Nr_Tickets__c
FROM turnit_product_activity_views as main_table
JOIN nr_tickets ON main_table.productcode = nr_tickets.used_product_code
and main_table.basketcode = nr_tickets.basketcode
),
temp_status_groups AS (
SELECT
Product_Code__c,
Transcation_Code__c,
COUNT(DISTINCT Transcation_Status__c) AS status_count,
SUM(CASE WHEN Transcation_Status__c = 'TRANSACTION_STATUS.SOLD' THEN 1 ELSE 0 END) AS sold_count,
SUM(CASE WHEN Transcation_Status__c = 'TRANSACTION_STATUS.RETURNED' THEN 1 ELSE 0 END) AS returned_count
FROM bus_tickets
GROUP BY Product_Code__c, Transcation_Code__c
),
bus_tickets_with_cancel_status AS (
SELECT bt.*,
CASE
WHEN tsg.status_count = 2 AND tsg.sold_count > 0 AND tsg.returned_count > 0 THEN 1
ELSE 0
END AS Cancelled__c
FROM bus_tickets bt
LEFT JOIN temp_status_groups tsg
ON bt.Product_Code__c = tsg.Product_Code__c
AND bt.Transcation_Code__c = tsg.Transcation_Code__c
),
status_info AS (
SELECT
Product_Code__c,
COUNT(*) AS total_tickets,
SUM(CAST(Cancelled__c AS INTEGER)) AS cancelled_tickets
FROM bus_tickets_with_cancel_status
GROUP BY Product_Code__c
),
sold_max_op_time AS (
SELECT
Product_Code__c,
MAX(Operation_Date_Time__c) AS max_op_time
FROM bus_tickets_with_cancel_status
WHERE Transcation_Status__c = 'TRANSACTION_STATUS.SOLD'
GROUP BY Product_Code__c
),
journey_tickets AS (
SELECT
bt.*
FROM bus_tickets_with_cancel_status bt
JOIN status_info si ON bt.Product_Code__c = si.Product_Code__c
LEFT JOIN sold_max_op_time sm ON bt.Product_Code__c = sm.Product_Code__c
WHERE
(si.cancelled_tickets = si.total_tickets
AND bt.Transcation_Status__c = 'TRANSACTION_STATUS.SOLD'
AND bt.Operation_Date_Time__c = sm.max_op_time)
OR
(si.cancelled_tickets < si.total_tickets
AND bt.Cancelled__c = 0)
),
journey_type AS (
SELECT
Product_Code__c,
CASE
WHEN COUNT(*) = 1 THEN 'One Way'
WHEN COUNT(*) = SUM(CASE WHEN Channel__c IN ('JOURNEY_WAY.ONE_WAY', 'AS') THEN 1 ELSE 0 END) THEN 'One Way'
ELSE 'Two Way'
END AS Journey_Type__c
FROM journey_tickets
GROUP BY Product_Code__c
),
journey_tickets_w_type AS (
SELECT
jt.*,
jtype.Journey_Type__c AS Journey_Type__c
FROM journey_tickets jt
JOIN journey_type jtype ON jt.Product_Code__c = jtype.Product_Code__c
),
temp_journey_one_way AS (
SELECT
Product_Code__c AS Journey_Id__c,
Journey_Type__c,
MIN(Operation_Date_Time__c) AS Booking_Date__c,
MIN(CASE WHEN Journey_Type__c = 'One Way' THEN Departure_Date_Time__c ELSE NULL END) AS Departure_Date__c,
MAX(CASE WHEN Journey_Type__c = 'One Way' THEN Arrival_Date_Time__c ELSE NULL END) AS Arrival_Date__c,
MAX(CASE WHEN Journey_Type__c = 'One Way' THEN Journey_Origin_Stop_Code__c ELSE NULL END) AS Ticket_Origin__c,
MAX(CASE WHEN Journey_Type__c = 'One Way' THEN Destination_Stop_Code__c ELSE NULL END) AS Ticket_Destination__c,
MAX(CASE WHEN Journey_Type__c = 'One Way' THEN Number_of_leg_for_Outbound__c ELSE NULL END) AS Nr_Legs__c,
CASE WHEN SUM(Cancelled__c) = 0 THEN 'false' ELSE 'true' END AS Cancelled__c,
MAX(Traveler_Account_Id__c) AS Account_Master_Detail__r,
MAX(Nr_Tickets__c) AS Nr_Tickets__c,
NULL AS Departure_Date_Outbound__c,
NULL AS Departure_Date_Inbound__c,
NULL AS Arrival_Date_Outbound__c,
NULL AS Arrival_Date_Inbound__c,
NULL AS Nr_Legs_Outbound__c,
NULL AS Nr_Legs_Inbound__c,
'0123Y000000dbzhQAA' AS RecordTypeId
FROM journey_tickets_w_type
WHERE Journey_Type__c = 'One Way'
GROUP BY Product_Code__c, Journey_Type__c, Cancelled__c
),
temp_journey_two_way AS (
SELECT
Product_Code__c AS Journey_Id__c,
Journey_Type__c,
MIN(Operation_Date_Time__c) AS Booking_Date__c,
MIN(CASE WHEN Journey_Type__c = 'Two Way' AND Is_Inbound_Journey__c = FALSE THEN Departure_Date_Time__c ELSE NULL END) AS Departure_Date_Outbound__c,
MIN(CASE WHEN Journey_Type__c = 'Two Way' AND Is_Inbound_Journey__c = TRUE THEN Departure_Date_Time__c ELSE NULL END) AS Departure_Date_Inbound__c,
MAX(CASE WHEN Journey_Type__c = 'Two Way' AND Is_Inbound_Journey__c = FALSE THEN Arrival_Date_Time__c ELSE NULL END) AS Arrival_Date_Outbound__c,
MAX(CASE WHEN Journey_Type__c = 'Two Way' AND Is_Inbound_Journey__c = TRUE THEN Arrival_Date_Time__c ELSE NULL END) AS Arrival_Date_Inbound__c,
MAX(CASE WHEN Journey_Type__c = 'Two Way' AND Is_Inbound_Journey__c = FALSE THEN Journey_Origin_Stop_Code__c ELSE NULL END) AS Ticket_Origin__c,
MAX(CASE WHEN Journey_Type__c = 'Two Way' AND Is_Inbound_Journey__c = FALSE THEN Journey_Destination_Stop_Code__c ELSE NULL END) AS Ticket_Destination__c,
MAX(CASE WHEN Journey_Type__c = 'Two Way' THEN Number_of_leg_for_Outbound__c ELSE NULL END) AS Nr_Legs_Outbound__c,
MAX(CASE WHEN Journey_Type__c = 'Two Way' THEN Number_of_leg_for_Inbound__c ELSE NULL END) AS Nr_Legs_Inbound__c,
CASE WHEN SUM(Cancelled__c) = 0 THEN 'false' ELSE 'true' END AS Cancelled__c,
MAX(Traveler_Account_Id__c) AS Account_Master_Detail__r,
MAX(Nr_Tickets__c) AS Nr_Tickets__c,
NULL AS Departure_Date__c,
NULL AS Arrival_Date__c,
NULL AS Nr_Legs__c,
'0123Y000000dbzrQAA' AS RecordTypeId
FROM journey_tickets_w_type
WHERE Journey_Type__c = 'Two Way'
GROUP BY Product_Code__c, Journey_Type__c, Cancelled__c
),
journey AS (
SELECT
Journey_Id__c,
Journey_Type__c,
Booking_Date__c,
Departure_Date_Outbound__c,
Departure_Date_Inbound__c,
Arrival_Date_Outbound__c,
Arrival_Date_Inbound__c,
Ticket_Origin__c,
Ticket_Destination__c,
Nr_Legs_Outbound__c,
Nr_Legs_Inbound__c,
Cancelled__c,
Account_Master_Detail__r,
Nr_Tickets__c,
Departure_Date__c,
Arrival_Date__c,
Nr_Legs__c,
RecordTypeId
FROM temp_journey_one_way
UNION ALL
SELECT
Journey_Id__c,
Journey_Type__c,
Booking_Date__c,
Departure_Date_Outbound__c,
Departure_Date_Inbound__c,
Arrival_Date_Outbound__c,
Arrival_Date_Inbound__c,
Ticket_Origin__c,
Ticket_Destination__c,
Nr_Legs_Outbound__c,
Nr_Legs_Inbound__c,
Cancelled__c,
Account_Master_Detail__r,
Nr_Tickets__c,
Departure_Date__c,
Arrival_Date__c,
Nr_Legs__c,
RecordTypeId
FROM temp_journey_two_way
)
SELECT * FROM journey