Scenario:
The table contains data inserted over a 7-day window where data from the 11th to the 17th of each month is inserted daily. Sometimes, overlapping data is inserted, and the challenge is to retrieve the latest data for the start_period
.
Start_period ID Management_unit business_unit activity actual_start_date actual_end_date
2024-04-15T00:00:00.000Z 13be8b33 MU - INZ Group BU-MB Break 17-04-2024 23:51:45 17-04-2024 23:59:59
2024-04-12T00:00:00.000Z 13be8b33 MU - INZ Group BU-MB Break 17-04-2024 23:51:45 17-04-2024 23:40:59
2024-04-11T00:00:00.000Z 13be8b33 MU - INZ Group BU-MB Break 17-04-2024 23:22:45 17-04-2024 23:59:59
2024-04-15T00:00:00.000Z 13be8b33 MU - INZ Group BU-MB Break 17-04-2024 00:02:57 17-04-2024 00:24:05
Query:
select distinct
start_period,
agent_id,
MANAGEMENTUNIT_NAME,
BUSINESSUNIT_NAME,
ACTUAL_ACTIVITY_CATEGORY,
actual_start_date ,
DATE_PART('epoch_second', TO_TIMESTAMP_NTZ(ACTUAL_START_DATE)) as Start_offset,
DATE_PART('epoch_second', TO_TIMESTAMP_NTZ(ACTUAL_END_DATE)) as end_offset,
end_offset - Start_offset as actual_offset
from (
with date_dim as (
select seq4() as id,
row_number() over (order by id) as row_num,
dateadd(day,row_num,'1999-12-31'::timestamp) as date
from TABLE(GENERATOR(ROWCOUNT => 36500))
),
data_table as (
select seq4() as id,
start_date,
agent_id,
BUSINESSUNIT_NAME,
MANAGEMENTUNIT_NAME,
ACTUAL_ACTIVITY_CATEGORY,
ACTUAL_START_DATE,
ACTUAL_END_DATE,
ACTIVITY_DURATION
from MYTABLE
)
select distinct
row_number() over (partition by dt.id order by dd.id) as id,
dt.start_date as start_period,
dt.agent_id,
dt.MANAGEMENTUNIT_NAME,
dt.BUSINESSUNIT_NAME,
dt.ACTUAL_ACTIVITY_CATEGORY,
case when dt.actual_start_date > dd.date then dt.actual_start_date else dd.date end as actual_start_date,
case when dt.actual_end_date < dateadd(d,1,dd.date) then dt.actual_end_date else dateadd(s,-1,dateadd(d,1,dd.date)) end as actual_end_date
from data_table dt
join date_dim dd
on dd.date between date_trunc(day,dt.actual_start_date) and dt.actual_end_date
) QUALIFY ROW_NUMBER() OVER(PARTITION BY AGENT_ID,MANAGEMENTUNIT_NAME,ACTUAL_ACTIVITY_CATEGORY,actual_start_date
ORDER BY Start_period desc) = 1;
--expectedOutput
| Start_period | ID | Management_unit | Business_unit | Activity | Actual_start_date | Actual_end_date |
|------------------------|---------|-----------------|---------------|----------|----------------------|----------------------|
| 2024-04-15T00:00:00.000Z | 13be8b33 | MU - INZ Group | BU-MB | Break | 17-04-2024 23:51:45 | 17-04-2024 23:59:59 |
| 2024-04-15T00:00:00.000Z | 13be8b33 | MU - INZ Group | BU-MB | Break | 17-04-2024 00:02:57 | 17-04-2024 00:24:05 |
4