I’m running the following query in BigQuery:
WITH mock AS (
SELECT
TIMESTAMP '2024-04-01 22:26:25' AS event_timestamp,
'h6p2ylt679me5pcz' AS user_pseudo_id,
'session start' AS event_name,
1712010385 AS ga_session_id,
'example.com' AS hostname
UNION ALL
SELECT
TIMESTAMP '2024-04-01 22:26:28' AS event_timestamp,
'h6p2ylt679me5pcz' AS user_pseudo_id,
'view' AS event_name,
1712010385 AS ga_session_id,
'example.com' AS hostname
UNION ALL
SELECT
TIMESTAMP '2024-04-01 22:26:31' AS event_timestamp,
'h6p2ylt679me5pcz' AS user_pseudo_id,
'click' AS event_name,
1712010385 AS ga_session_id,
'example.com' AS hostname
UNION ALL
SELECT
TIMESTAMP '2024-04-01 22:26:50' AS event_timestamp,
'h6p2ylt679me5pcz' AS user_pseudo_id,
'click' AS event_name,
1712010385 AS ga_session_id,
'example.com' AS hostname
UNION ALL
SELECT
TIMESTAMP '2024-04-01 22:27:02' AS event_timestamp,
'h6p2ylt679me5pcz' AS user_pseudo_id,
'session start' AS event_name,
2939469694 AS ga_session_id,
'example2.com' AS hostname
UNION ALL
SELECT
TIMESTAMP '2024-04-01 22:27:14' AS event_timestamp,
'h6p2ylt679me5pcz' AS user_pseudo_id,
'view' AS event_name,
2939469694 AS ga_session_id,
'example2.com' AS hostname
UNION ALL
SELECT
TIMESTAMP '2024-04-01 22:27:50' AS event_timestamp,
'h6p2ylt679me5pcz' AS user_pseudo_id,
'click' AS event_name,
2939469694 AS ga_session_id,
'example2.com' AS hostname
),
raw AS (
SELECT
event_timestamp,
user_pseudo_id,
event_name,
hostname,
ga_session_id,
MIN(event_timestamp) OVER (PARTITION BY user_pseudo_id, ga_session_id) AS session_start
FROM mock
)
SELECT
*,
LAG(ga_session_id) OVER (PARTITION BY user_pseudo_id ORDER BY session_start) AS ga_session_id_fix
FROM raw
ORDER BY event_timestamp ASC;
This is the output I have
| Row | event_timestamp | user_pseudo_id | event_name | hostname | ga_session_id | session_start | ga_session_id_fix |
|-----|--------------------------|---------------------|----------------|---------------|---------------|------------------------|-------------------|
| 1 | 2024-04-01 22:26:25 UTC | h6p2ylt679me5pcz | session start | example.com | 1712010385 | 2024-04-01 22:26:25 UTC| null |
| 2 | 2024-04-01 22:26:28 UTC | h6p2ylt679me5pcz | view | example.com | 1712010385 | 2024-04-01 22:26:25 UTC| 1712010385 |
| 3 | 2024-04-01 22:26:31 UTC | h6p2ylt679me5pcz | click | example.com | 1712010385 | 2024-04-01 22:26:25 UTC| 1712010385 |
| 4 | 2024-04-01 22:26:50 UTC | h6p2ylt679me5pcz | click | example.com | 1712010385 | 2024-04-01 22:26:25 UTC| 1712010385 |
| 5 | 2024-04-01 22:27:02 UTC | h6p2ylt679me5pcz | session start | example2.com | 2939469694 | 2024-04-01 22:27:02 UTC| 1712010385 |
| 6 | 2024-04-01 22:27:14 UTC | h6p2ylt679me5pcz | view | example2.com | 2939469694 | 2024-04-01 22:27:02 UTC| 2939469694 |
| 7 | 2024-04-01 22:27:50 UTC | h6p2ylt679me5pcz | click | example2.com | 2939469694 | 2024-04-01 22:27:02 UTC| 2939469694 |
Why the last two rows ga_session_id_fix
column doesn’t return the session id of the first session.