I have created this query to grab data from all purchase events from GA4. It shows each product(s) for each order, and has the correct data. However, it looks like it is querying each individual product data 15 times, so there are lots of duplicates. I have been stuck on this. I tried using distinct so there cannot be duplicate item_id’s in a purchase, but get the error: Aggregate functions with DISTINCT cannot be used with arguments of type STRUCT. Any help would be much appreciated. Code is below.
WITH
_source AS (
SELECT
PARSE_DATE('%Y%m%d', event_date) AS event_date,
TIMESTAMP_MICROS(event_timestamp) AS event_ts,
MAX(CASE WHEN params.key = 'ga_session_id' THEN params.value.int_value ELSE NULL END) OVER (PARTITION BY event_timestamp, user_pseudo_id) AS ga_session_id,
user_id,
user_pseudo_id,
TIMESTAMP_MICROS(user_first_touch_timestamp) AS user_first_touch_ts,
event_name,
params.key AS param_key,
params.value.string_value AS params_string_value,
params.value.float_value AS params_float_value,
params.value.double_value AS params_double_value,
-- Traffic columns
traffic_source.name AS channel,
traffic_source.medium AS utm_medium,
traffic_source.source AS utm_source,
-- Add parameters
MAX(CASE WHEN params.key = 'page_title' THEN params.value.string_value ELSE NULL END) OVER (PARTITION BY event_timestamp, user_pseudo_id) AS page_title,
MAX(CASE WHEN params.key = 'page_location' THEN params.value.string_value ELSE NULL END) OVER (PARTITION BY event_timestamp, user_pseudo_id) AS page_location,
MAX(CASE WHEN params.key = 'page_referrer' THEN params.value.string_value ELSE NULL END) OVER (PARTITION BY event_timestamp, user_pseudo_id) AS page_referrer,
MAX(CASE WHEN params.key = 'value' THEN params.value.double_value ELSE NULL END) OVER (PARTITION BY event_timestamp, user_pseudo_id) AS ecomm_value,
MAX(CASE WHEN params.key = 'transaction_id' THEN params.value.string_value ELSE NULL END) OVER (PARTITION BY event_timestamp, user_pseudo_id) AS ecomm_id,
MAX(CASE WHEN params.key = 'search_term' THEN params.value.string_value ELSE NULL END) OVER (PARTITION BY event_timestamp, user_pseudo_id) AS search_term,
-- Ecommerce columns
ecommerce.total_item_quantity as quantity,
ecommerce.purchase_revenue_in_usd as usd_revenue,
ecommerce.purchase_revenue as revenue,
ecommerce.refund_value_in_usd as usd_refund,
ecommerce.refund_value as refund_value,
ecommerce.shipping_value_in_usd as usd_shipping,
ecommerce.shipping_value as shipping_value,
ecommerce.tax_value_in_usd as usd_tax,
ecommerce.tax_value as tax_value,
ecommerce.unique_items as unique_items,
-- Ecommerce Items columns
items.item_id,
items.item_name,
items.item_brand,
items.item_variant,
items.item_category,
items.item_category2,
items.item_category3,
items.item_category4,
items.item_category5,
items.quantity as item_quantity,
items.price,
items.item_revenue,
items.item_refund,
items.price_in_usd,
items.item_revenue_in_usd,
items.item_refund_in_usd,
items.coupon,
items.affiliation,
items.location_id,
items.item_list_id,
items.item_list_name,
items.item_list_index,
items.promotion_id,
items.promotion_name,
items.creative_name,
items.creative_slot,
-- Geological + device columns
geo.continent AS continent,
geo.country AS country,
geo.region AS region,
geo.city AS city,
device.web_info.hostname AS domain,
device.category AS device_category,
device.mobile_brand_name AS device_brand,
device.mobile_model_name AS device_model,
device.operating_system AS device_os,
device.operating_system_version AS device_os_version,
device.language AS device_language,
device.is_limited_ad_tracking AS device_is_limited_ad_tracking,
platform AS platform,
device.web_info.browser AS browser,
device.web_info.browser_version AS browser_version,
dataset_name
FROM (
SELECT *, 'hidden' AS dataset_name FROM `hidden`
WHERE PARSE_DATE('%Y%m%d',_TABLE_SUFFIX) > DATE_ADD(CURRENT_DATE(), INTERVAL -3 DAY)
UNION ALL
SELECT *, 'hidden' AS dataset_name FROM `hidden`
WHERE PARSE_DATE('%Y%m%d',_TABLE_SUFFIX) > DATE_ADD(CURRENT_DATE(), INTERVAL -3 DAY)
) AS events
LEFT JOIN UNNEST (items) AS items
LEFT JOIN UNNEST(event_params) AS params
),
event_aggregated AS (
SELECT
event_date,
event_ts,
ga_session_id,
user_pseudo_id,
user_first_touch_ts,
event_name,
MAX(dataset_name) as dataset_name,
MAX(channel) AS channel,
MAX(utm_medium) AS utm_medium,
MAX(utm_source) AS utm_source,
MAX(page_title) AS page_title,
MAX(page_location) AS page_location,
MAX(page_referrer) AS page_referrer,
MAX(continent) AS continent,
MAX(country) AS country,
MAX(region) AS region,
MAX(city) AS city,
MAX(domain) AS domain,
MAX(device_category) AS device_category,
MAX(device_brand) AS device_brand,
MAX(device_model) AS device_model,
MAX(device_os) AS device_os,
MAX(device_os_version) AS device_os_version,
MAX(device_language) AS device_language,
MAX(device_is_limited_ad_tracking) AS device_is_limited_ad_tracking,
MAX(platform) AS platform,
MAX(browser) AS browser,
MAX(browser_version) AS browser_version,
MAX(ecomm_value) AS ecomm_value,
MAX(ecomm_id) AS ecomm_id,
MAX(usd_revenue) AS usd_revenue,
MAX(revenue) AS revenue,
MAX(quantity) AS quantity,
MAX(usd_refund) AS usd_refund,
MAX(refund_value) AS refund_value,
MAX(usd_shipping) AS usd_shipping,
MAX(shipping_value) AS shipping_value,
MAX(usd_tax) AS usd_tax,
MAX(tax_value) AS tax_value,
MAX(unique_items) AS unique_items,
MAX(search_term) AS search_term,
ARRAY_AGG(STRUCT(
item_id,
item_name,
item_brand,
item_variant,
item_category,
item_category2,
item_category3,
item_category4,
item_category5,
item_quantity,
price,
item_revenue,
item_refund,
price_in_usd,
item_revenue_in_usd,
item_refund_in_usd,
coupon, affiliation,
location_id,
item_list_id,
item_list_name,
item_list_index,
promotion_id,
promotion_name,
creative_name,
creative_slot)) AS items_info
FROM _source
WHERE event_name = 'purchase'
GROUP BY 1, 2, 3, 4, 5, 6
)
SELECT * FROM event_aggregated
ORDER BY event_date DESC, ga_session_id, event_ts ASC
I tried using distinct, but get the error: Aggregate functions with DISTINCT cannot be used with arguments of type STRUCT
Luke Bright is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.