I have created a SQL query which will show the current sales and sales from previous year with the LAG window function, this per customer, product, year and month. A product is not always sold each year and I know that LAG does not care about missing years, therefore the CASE statement, based on this stackoverflow post: LAG function alternative. I need the results for the missing year in between
Now the problem is that on detailed level the previous year is correct with the LAG function but when it is grouped on year it does not match anymore (See table results below)
These are my tables:
DIM_CUSTOMER
DECLARE @DIM_CUSTOMERS TABLE([BusinessKey] INT,[Customer] NVARCHAR(255))
INSERT INTO @DIM_CUSTOMERS
VALUES
(10000, 'Kevin N.V.'),
(10001, 'V.Z.W. Frederik'),
(10002, 'Klaas N.V.')
SELECT * FROM @DIM_CUSTOMERS
DIM_PRODUCTS
INSERT INTO @DIM_PRODUCTS
VALUES
(9000, 'PH114'),
(9001, 'PH272'),
(9002, 'PH878'),
(9003, 'PH900')
SELECT * FROM @DIM_PRODUCTS
DIM_DATES
DECLARE @DIM_DATES TABLE([BusinessKey] INT, [Year] INT, [Month] INT, [YearMonth] INT, [YearMonthText] NVARCHAR(20))
INSERT INTO @DIM_DATES
VALUES
(202201, 2022, 1, 202201, '2022.01'),
(202202, 2022, 2, 202202, '2022.02'),
(202203, 2022, 3, 202203, '2022.03'),
(202204, 2022, 4, 202204, '2022.04'),
(202205, 2022, 5, 202205, '2022.05'),
(202206, 2022, 6, 202206, '2022.06'),
(202207, 2022, 7, 202207, '2022.07'),
(202208, 2022, 8, 202208, '2022.08'),
(202209, 2022, 9, 202209, '2022.09'),
(202210, 2022, 10, 202210, '2022.10'),
(202211, 2022, 11, 202211, '2022.11'),
(202212, 2022, 12, 202212, '2022.12'),
(202301, 2023, 1, 202301, '2023.01'),
(202302, 2023, 2, 202302, '2023.02'),
(202303, 2023, 3, 202303, '2023.03'),
(202304, 2023, 4, 202304, '2023.04'),
(202305, 2023, 5, 202305, '2023.05'),
(202306, 2023, 6, 202306, '2023.06'),
(202307, 2023, 7, 202307, '2023.07'),
(202308, 2023, 8, 202308, '2023.08'),
(202309, 2023, 9, 202309, '2023.09'),
(202310, 2023, 10, 202310, '2023.10'),
(202311, 2023, 11, 202311, '2023.11'),
(202312, 2023, 12, 202312, '2023.12'),
(202401, 2024, 1, 202401, '2024.01'),
(202402, 2024, 2, 202402, '2024.02'),
(202403, 2024, 3, 202403, '2024.03'),
(202404, 2024, 4, 202404, '2024.04'),
(202405, 2024, 5, 202405, '2024.05'),
(202406, 2024, 6, 202406, '2024.06'),
(202407, 2024, 7, 202407, '2024.07'),
(202408, 2024, 8, 202408, '2024.08'),
(202409, 2024, 9, 202409, '2024.09'),
(202410, 2024, 10, 202410, '2024.10'),
(202411, 2024, 11, 202411, '2024.11'),
(202412, 2024, 12, 202412, '2024.12')
SELECT * FROM @DIM_DATES
FACT_SALES
DECLARE @FACT_SALES TABLE([ID] INT, [FK_Product] INT, [FK_Customer] INT, [FK_Date] INT, [Sales] FLOAT)
INSERT INTO @FACT_SALES
VALUES
(1, 9000, 10000, 202303, 90.48),
(2, 9000, 10000, 202304, 20.40),
(3, 9002, 10000, 202305, 250.85),
(4, 9002, 10000, 202303, 100.50),
(5, 9000, 10000, 202403, 38.40),
(6, 9000, 10000, 202406, 474.50),
(7, 9001, 10000, 202403, 128.60),
(8, 9001, 10000, 202404, 144.97),
(9, 9000, 10002, 202303, 199.60),
(10, 9001, 10002, 202302, 58.97),
(11, 9001, 10002, 202402, 40.88),
(12, 9001, 10000, 202203, 14.5)
SELECT * FROM @FACT_SALES
MY_ATTEMPT
;WITH CustProdYears
AS(
SELECT DISTINCT
d.[Year] as SaleYear, s.FK_Product, s.FK_Customer
FROM @FACT_SALES s
JOIN @DIM_DATES d on s.FK_Date = d.BusinessKey
)
, CustomerSales
AS (
SELECT cpy.FK_Customer, cpy.FK_Product, d.YearMonthText, s.[Sales], d.year, d.month
FROM CustProdYears cpy
JOIN @DIM_DATES d on cpy.[SaleYear] = d.[Year]
LEFT JOIN @FACT_SALES s
on s.FK_Customer = cpy.FK_Customer
and s.FK_Product = cpy.FK_Product
and s.FK_Date = d.BusinessKey
)
SELECT b.Year,
SUM(b.[Sales]) AS [Sales],
SUM(b.[SalesLastYear]) AS [SalesLastYear]
FROM
(
SELECT *,
LAG(a.year, 1, 0) OVER (PARTITION BY a.Customer, a.Product, a.month ORDER BY a.year) AS [PreviousYear],
CASE WHEN 1 = a.year - LAG(a.year, 1, 0) OVER (PARTITION BY a.Customer, a.Product, a.month ORDER BY a.year)
THEN LAG(a.[Sales], 1, 0) OVER (PARTITION BY a.Customer, a.Product, a.month ORDER BY a.year)
ELSE 0 END AS [SalesLastYear]
FROM
(
SELECT
Customer, Product, Year, Month
, [Sales] = ISNULL([Sales], 0)
FROM @DIM_CUSTOMERS c
LEFT JOIN CustomerSales s ON c.BusinessKey = s.FK_Customer
LEFT JOIN @DIM_PRODUCTS p on s.FK_Product = p.BusinessKey
WHERE Customer = 'Kevin N.V.'
) a
--ORDER BY a.Customer, a.Product, a.year, a.month
) b
GROUP BY b.Year
ORDER BY b.Year
My result:
Year | Sales | SalesLastYear |
---|---|---|
2022 | 14,5 | 0 |
2023 | 462,23 | 0 |
2024 | 786,47 | 110,88 |
Expected result:
Year | Sales | SalesLastYear |
---|---|---|
2022 | 14,5 | 0 |
2023 | 462,23 | 0 |
2024 | 786,47 | 462,23 |
Is there a way to fix this issue?