I have two queries of which I need to create one, unified without any filters applied on them. The code for each is as below
The query1, which consist of 5400 rows
SELECT
CAST(CustInvoice.InvoiceDate AS DATE) AS Date,
CAST(CustInvoice.InvoiceDate AS DATE) AS InvoiceDate,
CustInvoice.InvoiceId AS DocumentNo,
Sales.SalesId AS SalesID,
<rest of the columns>
FROM erpschema.CustInvoiceTrans AS CustInvoice
LEFT JOIN erpschema.SalesTable AS Sales ON CustInvoice.SalesId = Sales.SalesId
LEFT JOIN erpschema.RetailChannelTable AS RetailChannel ON Sales.RetailChannelTable = RetailChannel.RECID
LEFT JOIN erpschema.DirPartyTable AS DirParty ON RetailChannel.OMOPERATINGUNITID = DirParty.RECID
AND DirParty.LanguageId = 'en-US'
LEFT JOIN erpschema.SRSAnalysisEnums AS SRSAnalysisEnums ON RetailChannel.ChannelType = SRSAnalysisEnums.EnumItemValue
AND SRSAnalysisEnums.EnumName LIKE 'RetailChannelType'
LEFT JOIN v_FirstTerminalPerSalesOrderID AS v_FirstTerminal ON v_FirstTerminal.salesorderid = CustInvoice.salesId
LEFT JOIN erpschema.InventItemGroupItem AS InventItem ON CustInvoice.ITEMID = InventItem.ITEMID
AND CustInvoice.DATAAREAID = InventItem.ITEMDATAAREAID
LEFT JOIN erpschema.InventTable AS Invent ON CustInvoice.ITEMID = Invent.ITEMID
AND CustInvoice.DATAAREAID = Invent.DATAAREAID
LEFT JOIN erpschema.EcoResProduct AS EcoResProduct ON Invent.PRODUCT = EcoResProduct.RECID
LEFT JOIN erpschema.SRSAnalysisEnums AS SRSAnalysisEnums2 ON EcoResProduct.PRODUCTTYPE = SRSAnalysisEnums2.ENUMITEMVALUE
AND SRSAnalysisEnums2.ENUMNAME = 'EcoResProductType'
LEFT JOIN erpschema.InventTransOrigin AS InventTransOrg ON CustInvoice.InventTransID = InventTransOrg.InventTransID
LEFT JOIN view_COGS_CustInvoiceTrans AS COGSCustInvoice ON InventTransOrg.RECID = COGSCustInvoice.INVENTTRANSORIGIN
LEFT JOIN core.DataFeed_DimDefaultFinancialDimensions AS DefaultFinDim ON CustInvoice.DefaultDimension = DefaultFinDim.DefaultDimensionRecID
LEFT JOIN view_latestSalesGroup AS lstSalesGrp ON CustInvoice.SalesId = lstSalesGrp.salesOrderId
LEFT JOIN erpschema.RetailTransactionSalesTrans AS RetailTransSalesTrans ON lstSalesGrp.recid = RetailTransSalesTrans.RECID
LEFT JOIN erpschema.CommissionSalesRep AS CommSalesRep ON RetailTransSalesTrans.SalesGroup = CommSalesRep.GroupId
LEFT JOIN erpschema.DirPartyTable AS DirParty2 ON CommSalesRep.Person = DirParty2.RECID
LEFT JOIN erpschema.HcmWorker AS HcmWorker ON CommSalesRep.Person = HcmWorker.Person
LEFT JOIN view_salesWithoutSalesGroup AS salesWoSalesGrp ON salesWoSalesGrp.salesOrderId = CustInvoice.SalesId
LEFT JOIN erpschema.HcmWorker AS HcmWorker2 ON salesWoSalesGrp.staffid = HcmWorker2.PersonnelNumber
LEFT JOIN erpschema.DirPartyTable AS DirParty3 ON HcmWorker2.Person = DirParty3.RECID
LEFT JOIN erpschema.HcmWorker AS HcmWorker3 ON Sales.WorkerSalesResponsible = HcmWorker3.RECID
LEFT JOIN erpschema.DirPartyTable AS DirParty4 ON HcmWorker3.Person = DirParty4.RECID
LEFT JOIN erpschema.CompanyInfo AS CompanyInfo ON CustInvoice.DataAreaId = CompanyInfo.DataArea
LEFT JOIN erpschema.Ledger AS Ledger ON CompanyInfo.RecId = Ledger.PrimaryForLegalEntity
+ FILTERS with WHERE clause
Second query, which consist of 6000 rows
SELECT
T1.SalesId AS SalesOrderId,
T1.InvoiceId AS SalesInvoiceId,
date_format(T1.InvoiceDate, 'yyyyMMdd') AS Day,
<rest of the columns>
FROM erpschema.CustInvoiceTrans T1
LEFT OUTER JOIN erpschema.DimensionAttributeValueSet T2
ON T1.DefaultDimension = T2.RecId
LEFT JOIN erpschema.DimensionFinancialTag T3
ON T2.CLIENTTYPE = T3.RECID
LEFT JOIN erpschema.CompanyInfo T4
ON T1.DataAreaId = T4.DataArea
LEFT JOIN erpschema.Ledger T5
ON T4.RecId = T5.PrimaryForLegalEntity
LEFT JOIN erpschema.InventTable T6
ON T1.ItemId = T6.ItemId AND T1.DataAreaId = T6.DataAreaId AND T1.PARTITION = T6.PARTITION
LEFT JOIN erpschema.InventDim T7
ON T1.InventDimId = T7.InventDimId AND T1.DataAreaId = T7.DataAreaId AND T1.PARTITION = T7.PARTITION
LEFT JOIN erpschema.InventLocation T8
ON T7.InventLocationId = T8.InventLocationId AND T7.DataAreaId = T8.DataAreaId AND T7.PARTITION = T8.PARTITION
LEFT JOIN erpschema.CustInvoiceJour T9
ON T1.InvoiceId = T9.InvoiceId
AND T1.ParentRecId = T9.RECID
AND T1.DataAreaId = T9.DataAreaId
LEFT JOIN erpschema.CustTable T10
ON T9.InvoiceAccount = T10.AccountNum
AND T9.DataAreaId = T10.DataAreaId
LEFT JOIN erpschema.LogisticsPostalAddress T11
ON T9.InvoicePostalAddress = T11.RECID
LEFT JOIN erpschema.LogisticsAddressCountryRegionTranslation T12
ON T11.CountryRegionId = T12.CountryRegionId AND T12.LanguageId = 'en-US'
+ FILTERS with WHERE clause
As you can see, they share lot of tables. Because of filter applied, there are rows which are available in query1 but not query2 and vice versa,
What I’ve done so far is to create unified view on two ways, and here is the result
way1:
SELECT
CustInvoice.RecId AS SalesInvoiceLine_KEY,
CustInvoice.SalesId AS SalesOrderId,--same in both queries
CustInvoice.InvoiceId AS SalesInvoiceId, --same in both queries
date_format(CustInvoice.InvoiceDate, 'yyyyMMdd') AS Date, --same in both queries, converted to INT like 2022-01-01 to 20220101
FROM erpschema.CustInvoiceTrans AS CustInvoice
LEFT JOIN erpschema.DimensionAttributeValueSet AS DimAttrValueSet ON CustInvoice.DefaultDimension = DimAttrValueSet.RecId
LEFT JOIN erpschema.DimensionFinancialTag AS DimFinTag ON DimAttrValueSet.CLIENTTYPE = DimFinTag.RECID
LEFT JOIN erpschema.CompanyInfo AS CompanyInfo ON CustInvoice.DataAreaId = CompanyInfo.DataArea
LEFT JOIN erpschema.Ledger AS Ledger ON CompanyInfo.RecId = Ledger.PrimaryForLegalEntity
LEFT JOIN erpschema.InventTable AS InventTable ON CustInvoice.ItemId = InventTable.ItemId AND CustInvoice.DataAreaId = InventTable.DataAreaId AND CustInvoice.PARTITION = InventTable.PARTITION
LEFT JOIN erpschema.InventDim AS InventDim ON CustInvoice.InventDimId = InventDim.InventDimId AND CustInvoice.DataAreaId = InventDim.DataAreaId AND CustInvoice.PARTITION = InventDim.PARTITION
LEFT JOIN erpschema.InventLocation AS InventLocation ON InventDim.InventLocationId = InventLocation.InventLocationId AND InventDim.DataAreaId = InventLocation.DataAreaId AND InventDim.PARTITION = InventLocation.PARTITION
LEFT JOIN erpschema.CustInvoiceJour AS CustInvoiceJour ON CustInvoice.InvoiceId = CustInvoiceJour.InvoiceId AND CustInvoice.ParentRecId = CustInvoiceJour.RECID AND CustInvoice.DataAreaId = CustInvoiceJour.DataAreaId
LEFT JOIN erpschema.CustTable AS CustTable ON CustInvoiceJour.InvoiceAccount = CustTable.AccountNum AND CustInvoiceJour.DataAreaId = CustTable.DataAreaId
LEFT JOIN erpschema.LogisticsPostalAddress AS LogPostalAddress ON CustInvoiceJour.InvoicePostalAddress = LogPostalAddress.RECID
LEFT JOIN erpschema.LogisticsAddressCountryRegionTranslation AS LogAddressCountryTrans ON LogPostalAddress.CountryRegionId = LogAddressCountryTrans.CountryRegionId AND LogAddressCountryTrans.LanguageId = 'en-US'
LEFT JOIN erpschema.SalesTable AS Sales ON CustInvoice.SalesId = Sales.SalesId
LEFT JOIN erpschema.RetailChannelTable AS RetailChannel ON Sales.RetailChannelTable = RetailChannel.RECID
LEFT JOIN erpschema.DirPartyTable AS DirParty ON RetailChannel.OMOPERATINGUNITID = DirParty.RECID AND DirParty.LanguageId = 'en-US'
LEFT JOIN erpschema.SRSAnalysisEnums AS SRSAnalysisEnums ON RetailChannel.ChannelType = SRSAnalysisEnums.EnumItemValue AND SRSAnalysisEnums.EnumName LIKE 'RetailChannelType'
LEFT JOIN erpschema.InventItemGroupItem AS InventItem ON CustInvoice.ItemId = InventItem.ItemId AND CustInvoice.DataAreaId = InventItem.ItemDataAreaId
LEFT JOIN erpschema.EcoResProduct AS EcoResProduct ON InventTable.PRODUCT = EcoResProduct.RECID
LEFT JOIN erpschema.SRSAnalysisEnums AS SRSAnalysisEnums2 ON EcoResProduct.PRODUCTTYPE = SRSAnalysisEnums2.ENUMITEMVALUE AND SRSAnalysisEnums2.ENUMNAME = 'EcoResProductType'
LEFT JOIN erpschema.InventTransOrigin AS InventTransOrg ON CustInvoice.InventTransID = InventTransOrg.InventTransID
LEFT JOIN v_COGS_CustInvoiceTrans AS COGSCustInvoice ON InventTransOrg.RECID = COGSCustInvoice.INVENTTRANSORIGIN
LEFT JOIN udm.DimDefaultFinancialDimensions AS DefaultFinDim ON CustInvoice.DefaultDimension = DefaultFinDim.DefaultDimensionRecID
LEFT JOIN v_FirstTerminalPerSalesOrderID AS v_FirstTerminal ON v_FirstTerminal.salesorderid = CustInvoice.salesId
LEFT JOIN v_latestSalesGroup AS lstSalesGrp ON CustInvoice.SalesId = lstSalesGrp.salesOrderId
LEFT JOIN erpschema.RetailTransactionSalesTrans AS RetailTransSalesTrans ON lstSalesGrp.recid = RetailTransSalesTrans.RECID
LEFT JOIN erpschema.CommissionSalesRep AS CommSalesRep ON RetailTransSalesTrans.SalesGroup = CommSalesRep.GroupId
LEFT JOIN erpschema.DirPartyTable AS DirParty2 ON CommSalesRep.Person = DirParty2.RECID
LEFT JOIN erpschema.HcmWorker AS HcmWorker ON CommSalesRep.Person = HcmWorker.Person
LEFT JOIN v_salesWithoutSalesGroup AS salesWoSalesGrp ON salesWoSalesGrp.salesOrderId = CustInvoice.SalesId
LEFT JOIN erpschema.HcmWorker AS HcmWorker2 ON salesWoSalesGrp.staffid = HcmWorker2.PersonnelNumber
LEFT JOIN erpschema.DirPartyTable AS DirParty3 ON HcmWorker2.Person = DirParty3.RECID
LEFT JOIN erpschema.HcmWorker AS HcmWorker3 ON Sales.WorkerSalesResponsible = HcmWorker3.RECID
LEFT JOIN erpschema.DirPartyTable AS DirParty4 ON HcmWorker3.Person = DirParty4.RECID
way2:
`SELECT
t1.*, t2.*
FROM query1 t1
FULL OUTER JOIN query2 t2
ON t1.SalesID = t2.SalesOrderId
AND t1.DocumentNo = t2.SalesInvoiceId
AND CAST(date_format(t1.InvoiceDate, 'yyyyMMdd') AS INT) = t2.Date;`
I am not sure which way is better and which is more accurate (I keen to 1st method and would prefer that) but I am not sure if its properly created, and whats more important – how to check if it is accurate to be honest.
Any ideas in how to merge two queries into one and how to test the accuracy would be highly appreciated