Purpose
Determine data quality issues within the RELATIONSHIP_MANAGER_VW (RM) and RATING_VW (R).
1. Data quality issues
1.1 Overlapping Dates per Account (RM)
Example:
WITH Overlaps AS ( SELECT A."ACCOUNT ID", A."USER" AS "User_A", B."USER" AS "User_B", A."STARTDATE" AS "Start_A", COALESCE(A."ENDDATE", CURRENT_DATE) AS "End_A", B."STARTDATE" AS "Start_B", COALESCE(B."ENDDATE", CURRENT_DATE) AS "End_B", GREATEST(A."STARTDATE", B."STARTDATE") AS "Overlap_Start", LEAST(COALESCE(COALESCE(A."ENDDATE", CURRENT_DATE), CURRENT_DATE), COALESCE(COALESCE(B."ENDDATE", CURRENT_DATE), CURRENT_DATE)) AS "Overlap_End" FROM SALESFORCE_DB.TDA_JARVIS_DEV_SCH.RELATIONSHIP_MANAGER_VW A JOIN SALESFORCE_DB.TDA_JARVIS_DEV_SCH.RELATIONSHIP_MANAGER_VW B ON A."ACCOUNT ID" = B."ACCOUNT ID" -- Same account AND A."USER" <= B."USER" -- Prevent double-checking AND A."PRIMARY_MANAGER" = 'Primary Manager' AND B."PRIMARY_MANAGER" = 'Primary Manager' AND ( (A."STARTDATE" < COALESCE(B."ENDDATE", CURRENT_DATE) AND COALESCE(A."ENDDATE", CURRENT_DATE) > B."STARTDATE") ) ) SELECT "ACCOUNT ID", "User_A" AS "User Name", "Start_A" AS "STARTDATE", "End_A" AS "End Date Updated", "User_B" AS "Overlapping User", "Start_B" AS "Overlapping Start Date", "End_B" AS "Overlapping End Date", "Overlap_Start", "Overlap_End", DATEDIFF(day, "Overlap_Start", "Overlap_End") AS "Days of Overlap" -- Calculate the number of overlap days FROM Overlaps WHERE "User_A" <> "User_B";
Output:
Provides Overlapping User, Overlapping Start/ End date per Account ID.
Provides no. of days overlap.
1.2 Multiple Active Managers (RM)
Example:
WITH ActivePrimaryManagers AS ( SELECT A."ACCOUNT ID", A."USER" AS "User_A", B."USER" AS "User_B", A."STARTDATE" AS "Start_A", A."End Date Updated" AS "End_A", B."STARTDATE" AS "Start_B", B."End Date Updated" AS "End_B", GREATEST(A."STARTDATE", B."STARTDATE") AS "Overlap_Start", LEAST(A."End Date Updated", B."End Date Updated") AS "Overlap_End", DATEDIFF(day, GREATEST(A."STARTDATE", B."STARTDATE"), LEAST(A."End Date Updated", B."End Date Updated")) AS "Days of Overlap" FROM SALESFORCE_DB.TDA_JARVIS_DEV_SCH.RELATIONSHIP_MANAGER_VW_1 A JOIN SALESFORCE_DB.TDA_JARVIS_DEV_SCH.RELATIONSHIP_MANAGER_VW_1 B ON A."ACCOUNT ID" = B."ACCOUNT ID" -- Same account AND A."USER" < B."USER" -- Prevent double-checking and avoid self-join AND A."ROLE" = 'Primary Manager' -- Only consider Primary Manager role AND B."ROLE" = 'Primary Manager' -- Only consider Primary Manager role AND A."ACTIVE" = 'True' -- Only consider active Primary Managers AND B."ACTIVE" = 'True' -- Only consider active Primary Managers AND ( (A."STARTDATE" <= B."End Date Updated" AND A."End Date Updated" >= B."STARTDATE") ) ) SELECT "ACCOUNT ID", "User_A" AS "Primary Manager 1", "User_B" AS "Primary Manager 2", "Overlap_Start" AS "Overlap Start Date", "Overlap_End" AS "Overlap End Date", "Days of Overlap" FROM ActivePrimaryManagers WHERE "Days of Overlap" > 0;
Output:
Provides overlapping Primary Managers IDs, overlapping dates and no. of days overlap.
1.3 Multiple overall active ratings (R)
Example:
WITH ActiveReadiness AS ( SELECT A."Account ID", A."Readiness" AS "Readiness_A", B."Readiness" AS "Readiness_B", A."Date of Review" AS "Date_A", B."Date of Review" AS "Date_B", DATEDIFF(day, A."Date of Review", B."Date of Review") AS "Days_Between" FROM SALESFORCE_DB.TDA_JARVIS_DEV_SCH.RATING_VW_TEST1 A JOIN SALESFORCE_DB.TDA_JARVIS_DEV_SCH.RATING_VW_TEST1 B ON A."Account ID" = B."Account ID" -- Same account AND A."Readiness" <> B."Readiness" -- Different readiness states AND A."Active" = 'True' -- Only consider active readiness AND B."Active" = 'True' AND YEAR(A."Date of Review") >= 2021 -- Filter for Date of Review Year >= 2021 AND YEAR(B."Date of Review") >= 2021 AND A."Record Type" = 'Overall Account Rating' -- Filter for Record Type = 'Overall Account Rating' AND B."Record Type" = 'Overall Account Rating' AND ( (A."Date of Review" <= B."Date of Review") ) ) SELECT "Account ID", "Readiness_A" AS "Readiness 1", "Readiness_B" AS "Readiness 2", "Date_A" AS "Review Date 1", "Date_B" AS "Review Date 2", "Days_Between" AS "Days Between Reviews" FROM ActiveReadiness WHERE "Days_Between" >= 0;
Output:
Provides overlapping active readiness and no. of days overlap per Account ID.
1.4 Readiness with an active overall account rating is not with the most recent ‘Date of Review’ (R)
Example:
WITH OverallActive AS ( SELECT "Account ID", "Readiness", "Date of Review" AS "Overall_Date", "Active", "Record Type" FROM SALESFORCE_DB.TDA_JARVIS_DEV_SCH.RATING_VW_TEST1 WHERE "Record Type" = 'Overall Account Rating' AND "Active" = 'True' ), NonOverallActive AS ( SELECT "Account ID", MAX("Date of Review") AS "Most_Recent_Non_Overall_Date" FROM SALESFORCE_DB.TDA_JARVIS_DEV_SCH.RATING_VW_TEST1 WHERE "Record Type" <> 'Overall Account Rating' AND "Active" = 'True' GROUP BY "Account ID" ) SELECT O."Account ID", O."Readiness" AS "Overall Readiness", O."Overall_Date", N."Most_Recent_Non_Overall_Date", CASE WHEN N."Most_Recent_Non_Overall_Date" > O."Overall_Date" THEN 'More Recent Active Non-Overall Exists' ELSE 'No More Recent Active Non-Overall' END AS "Check Result" FROM OverallActive O LEFT JOIN NonOverallActive N ON O."Account ID" = N."Account ID" WHERE "Check Result" = 'More Recent Active Non-Overall Exists';
Output:
flag ‘More Recent Active Non-Overall Exists’ indicates that there is an active non overall account rating with a more recent Date of Review.
1.5 Accounts with blank readiness between filled readiness (R)
Example:
WITH RankedData AS ( SELECT "Account ID", "Active", "Readiness", "Record Type", "Date of Review", LAG("Readiness") OVER (PARTITION BY "Account ID" ORDER BY "Date of Review") AS PrevReadiness, LEAD("Readiness") OVER (PARTITION BY "Account ID" ORDER BY "Date of Review") AS NextReadiness FROM SALESFORCE_DB.TDA_JARVIS_DEV_SCH.RATING_VW_TEST1 WHERE "Record Type" = 'Overall Account Rating' ) SELECT "Account ID", "Active", "Readiness", "Record Type", "Date of Review", CASE WHEN "Readiness" IS NULL AND PrevReadiness IS NOT NULL AND NextReadiness IS NOT NULL THEN 'Flag' ELSE NULL END AS "Flag" FROM RankedData WHERE "Flag" = 'Flag';
Output:
Provides all accounts with blank readiness between filled readiness.
Note: blank readiness with the same date as filled readiness also gets picked up.
2. Handling data quality issues
2.1 Handling multiple active managers
Rank active primary managers per account based on their start dates
If there are multiple active primary managers - pick the one with the most recent start date
WITH RankedPrimaryManagers AS ( SELECT A."ACCOUNT ID", A."USER" AS "Primary_Manager", A."STARTDATE", A."End Date Updated", ROW_NUMBER() OVER ( PARTITION BY A."ACCOUNT ID" ORDER BY A."STARTDATE" DESC ) AS "Manager_Rank" FROM SALESFORCE_DB.TDA_JARVIS_DEV_SCH.RELATIONSHIP_MANAGER_VW_1 A WHERE A."ROLE" = 'Primary Manager' -- Only consider Primary Manager role AND A."ACTIVE" = 'True' -- Only consider active Primary Managers ) SELECT "ACCOUNT ID", "Primary_Manager", "STARTDATE", "End Date Updated" FROM RankedPrimaryManagers WHERE "Manager_Rank" = 1;
2.2 Handling overlapping primary managers
if primary managers (not necessarily active) have multiple rows with consecutive dates, take the earliest start date per user and the latest end date per that user within an account to get one row per primary manager.
WITH LaggedDates AS ( SELECT "ACCOUNT ID", "USER", PRIMARY_MANAGER "Primary Manager", "ACTIVE", "STARTDATE", "End Date Updated", LAG("End Date Updated") OVER ( PARTITION BY "ACCOUNT ID", "USER" ORDER BY "STARTDATE" ASC NULLS LAST ) AS "Prev_End_Date" FROM SALESFORCE_DB.TDA_JARVIS_DEV_SCH.RELATIONSHIP_MANAGER_VW_1 WHERE "Primary Manager" = 'Primary Manager' -- Only consider Primary Manager role ), GroupedManagers AS ( SELECT "ACCOUNT ID", "USER", "Primary Manager", "ACTIVE", "STARTDATE", "End Date Updated", -- Create a new group when the current STARTDATE is greater than the previous row's End Date Updated SUM( CASE WHEN "STARTDATE" > "Prev_End_Date" THEN 1 ELSE 0 END ) OVER (PARTITION BY "ACCOUNT ID", "USER" ORDER BY "STARTDATE") AS "Group_Flag" FROM LaggedDates ) SELECT "ACCOUNT ID", "USER", MIN("STARTDATE") AS "Consolidated_Start", MAX("End Date Updated") AS "Consolidated_End", "Primary Manager", "ACTIVE" FROM GroupedManagers GROUP BY "ACCOUNT ID", "USER", "Primary Manager", "ACTIVE", "Group_Flag" ORDER BY "ACCOUNT ID", "USER", "Consolidated_Start";;
Example:
Peta and Nalan will both have one row, with start and end dates as 15/02/2019 - 07/04/2021 and 15/02/2019 - 21/12/2022 accordingly.