1

I have table that stores payment changed records. So each time the payment method is changed the payment used and date is stored. The data comes in bulk, but i only grab the first date the new payment was used.

CREATE TABLE #payments 
(
    pay_ID uniqueidentifier, 
    pay_type int, 
    pay_account varchar(max), 
    pay_routing varchar(max),  
    pay_date datetime
);

DECLARE @payID uniqueidentifier = newid();

--Actual payments made
INSERT INTO #payments (pay_ID, pay_type, pay_account, pay_routing, pay_date) VALUES
(@payID, 1, 'e121', '0101', '09/18/2020'),
(@payID, 1, 'e121', '0101', '09/19/2020'),
(@payID, 1, 'e121', '0101', '09/20/2020'),
(@payID, 2, 'e122', '0102', '09/21/2020'),
(@payID, 2, 'e122', '0102', '09/22/2020'),
(@payID, 1, 'e121', '0101', '09/23/2020'),
(@payID, 1, 'e121', '0101', '09/24/2020'),
(@payID, 1, 'e121', '0101', '09/25/2020'),
(@payID, 2, 'e122', '0102', '09/26/2020'),
(@payID, 2, 'e122', '0102', '09/27/2020'),
(@payID, 3, 'e123', '0103', '09/28/2020'),
(@payID, 1, 'e121', '0101', '09/29/2020'),
(@payID, 1, 'e121', '0101', '09/30/2020'),
(@payID, 1, 'e121', '0101', '10/01/2020'),
(@payID, 1, 'e121', '0101', '10/02/2020')


SELECT * 
FROM #payments 
ORDER BY pay_ID ASC, pay_date ASC;

This code can be used to create a set for each payment changed, but I'm not sure how I can get the start and end dates with this.

SELECT
    p.*
FROM
    (SELECT
         p.*,
         LAG(pay_date) OVER (PARTITION BY pay_id, ORDER BY pay_date) AS prev_pd,
         LAG(pay_date) OVER (PARTITION BY pay_id, pay_account, pay_type, pay_routing ORDER BY pay_date) AS prev_pd_grp
     FROM
         #payments p) p
WHERE 
    prev_pd_grp IS NULL OR prev_pd_grp <> prev_pd

The desired result would be so that the first payment has a stamp for the start and end date for each section that the payment was changed.

ID                                  PayType account routing       CreatedDate       start   end
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    1   e121    0101    2020-09-18 00:00:00.000 2020-09-18 00:00:00.000 2020-09-20 00:00:00.000 
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    1   e121    0101    2020-09-19 00:00:00.000 NULL    NULL
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    1   e121    0101    2020-09-20 00:00:00.000 NULL    NULL
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    2   e122    0102    2020-09-21 00:00:00.000 2020-09-21 00:00:00.000 2020-09-22 00:00:00.000 
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    2   e122    0102    2020-09-22 00:00:00.000 NULL    NULL
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    1   e121    0101    2020-09-23 00:00:00.000 2020-09-23 00:00:00.000 2020-09-25 00:00:00.000
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    1   e121    0101    2020-09-24 00:00:00.000 NULL    NULL
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    1   e121    0101    2020-09-25 00:00:00.000 NULL    NULL
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    2   e122    0102    2020-09-26 00:00:00.000 2020-09-26 00:00:00.000 2020-09-27 00:00:00.000
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    2   e122    0102    2020-09-27 00:00:00.000 NULL    NULL
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    3   e123    0103    2020-09-28 00:00:00.000 2020-09-28 00:00:00.000 2020-09-28 00:00:00.000 
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    1   e121    0101    2020-09-29 00:00:00.000 2020-09-29 00:00:00.000 2020-10-02 00:00:00.000 
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    1   e121    0101    2020-09-30 00:00:00.000 NULL    NULL
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    1   e121    0101    2020-10-01 00:00:00.000 NULL    NULL
FB4FE2A7-3609-4E35-AFB9-908B2D3072E9    1   e121    0101    2020-10-02 00:00:00.000 NULL    NULL
GMB
  • 216,147
  • 25
  • 84
  • 135
Going-gone
  • 282
  • 1
  • 14

1 Answers1

1

This is a gaps-and-island problem. Here is an approach that uses the difference between row numbers to identify the groups. You can then use row_number() again in the outer query to identify the first record per group, and a window min() and max() to exhibit the corresponding date range:

select pay_id, pay_type, pay_account, pay_routing, pay_date,
    case when row_number() over(partition by pay_id, pay_type, rn1 - rn2 order by pay_date) = 1
        then min(pay_date) over(partition by pay_id, pay_type, rn1 - rn2)
    end as pay_date_start,
    case when row_number() over(partition by pay_id, pay_type, rn1 - rn2 order by pay_date) = 1
        then max(pay_date) over(partition by pay_id, pay_type, rn1 - rn2)
    end as pay_date_end
from (
    select p.*,
        row_number() over(partition by pay_id order by pay_date) rn1,
        row_number() over(partition by pay_id, pay_type order by pay_date) rn2
    from #payments p
) p
order by pay_id, pay_date

Demo on DB Fiddle:

pay_id                               | pay_type | pay_account | pay_routing | pay_date                | pay_date_start          | pay_date_end           
:----------------------------------- | -------: | :---------- | :---------- | :---------------------- | :---------------------- | :----------------------
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        1 | e121        | 0101        | 2020-09-18 00:00:00.000 | 2020-09-18 00:00:00.000 | 2020-09-20 00:00:00.000
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        1 | e121        | 0101        | 2020-09-19 00:00:00.000 | null                    | null                   
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        1 | e121        | 0101        | 2020-09-20 00:00:00.000 | null                    | null                   
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        2 | e122        | 0102        | 2020-09-21 00:00:00.000 | 2020-09-21 00:00:00.000 | 2020-09-22 00:00:00.000
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        2 | e122        | 0102        | 2020-09-22 00:00:00.000 | null                    | null                   
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        1 | e121        | 0101        | 2020-09-23 00:00:00.000 | 2020-09-23 00:00:00.000 | 2020-09-25 00:00:00.000
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        1 | e121        | 0101        | 2020-09-24 00:00:00.000 | null                    | null                   
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        1 | e121        | 0101        | 2020-09-25 00:00:00.000 | null                    | null                   
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        2 | e122        | 0102        | 2020-09-26 00:00:00.000 | 2020-09-26 00:00:00.000 | 2020-09-27 00:00:00.000
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        2 | e122        | 0102        | 2020-09-27 00:00:00.000 | null                    | null                   
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        3 | e123        | 0103        | 2020-09-28 00:00:00.000 | 2020-09-28 00:00:00.000 | 2020-09-28 00:00:00.000
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        1 | e121        | 0101        | 2020-09-29 00:00:00.000 | 2020-09-29 00:00:00.000 | 2020-10-02 00:00:00.000
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        1 | e121        | 0101        | 2020-09-30 00:00:00.000 | null                    | null                   
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        1 | e121        | 0101        | 2020-10-01 00:00:00.000 | null                    | null                   
2c1a463f-198b-41bd-a1a4-30aafda21d4f |        1 | e121        | 0101        | 2020-10-02 00:00:00.000 | null                    | null                   
GMB
  • 216,147
  • 25
  • 84
  • 135
  • 1
    @Going-gone: yes, it does. `pay_id` is in the partition already. – GMB Sep 23 '20 at 19:55
  • How does that rn1-rn2 work? Confused how it determines the row number as 1 per group like that. rn1- rn2 for row 1 would be 0. row_number() over(partition by pay_id, pay_type, rn1 - rn2 order by pay_date) = 1 – Going-gone Sep 24 '20 at 02:01