0

SQL Server

Data:

 Row | Type | Start Date | End Date   | Dollars
------------------------------------------
1    |  1   | 01/01/2017 | 01/05/2017 |  10
2    |  1   | 01/08/2017 | 01/12/2017 |  10
3    |  1   | 01/17/2017 | 01/19/2017 |  10
4    |  1   | 01/28/2017 | 02/10/2017 |  10
5    |  1   | 02/20/2017 | 03/10/2017 |  10
6    |  2   | 10/01/2017 | 10/03/2017 |  10
7    |  2   | 10/20/2017 | 10/23/2017 |  10
8    |  2   | 10/25/2017 | 10/29/2017 |  10

Within types, I need to group consecutive date periods as long as they are separated by less than 7 days, summing the dollars and the total gap days within each group.

Intermediate Table:

 Row | Type | Start Date | End Date   | Dollars | Grouping | GapDays
------------------------------------------
1    |  1   | 01/01/2017 | 01/05/2017 |  10     |  1       |  null
2    |  1   | 01/08/2017 | 01/12/2017 |  10     |  1       |  3
3    |  1   | 01/17/2017 | 01/19/2017 |  10     |  1       |  5
4    |  1   | 01/28/2017 | 02/10/2017 |  10     |  2       |  9
5    |  1   | 02/20/2017 | 03/10/2017 |  10     |  3       |  10
6    |  2   | 10/01/2017 | 10/03/2017 |  10     |  1       |  null
7    |  2   | 10/20/2017 | 10/23/2017 |  10     |  2       |  17
8    |  2   | 10/25/2017 | 10/29/2017 |  10     |  2       |  2

Result:

----------------------------------------------------
 Type | Start Date |  End Date  | Dollars | GapDays
----------------------------------------------------
  1   | 01/01/2017 | 01/19/2017 |   30    |   8
  1   | 01/28/2017 | 02/10/2017 |   10    |   0
  1   | 02/20/2017 | 03/10/2017 |   10    |   0
  2   | 10/01/2017 | 10/03/2017 |   10    |   0
  2   | 10/20/2017 | 10/29/2017 |   20    |   2

Solution: Use binary grouping for "consecutive" date criteria (in this case < 7 days) and then use that grouping in a sum over with rows unbounded preceding

with cte as (
select
  *,
  COALESCE(DATEDIFF(dd, LAG(EndDate, 1, NULL) OVER (PARTITION BY [Type] ORDER BY StartDate), StartDate),0) AS GapDays
from
  #data
),
cte2 as (
select
  *,
  case when GapDays < 7 then 0 else 1 end as group1
from
  cte
),
cte3 as (
select
  *,
  sum(group1) over (partition by [type] order by startDate, endDate rows unbounded preceding) as group2
from
  cte2
)
select
  [TYPE],
  MIN(StartDate) AS StartDate,
  MAX(EndDate) AS EndDate,
  SUM(Dollars) AS Dollars,
  SUM(CASE WHEN GapDays > 7 THEN 0 ELSE GapDays END) AS GapDays
from
  cte3
group by
  [Type], group2
nbot
  • 184
  • 7

4 Answers4

0

You have to first get the date difference group by type and then apply aggregate functions on column as below:

Look at here for DENSE_RANK (Returns the rank of rows within the partition of a result set, without any gaps in the ranking. The rank of a row is one plus the number of distinct ranks that come before the row in question)

;WITH T AS
(
    SELECT
        *,      
        CASE WHEN Type=LAG(Type) OVER (ORDER BY Type) AND DATEDIFF(d,StartDate,EndDate) < 7 THEN DATEDIFF(d,lag(EndDate)  OVER (ORDER BY Type),StartDate) ELSE 0 END  AS GapDays,
        DENSE_RANK() OVER(ORDER BY Type,CASE WHEN DATEDIFF(d,StartDate,EndDate) < 7 THEN 1 ELSE 2 END) AS PartNo
    FROM @tblTest
)

SELECT
    Type,
    MIN(StartDate) AS StartDate,
    MAX(EndDate) AS EndDate, 
    ISNULL(SUM(Dollars),0) AS Dollars, 
    --DATEDIFF(d,MIN(StartDate),MAX(EndDate)) AS GapDays
    SUM(GapDays) AS GapDays
FROM T
GROUP BY Type,PartNo

Output:

enter image description here

Note: It seems you have given wrong result in your question

0

you can use a query like below: See working demo

create table data (Type int,StartDate date,EndDate date,  Dollars int)
insert into data values
(1,'01/01/2017','01/05/2017',10)
,(1,'01/08/2017','01/12/2017',10)
,(1,'01/17/2017','01/19/2017',10)
,(1,'01/28/2017','02/10/2017',10)
,(2,'10/01/2017','10/03/2017',10)
,(2,'10/20/2017','10/23/2017',10)
,(2,'10/25/2017','10/29/2017',10)

; with uniquerowset as
(
    select
    days=datediff(d,startdate,endDate)+1,
    grouped= 
        case 
            when 
                datediff(d,lag(EndDate) over(partition by Type order by StartDate asc),StartDate) >7
            then 0
            else 1
        end ,
    days_missed=
        datediff(d,lag(EndDate) over(partition by Type order by StartDate asc),StartDate),
    *
    from data
 ),
 finalresult as
 (
     select 
         *,
         rn =row_number() over ( partition by Type order by StartDate asc),
         rn2= row_number() over ( partition by Type order by grouped asc),
         days_missed_corrected= grouped * isnull(days_missed,0)
    from 
         uniquerowset
     )

 select 
     Type,
     StartDate = Min(StartDate),
     EndDate = Max(EndDate),
     Dollars = Sum(Dollars),
     GapDays = Sum(days_missed_corrected)
 from finalresult 
 group by rn2-rn, type
 order by min(StartDate)
DhruvJoshi
  • 17,041
  • 6
  • 41
  • 60
  • You are on the right track, but the binary grouping is incorrect. Each GapDay > 7 should increment the grouping by 1 within Type. – nbot Feb 14 '18 at 06:59
0

If output is correct across all sample data then optimization can be discuss and implemented.

Try this script,

create table #data (Type int,StartDate date,EndDate date,  Dollars int)
insert into #data values
(1,'01/01/2017','01/05/2017',10)
,(1,'01/08/2017','01/12/2017',10)
,(1,'01/17/2017','01/19/2017',10)
,(1,'01/28/2017','02/10/2017',10)
,(2,'10/01/2017','10/03/2017',10)
,(2,'10/20/2017','10/23/2017',10)
,(2,'10/25/2017','10/29/2017',10)

;with CTE as
(
select [Type],StartDate,EndDate,Dollars
,ROW_NUMBER()over(PARTITION by [Type] order by StartDate)rn

from #data  
)
,CTE1 AS
(
select [Type],StartDate,EndDate,Dollars,rn 
,1 grp
,0 gapDays
from cte
where rn=1

union ALL

select c.[Type],c.StartDate,c.EndDate,c.Dollars
,c.rn  
,case when DATEDIFF(day,c1.EndDate,c.StartDate)<7 
then grp else grp+1 end
,case when DATEDIFF(day,c1.EndDate,c.StartDate)<7 
then DATEDIFF(day,c1.EndDate,c.StartDate)
 else 0 end
from CTE c
inner join cte1 c1
on c.[type]=c1.[type]
and c.rn=c1.rn+1
)
,CTE2 AS(
select [type],[grp]
,sum(Dollars)Dollars
,sum(gapDays)gapDays
 from cte1
group by [type],[grp]
)

select c.[type],
c1.startDate, c1.EndDate 
,c.[grp] 
,c.Dollars,c.gapDays
from cte2 c
cross apply(
select min(startDate) startDate
,max(EndDate) EndDate from cte1 c1
where c1.[type]=c.[type]
and c1.grp=c.grp
)c1
--order by [type]
drop table #data
KumarHarsh
  • 5,046
  • 1
  • 18
  • 22
0

Solution: Use binary grouping for "consecutive" date criteria (in this case < 7 days) and then use that grouping in a sum over with rows unbounded preceding

with cte as (
select
  *,
  COALESCE(DATEDIFF(dd, LAG(EndDate, 1, NULL) OVER (PARTITION BY [Type] ORDER BY StartDate), StartDate),0) AS GapDays
from
  #data
),
cte2 as (
select
  *,
  case when GapDays < 7 then 0 else 1 end as group1
from
  cte
),
cte3 as (
select
  *,
  sum(group1) over (partition by [type] order by startDate, endDate rows unbounded preceding) as group2
from
  cte2
)
select
  [TYPE],
  MIN(StartDate) AS StartDate,
  MAX(EndDate) AS EndDate,
  SUM(Dollars) AS Dollars,
  SUM(CASE WHEN GapDays > 7 THEN 0 ELSE GapDays END) AS GapDays
from
  cte3
group by
  [Type], group2
nbot
  • 184
  • 7
  • you should have mention that you are using Sql server 2012+.nonethless it is better than recursive CTE.Hope it is working fine across all sample data.Main trick is "grouping Criteria".you can also keep calculated Group2 column in table itself if and only if "Gapdays<7" requirement is fix. – KumarHarsh Feb 15 '18 at 05:42