1

I have a voucher status history table as a type2 slowly changing dimension table I am trying to get the summary total value of each status by each month before a particular date. This is my schema and insert code:

CREATE TABLE #HDimVouchers(
       [HVoucherKey] [bigint] IDENTITY(1,1) NOT NULL,
       [Voucher_id] [bigint] NOT NULL,
       [VoucherStatusKey] [int] NOT NULL,
       [Voucher_amt] [decimal](18, 2) NULL,    
       [DateStatusStart] [date] NULL,       
       [DateStatusEnd] [date] NULL
     
)
--drop table #HDimVouchers
insert #HDimVouchers 
values
(10,2,10.00,'2019-01-01','2019-02-15'),
(10,4,10.00,'2019-02-16',null),
(13,4,10.00,'2019-01-10',null),
(11,2,15.00,'2019-01-01',null),
(12,2,20.00,'2019-03-12','2019-03-12'),
(12,4,20.00,'2019-03-13',null),
(15,2,205.00,'2019-05-25','2020-04-24'),
(15,6,205.00,'2020-04-25',null),
(21,2,100.00,'2019-02-16',null)

I would like to get a summary to total value by year-month by voucherstatuskey something like the below:

[Year-Month] [VoucherStatusKey] [Amount]
201901 2 25
201901 4 10
201902 2 100
201902 4 10
201903 4 20
201905 2 205
201906 2 205
201907 2 205
201908 2 205
201909 2 205
201910 2 205
201911 2 205
201912 2 205
202001 2 205
202002 2 205
202003 2 205

I have had many attempts to get the data as above, but I am struggling to get the correct format and values. Below is something I have tried

SELECT  convert(nvarchar(4),Year([DateStatusStart])) + RIGHT('00' + CONVERT(NVARCHAR(2), DATEPART(Month, [DateStatusStart])), 2)
,[VoucherStatusKey]
,SUM([Voucher_amt]) OVER (PARTITION BY Year([DateStatusStart]),Month([DateStatusStart]), [VoucherStatusKey] ORDER BY [DateStatusStart]) AS running_total 
FROM #HDimVouchers where [DateStatusStart] < '2020-03-31';
Paul
  • 1,103
  • 1
  • 13
  • 18
  • The numbers don't make sense. Where does "25" in the first row come from? It is not in the original data. – Gordon Linoff Mar 10 '21 at 12:44
  • do you want to get grouped data by month and status key? if yes you can use s.th like ( GROUP BY DATEPART(Month, [DateStatusStart]), [VoucherStatusKey] ) – mhkarami97 Mar 10 '21 at 12:48

2 Answers2

1

Let me assume that you want the value at the end of the month. Then, you can take the following approach:

  • Generate all appropriate months for each voucher.
  • Use a join to bring in the appropriate value.

For the first part, you could use a tally or calendar table if one is available. However a recursive CTE is also convenient:

with vdates as (
      select voucher_id, eomonth(min(DateStatusStart)) as eom
      from HDimVouchers
      group by voucher_id
      union all
      select voucher_id, eomonth(dateadd(month, 1, eom))
      from vdates
      where eom < '2020-03-01'
     )
select vd.*, hv.Voucher_amt
from vdates vd join
     HDimVouchers hv
     on hv.voucher_id = vd.voucher_id and
        vd.eom >= hv.DateStatusStart and
        (vd.eom <= hv.DateStatusEnd or hv.DateStatusEnd is null)
order by vd.eom, vd.voucher_id;

Here is a db<>fiddle.

Gordon Linoff
  • 1,242,037
  • 58
  • 646
  • 786
  • Bit dodgy just using `eomonth` because eg `select dateadd(month, 1, '2021-02-28')` is `2021-03-28` admittedly you've got out of that with `>= and <=`, but probably better to use `datefromparts(year(DateStatusStart), month(DateStatusStart), 1` – Charlieface Mar 10 '21 at 13:13
  • @Charlieface . . . Good point. I should use `eomonth()` in both parts of the CTE. – Gordon Linoff Mar 10 '21 at 13:43
  • thanks @GordonLinoff ...I assume then to get a total by voucher status by eom - we would just need to group by `vd.eom , hv.VoucherStatusKey ` – Paul Mar 10 '21 at 14:16
  • @Paul . . . Yes, I think that would do what you want. – Gordon Linoff Mar 10 '21 at 14:22
1

My take on this would be:

;with [dates] as (
    select YEAR(MIN([DateStatusStart]))*100+MONTH(MIN([DateStatusStart])) [YM] from #HDimVouchers
    union all
    select case when ([dates].[YM] % 100) = 12 then [dates].[YM] + 100 - 11 else [dates].[YM] + 1 end from [dates] where [YM] < 202112
), [dimkeys] as (
    select
        [Voucher_id],
        YEAR(MIN([DateStatusStart]))*100+MONTH(MIN([DateStatusStart])) [DateStatusStart],
        YEAR(MAX(ISNULL([DateStatusEnd], DATEFROMPARTS(2999, 12, 31))))*100+MONTH(MAX(ISNULL([DateStatusEnd], DATEFROMPARTS(2999, 12, 31)))) [DateStatusEnd]
        from [#HDimVouchers] group by [Voucher_id]
), [map] as (
    select
        [dimkeys].[Voucher_id],
        [dates].[YM],
        COALESCE(
            MAX([d].[DateStatusStart]),
            (select MAX([i].[DateStatusStart]) from [#HDimVouchers] [i] where [i].[Voucher_id] = [dimkeys].[Voucher_id] and YEAR([i].[DateStatusStart])*100+MONTH([i].[DateStatusStart]) < [dates].[YM]),
            (select MIN([i].[DateStatusStart]) from [#HDimVouchers] [i] where [i].[Voucher_id] = [dimkeys].[Voucher_id])
        ) [MappingDate]
    from [dates]
    cross join [dimkeys]
    left join [#HDimVouchers] [d] on [d].[Voucher_id] = [dimkeys].[Voucher_id] and YEAR([d].[DateStatusStart])*100+MONTH([d].[DateStatusStart]) = [dates].[YM]
    where [dates].[YM] >= [dimkeys].[DateStatusStart] and [dates].[YM] <= [dimkeys].[DateStatusEnd]
    group by [dimkeys].[Voucher_id], [dates].[YM]
)
select [map].[YM], [fact].[VoucherStatusKey], SUM([fact].[Voucher_amt]) [Sum]
from [map] join [#HDimVouchers] [fact] on [fact].[Voucher_id] = [map].[Voucher_id] and [fact].[DateStatusStart] = [map].[MappingDate]
group by [map].[YM], [fact].[VoucherStatusKey]
order by [YM], [VoucherStatusKey];

So:

  • Get all year-month values from start to end
  • Get all distinct keys with their overall min/max dates (aka when the member exists)
  • cross join them to get an entry of every key for every yearmonth (within lifetime of member)
  • add the date which should be used for mapping (this is used to decide to which month to add a member that was changed in a year-month)
  • only then join this up with the full dimension and group by the SCD Type 2 attribute

Update

For big tables, you can split it up in multiple temp. tables instead of going all out on CTEs. That usually helps a lot on performance.

select *,
    YEAR([DateStatusStart])*100+MONTH([DateStatusStart]) [YmStart],
    YEAR([DateStatusEnd])*100+MONTH([DateStatusEnd]) [YmEnd]
into [#withYm]
from [#HDimVouchers];

;with [dates] as (
    select MIN([YmStart]) [YM] from [#withYm]
    union all
    select case when ([dates].[YM] % 100) = 12 then [dates].[YM] + 100 - 11 else [dates].[YM] + 1 end from [dates] where [YM] < 202112
), [dimkeys] as (
    select
        [Voucher_id],
        MIN([YmStart]) [YmStart],
        MAX(ISNULL([YmEnd], 299912)) [YmEnd]
    from [#withYm]
    group by [Voucher_id]
)
select
    [dimkeys].[Voucher_id],
    [dates].[YM]
into [#all]
from [dates]
cross join [dimkeys]
where [dates].[YM] >= [dimkeys].[YmStart] and [dates].[YM] <= [dimkeys].[YmEnd]

;with [map] as (
    select
        [#all].[Voucher_id],
        [#all].[YM],
        ISNULL(
            MAX([d].[DateStatusStart]),
            (select MAX([i].[DateStatusStart]) from [#withYm] [i] where [i].[Voucher_id] = [#all].[Voucher_id] and [i].[YmStart] < [#all].[YM])
        ) [MappingDate]
    from [#all]
    left join [#HDimVouchers] [d] on [d].[Voucher_id] = [#all].[Voucher_id] and YEAR([d].[DateStatusStart])*100+MONTH([d].[DateStatusStart]) = [#all].[YM]
    group by [#all].[Voucher_id], [#all].[YM]
)
select [map].[YM], [fact].[VoucherStatusKey], SUM([fact].[Voucher_amt]) [Sum]
from [map] join [#HDimVouchers] [fact] on [fact].[Voucher_id] = [map].[Voucher_id] and [fact].[DateStatusStart] = [map].[MappingDate]
group by [map].[YM], [fact].[VoucherStatusKey]
order by [YM], [VoucherStatusKey];
Andreas P.
  • 115
  • 2
  • 9
  • Thanks @Andreas-P I have checked this with the example data, and it seems to be doing the summation correctly, but when I take to Data warehouse with many rows I stopped the query after 10mins... :-( – Paul Mar 10 '21 at 14:49
  • Thanks for the update @Andreas it is a lot quicker appreciate your help and effort. – Paul Mar 11 '21 at 08:09