1

I have to fetch duplicate record if date difference between duplicate record is more than 96 hours or 4 days otherwise ignore the duplicate entry and return record with first entry or oldest date. My table look like this :

ID           SDATE
----------- -----------------------
1           2016-04-13 14:54:18.983
1           2016-04-08 12:55:47.907
2           2016-04-13 14:54:18.983
3           2016-04-13 14:54:18.983
4           2016-04-13 14:54:18.983
5           2016-04-13 14:54:18.983
5           2016-04-11 12:55:47.907
6           2016-04-13 14:54:18.983
6           2016-04-13 14:54:18.983

Expected result:

ID           SDATE
----------- -----------------------
1           2016-04-13 14:54:18.983
1           2016-04-08 12:55:47.907
2           2016-04-13 14:54:18.983
3           2016-04-13 14:54:18.983
4           2016-04-13 14:54:18.983    
5           2016-04-11 12:55:47.907
6           2016-04-13 14:54:18.983  

i tried following query but it is not working.

WITH tt AS (
SELECT 1 as ID, GETDATE() as SDATE
UNION ALL
SELECT 1 as ID, '2016-04-09 12:55:47.907' as SDATE
UNION ALL
SELECT 2 as ID, GETDATE() as SDATE
UNION ALL
SELECT 3 as ID, GETDATE() as SDATE
UNION ALL
SELECT 4 as ID, GETDATE() as SDATE
UNION ALL
SELECT 5 as ID, GETDATE() as SDATE
UNION ALL
SELECT 5 as ID, '2016-04-11 12:55:47.907' as SDATE
UNION ALL
SELECT 6 as ID, GETDATE() as SDATE
UNION ALL
SELECT 6 as ID, GETDATE() as SDATE
)
SELECT MIN(SDATE) as SDATE, ID FROM tt as tbl
GROUP BY ID,  DATEADD(HH, DATEDIFF(HH,0,SDATE) + 96,0) 
ravi
  • 1,300
  • 2
  • 11
  • 17
  • 1
    What should happen if one `id` has the following three records? `{'2016-04-01 06:00:00.000', '2016-04-03 18:00:00.000', '2016-04-06 06:00:00.000'}`? Each record is less than 4 days apart, but the first and last record are 5 days apart. – MatBailie Apr 13 '16 at 09:51
  • What happens if there are 3 duplicates ID, first date - `1/4/2016` , second date - `3/4/2016` and third date `6/4/2016` , by your definition, second is a duplicate of first and third is a duplicate of second. Which should be deleted? – sagi Apr 13 '16 at 09:52
  • 2
    @MatBailie Hah, same question, same dates. – sagi Apr 13 '16 at 09:52
  • http://stackoverflow.com/questions/13196190/mysql-subtracting-value-from-previous-row-group-by Start from here – Deep Kalra Apr 13 '16 at 09:54
  • Hi @MatBailie , if there are more than 2 records than we have look at first and last date. – ravi Apr 13 '16 at 09:59

3 Answers3

1

The below query returns the expected result, added the inline comments:

-- Simply grouping each ID and get unique row with minimum date
SELECT MIN(SDATE) [SDate], ID
FROM tt
GROUP BY ID

UNION 

-- Get the row with each ID's difference is more than 96 hours
SELECT D.MaxDate  [SDate], D.ID
FROM (
    SELECT MIN(SDATE) [MinDate], MAX(SDATE) [MaxDate], ID
    FROM tt
    GROUP BY ID
) D
WHERE DATEDIFF(HH, D.MinDate, D.MaxDate) >= 96
Arulkumar
  • 12,966
  • 14
  • 47
  • 68
  • I'm not convinced. What about if you have the dates `1st, 4th, 7th, 10th, 13th`. This answer will return `1st, 7th, 10th, 13th` even though the last three results all have a gap of only 3 days each. That's not to say this is wrong, just that the OP is far from clear on the requirements for such a case. – MatBailie Apr 13 '16 at 14:10
0

Please try below query, I have tested it is working fine.

in ignore columns you can change period (HH or DAY)

-- drop table #temptbl

WITH tt AS (
SELECT 1 as ID, GETDATE() as SDATE
UNION ALL
SELECT 1 as ID, '2016-04-09 12:55:47.907' as SDATE
UNION ALL
SELECT 2 as ID, GETDATE() as SDATE
UNION ALL
SELECT 3 as ID, GETDATE() as SDATE
UNION ALL
SELECT 4 as ID, GETDATE() as SDATE
UNION ALL
SELECT 5 as ID, GETDATE() as SDATE
UNION ALL
SELECT 5 as ID, '2016-04-11 12:55:47.907' as SDATE
UNION ALL
SELECT 6 as ID, GETDATE() as SDATE
UNION ALL
SELECT 6 as ID, GETDATE() as SDATE
)
SELECT Id,SDATE,case when DATEDIFF(HH,SDATE,GETDATE()) >94 THEN 0 else 1 end AS ignore,
  ROW_NUMBER() OVER ( PARTITION BY tt.ID ORDER BY tt.SDATE desc ) as Rowid
INTO #temptbl
FROM tt 
SELECT Id, sdate   from #temptbl
WHERE (#temptbl.ignore = 0) or (#temptbl.Rowid = 1)

0
declare @table table
(
ID int,           SDATE datetime)
insert into @table
(
ID      ,SDATE )
values 
(1,'2016-04-13 14:54:18'),
(1,'2016-04-08 12:55:47'),
(2,'2016-04-13 14:54:18'),
(3,'2016-04-13 14:54:18'),
(4,'2016-04-13 14:54:18'),
(5,'2016-04-13 14:54:18'),
(5,'2016-04-11 12:55:47'),
(6,'2016-04-13 14:54:18'),
(6,'2016-04-13 14:54:18')


;with cte as
(
select  id,min(sdate) mindate,max(sdate) maxdate, datediff(dd,min(sdate),max(sdate)) daysdiff,count(*) as Dups
 from @table
 group by   id 
 )
select  cte.id, t.sdate
from cte
join    @table t on t.id = cte.id 
where   cte.dups > 1 and cte.daysdiff > 4
union all
select  cte.id, 
        mindate
from cte
where   (cte.dups > 1 and cte.daysdiff <= 4) or
        cte.dups = 1
P.Salmon
  • 17,104
  • 2
  • 12
  • 19