3

I'm working on a report and started off by creating this summary SQL statement:

SELECT  o.description, 
        sum(t.total_minutes) total_minutes, 
        sum(n.total_new_minutes) total_new_minutes
FROM    job i
            INNER JOIN
        operation o ON i.operation_id = o.operation_id
            CROSS APPLY
        (
            SELECT  SUM(minutes) total_minutes 
            FROM    job_time t 
            WHERE   i.job_id = t.job_id 
            AND     record_date between '1 JAN 2016' and '29 FEB 2016'
        ) t
            CROSS APPLY
        (
            SELECT  SUM(minutes) total_new_minutes 
            FROM    job_time t 
            WHERE   i.job_id = t.job_id 
            AND     record_date between '1 JAN 2016' and '29 FEB 2016'
            AND     NOT EXISTS (SELECT 1 FROM job_time v WHERE v.record_date < DATEADD(month, DATEDIFF(month, 0, t.record_date), 0) AND v.job_id = t.job_id)
        ) n
GROUP BY o.description

Table structure:

Table: job
job_id       | operation_id
-------------|-------------
1            | 1
2            | 1
3            | 2
4            | 2

Table: operation
operation_id | description
-------------|-------------
1            | 'OP1'
2            | 'OP2'

Table: job_time
job_id       | record_date  | minutes
-------------|--------------|---------
1            | '1 JAN 2016' | 20
1            | '2 JAN 2016' | 20
2            | '1 JAN 2016' | 20
2            | '1 FEB 2016' | 20
3            | '1 FEB 2016' | 20
3            | '2 FEB 2016' | 20
4            | '2 JAN 2016' | 20
4            | '2 FEB 2016' | 20

Result of my summary query:

description  | total_minutes | total_new_minutes
-------------|---------------|-----------------
'OP1'        | 80            | 60
'OP2'        | 80            | 60

SQL fiddle: http://sqlfiddle.com/#!6/f28f7e/1/0

The idea is to have the total hours spent on jobs in a given data range, while also getting the total hours spent on new jobs every month.

Example job 2: 20 minutes in January are considered to be on a new job, however in February, those 20 minutes spent are on an old job.

My issue is that I want to split this result per month on the given range as shown below:

description  | month     |total_minutes | total_new_minutes
-------------|-----------|--------------|-----------------
'OP1'        | '01/2016' | 60           | 60
'OP1'        | '02/2016' | 20           | 0
'OP2'        | '01/2016' | 20           | 20
'OP2'        | '02/2016' | 60           | 40

The query change to reflect this result:

SELECT  o.description, 
        sum(t.total_minutes) total_minutes, 
        sum(n.total_new_minutes) total_new_minutes,
        t.record_month      
FROM    job i
            INNER JOIN
        operation o ON i.operation_id = o.operation_id
            CROSS APPLY
        (
            SELECT  SUM(minutes) total_minutes, right(convert(varchar(10),t.record_date,103),7) record_month
            FROM    job_time t 
            WHERE   i.job_id = t.job_id 
            AND     record_date between '1 JAN 2016' and '29 FEB 2016'
            GROUP BY right(convert(varchar(10),t.record_date,103),7)
        ) t
            CROSS APPLY
        (
            SELECT  SUM(minutes) total_new_minutes, right(convert(varchar(10),t.record_date,103),7) record_month
            FROM    job_time t 
            WHERE   i.job_id = t.job_id 
            AND     record_date between '1 JAN 2016' and '29 FEB 2016'
            AND     NOT EXISTS (SELECT 1 FROM job_time v WHERE v.record_date < DATEADD(month, DATEDIFF(month, 0, t.record_date), 0) AND v.job_id = t.job_id)
           GROUP BY right(convert(varchar(10),t.record_date,103),7)
        ) n     
GROUP BY o.description, t.record_month

SQL Fiddle: http://sqlfiddle.com/#!6/f28f7e/3/0

The problem is that I'm not understanding how the join is being made between t and n, and in my dev db with actual data, one operation is reporting more minutes in total_new_minutes than total_minutes which is can never happen, doesn't matter how bad the data is.

Any idea what I'm doing wrong here or if I should change the query completely?

Keith Mifsud
  • 725
  • 5
  • 16
  • There are more than one subquery named t in your example. If you asked about the big t and n, I don't think there is any join directly between them. They are indirectly related based on the base record they connected to from the temporary data set formed by the join between the tables job and operation. – DVT Oct 28 '16 at 17:14

1 Answers1

1

I don't know why you want to use CROSS APPLY here. My strategy would be to calculate the total_minutes and total_new_minutes in separate subqueries, then join them together. The final query is this:

WITH new_job AS (
    SELECT
        jt1.JOB_ID
        , EOMONTH(jt1.RECORD_DATE) AS mon
        , SUM(jt1."MINUTES") AS "MINUTES"
    FROM
        #JOB_TIME jt1
    WHERE
        NOT EXISTS (
            SELECT 1
            FROM
                #JOB_TIME jt2
            WHERE
                jt2.JOB_ID = jt1.JOB_ID
                AND EOMONTH(jt1.RECORD_DATE, -1) = EOMONTH(jt2.RECORD_DATE)
        )
    GROUP BY
        jt1.JOB_ID
        , EOMONTH(jt1.RECORD_DATE)
), new_total AS (
    SELECT
        j.OPERATION_ID
        , nj.mon
        , SUM(nj."MINUTES") AS total_new_minutes
    FROM
        new_job nj
        JOIN #JOB j ON j.JOB_ID = nj.JOB_ID
    GROUP BY
        j.OPERATION_ID
        , nj.mon
), total AS (
    SELECT
        j.OPERATION_ID
        , EOMONTH(jt.RECORD_DATE) AS mon
        , SUM(jt."MINUTES") AS total_minutes
    FROM
        #JOB_TIME jt
        JOIN #JOB j ON jt.JOB_ID = j.JOB_ID
    GROUP BY
        j.OPERATION_ID
        , EOMONTH(jt.RECORD_DATE)
)
SELECT
    o."DESCRIPTION"
    , COALESCE(t.mon, nt.mon)
    , COALESCE(t.total_minutes,0) AS total_minutes
    , COALESCE(nt.total_new_minutes,0) AS total_new_minutes
FROM
    #OPERATION o
    LEFT JOIN total t ON o.OPERATION_ID = t.OPERATION_ID
    LEFT JOIN new_total nt ON o.OPERATION_ID = nt.OPERATION_ID AND nt.mon=t.mon;

All the query needed to test it would be this:

CREATE TABLE #JOB (
    JOB_ID INT
    , OPERATION_ID INT
);

CREATE TABLE #OPERATION (
    OPERATION_ID INT
    , "DESCRIPTION" NCHAR(5)
);

CREATE TABLE #JOB_TIME (
    JOB_ID INT
    , RECORD_DATE DATE
    , "MINUTES" INT
)

INSERT INTO #JOB (JOB_ID, OPERATION_ID)
VALUES
(1,1)
,(2,1)
,(3,2)
,(4,2);

INSERT INTO #OPERATION (OPERATION_ID, "DESCRIPTION")
VALUES
(1, 'OP1')
, (2, 'OP2');

INSERT INTO #JOB_TIME (JOB_ID, RECORD_DATE, "MINUTES")
VALUES
(1, '20160101', 20)
, (1, '20160102', 20)
, (2, '20160101', 20)
, (2, '20160201', 20)
, (3, '20160201', 20)
, (3, '20160202', 20)
, (4, '20160102', 20)
, (4, '20160202', 20);

WITH new_job AS (
    SELECT
        jt1.JOB_ID
        , EOMONTH(jt1.RECORD_DATE) AS mon
        , SUM(jt1."MINUTES") AS "MINUTES"
    FROM
        #JOB_TIME jt1
    WHERE
        NOT EXISTS (
            SELECT 1
            FROM
                #JOB_TIME jt2
            WHERE
                jt2.JOB_ID = jt1.JOB_ID
                AND EOMONTH(jt1.RECORD_DATE, -1) = EOMONTH(jt2.RECORD_DATE)
        )
    GROUP BY
        jt1.JOB_ID
        , EOMONTH(jt1.RECORD_DATE)
), new_total AS (
    SELECT
        j.OPERATION_ID
        , nj.mon
        , SUM(nj."MINUTES") AS total_new_minutes
    FROM
        new_job nj
        JOIN #JOB j ON j.JOB_ID = nj.JOB_ID
    GROUP BY
        j.OPERATION_ID
        , nj.mon
), total AS (
    SELECT
        j.OPERATION_ID
        , EOMONTH(jt.RECORD_DATE) AS mon
        , SUM(jt."MINUTES") AS total_minutes
    FROM
        #JOB_TIME jt
        JOIN #JOB j ON jt.JOB_ID = j.JOB_ID
    GROUP BY
        j.OPERATION_ID
        , EOMONTH(jt.RECORD_DATE)
)
SELECT
    o."DESCRIPTION"
    , COALESCE(t.mon, nt.mon)
    , COALESCE(t.total_minutes,0) AS total_minutes
    , COALESCE(nt.total_new_minutes,0) AS total_new_minutes
FROM
    #OPERATION o
    LEFT JOIN total t ON o.OPERATION_ID = t.OPERATION_ID
    LEFT JOIN new_total nt ON o.OPERATION_ID = nt.OPERATION_ID AND nt.mon=t.mon;


DROP TABLE #JOB;
DROP TABLE #JOB_TIME;
DROP TABLE #OPERATION;
DVT
  • 3,014
  • 1
  • 13
  • 19