I have my SQL Query is like this
INSERT INTO staging.lps_data
(
col1
,col2
,col3
,col4
,col5
)
SELECT DISTINCT
col1
,col2
,col3
,col4
,col5
FROM tbl1 r WITH ( NOLOCK )
INNER JOIN tbl2 p WITH ( NOLOCK ) ON p.col1= r.col1
INNER JOIN tbl3 l WITH ( NOLOCK ) ON l.col2 = r.col2
r.col1 NOT IN ( 'Foreclosure Deed',
'Foreclosure Deed - Judicial',
'Foreclosure RESPA',
'Foreclosure Vendor Assignment Review',
'Foreclosure Stop',
'Foreclosure Screenprints Other',
'Foreclosure Sale Audit',
'Foreclosure Property Preservation',
'Foreclosure Acquisition',
'Foreclosure Notices Attorney Certification' )
AND ( r.col1 LIKE 'foreclosure%'
OR r.col1 = 'Vesting CT'
);
My tbl1 contains 100 million records , tbl2 contains 100 million records and tbl3 contains 1000million records. I gone thru the estimated execution plan The more load shows in Distinct. Note : I applied proper indexing on the tables.
I just try to solve this using batch process some thing like below
INSERT INTO TEMP1
SELECT SK_ID from tbl1 r where ( r.processname LIKE 'foreclosure%' OR r.processname = 'Vesting CT')
EXCEPT
SELECT SK_ID from tbl1 r where r.processname NOT IN ( 'Foreclosure Deed','Foreclosure Deed - Judicial',
'Foreclosure RESPA',
'Foreclosure Vendor Assignment Review',
'Foreclosure Stop',
'Foreclosure Screenprints Other',
'Foreclosure Sale Audit',
'Foreclosure Property Preservation',
'Foreclosure Acquisition',
'Foreclosure Notices Attorney Certification' )
-- Load data into staging table in batch mode
DECLARE @STARTID BIGINT=1, @LASTID BIGINT, @ENDID BIGINT;
DECLARE @SPLITCONFIG BIGINT =1000 -- Process 1000 records as batch
SELECT @LASTID = MAX(ID) FROM TEMP1
WHILE @STARTID < @LASTID
BEGIN
IF(@STARTID + @SPLITCONFIG > @LASTID)
SET @ENDID = @LASTID
ELSE
SET @ENDID = @STARTID + @SPLITCONFIG
INSERT INTO staging.lps_data
( col1
,col2
,col3
,col4
,col5)
SELECT DISTINCT
col1
,col2
,col3
,col4
,col5
FROM tbl1 r WITH (NOLOCK)
INNER JOIN TEMP1 SK WITH(NOLOCK) ON (r.SK_ID=SK.SK_ID AND SK.ID >=@STARTID AND SK.ID < @ENDID)
INNER JOIN tbl2 p WITH (NOLOCK) ON p.refinfoidentifier = r.refinfoidentifier
INNER JOIN tbl3 l WITH (NOLOCK) ON l.loaninfoidentifier = r.loaninfoidentifier
SET @STARTID = @ENDID
END
With first approach my server got crashed with out of memeory, With second approach I could able to process complete records in 4 Hours.
Please suggest me if any thing else i can do to complete this process less than hour