I am trying to implement type 2 SCD for my work in T-SQL. I'm following the wonderful approach given in this link: https://www.mssqltips.com/sqlservertip/2883/using-the-sql-server-merge-statement-to-process-type-2-slowly-changing-dimensions/
However, in my case there is no BinaryCheckSum column inbuilt in the tables. I'm required to calculate the same on the fly. I'm posting my code below (with some sample data created) for this implementation.
Could someone let me know if this approach works fine? Or do I need to have DimensionCheckSum in the column itself? Also, in my case I don't need a surrogate ID. (Please pardon for any syntax errors)
/*
--CREATING A SAMPLE SCD TABLE WITH NO DATA
CREATE TABLE SCD_ACCOUNT
(
SCD_ACCOUNT_ID INT IDENTITY(1,1) NOT NULL,
SCD_SRC_ID INT NULL,
SCD_SF_SRC_ID INT NULL,
SCD_SF_ATTR1 VARCHAR(100) NULL,
SCD_SF_ATTR2 VARCHAR(100) NULL,
SCD_SF_ATTR3 VARCHAR(100) NULL,
SCD_SF_ATTR4 VARCHAR(100) NULL,
SCD_SF_ATTR5 VARCHAR(100) NULL,
SCD_EFF_DATE DATETIME NOT NULL CONSTRAINT DF_SCD_EFF_DATE DEFAULT GETDATE(),
SCD_END_DATE DATETIME NOT NULL CONSTRAINT DF_SCD_END_DATE DEFAULT '12/31/9999',
SCD_IS_ACTIVE VARCHAR(10) NULL CONSTRAINT DF_SCD_IS_ACTIVE DEFAULT 'Y',
SCD_LAST_UPD_DATE DATETIME NOT NULL CONSTRAINT DF_SCD_LAST_UPD_DATE DEFAULT GETDATE(),
SCD_LAST_UPD_BY VARCHAR(100) NOT NULL CONSTRAINT DF_SCD_LAST_UPD_BY DEFAULT SUSER_SNAME());
SELECT * FROM SCD_ACCOUNT;
--CREATING A SOURCE TABLE WHICH WILL INSERT/UPDATE DATA INTO SCD TABLE
--THIS WILL HAVE THE SAME COLUMNS AS ABOVE TABLE EXCEPT SCD_ACCOUNT_ID, SCD_SRC_ID, SCD_IS_ACTIVE, SCD_EFF_DATE, SCD_END_DATE
CREATE TABLE SF_SRC
(
SF_SRC_ID INT NULL,
SF_ATTR1 VARCHAR(100) NULL,
SF_ATTR2 VARCHAR(100) NULL,
SF_ATTR3 VARCHAR(100) NULL,
SF_ATTR4 VARCHAR(100) NULL,
SF_ATTR5 VARCHAR(100) NULL);
--INSERTING DATA INTO SF_SRC TABLE
INSERT INTO SF_SRC VALUES(100,'Row1Attr1','Row1Attr2','Row1Attr3','Row1Attr4','Row1Attr5');
INSERT INTO SF_SRC VALUES(200,'Row2Attr1','Row2Attr2','Row2Attr3','Row2Attr4','Row2Attr5');
INSERT INTO SF_SRC VALUES(300,'Row3Attr1','Row3Attr2','Row3Attr3','Row3Attr4','Row3Attr5');
SELECT * FROM SF_SRC;
--DROP TABLE SCD_ACCOUNT;
--DROP TABLE SF_SRC;
*/
-- BEGIN OF INSERT USING MERGE
INSERT INTO dbo.SCD_ACCOUNT
( --TABLE AND COLUMNS IN WHICH TO INSERT THE DATA
SCD_SF_SRC_ID,
SCD_SF_ATTR1,
SCD_SF_ATTR2,
SCD_SF_ATTR3,
SCD_SF_ATTR4,
SCD_SF_ATTR5,
SCD_EFF_DATE,
SCD_END_DATE,
BINARY_CHECKSUM(SCD_SF_SRC_ID, SCD_SF_ATTR1, SCD_SF_ATTR2, SCD_SF_ATTR3, SCD_SF_ATTR4, SCD_SF_ATTR5) AS SCD_DIMENSION_CHECKSUM
)
-- SELECT THE ROWS/COLUMNS TO INSERT THAT ARE OUTPUT FROM THIS MERGE STATEMENT
-- IN THIS EXAMPLE, THE ROWS TO BE INSERTED ARE THE ROWS THAT HAVE CHANGED (UPDATE).
SELECT
SCD_SF_SRC_ID,
SCD_SF_ATTR1,
SCD_SF_ATTR2,
SCD_SF_ATTR3,
SCD_SF_ATTR4,
SCD_SF_ATTR5,
SCD_EFF_DATE,
SCD_END_DATE,
BINARY_CHECKSUM(SCD_SF_SRC_ID, SCD_SF_ATTR1, SCD_SF_ATTR2, SCD_SF_ATTR3, SCD_SF_ATTR4, SCD_SF_ATTR5) AS SCD_DIMENSION_CHECKSUM
FROM
(
-- THIS IS THE BEGINNING OF THE MERGE STATEMENT.
-- THE TARGET MUST BE DEFINED, IN THIS EXAMPLE IT IS OUR SLOWLY CHANGING
-- DIMENSION TABLE
MERGE INTO dbo.SCD_ACCOUNT AS target
-- THE SOURCE MUST BE DEFINED WITH THE USING CLAUSE
USING
(
-- THE SOURCE IS MADE UP OF THE ATTRIBUTE COLUMNS FROM THE STAGING TABLE.
SELECT
SF_SRC_ID,
SF_ATTR1,
SF_ATTR2,
SF_ATTR3,
SF_ATTR4
SF_ATTR5,
BINARY_CHECKSUM(SF_SRC_ID, SF_ATTR1, SF_ATTR2, SF_ATTR3, SF_ATTR4, SF_ATTR5) AS SRC_DIMENSION_CHECKSUM
FROM dbo.SF_SRC
) AS SOURCE
(
SF_SRC_ID,
SF_ATTR1,
SF_ATTR2,
SF_ATTR3,
SF_ATTR4,
SF_ATTR5,
BINARY_CHECKSUM(SF_SRC_ID, SF_ATTR1, SF_ATTR2, SF_ATTR3, SF_ATTR4, SF_ATTR5) AS SRC_DIMENSION_CHECKSUM
) ON --WE ARE MATCHING ON THE SOURCESYSTEMID IN THE TARGET TABLE AND THE SOURCE TABLE.
(
TARGET.SCD_SF_SRC_ID = SOURCE.SF_SRC_ID
)
-- IF THE ID'S MATCH BUT THE CHECKSUMS ARE DIFFERENT, THEN THE RECORD HAS CHANGED;
-- THEREFORE, UPDATE THE EXISTING RECORD IN THE TARGET, END DATING THE RECORD
-- AND SET THE CURRENTRECORD FLAG TO N
WHEN MATCHED AND TARGET.BINARY_CHECKSUM(SCD_SF_SRC_ID, SCD_SF_ATTR1, SCD_SF_ATTR2, SCD_SF_ATTR3, SCD_SF_ATTR4, SF_ATTR5)
<> SOURCE.BINARY_CHECKSUM(SF_SRC_ID, SF_ATTR1, SF_ATTR2, SF_ATTR3, SF_ATTR4, SF_ATTR5)
AND TARGET.SCD_IS_ACTIVE = 'Y'
THEN
UPDATE SET
SCD_END_DATE = GETDATE()-1,
SCD_IS_ACTIVE = 'N',
SCD_LAST_UPD_DATE = GETDATE(),
SCD_LAST_UPD_BY = SUSER_SNAME()
-- IF THE ID'S DO NOT MATCH, THEN THE RECORD IS NEW;
-- THEREFORE, INSERT THE NEW RECORD INTO THE TARGET USING THE VALUES FROM THE SOURCE.
WHEN NOT MATCHED THEN
INSERT
(
SCD_SF_SRC_ID,
SCD_SF_ATTR1,
SCD_SF_ATTR2,
SCD_SF_ATTR3,
SCD_SF_ATTR4,
SCD_SF_ATTR5
)
VALUES
(
SOURCE.SF_SRC_ID,
SOURCE.SF_ATTR1,
SOURCE.SF_ATTR2,
SOURCE.SF_ATTR3,
SOURCE.SF_ATTR4,
SOURCE.SF_ATTR5
)
OUTPUT $ACTION,
SOURCE.SF_SRC_ID,
SOURCE.SF_ATTR1,
SOURCE.SF_ATTR2,
SOURCE.SF_ATTR3,
SOURCE.SF_ATTR4,
SOURCE.SF_ATTR5,
GETDATE(),
'12/31/9999'
) -- THE END OF THE MERGE STATEMENT
--THE CHANGES OUTPUT BELOW ARE THE RECORDS THAT HAVE CHANGED AND WILL NEED
--TO BE INSERTED INTO THE SLOWLY CHANGING DIMENSION.
AS CHANGES
(
ACTION,
SCD_SF_SRC_ID,
SCD_SF_ATTR1,
SCD_SF_ATTR2,
SCD_SF_ATTR3,
SCD_SF_ATTR4,
SCD_SF_ATTR5,
SCD_EFF_DATE,
SCD_END_DATE
)
WHERE ACTION='UPDATE';