-2

Can anyone recommend how I can speed up this code and primarily the cursor? The code is an SQL Server db query that creates a trigger on INSERT, UPDATE, or DELETE. It writes a record to a changlog table identifying the type of change (I, U, or D) and then saves the old value and new value of each affected column for each row in a details table.

I want this to be generic so I can easily reuse it for any table I throw at it that has a unique column I can filter on. Writing the whole row of changes to a cloned structure audit table is not an option unfortunately.

Any help is greatly appreciated, I am not the greatest at query optimization and welcome any feedback or rewrites.. Thanks!

ALTER TRIGGER [dbo].[tbl_Address_ChangeTracking] ON [dbo].[tbl_Address]
AFTER INSERT, DELETE, UPDATE
AS
BEGIN
       -- SET NOCOUNT ON added to prevent extra result sets from
       -- interfering with SELECT statements.
       SET NOCOUNT ON
       --SET XACT_ABORT ON
       -- Get the table name of the current process
       DECLARE @TableName VARCHAR(25)
       SET @TableName = COALESCE(
       (
              SELECT SCHEMA_NAME(schema_id) + '.' + OBJECT_NAME(parent_object_id)
              FROM sys.objects
              WHERE sys.objects.name = OBJECT_NAME(@@PROCID) AND
                       SCHEMA_NAME(sys.objects.schema_id) = OBJECT_SCHEMA_NAME(@@PROCID)
       ), 'Unknown')

       --Declare our cursor to navigate the records in inserted and deleted
       DECLARE @cursorSQL AS NVARCHAR(MAX) = ''
       DECLARE @PrimaryID AS VARCHAR(MAX) = ''
       DROP TABLE IF EXISTS #inserted1TableTemp
       DROP TABLE IF EXISTS #inserted2TableTemp
       DROP TABLE IF EXISTS #deletedTableTemp
       DECLARE @ourLogCursor CURSOR

       --If we have a record in inserted and deleted this is an update record and we should pull from the inserted table and assume
       --this is one update or many update statements
       IF EXISTS
       (
              SELECT 1
              FROM inserted
       ) AND
          EXISTS
       (
              SELECT 1
              FROM deleted
       )
       BEGIN
              SELECT *
              INTO #inserted1TableTemp
              FROM inserted

              SET @cursorSQL = 'SET @ourLogCursor = CURSOR FOR SELECT AddressID FROM #inserted1TableTemp; OPEN @ourLogCursor;'
       END

       --If we have an inserted record and no deleted record this is an insert and we pull from the inserted table
       IF EXISTS
       (
              SELECT 1
              FROM inserted
       ) AND
          NOT EXISTS
       (
              SELECT 1
              FROM deleted
       )
       BEGIN
              DROP TABLE IF EXISTS #inserted2TableTemp
              DROP TABLE IF EXISTS #inserted1TableTemp
              DROP TABLE IF EXISTS #deletedTableTemp

              SELECT *
              INTO #inserted2TableTemp
              FROM inserted

              SET @cursorSQL = 'SET @ourLogCursor = CURSOR FOR SELECT AddressID FROM #inserted2TableTemp; OPEN @ourLogCursor;'
       END

       --If we have a deleted record and no insert record this is a deletion and we pull from the deleted table
       IF NOT EXISTS
       (
              SELECT 1
              FROM inserted
       ) AND
          EXISTS
       (
              SELECT 1
              FROM deleted
       )
       BEGIN
              DROP TABLE IF EXISTS #inserted1TableTemp
              DROP TABLE IF EXISTS #inserted2TableTemp
              DROP TABLE IF EXISTS #deletedTableTemp

              SELECT *
              INTO #deletedTableTemp
              FROM deleted

              SET @cursorSQL = 'SET @ourLogCursor = CURSOR FOR SELECT AddressID FROM #deletedTableTemp; OPEN @ourLogCursor;'
       END

       --If we have a deleted record and no insert record this is a deletion and we pull from the deleted table
       IF NOT EXISTS
       (
              SELECT 1
              FROM inserted
       ) AND
          NOT EXISTS
       (
              SELECT 1
              FROM deleted
       )
       BEGIN
              RETURN;
       END

       --Execute our dynamic SQL that sets the correct FOR SELECT statment for the cursor. Pass @ourCursorLog as an input param, and then grab the output
       --so the results are available outside the scope of the executesql call
       EXEC sys.sp_executesql @cursorSQL, N'@ourLogCursor CURSOR OUTPUT', @ourLogCursor OUTPUT;

       FETCH NEXT FROM @ourLogCursor INTO @PrimaryID
       DECLARE @xmlOld XML
       DECLARE @xmlNew XML
       DECLARE @SummaryID INT
       SET @TableName = COALESCE(
       (
              SELECT SCHEMA_NAME(schema_id) + '.' + OBJECT_NAME(parent_object_id)
              FROM sys.objects
              WHERE sys.objects.name = OBJECT_NAME(@@PROCID) AND
                       SCHEMA_NAME(sys.objects.schema_id) = OBJECT_SCHEMA_NAME(@@PROCID)
       ), 'Unknown')
       --Navigate all our rows
       WHILE @@FETCH_STATUS = 0
       BEGIN
              DROP TABLE IF EXISTS #tmp_AddressesChangelogTrigger
              DROP TABLE IF EXISTS #tmp_AddressesChangelogTriggerXML1
              DROP TABLE IF EXISTS #tmp_AddressesChangelogTriggerXML2
              DROP TABLE IF EXISTS #tmp_AddressesChangelogTriggerXMLsWithDifferences

              --Get the deleted and inserted records as xml for comparison against each other
              SET @xmlNew =
              (
                     SELECT *
                     FROM deleted AS [TABLE]
                     WHERE AddressID = @PrimaryID
                     ORDER BY AddressID FOR XML AUTO, ELEMENTS
              )
              SET @xmlOld =
              (
                     SELECT *
                     FROM inserted AS [TABLE]
                     WHERE AddressID = @PrimaryID
                     ORDER BY AddressID FOR XML AUTO, ELEMENTS
              )
              
              CREATE TABLE #tmp_AddressesChangelogTriggerXML1
              (
                                  NodeName VARCHAR(MAX), Value VARCHAR(MAX)
              )
              CREATE TABLE #tmp_AddressesChangelogTriggerXML2
              (
                                  NodeName VARCHAR(MAX), Value VARCHAR(MAX)
              )

              --Extract the values and column names
              INSERT INTO #tmp_AddressesChangelogTriggerXML2( NodeName, Value )
                        --Throw the XML into temp tables with the column name and value
                        SELECT N.value( 'local-name(.)', 'nvarchar(MAX)' ) AS NodeName, N.value( 'text()[1]', 'nvarchar(MAX)' ) AS VALUE
                        FROM @xmlNew.nodes( '/TABLE/*' ) AS T(N)

              INSERT INTO #tmp_AddressesChangelogTriggerXML1( NodeName, Value )
                        SELECT N.value( 'local-name(.)', 'nvarchar(MAX)' ) AS NodeName, N.value( 'text()[1]', 'nvarchar(MAX)' ) AS VALUE
                        FROM @xmlOld.nodes( '/TABLE/*' ) AS T(N)

              --Get the differences into a temp table
              SELECT *
              INTO #tmp_AddressesChangelogTriggerXMLsWithDifferences
              FROM
              (
                     SELECT COALESCE(A.NodeName, B.NodeName) AS NodeName, B.Value AS OldValue, A.Value AS NewValue
                     FROM #tmp_AddressesChangelogTriggerXML1 AS A
                           FULL OUTER JOIN #tmp_AddressesChangelogTriggerXML2 AS B ON A.NodeName = B.NodeName
                     WHERE A.Value <> B.Value
              ) AS tmp

              --If anything changed thhen start our write statments
              IF
              (
                     SELECT COUNT(*)
                     FROM #tmp_AddressesChangelogTriggerXMLsWithDifferences
              ) > 0
              BEGIN
                     BEGIN TRY
                           -- Now create the Summary record
                           --BEGIN TRANSACTION WRITECHANGELOGRECORDS
                           INSERT INTO TableChangeLogSummary( ID, ModifiedDate, ChangeType, TableName )
                                     --Get either insert, or if no insert value, get the delete value
                                     --Set the update type, I, D, U
                                     --Compare values with a full outer join
                                     --Filter on the ID we are on in the CURSOR
                                    SELECT COALESCE(I.AddressID, D.AddressID), GETDATE(),
                                       CASE
                                       WHEN D.AddressID IS NULL THEN 'I'
                                       WHEN I.AddressID IS NULL THEN 'D'
                                       ELSE 'U'
                                       END, @TableName
                                     FROM inserted AS I
                                                FULL OUTER JOIN deleted AS D ON I.AddressID = D.AddressID
                                     WHERE( I.AddressID = @PrimaryID OR
                                                  I.AcesAddressID IS NULL
                                                ) AND
                                                ( D.AddressID = @PrimaryID OR
                                                  D.AcesAddressID IS NULL
                                                )

                           --Get the last summary id that was inserted so we can use it in the detail record
                           SET @SummaryID = (SELECT IDENT_CURRENT('TableChangeLogSummary'))

                           --Insert our 
                           INSERT INTO TableChangeLogDetail( SummaryID, ColumnName, OldValue, NewValue )
                                     SELECT @SummaryID, T.NodeName, T.OldValue, T.NewValue
                                     FROM #tmp_AddressesChangelogTriggerXMLsWithDifferences AS T
                           --COMMIT TRANSACTION WRITECHANGELOGRECORDS
                           --PRINT 'RECORD WRITTEN'
                     END TRY
                     BEGIN CATCH
                           DECLARE @errorXML XML
                           SET @errorXML = (SELECT ERROR_NUMBER() AS ErrorNumber, ERROR_STATE() AS ErrorState, ERROR_SEVERITY() AS ErrorSeverity, ERROR_PROCEDURE() AS ErrorProcedure, ERROR_LINE() AS ErrorLine, ERROR_MESSAGE() AS ErrorMessage FOR XML RAW)
                           DECLARE @errorXMLText NVARCHAR(MAX) = ''
                           SET @errorXMLText = (SELECT CAST(@errorXML AS NVARCHAR(MAX)))
                           RAISERROR(@errorXMLText, 16, 1) WITH NOWAIT
                     END CATCH
              END
              --Go to the next record and process
              FETCH NEXT FROM @ourLogCursor INTO @PrimaryID
       END
       CLOSE @ourLogCursor
       DEALLOCATE @ourLogCursor
END
General Grievance
  • 4,555
  • 31
  • 31
  • 45
  • 4
    By not using a `CURSOR` is a start. SQL is a set based language, and so it performs great at set based solutions; a `CURSOR`/`WHILE` is the *complete opposite* of that methodology. – Thom A Jan 09 '22 at 18:17
  • 4
    You absolutely don't need any looping constructs here, I've used change tracking and CDC and never once thought I needed a cursor. You first need to identify which parts are the most costly by divide and conquer approach, then post a [Minimal, Reproducible Example](https://stackoverflow.com/help/minimal-reproducible-example) - emphasis on minimal. Include the object definitions and the actual execution plan (using [PasteThePlan](https://www.brentozar.com/pastetheplan)) - in fact I read *change tracking* and assumed you were using it, you're not so perhaps review what SQL offers out of the box? – Stu Jan 09 '22 at 18:21
  • 1
    If you want a history of the data, why not use temporal tables? – Thom A Jan 09 '22 at 18:23
  • @Larnu I can't use temporal tables, it's for a data export and the structure is a requirement of the vendor :( – Adam Source Jan 09 '22 at 18:26
  • 3
    This is much too big a procedure to fully optimize in a little [so] question Things I can see wrong here off the top of my head: Probably easier to make 3 separate triggers rather than mush them all together. Cursor is just bad, bad, you can almost certainly do this set-based. Better to write a customized trigger for each table with all its columns, rather than doing `SELECT * ... FOR XML` and dynamic (maybe generate the trigger dynamically). This doesn't mean you can't generate XML, just specify every column properly. ..... – Charlieface Jan 09 '22 at 18:36
  • 3
    ..... `IDENT_CURRENT` is not the best way to get last inserted ID, it has issues with concurrency. Why catch an error only to raise some weird XML error, just let the error propagate to the client. `IF (SELECT COUNT ... ) > 0` look inefficient, you should use `EXISTS` (even if SQL Server does normally optimize this). Change Data Capture would definitely be less invasive, you could periodically poll all the changes and batch them up. – Charlieface Jan 09 '22 at 18:36
  • 1
    Also, side note: a **trigger** is executed in the context and transaction of the statement that caused it to fire - it should be ***small and nimble*** - and it should **NOT** do any heavy lifting at all! Having a **cursor** inside a trigger is a sure-fire recipe for horribly bad performance and all sorts of troubles - **don't do it!** You need to re-architect your solution to **avoid** such heavy triggers - otherwise, you'll never be able to get any decent performance. – marc_s Jan 09 '22 at 19:13
  • 1
    @AdamSource I agree with other comments that set-based approach is the "correct" solution, but if you cannot rewrite everything, recommend trying setting cursor options FORWARD_ONLY, STATIC, and READ_ONLY. These options can greatly increase looping through large datasets. – Stephan Jan 09 '22 at 22:38
  • 1
    I may be able to help, but it will take a couple of days. Although a change-data-capture approach would be better, I have worked with trigger-based change logging before. Using XML as a way to generically capture column data is new to me, so I'll have to experiment. – T N Jan 10 '22 at 21:39

1 Answers1

1

Acknowledging the recommendation for using change data tracking and caution against putting too much logic into triggers, the following is a refactoring (and some outright rewriting) of your change capture logic.

The updated logic makes a single pass through the data, handing all affected records at once. Given the requirements, I think it is pretty close to optimal, but there may still be room for improvements. The conversion to and from XML likely adds a significant bit of overhead. The alternative would be to dynamically generate and apply custom triggers for each table that explicitly reference all of the data columns individually to get the details and UNION them together.

I also refined the value comparison to better handle nulls, case sensitivity, and potential trailing space changes.

The code below is not in the form of a trigger, but in a form suitable for stand-alone testing. I figured you (and any others who may be interested) would want to test Once checked out, you should be able to retrofit it back into your trigger.

Note that this is not a 100% generalized solution. Some column types may not be supported. The logic currently assumes a single column primary key of type integer. Changes would be required to handle deviations from these (and possibly some currently unidentified) constraints.

-- Simulated change log tables
DECLARE @TableChangeLogSummary TABLE (ID INT IDENTITY(1,1), KeyValue INT NOT NULL, ModifiedDate DATETIME NOT NULL, ChangeType CHAR(1) NOT NULL, TableName NVARCHAR(1000) NOT NULL )
DECLARE @TableChangeLogDetails TABLE (ID INT IDENTITY(1,1), SummaryID int NOT NULl, ColumnName NVARCHAR(1000) NOT NULL, OldValue NVARCHAR(MAX), NewValue NVARCHAR(MAX))

-- Simulated system defined inserted/deleted tables
DECLARE @inserted TABLE (ID INTEGER, Value1 NVARCHAR(100), Value2 BIT, Value3 FLOAT)
DECLARE @deleted TABLE (ID INTEGER, Value1 NVARCHAR(100), Value2 BIT, Value3 FLOAT)

-- Test data
INSERT @inserted
VALUES
    (1, 'AAA', 0, 3.14159), -- Insert
    (2, 'BBB', 1, null),    -- Mixed updates including null to non-null and non-null to null
    (3, 'CCC', 0, 0),       -- Trailing space change
    (4, 'DDD', null, 1.68), -- No changes
    (5, '', 0, null),       -- No changes with blanks and nulls
    (6, null, null, null),  -- No changes all nulls
    (7, null, null, null)   -- Insert all nulls (summary with key, but no details will be logged)

INSERT @deleted
VALUES
    (2, 'bbb', null, 2.73),
    (3, 'CCC ', 0, 0),
    (4, 'DDD', null, 1.68),
    (5, '', 0, null),
    (6, null, null, null),
    (8, null, null, null), -- Delete all null values (summary with key, but no details will be logged)
    (9, 'ZZZ', 999, 999.9) -- Delete non-nulls

--- Now the real work begins...

-- Set table and information. Assumes table has exactly one PK column. Later logic assumes an INT.
DECLARE @TableName NVARCHAR(1000) = 'MyTable' -- To be extracted from the parent object of the trigger
DECLARE @KeyColumnName SYSNAME = 'ID' -- This can be fixed if known or derived on the fly from the primary key definition

-- Extract inserted and/or deleted data
DECLARE @InsertedXml XML = (
        SELECT *
        FROM @inserted
        FOR XML PATH('inserted'), TYPE
)
DECLARE @DeletedXml XML = (
        SELECT *
        FROM @deleted
        FOR XML PATH('deleted'), TYPE
)

-- Parse and reassange the captured key and data values
DECLARE @TempDetails TABLE(
    KeyValue INT NOT NULL,
    ChangeType CHAR(1) NOT NULL,
    ColumnName VARCHAR(1000) NOT NULL,
    IsKeyColumn BIT NOT NULL,
    NewValue NVARCHAR(MAX),
    OldValue NVARCHAR(MAX))
INSERT @TempDetails
SELECT
    KeyValue = COALESCE(I.KeyValue, D.KeyValue),
    ChangeType = CASE WHEN D.KeyValue IS NULL THEN 'I' WHEN I.KeyValue IS NULL THEN 'D' ELSE 'U' END,
    ColumnName = COALESCE(I.ColumnName, D.ColumnName),
    IsKeyColumn = K.IsKeyColumn,
    NewValue = I.Value,
    OldValue = D.Value
FROM (
    SELECT K.KeyValue, C.ColumnName, C.Value
    FROM @InsertedXml.nodes( '/inserted' ) R(Row)
    CROSS APPLY (
        SELECT KeyValue = C.Col.value('text()[1]', 'int')
        FROM R.Row.nodes( './*' ) C(Col)
        WHERE C.Col.value( 'local-name(.)', 'nvarchar(MAX)' ) = @KeyColumnName
    ) K
    CROSS APPLY (
        SELECT ColumnName = C.Col.value('local-name(.)', 'nvarchar(MAX)'), Value = C.Col.value('text()[1]', 'nvarchar(MAX)')
        FROM R.Row.nodes( './*' ) C(Col)
    ) C
) I
FULL OUTER JOIN (
    SELECT K.KeyValue, C.ColumnName, C.Value
    FROM @DeletedXml.nodes( '/deleted' ) R(Row)
    CROSS APPLY (
        SELECT KeyValue = C.Col.value('text()[1]', 'int')
        FROM R.Row.nodes( './*' ) C(Col)
        WHERE C.Col.value( 'local-name(.)', 'nvarchar(MAX)' ) = @KeyColumnName
    ) K
    CROSS APPLY (
        SELECT ColumnName = C.Col.value('local-name(.)', 'nvarchar(MAX)'), Value = C.Col.value('text()[1]', 'nvarchar(MAX)')
        FROM R.Row.nodes( './*' ) C(Col)
    ) C
) D
    ON D.KeyValue = I.KeyValue
    AND D.ColumnName = I.ColumnName
CROSS APPLY (
    SELECT IsKeyColumn = CASE WHEN COALESCE(I.ColumnName, D.ColumnName) = @KeyColumnName THEN 1 ELSE 0 END
) K
WHERE ( -- We need to be careful about edge cases here
    (I.Value IS NULL AND D.Value IS NOT NULL)
    OR (I.Value IS NOT NULL AND D.Value IS NULL)
    OR I.Value <> D.Value COLLATE Latin1_General_Bin -- Precise compare (case and accent sensitive)
    OR DATALENGTH(I.Value) <> DATALENGTH(D.Value) -- Catch trailing space cases
    OR K.IsKeyColumn = 1
    )

-- Get rid of updates with no changes, but keep key-only inserts or deletes
DELETE T
FROM @TempDetails T
WHERE T.IsKeyColumn = 1
AND T.ChangeType = 'U'
AND NOT EXISTS (
    SELECT *
    FROM @TempDetails T2
    WHERE T2.KeyValue = T.KeyValue
    AND T2.IsKeyColumn = 0
)

-- Local table to capture and link SummaryID between the summary and details tables
DECLARE @CaptureSummaryID TABLE (SummaryID int, KeyValue INT NOT NULL)

-- Insert change summary and capture the assigned Summary ID via the OUTPUT clause
INSERT INTO @TableChangeLogSummary (KeyValue, ModifiedDate, ChangeType, TableName)
OUTPUT INSERTED.id, INSERTED.KeyValue INTO @CaptureSummaryID
SELECT T.KeyValue, ModifiedDate = GETDATE(), T.ChangeType, TableName = @TableName
FROM @TempDetails T
WHERE T.IsKeyColumn = 1
ORDER BY T.KeyValue  -- Optional, but adds consistancy

-- Insert change details
INSERT INTO @TableChangeLogDetails (SummaryID, ColumnName, OldValue, NewValue)
SELECT S.SummaryID, T.ColumnName, T.OldValue, T.NewValue
FROM @CaptureSummaryID S
JOIN @TempDetails T ON T.KeyValue = S.KeyValue
WHERE T.IsKeyColumn = 0
ORDER BY T.ColumnName  -- Optional, but adds consistancy

-- View test results
SELECT 'Change Log:', *
FROM @TableChangeLogSummary S
LEFT JOIN @TableChangeLogDetails D ON D.SummaryID = S.ID
ORDER BY S.ID, D.ID
T N
  • 4,322
  • 1
  • 5
  • 18