I've seen a couple of similar threads, but they all seem to be about massive databases. I've created a dummy database to demonstrate the issue after seeing this crop up in a small live database this morning.
The basis for this data is as follows: a company keeps track of stock portfolios for its 100 clients. Each of 1000 stocks has a daily record that lists the four investors that own it, along with their percentage. Unfortunately, it has a glitch that allows for an owner to show up multiple times. The procedure parses the data and separates the records out so there are 4 records for each stock on each day, and it will then add up the portfolio total for each owner. However, because there are multiple records, that can overstate the value for that owner. So a flag is being inserted to identify any of these duplicates. Later in the code, the value of each line is multiplied by that flag, which is 0 for a duplicate and 1 if not.
I have five methods of updating that flag. I start with 0, which is just to use a CTE with a SELECT statement as a baseline; it takes about 0.07 seconds. 1 uses that CTE with a JOIN to update the table and takes about 48 seconds. 2 uses a nested select statement instead of the CTE and takes about 48 seconds. 3 dumps that CTE to a table variable and and joins to that and takes about 0.13 seconds. 4 I had thought would be the least efficient because it uses a counter loop and updates one row at a time, but it only took 0.17 seconds. 5 uses a CASE statement to update all rows, joined to a CTE, and takes about 48 seconds.
DECLARE @OwnRec TABLE (
StockID INT
, TradeDate DATE
, Shares DECIMAL(4,0)
, Price DECIMAL(4,2)
, Owner1 INT
, Owner1Pct DECIMAL(3,2)
, Owner2 INT
, Owner2Pct DECIMAL(3,2)
, Owner3 INT
, Owner3Pct DECIMAL(3,2)
, Owner4 INT
, Owner4Pct DECIMAL(3,2)
)
DECLARE @OwnRec2 TABLE (
RecID INT IDENTITY
, StockID INT
, TradeDate DATE
, Shares DECIMAL(4,0)
, Price DECIMAL(4,2)
, Owner0 INT
, Owner0Pct DECIMAL(3,2)
, OwnerNum INT
, DupeOwner TINYINT
)
DECLARE @CullDupe TABLE (
ID INT IDENTITY
, RecID INT
)
DECLARE @Method INT
, @Counter1 INT = 0
, @StartTime DATETIME
--Populate tables with dummy data
WHILE @Counter1 < 1000
BEGIN
SET @Counter1 += 1
INSERT INTO @OwnRec (
StockID
, TradeDate
, Shares
, Price
, Owner1
, Owner1Pct
, Owner2
, Owner2Pct
, Owner3
, Owner3Pct
, Owner4
, Owner4Pct
)
SELECT @Counter1
, '2016-09-26'
, ROUND((RAND() * 1000 + 500)/25,0)*25
, ROUND((RAND() * 30 + 20),2)
, ROUND((RAND() * 100 + .5),0)
, CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
, ROUND((RAND() * 100 + .5),0)
, CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
, ROUND((RAND() * 100 + .5),0)
, CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
, ROUND((RAND() * 100 + .5),0)
, CAST(ROUND((RAND() * 5 + .5),0)*.05 AS DECIMAL(3,2))
END
SET @Counter1 = 0
WHILE @Counter1 < 1000
BEGIN
SET @Counter1 += 1
INSERT INTO @OwnRec (
StockID
, TradeDate
, Shares
, Price
, Owner1
, Owner1Pct
, Owner2
, Owner2Pct
, Owner3
, Owner3Pct
, Owner4
, Owner4Pct
)
SELECT @Counter1 + 1000
, '2016-09-27'
, Shares
, ROUND(Price * ROUND(RAND()*10 + .5,0)*.01+.95,2)
, Owner1
, Owner1Pct
, Owner2
, Owner2Pct
, Owner3
, Owner3Pct
, Owner4
, Owner4Pct
FROM @OwnRec WHERE StockID = @Counter1
END
UPDATE orx
SET Owner2Pct = Owner1Pct
FROM @OwnRec orx
WHERE Owner1 = Owner2
UPDATE orx
SET Owner3Pct = Owner1Pct
FROM @OwnRec orx
WHERE Owner1 = Owner3
UPDATE orx
SET Owner4Pct = Owner1Pct
FROM @OwnRec orx
WHERE Owner1 = Owner4
UPDATE orx
SET Owner3Pct = Owner2Pct
FROM @OwnRec orx
WHERE Owner2 = Owner3
UPDATE orx
SET Owner4Pct = Owner2Pct
FROM @OwnRec orx
WHERE Owner2 = Owner4
UPDATE orx
SET Owner4Pct = Owner3Pct
FROM @OwnRec orx
WHERE Owner3 = Owner4
INSERT INTO @OwnRec2
SELECT StockID, TradeDate, Shares, Price, Owner1 AS Owner0, Owner1Pct, 1, 1 AS Owner0Pct
FROM @OwnRec
UNION
SELECT StockID, TradeDate, Shares, Price, Owner2 AS Owner0, Owner2Pct, 2, 1 AS Owner0Pct
FROM @OwnRec
UNION
SELECT StockID, TradeDate, Shares, Price, Owner3 AS Owner0, Owner3Pct, 3, 1 AS Owner0Pct
FROM @OwnRec
UNION
SELECT StockID, TradeDate, Shares, Price, Owner4 AS Owner0, Owner4Pct, 4, 1 AS Owner0Pct
FROM @OwnRec
--END Populate tables with dummy data
SET @StartTime = GETDATE()
SET @Method = 5 -- Choose which method to test
--CASE 0: Just identify duplicates
IF @Method = 0
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
SELECT * FROM CullDupe WHERE rn > 1
END
--CASE 1: Update on JOIN to CTE
IF @Method = 1
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
UPDATE OR2
SET DupeOwner = 0
FROM @OwnRec2 OR2
JOIN CullDupe cd
ON OR2.RecID = cd.RecID
WHERE rn > 1
END
--CASE 2: Update on JOIN to nested SELECT
IF @Method = 2
BEGIN
UPDATE OR2
SET DupeOwner = 0
FROM @OwnRec2 OR2
JOIN (SELECT RecID, ROW_NUMBER() OVER
(PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2) cd
ON OR2.RecID = cd.RecID
WHERE rn > 1
END
--CASE 3: Update on JOIN to temp table
IF @Method = 3
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
INSERT INTO @CullDupe SELECT RecID FROM CullDupe WHERE rn > 1
UPDATE OR2
SET DupeOwner = 0
FROM @OwnRec2 OR2
JOIN @CullDupe cd
ON OR2.RecID = cd.RecID
END
--CASE 4: Update using counted loop
IF @Method = 4
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
INSERT INTO @CullDupe SELECT RecID FROM CullDupe WHERE rn > 1
SET @Counter1 = 0
WHILE @Counter1 < (SELECT MAX(ID) FROM @CullDupe)
BEGIN
SET @Counter1 += 1
UPDATE OR2
SET DupeOwner = 0
FROM @OwnRec2 OR2
WHERE RecID = (SELECT RecID FROM @CullDupe WHERE ID = @Counter1)
END
END
--CASE 5: Update using JOIN to CTE, but updating all rows (CASE to identify)
IF @Method = 5
BEGIN
; WITH CullDupe
AS (
SELECT RecID, ROW_NUMBER() OVER (PARTITION BY StockID, TradeDate, Owner0 ORDER BY OwnerNum) AS rn
FROM @OwnRec2
)
UPDATE OR2
SET DupeOwner = CASE WHEN rn > 1 THEN 0 ELSE 1 END
FROM @OwnRec2 OR2
JOIN CullDupe cd
ON OR2.RecID = cd.RecID
END
SELECT 'Method ' + CAST(@Method AS NVARCHAR(1)) + ': ' + CAST(DATEDIFF(ms,@StartTime,GETDATE()) AS NVARCHAR(10)) + ' milliseconds'