This question has been moved here and been revised as more logging has been added.
This is the structure of the SQL database:
a) Table called tblPPObject
which has a bigint
column called InsertionID
and this is the PK and there is a clustered index on this column.
b) Table called tblPPObjectChildObjectList
which has a bigint
column called InsertionID
and bigint
column called ChildInsertionID
. There is a composite PK on these columns and there is a clustered index on these columns. There are no FKs on this table.
d) The database isolation level is read committed snapshot
.
e) Any procedure that inserts/updates/deletes rows from the tblPPObjectChildObjectList table uses using (rowlock, updlock)
when accessing rows.
f) All read operations are done in SNAPSHOT
mode. All write operations are done in READ_COMMITTED_SNAPSHOT
mode and so in the database this is effectively READ_COMMITTED_SNAPSHOT
as SNAPSHOT
is switched on. So in the code the transaction is set like this:
m_sqlTransaction = m_sqlConnection.BeginTransaction(m_readOnly ? IsolationLevel.Snapshot : IsolationLevel.ReadCommitted);
Periodically, rows are removed from the tblPPObjectChildObjectList
. These are rows that are no longer part of the list and have not been accessed for 2 mins.
In the procedure that removes these redundant rows, we sometimes get the following error:
[2022-01-05 08:27:45.123 TC:08:27:45:01] DEBUG ( 87) PP.Workflow.Service.Database.MetadataProvider - Purge failed ('Failed to purge data due to an SQL error ('Error purging items from the database. Snapshot isolation transaction aborted due to update conflict. You cannot use snapshot isolation to access table 'dbo.tblPPObjectChildObjectList' directly or indirectly in database 'PPGallium_Metadata' to update, delete, or insert the row that has been mod')').
As I mentioned above there are no FKs on this table and I have proved from logging that nothing is changing the rows that are being removed (new rows are being updated and inserted but not the same ones as those being removed).
From my understanding, this means that we should never get the update conflict error.
We are looking for advice/suggestions on how to prevent this error as there is no reason that the remove procedure should raise this exception.
It is worth adding that I have a test system where I can reproduce this issue within one hour consistently.
As requested I have added the create table statements below for the tblPPObject
table and the tblPPObjectChildObjectList
table and the purge procedure that is doing the removal.
Please note that there other other tables in the database but the tblPPObjectChildObjectList
table is the one that is producing the snapshot errors.
Please note that the snapshot error occurs after the 'Removing rows from tblPPObjectChildObjectList' log statement. As you can see a huge amount of logging has been added to make sure that we are not removing rows that are currently being updated.
-- Parent table
CREATE TABLE [dbo].[tblPPObject](
[ObjectID] [uniqueidentifier] NOT NULL,
[UpdateTime] [datetime] NULL,
[InsertionID] [bigint] NOT NULL,
[SourceInterpreterID] [uniqueidentifier] NULL,
[LevelID] [nvarchar](260) NULL,
[UpdateID] [bigint] NULL,
[TypeNameID] [int] NOT NULL,
[SearchParentInsertionID] [bigint] NULL,
[GroupID] [bigint] NULL,
CONSTRAINT [PK_tblPPObject] PRIMARY KEY CLUSTERED
(
[InsertionID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 85, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[tblPPObject] WITH CHECK ADD CONSTRAINT [FK_tblPPObject_tblPPTypeName] FOREIGN KEY([TypeNameID])
REFERENCES [dbo].[tblPPTypeName] ([TypeNameID])
GO
ALTER TABLE [dbo].[tblPPObject] CHECK CONSTRAINT [FK_tblPPObject_tblPPTypeName]
GO
-- Child list table
CREATE TABLE [dbo].[tblPPObjectChildObjectList](
[SortIndex] [int] NOT NULL,
[UpdateTime] [datetime] NULL,
[InsertionID] [bigint] NOT NULL,
[ChildInsertionID] [bigint] NOT NULL,
[SortText] [nvarchar](260) NULL,
[UpdateID] [bigint] NULL,
[RemovalThreshold] [bigint] NULL,
CONSTRAINT [PK_tblPPObjectChildObjectList] PRIMARY KEY CLUSTERED
(
[InsertionID] ASC,
[ChildInsertionID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, FILLFACTOR = 85, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO
-- Remove procedure
ALTER procedure [dbo].[spPurge](@modelinsertionid bigint)
as
begin
begin try
print 'Start Purge'
set deadlock_priority low
set nocount on
declare @currenttime datetime
set @currenttime = GetUTCDate()
-- It is possible for "redundant" items to come back again due to the service communication so allow for this
declare @thresholddatetime datetime
set @thresholddatetime = DATEADD(mi, -2, @currenttime)
-- Constants
declare @maximumprocedureruntimeinseconds int
declare @purgecount int
declare @objectsremainingtoremove int
declare @starttime datetime
set @purgecount = 500
set @maximumprocedureruntimeinseconds = 1
set @starttime = @currenttime
set @objectsremainingtoremove = @purgecount
-- Create the required temporary tables
declare @InsertionIDs table (InsertionID bigint, PRIMARY KEY (InsertionID))
declare @ObjectIDs table (ObjectID uniqueidentifier, PRIMARY KEY (ObjectID))
declare @RedundantInsertionIDs table (InsertionID bigint, PRIMARY KEY (InsertionID))
declare @pass int
set @pass = 0
while (@objectsremainingtoremove > 0)
begin
set @pass = @pass + 1
delete from @InsertionIDs
delete from @RedundantInsertionIDs
-- Get the next set of items to remove from the previous iteration of the procedure. This will be the child rows of the children removed in the previous iteration
insert into @RedundantInsertionIDs select top (@purgecount) InsertionID from tblPPRedundantObjects
declare @count int
select @count = count(*) from @RedundantInsertionIDs
if (@count > 0)
begin
print 'Taken ' + cast(@count as varchar) + ' rows from tblPPRedundantObjects'
end
insert into @InsertionIDs select InsertionID from @RedundantInsertionIDs
set @objectsremainingtoremove = @objectsremainingtoremove - @@rowcount
if (@objectsremainingtoremove > 0)
begin
print 'Get child objects'
-- Look for entries that have become redundant because the update id has been modified
insert into @InsertionIDs
select top (@objectsremainingtoremove) poco.ChildInsertionID from tblPPObjectChildObject as poco with (rowlock, updlock)
where (poco.UpdateID = -1) and (poco.UpdateTime < @thresholddatetime)
set @objectsremainingtoremove = @objectsremainingtoremove - @@rowcount
if (@objectsremainingtoremove > 0)
begin
print 'Get child list objects'
-- Look for entries that have become redundant because the update id has been modified
insert into @InsertionIDs
select top (@objectsremainingtoremove) pocol.ChildInsertionID from tblPPObjectChildObjectList as pocol with (rowlock, updlock)
where (pocol.UpdateID = -1) and (pocol.UpdateTime < @thresholddatetime)
set @objectsremainingtoremove = @objectsremainingtoremove - @@rowcount
end
end
if ((select count(*) from @InsertionIDs) = 0)
begin
break
end
declare @debug_insertionid int
DECLARE debug_cursor CURSOR FOR
SELECT InsertionID from @InsertionIDs
OPEN debug_cursor
FETCH NEXT FROM debug_cursor INTO @debug_insertionid
WHILE @@FETCH_STATUS = 0
BEGIN
PRINT 'InsertionID to remove is ' + cast(@debug_insertionid as varchar(max))
FETCH NEXT FROM debug_cursor INTO @debug_insertionid
END
CLOSE debug_cursor
DEALLOCATE debug_cursor
print 'Get objects'
insert into @ObjectIDs
select ObjectID from tblPPObject as po
join @InsertionIDs as ii
on po.InsertionID = ii.InsertionID
print 'Merge in redundant objects 1'
-- Insert the next level of child items to remove the next time the procedure is called
merge tblPPRedundantObjects as ro
using (select ChildInsertionID from tblPPObjectChildObject as poco with (rowlock, updlock) join @InsertionIDs as ii on poco.InsertionID = ii.InsertionID) as s
on ro.InsertionId = s.ChildInsertionId
when not matched then
insert (InsertionId) values (s.ChildInsertionId);
print 'Merge in redundant objects 2'
merge tblPPRedundantObjects as ro
using (select ChildInsertionID from tblPPObjectChildObjectList as pocol with (rowlock, updlock) join @InsertionIDs as ii on pocol.InsertionID = ii.InsertionID) as s
on ro.InsertionId = s.ChildInsertionId
when not matched then
insert (InsertionId) values (s.ChildInsertionId);
print 'Start removing rows...'
delete from tblPPObjectChildObject with (rowlock, updlock)
from tblPPObjectChildObject as poco
join @InsertionIDs as ii
on poco.InsertionID = ii.InsertionID
print 'Rows removed from tblPPObjectChildObject is ' + cast(@@rowcount as varchar(max))
delete from tblPPObjectChildObjectList with (rowlock, updlock)
from tblPPObjectChildObjectList as pocol
join @InsertionIDs as ii
on pocol.InsertionID = ii.InsertionID
print 'Rows removed from tblPPObjectChildObjectList is ' + cast(@@rowcount as varchar(max))
delete from tblPPObjectProperty with (rowlock, updlock)
from tblPPObjectProperty as op
join @InsertionIDs as ii
on op.InsertionID = ii.InsertionID
delete from tblPPObjectBlobProperty with (rowlock, updlock)
from tblPPObjectBlobProperty as bop
join @InsertionIDs as ii
on bop.InsertionID = ii.InsertionID
print 'Removing rows from tblPPObjectChildObject'
delete from tblPPObjectChildObject with (rowlock, updlock)
from tblPPObjectChildObject as poco
join @InsertionIDs as ii
on poco.ChildInsertionID = ii.InsertionID
print 'Removing rows from tblPPObjectChildObjectList'
delete from tblPPObjectChildObjectList with (rowlock, updlock)
from tblPPObjectChildObjectList as pocol
join @InsertionIDs as ii
on pocol.ChildInsertionID = ii.InsertionID
print 'Removing rows from tblPPObject'
delete from tblPPObject with (rowlock, updlock)
from tblPPObject as po
join @InsertionIDs as ii
on po.InsertionID = ii.InsertionID
-- Finally we can now remove the objects that we took from the tblRedundantObjects table as they have now been acted upon
delete from tblPPRedundantObjects
from tblPPRedundantObjects as ro
join @RedundantInsertionIDs as ri
on ro.InsertionID = ri.InsertionID
print 'Pass processed'
-- If this procedure has been running too long then stop processing
if (DATEADD(s, @maximumprocedureruntimeinseconds, @starttime) < GetUtcDate())
begin
print 'Stopping due to overrun after ' + cast(@pass as varchar) + ' pass(es) with ' + cast((@purgecount - @objectsremainingtoremove) as varchar) + ' object(s) purged'
break
end
end
print 'End Purge'
-- Return all the objects that have been removed
select ObjectID
from @ObjectIDs
end try
begin catch
declare @errormessage varchar(256)
select @errormessage = ERROR_MESSAGE()
-- Raise an error and return
raiserror('Error purging items from the database. %s', 16, 1, @errormessage)
end catch
end