2

I have the following small MVWE for a basic micro-auditing system which works fine but lacks a functionality:

DROP TABLE IF EXISTS audit CASCADE;
CREATE TABLE audit(
    Id          BIGSERIAL  NOT NULL
   ,TimeValue   TIMESTAMP  NOT NULL
   ,RoleName    NAME       NOT NULL
   ,Operation   NAME       NOT NULL
   ,SchemaName  NAME       NOT NULL
   ,TableName   NAME       NOT NULL
   ,Identifiers BIGINT[]
    ---
   ,PRIMARY KEY(Id)
);

-- Audit Trigger:
DROP FUNCTION IF EXISTS audit_trigger() CASCADE;
CREATE OR REPLACE FUNCTION audit_trigger()
RETURNS TRIGGER AS
$BODY$
BEGIN

INSERT INTO audit(TimeValue, RoleName, Operation, SchemaName, TableName) VALUES
(now()::TIMESTAMP, current_user, TG_OP, TG_TABLE_SCHEMA, TG_RELNAME);
RETURN NULL;

END;
$BODY$
LANGUAGE plpgsql SECURITY DEFINER;

-- Channels:
DROP TABLE IF EXISTS channels CASCADE;
CREATE TABLE channels(
    Id          INTEGER    NOT NULL
   ,UserKey     TEXT       NOT NULL
   ,Active      BOOLEAN    NOT NULL   DEFAULT(TRUE)
    ---
   ,PRIMARY KEY(Id)
   ,UNIQUE(UserKey)
);

CREATE TRIGGER channel_audit_trigger BEFORE INSERT OR UPDATE OR DELETE ON channels
FOR EACH STATEMENT EXECUTE PROCEDURE audit_trigger();

-- Perform some operations:
INSERT INTO channels(
SELECT C.Id, 'Channel-' || C.Id
FROM generate_series(1, 300, 10) AS C(Id)
);

DELETE FROM channels WHERE id < 10;

UPDATE channels
SET UserKey = 'wild channel'
WHERE id = 21;

I would like to add into the last column of audit table, identifiers of rows that have been inserted/updated/deleted in channels.

I have used STATEMENT level because I just need to collects identifiers in a array. But I do not find how to access DML statistics. Conversely at the ROW level I must handle OLD and NEW cases and I cannot succeed in aggregate all touched identifier.

How can I proceed in order to fill the last column of audit table with touched identifiers?

Update

Finally I reached my goal, but this solution might not be scalable and may have some unwanted drawbacks (I am open to any constructive feedback and advice).

Basically, how I have solved my problem:

  1. Log at ROW level BEFORE DML is performed into a table audit_rowlevel;
  2. Aggregate the freshly added content of audit_rowlevel into audit_statementlevel at STATEMENT level AFTER DML is performed;

Minimal Working Example is now:

DROP TABLE IF EXISTS audit_rowlevel CASCADE;
CREATE TABLE audit_rowlevel(
    Id          BIGSERIAL  NOT NULL
   ,Aggregated  BOOLEAN    NOT NULL    DEFAULT(FALSE)
   ,TimeValue   TIMESTAMP  NOT NULL
    -- https://www.postgresql.org/docs/current/static/functions-info.html
   ,RoleName    NAME       NOT NULL
   ,ClientIP    INET       NOT NULL
   ,ClientPid   INTEGER    NOT NULL
    -- https://www.postgresql.org/docs/current/static/plpgsql-trigger.html
   ,Operation   TEXT       NOT NULL
   ,SchemaName  NAME       NOT NULL
   ,TableName   NAME       NOT NULL
   ,RowId       BIGINT     NOT NULL
    -- https://www.postgresql.org/docs/current/static/functions-json.html
   ,OldValue    JSONB
   ,NewValue    JSONB
    ---
   ,PRIMARY KEY(Id)
);

-- Row Level Trigger:
DROP FUNCTION IF EXISTS audit_rowlevel_trigger() CASCADE;
CREATE OR REPLACE FUNCTION audit_rowlevel_trigger()
RETURNS TRIGGER AS
$BODY$
DECLARE

history  BOOLEAN := (TG_NARGS > 0) AND (TG_ARGV[0]::BOOLEAN);
rowid    BIGINT;
oldvalue JSONB;
newvalue JSONB;

BEGIN 

-- Handle NEW:
IF TG_OP = ANY('{INSERT,UPDATE}') THEN
    IF history THEN
        newvalue := to_jsonb(NEW);
    END IF;
    rowid := NEW.Id::BIGINT;
END IF;

-- Handle OLD:
IF TG_OP = ANY('{UPDATE,DELETE}') THEN
    IF history THEN
        oldvalue := to_jsonb(OLD);
    END IF;
    rowid := OLD.Id::BIGINT;
END IF;

-- INSERT:
INSERT INTO audit_rowlevel(TimeValue, RoleName, ClientIP, ClientPID, Operation, SchemaName, TableName, RowId, NewValue, OldValue) VALUES
(now()::TIMESTAMP, current_user, inet_client_addr(), pg_backend_pid(), TG_OP, TG_TABLE_SCHEMA, TG_RELNAME, RowId, NewValue, OldValue);

-- RETURN:
IF TG_OP = ANY('{INSERT,UPDATE}') THEN
    RETURN NEW;
ELSIF TG_OP = 'DELETE' THEN
    RETURN OLD;
ELSE
    RETURN NULL;
END IF; 

END;
$BODY$
LANGUAGE plpgsql SECURITY DEFINER;

-- Statement Level Trigger:
DROP TABLE IF EXISTS audit_statementlevel CASCADE;
CREATE TABLE audit_statementlevel(
    Id          BIGSERIAL  NOT NULL
   ,TimeValue   TIMESTAMP  NOT NULL
   ,RoleName    NAME       NOT NULL
   ,ClientIP    INET       NOT NULL
   ,ClientPid   INTEGER    NOT NULL
   ,Operation   TEXT       NOT NULL
   ,SchemaName  NAME       NOT NULL
   ,TableName   NAME       NOT NULL
   ,RowCount    BIGINT     NOT NULL
   ,RowIds      BIGINT[]   NOT NULL
   ,AuditIds    BIGINT[]   NOT NULL
    ---
   ,PRIMARY KEY(Id)
);


-- Row Level Trigger:
DROP FUNCTION IF EXISTS audit_statementlevel_trigger() CASCADE;
CREATE OR REPLACE FUNCTION audit_statementlevel_trigger()
RETURNS TRIGGER AS
$BODY$
DECLARE

rowcount BIGINT;

BEGIN  

WITH

A AS (
SELECT
    TimeValue, RoleName, ClientIP, ClientPid, Operation, SchemaName, TableName
   ,COUNT(*)
   ,array_agg(RowId)
   ,array_agg(Id)
FROM
    audit_rowlevel
WHERE
    NOT Aggregated
GROUP BY
    TimeValue, RoleName, ClientIP, ClientPid, Operation, SchemaName, TableName
ORDER BY
    TimeValue
),

B AS (
INSERT INTO audit_statementlevel(TimeValue, RoleName, ClientIP, ClientPid, Operation, SchemaName, TableName, RowCount, RowIds, AuditIds)
(SELECT * FROM A)
RETURNING AuditIds
),

C AS (
SELECT array_agg(DISTINCT T.id) AS Ids FROM B, unnest(B.AuditIds) AS T(id)
)

UPDATE
    audit_rowlevel
SET
    Aggregated = TRUE
FROM
    C
WHERE
    Id = ANY(C.Ids);

RETURN NULL;

END;
$BODY$
LANGUAGE plpgsql SECURITY DEFINER;


-- Channels:
DROP TABLE IF EXISTS channels CASCADE;
CREATE TABLE channels(
    Id          INTEGER    NOT NULL
   ,UserKey     TEXT       NOT NULL
   ,Active      BOOLEAN    NOT NULL   DEFAULT(TRUE)
    ---
   ,PRIMARY KEY(Id)
   ,UNIQUE(UserKey)
);

CREATE TRIGGER channel_audit_rowlevel_trigger BEFORE INSERT OR UPDATE OR DELETE ON channels
FOR EACH ROW EXECUTE PROCEDURE audit_rowlevel_trigger(TRUE);

CREATE TRIGGER channel_audit_statementlevel_trigger AFTER INSERT OR UPDATE OR DELETE ON channels
FOR EACH STATEMENT EXECUTE PROCEDURE audit_statementlevel_trigger();


-- Perform some operations:
INSERT INTO channels(
SELECT C.Id, 'Channel-' || C.Id
FROM generate_series(1, 300, 10) AS C(Id)
);

DELETE FROM channels WHERE id < 10;

UPDATE channels
SET UserKey = 'wild channel'
WHERE id = 21;

I am interested knowing if this solution looks right to professional developers. Am I going into the good direction or is this solution evil?

jlandercy
  • 7,183
  • 1
  • 39
  • 57
  • 1
    Unrelated, but: the cast to bigint in e.g. `NEW.Id::BIGINT` is useless. `new.id` already **is** a `bigint` –  Aug 21 '17 at 09:40
  • 1
    Postgres 10 will support "transition tables" in statement level triggers that will contain all changed rows (similar to e.g. the "inserted" or "deleted" pseudo-tables in SQL Server triggers) –  Aug 21 '17 at 09:43
  • @a_horse_with_no_name Thinking more about it, I feel that my solution will not properly handle concurrence because of the non selective boolean flag `aggregated`. Am I right? – jlandercy Aug 21 '17 at 09:45
  • @a_horse_with_no_name good to know about psql 10 – jlandercy Aug 21 '17 at 09:45

0 Answers0