0

What type of data structure can help compress JSON patches?

We can compress JSON patches by removing redundant patches. Redundant patches are patches which are fully replaced later, either by a patch with the same path, or with a patch with a shorter prefix.

This is like Last Writer Wins with Highest Ancestors.

As an example, take the following list

[
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/x",
        "value": 792
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/y",
        "value": 264
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/x",
        "value": 784
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/y",
        "value": 296
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/x",
        "value": 760
    },
    {
        "op": "remove",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d"
    },
]

This can be compressed to the following since "/layouts/13da6470-dbec-57e7-b412-0d75110b145d" is a prefix key for patches before it.

[
    {
        "op": "remove",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d"
    },
]

On the other hand, if we had a different list,

[
    {
        "op": "add",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d",
        "value": { x: 0, y: 0 } 
    },
        {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/x",
        "value": 792
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/y",
        "value": 264
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/x",
        "value": 784
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/y",
        "value": 296
    },
    {
        "op": "replace",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d/x",
        "value": 760
    },
]

it could be compressed to

[
    {
        "op": "add",
        "path": "/layouts/13da6470-dbec-57e7-b412-0d75110b145d",
        "value": { x: 760, y: 296 } 
    },
]

I've thought of using a trie where each path segment is treated as a character, but in the same thought, could I use some type of radix sort by path segment + patch index too?

Sentient
  • 781
  • 2
  • 10
  • 23
  • I don't understand what you're asking for. You've already identified methods for reducing the data in a JSON file, and it seems that you can change the format of the source file to facilitate that. Only you can judge what works best for you (by whatever definition of 'best' you're using). Is there some specific question you need answering here? – Tangentially Perpendicular Jul 27 '23 at 22:35
  • There is the obvious solution of using a trie to compress json patches according to a last writer wins with highest ancestor, but I wonder if it's the most efficient way of doing so in terms of runtime and memory. – Sentient Jul 28 '23 at 07:24

1 Answers1

0

Here's an implementation of a Radix Trie; not sure if it's the fastest way.

/* eslint-disable max-classes-per-file */
import { IJsonPatch, splitJsonPath } from "mobx-state-tree";
import { applyPatches } from "immer";

class TrieNode {
  children: Map<string, TrieNode>;

  label: string[];

  patch: IJsonPatch | undefined;

  // eslint-disable-next-line default-param-last
  constructor(label = [""], patch?: IJsonPatch) {
    this.children = new Map();
    this.label = label;
    this.patch = patch;
  }
}

/**
 * Finds index at which arrays differ. Returns -1 if they are equal or one contains the other.
 */
function splitIndex(partsA: string[], partsB: string[]) {
  const maxLength = Math.min(partsA.length, partsB.length);
  for (let i = 0; i < maxLength; i++) {
    if (partsA[i] !== partsB[i]) {
      return i;
    }
  }
  return -1;
}

export class PatchRadixTrie {
  root: TrieNode;

  constructor(patches: IJsonPatch[] = []) {
    this.root = new TrieNode([""]);
    patches.forEach(patch => this.insert(patch));
  }

  insert(patch: IJsonPatch) {
    let node = this.root;
    const parts = splitJsonPath(patch.path);
    let depth = 0;
    while (depth < parts.length) {
      // Get path that is currently being processed, and unprocessed path parts.
      const path = parts[depth];
      const pathSuffix = parts.slice(depth);
      if (!node.children.has(path)) {
        node.children.set(path, new TrieNode(pathSuffix, patch));
        return;
      }
      const parent = node;
      node = node.children.get(path)!;
      // Check if existing path contains incoming path or vice versa.
      let index = splitIndex(pathSuffix, node.label);
      if (index === -1) {
        // Incoming patch should replace existing node.
        if (pathSuffix.length === node.label.length) {
          node.patch = patch;
          node.children = new Map();
          return;
        }
        // Incoming patch should be parent of existing node.
        if (pathSuffix.length < node.label.length) {
          const prefix = pathSuffix;
          const prefixNode = new TrieNode(prefix, patch);
          const suffix = node.label.slice(prefix.length);
          node.label = suffix;
          prefixNode.children.set(suffix[0], node);
          parent.children.set(path, prefixNode);
          return;
        }
        // Incoming patch should be child of existing node.
        if (pathSuffix.length > node.label.length) {
          index = node.label.length;
        }
      } else {
        // Incoming patch splits existing node into two.
        const prefix = node.label.slice(0, index);
        const suffix = node.label.slice(index);
        node.label = suffix;
        const prefixNode = new TrieNode(prefix);
        prefixNode.children.set(suffix[0], node);
        parent.children.set(path, prefixNode);
        // Insert incoming patch as child of prefix node in next iteration.
        node = prefixNode;
      }
      depth += index;
    }
  }

  getPatches() {
    return this.traverseTrie(this.root);
  }

  traverseTrie(node: TrieNode) {
    const { patch, children } = node;
    const childPatches: IJsonPatch[] = Array.from(children.values()).flatMap(this.traverseTrie.bind(this));
    if (patch) {
      if (children.size) {
        if (patch.op === "remove") throw RangeError("Patch with op remove cannot have children patches.");
        return [
          {
            ...patch,
            value: applyPatches(
              patch.value,
              childPatches.map(patch => ({ ...patch, path: splitJsonPath(patch.path) }))
            ),
          },
        ];
      }
      return [patch];
    }
    return childPatches;
  }
}
Sentient
  • 781
  • 2
  • 10
  • 23