DROP TABLE IF EXISTS t;
CREATE TABLE t(
mypath varchar(100),
parent_path varchar(100)
);
INSERT INTO t VALUES ('a', NULL),('a/b', 'a'),('a/b/c', 'a/b');
-- Listing all parent paths
1) using LIKE, not making use of parent_path column:
SELECT a.mypath, b.mypath aS parent_path
FROM t a
JOIN t b ON a.mypath LIKE b.mypath + '%' AND a.mypath != b.mypath
2) Using a recursive cte, making use of parent_path column
WITH cte AS (
SELECT mypath, parent_path
FROM t
UNION ALL
SELECT a.mypath, b.parent_path
FROM cte a
JOIN t b ON a.parent_path = b.mypath
)
SELECT * FROM cte WHERE parent_path IS NOT NULL;
On large datasets, what are the pros and cons of each method, performance wise ? Am I right to thing the rcte method should be faster ? Should LIKE be able to make use of an index, since there is only a trailing wildcard ?