I have approx. 200 sql statements and I need to analyze what columns and tables are used in those columns. I've found there is XML explain plan available in PostgreSQL 9.0+.
Is there a known way how I can get list of used colums and tables from that plan?
Updated version:
Test data
CREATE TABLE tmp.a (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.b (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.c (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.d (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
CREATE TABLE tmp.e (id integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer, i integer);
insert into tmp.a values (1,1,1,1,1,1,1,1,1);
insert into tmp.a values (2,1,1,1,1,1,1,1,1);
insert into tmp.a values (3,1,1,1,1,1,1,1,1);
insert into tmp.a values (4,1,1,1,1,1,1,1,1);
insert into tmp.b values (1,1,1,1,1,1,1,1,1);
insert into tmp.b values (2,1,1,1,1,1,1,1,1);
insert into tmp.b values (3,1,1,1,1,1,1,1,1);
insert into tmp.b values (4,1,1,1,1,1,1,1,1);
insert into tmp.c values (1,1,1,1,1,1,1,1,1);
insert into tmp.c values (2,1,1,1,1,1,1,1,1);
insert into tmp.c values (3,1,1,1,1,1,1,1,1);
insert into tmp.c values (4,1,1,1,1,1,1,1,1);
insert into tmp.d values (1,1,1,1,1,1,1,1,1);
insert into tmp.d values (2,1,1,1,1,1,1,1,1);
insert into tmp.d values (3,1,1,1,1,1,1,1,1);
insert into tmp.e values (2,1,1,1,1,1,1,1,1);
insert into tmp.e values (3,1,1,1,1,1,1,1,1);
insert into tmp.e values (4,1,1,1,1,1,1,1,1);
Sample SQL and explain plan
explain (verbose true, format xml, costs false)
select
a.b,
a.c,
b.c,
d.b,
e.f
from
tmp.a a
join tmp.b b using (id)
join tmp.c c using (id)
left join tmp.d d on (a.id = d.id)
left join tmp.e e on (b.id = e.id)
where
c.d = 1 and (d.f > 0 or e.g is null)
XML results stored in table
create table tmp.file (fcontent text);
insert into tmp.file values ('
<explain xmlns="http://www.postgresql.org/2009/explain">
<Query>
<Plan>
<Node-Type>Merge Join</Node-Type>
<Join-Type>Left</Join-Type>
<Output>
<Item>a.b</Item>
<Item>a.c</Item>
<Item>b.c</Item>
<Item>d.b</Item>
<Item>e.f</Item>
</Output>
<Merge-Cond>(b.id = e.id)</Merge-Cond>
<Filter>((d.f > 0) OR (e.g IS NULL))</Filter>
<Plans>
<Plan>
<Node-Type>Merge Join</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Join-Type>Left</Join-Type>
<Output>
<Item>a.b</Item>
<Item>a.c</Item>
<Item>b.c</Item>
<Item>b.id</Item>
<Item>d.b</Item>
<Item>d.f</Item>
</Output>
<Merge-Cond>(a.id = d.id)</Merge-Cond>
<Plans>
<Plan>
<Node-Type>Sort</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Output>
<Item>a.b</Item>
<Item>a.c</Item>
<Item>a.id</Item>
<Item>b.c</Item>
<Item>b.id</Item>
</Output>
<Sort-Key>
<Item>a.id</Item>
</Sort-Key>
<Plans>
<Plan>
<Node-Type>Hash Join</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Join-Type>Inner</Join-Type>
<Output>
<Item>a.b</Item>
<Item>a.c</Item>
<Item>a.id</Item>
<Item>b.c</Item>
<Item>b.id</Item>
</Output>
<Hash-Cond>(b.id = a.id)</Hash-Cond>
<Plans>
<Plan>
<Node-Type>Seq Scan</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Relation-Name>b</Relation-Name>
<Schema>tmp</Schema>
<Alias>b</Alias>
<Output>
<Item>b.id</Item>
<Item>b.b</Item>
<Item>b.c</Item>
<Item>b.d</Item>
<Item>b.e</Item>
<Item>b.f</Item>
<Item>b.g</Item>
<Item>b.h</Item>
<Item>b.i</Item>
</Output>
</Plan>
<Plan>
<Node-Type>Hash</Node-Type>
<Parent-Relationship>Inner</Parent-Relationship>
<Output>
<Item>a.b</Item>
<Item>a.c</Item>
<Item>a.id</Item>
<Item>c.id</Item>
</Output>
<Plans>
<Plan>
<Node-Type>Hash Join</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Join-Type>Inner</Join-Type>
<Output>
<Item>a.b</Item>
<Item>a.c</Item>
<Item>a.id</Item>
<Item>c.id</Item>
</Output>
<Hash-Cond>(a.id = c.id)</Hash-Cond>
<Plans>
<Plan>
<Node-Type>Seq Scan</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Relation-Name>a</Relation-Name>
<Schema>tmp</Schema>
<Alias>a</Alias>
<Output>
<Item>a.id</Item>
<Item>a.b</Item>
<Item>a.c</Item>
<Item>a.d</Item>
<Item>a.e</Item>
<Item>a.f</Item>
<Item>a.g</Item>
<Item>a.h</Item>
<Item>a.i</Item>
</Output>
</Plan>
<Plan>
<Node-Type>Hash</Node-Type>
<Parent-Relationship>Inner</Parent-Relationship>
<Output>
<Item>c.id</Item>
</Output>
<Plans>
<Plan>
<Node-Type>Seq Scan</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Relation-Name>c</Relation-Name>
<Schema>tmp</Schema>
<Alias>c</Alias>
<Output>
<Item>c.id</Item>
</Output>
<Filter>(c.d = 1)</Filter>
</Plan>
</Plans>
</Plan>
</Plans>
</Plan>
</Plans>
</Plan>
</Plans>
</Plan>
</Plans>
</Plan>
<Plan>
<Node-Type>Sort</Node-Type>
<Parent-Relationship>Inner</Parent-Relationship>
<Output>
<Item>d.b</Item>
<Item>d.id</Item>
<Item>d.f</Item>
</Output>
<Sort-Key>
<Item>d.id</Item>
</Sort-Key>
<Plans>
<Plan>
<Node-Type>Seq Scan</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Relation-Name>d</Relation-Name>
<Schema>tmp</Schema>
<Alias>d</Alias>
<Output>
<Item>d.b</Item>
<Item>d.id</Item>
<Item>d.f</Item>
</Output>
</Plan>
</Plans>
</Plan>
</Plans>
</Plan>
<Plan>
<Node-Type>Sort</Node-Type>
<Parent-Relationship>Inner</Parent-Relationship>
<Output>
<Item>e.f</Item>
<Item>e.id</Item>
<Item>e.g</Item>
</Output>
<Sort-Key>
<Item>e.id</Item>
</Sort-Key>
<Plans>
<Plan>
<Node-Type>Seq Scan</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Relation-Name>e</Relation-Name>
<Schema>tmp</Schema>
<Alias>e</Alias>
<Output>
<Item>e.f</Item>
<Item>e.id</Item>
<Item>e.g</Item>
</Output>
</Plan>
</Plans>
</Plan>
</Plans>
</Plan>
</Query>
</explain>
');
Items in explain plan
with elements as (
SELECT trim(a[rn]) AS elem, rn
FROM (
SELECT *, generate_series(1, array_upper(a, 1)) AS rn
FROM (
SELECT string_to_array(fcontent, chr(10)) AS a
FROM tmp.file
) x
) y
)
select
regexp_replace(elem, E'<Item>|</Item>', '', 'g' ) as sql_line
from
elements where elem like '<Item>%'
group by
regexp_replace(elem, E'<Item>|</Item>', '', 'g' )
order by
regexp_replace(elem, E'<Item>|</Item>', '', 'g' )
There are 25 columns in Item
tag. However, to perform this query you need only 13: a.b, a.c, b.c, d.b, e.f, a.id, b.id, c.id, d.id, e.id, c.d, d.f, e.g
. Is ther a way how I can get only these columns out of explain plan?
Original version
For example I do have the following query (more for illustration, no need to understand it):
select
dd.id_databox_data,
dd.recipient_id,
dd.sender_id,
lp.business_name,
fa.repayment_identification,
perform_time,
dd.subject as dd_subject,
ca.subject as ca_subject,
d.unique_name,
s.name,
s.long_name,
lld.legal_template,
lld.issue_date,
ca.perform_time,
dd.id_recipient_document_ident,
dd.id_sender_document_ident,
dci1.ref_number,
dci2.ref_number
from
databox_data as dd
join databox_data_attachments as dda on (dd.id_databox_data = dda.id_databox_data)
join databox_attachment as da on (da.id_databox_attachment = dda.id_databox_attachment)
join document as d on (d.id_document = da.id_document)
join external_file_letter_data as fld on (fld.id_document = d.id_document)
join letter_data as ld on (ld.id_letter_data = fld.id_letter_data)
join legal_letter_data lld on (ld.id_letter_data = lld.id_letter_data)
join legal_instrument li using (id_legal_instrument)
left join execution e using (id_legal_instrument)
join v_communication_act as ca on (ca.id_letter_data = ld.id_letter_data)
join solver s using (id_solver)
join responsibility as r on (r.id_responsibility = ca.id_related_responsibility)
join party pr on (r.id_responsible = pr.id_party)
join financial_accountability fa using (id_accountability)
join flight f using (id_flight)
join portfolio p using (id_portfolio)
join legal_person lp on (pr.id_source = lp.id_party)
left join v_authority va on (dd.recipient_id = va.data_box_id)
left join databox_document_ident dci1 on (dd.id_recipient_document_ident = dci1.id_databox_document_ident)
left join databox_document_ident dci2 on (dd.id_recipient_document_ident = dci2.id_databox_document_ident)
where
ca.perform_time > (Now()::date - 1)
and s.id_solver = 41
I'm using explain (verbose true, format xml, costs false)
.
This results in the following explain plan (XML version), unfortunately I'm not able to insert the whole explain plan (SO limitation on post length), please use this pastebin if you need the full explain plan version:
<explain xmlns="http://www.postgresql.org/2009/explain">
<Query>
<Plan>
<Node-Type>Nested Loop</Node-Type>
<Join-Type>Left</Join-Type>
<Output>
<Item>dd.id_databox_data</Item>
<Item>dd.recipient_id</Item>
<Item>dd.sender_id</Item>
<Item>lp.business_name</Item>
<Item>fa.repayment_identification</Item>
<Item>act.perform_time</Item>
<Item>dd.subject</Item>
<Item>communication_act.subject</Item>
<Item>d.unique_name</Item>
<Item>s.name</Item>
<Item>s.long_name</Item>
<Item>lld.legal_template</Item>
<Item>lld.issue_date</Item>
<Item>act.perform_time</Item>
<Item>dd.id_recipient_document_ident</Item>
<Item>dd.id_sender_document_ident</Item>
<Item>dci1.ref_number</Item>
<Item>dci2.ref_number</Item>
</Output>
<Plans>
<Plan>
<Node-Type>Nested Loop</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Join-Type>Left</Join-Type>
<Output>
<Item>dd.id_databox_data</Item>
<Item>dd.recipient_id</Item>
<Item>dd.sender_id</Item>
<Item>dd.subject</Item>
<Item>dd.id_recipient_document_ident</Item>
<Item>dd.id_sender_document_ident</Item>
<Item>d.unique_name</Item>
<Item>lld.legal_template</Item>
<Item>lld.issue_date</Item>
<Item>communication_act.subject</Item>
<Item>act.perform_time</Item>
<Item>s.name</Item>
<Item>s.long_name</Item>
<Item>fa.repayment_identification</Item>
<Item>lp.business_name</Item>
<Item>dci1.ref_number</Item>
</Output>
<Plans>
<Plan>
<Node-Type>Nested Loop</Node-Type>
<Parent-Relationship>Outer</Parent-Relationship>
<Join-Type>Inner</Join-Type>
<Output>
<Item>dd.id_databox_data</Item>
<Item>dd.recipient_id</Item>
<Item>dd.sender_id</Item>
<Item>dd.subject</Item>
<Item>dd.id_recipient_document_ident</Item>
<Item>dd.id_sender_document_ident</Item>
<Item>d.unique_name</Item>
<Item>lld.legal_template</Item>
<Item>lld.issue_date</Item>
<Item>communication_act.subject</Item>
<Item>act.perform_time</Item>
<Item>s.name</Item>
<Item>s.long_name</Item>
<Item>fa.repayment_identification</Item>
Is there a way (preferably SQL way) how can I get list of used columns and tables from such plan? It is not enough just query <Item>
rows, because when the table appears first time in the explain plan (at the lowest level), then all columns are listed in <Item>
tag although many of them are not needed to complete the query.
I have used the following SQL to list unique <Item>
tags:
with elements as (
SELECT trim(a[rn]) AS elem, rn
FROM (
SELECT *, generate_series(1, array_upper(a, 1)) AS rn
FROM (
SELECT string_to_array(fcontent, chr(10)) AS a
FROM tmp.file
) x
) y
)
select
regexp_replace(elem, E'<Item>|</Item>', '', 'g' ) as sql_line
from
elements where elem like '<Item>%'
group by
regexp_replace(elem, E'<Item>|</Item>', '', 'g' )
order by
regexp_replace(elem, E'<Item>|</Item>', '', 'g' )