sqlfluff can generate a parsed query as a dict using the function .parse
, like in the example:
import json
import sqlfluff
print(json.dumps(sqlfluff.parse(
"""with cte1 as (select colA from table1), cte2 as (select colB from table2 inner join table1 on table2.colB = table1.colA) select * from cte2""",
"postgres"
), indent=4))
This results in the following
{
"file": {
"statement": {
"with_compound_statement": [
{
"keyword": "with"
},
{
"whitespace": " "
},
{
"common_table_expression": [
{
"naked_identifier": "cte1"
},
{
"whitespace": " "
},
{
"keyword": "as"
},
{
"whitespace": " "
},
{
"bracketed": {
"start_bracket": "(",
"select_statement": {
"select_clause": {
"keyword": "select",
"whitespace": " ",
"select_clause_element": {
"column_reference": {
"naked_identifier": "colA"
}
}
},
"whitespace": " ",
"from_clause": {
"keyword": "from",
"whitespace": " ",
"from_expression": {
"from_expression_element": {
"table_expression": {
"table_reference": {
"naked_identifier": "table1"
}
}
}
}
}
},
"end_bracket": ")"
}
}
]
},
{
"comma": ","
},
{
"whitespace": " "
},
{
"common_table_expression": [
{
"naked_identifier": "cte2"
},
{
"whitespace": " "
},
{
"keyword": "as"
},
{
"whitespace": " "
},
{
"bracketed": {
"start_bracket": "(",
"select_statement": {
"select_clause": {
"keyword": "select",
"whitespace": " ",
"select_clause_element": {
"column_reference": {
"naked_identifier": "colB"
}
}
},
"whitespace": " ",
"from_clause": {
"keyword": "from",
"whitespace": " ",
"from_expression": {
"from_expression_element": {
"table_expression": {
"table_reference": {
"naked_identifier": "table2"
}
}
},
"whitespace": " ",
"join_clause": [
{
"keyword": "inner"
},
{
"whitespace": " "
},
{
"keyword": "join"
},
{
"whitespace": " "
},
{
"from_expression_element": {
"table_expression": {
"table_reference": {
"naked_identifier": "table1"
}
}
}
},
{
"whitespace": " "
},
{
"join_on_condition": {
"keyword": "on",
"whitespace": " ",
"expression": [
{
"column_reference": [
{
"naked_identifier": "table2"
},
{
"dot": "."
},
{
"naked_identifier": "colB"
}
]
},
{
"whitespace": " "
},
{
"comparison_operator": {
"raw_comparison_operator": "="
}
},
{
"whitespace": " "
},
{
"column_reference": [
{
"naked_identifier": "table1"
},
{
"dot": "."
},
{
"naked_identifier": "colA"
}
]
}
]
}
}
]
}
}
},
"end_bracket": ")"
}
}
]
},
{
"whitespace": " "
},
{
"select_statement": {
"select_clause": {
"keyword": "select",
"whitespace": " ",
"select_clause_element": {
"wildcard_expression": {
"wildcard_identifier": {
"star": "*"
}
}
}
},
"whitespace": " ",
"from_clause": {
"keyword": "from",
"whitespace": " ",
"from_expression": {
"from_expression_element": {
"table_expression": {
"table_reference": {
"naked_identifier": "cte2"
}
}
}
}
}
}
}
]
}
}
}
Is there a way to reconstrunct the query string given one of these dicts? I couldn't find anything on the official API reference.