Uses tokenized query returned by python-sqlparse and generates query metadata
Project description
sql-metadata
Uses tokenized query returned by python-sqlparse
and generates query metadata.
Extracts column names and tables used by the query. Provides a helper for normalization of SQL queries and tables aliases resolving.
Supported queries syntax:
- MySQL
- PostgreSQL
- Apache Hive
Usage
pip install sql-metadata
Extracting raw sql-metadata tokens
from sql_metadata import Parser
# extract raw sql-metadata tokens
Parser("SELECT * FROM foo").tokens
# ['SELECT', '*', 'FROM', 'foo']
Extracting columns from query
from sql_metadata import Parser
# get columns from query - for more examples see `tests/test_getting_columns.py`
Parser("SELECT test, id FROM foo, bar").columns
# ['test', 'id']
Parser("INSERT /* VoteHelper::addVote xxx */ INTO `page_vote` (article_id,user_id,`time`) VALUES ('442001','27574631','20180228130846')").columns
# ['article_id', 'user_id', 'time']
parser = Parser("SELECT a.* FROM product_a.users AS a JOIN product_b.users AS b ON a.ip_address = b.ip_address")
# note that aliases are auto-resolved
parser.columns
# ['product_a.*', 'product_a.users.ip_address', 'product_b.users.ip_address']
# note that you can also extract columns with their place in the query
# which will return dict with lists divided into select, where, order_by, group_by, join, insert and update
parser.columns_dict
# {'select': ['product_a.users.*'], 'join': ['product_a.users.ip_address', 'product_b.users.ip_address']}
Extracting columns aliases from query
from sql_metadata import Parser
parser = Parser("SELECT a, (b + c - u) as alias1, custome_func(d) alias2 from aa, bb order by alias1")
# note that columns list do not contain aliases of the columns
parser.columns
# ["a", "b", "c", "u", "d"]
# but you can still extract aliases names
parser.columns_aliases_names
# ["alias1", "alias2"]
# aliases are resolved to the columns which they refer to
parser.columns_aliases
# {"alias1": ["b", "c", "u"], "alias2": "d"}
# you can also extract aliases used by section of the query in which they are used
parser.columns_aliases_dict
# {"order_by": ["alias1"], "select": ["alias1", "alias2"]}
# the same applies to aliases used in queries section when you extract columns_dict
# here only the alias is used in order by but it's resolved to actual columns
assert parser.columns_dict == {'order_by': ['b', 'c', 'u'],
'select': ['a', 'b', 'c', 'u', 'd']}
Extracting tables from query
from sql_metadata import Parser
# get tables from query - for more examples see `tests/test_getting_tables.py`
Parser("SELECT a.* FROM product_a.users AS a JOIN product_b.users AS b ON a.ip_address = b.ip_address").tables
# ['product_a.users', 'product_b.users']
Parser("SELECT test, id FROM foo, bar").tables
# ['foo', 'bar']
# you can also extract aliases of the tables as a dictionary
parser = Parser("SELECT f.test FROM foo AS f")
# get table aliases
parser.tables_aliases
# {'f': 'foo'}
# note that aliases are auto-resolved for columns
parser.columns
# ["foo.test"]
Extracting values from query
from sql_metadata import Parser
parser = Parser(
"INSERT /* VoteHelper::addVote xxx */ INTO `page_vote` (article_id,user_id,`time`) "
"VALUES ('442001','27574631','20180228130846')"
)
# extract values from query
parser.values
# ["442001", "27574631", "20180228130846"]
# extract a dictionary with column-value pairs
parser.values_dict
#{"article_id": "442001", "user_id": "27574631", "time": "20180228130846"}
# if column names are not set auto-add placeholders
parser = Parser(
"INSERT IGNORE INTO `table` VALUES (9, 2.15, '123', '2017-01-01');"
)
parser.values
# [9, 2.15, "123", "2017-01-01"]
parser.values_dict
#{"column_1": 9, "column_2": 2.15, "column_3": "123", "column_4": "2017-01-01"}
Extracting limit and offset
from sql_metadata import Parser
Parser('SELECT foo_limit FROM bar_offset LIMIT 50 OFFSET 1000').limit_and_offset
# (50, 1000)
Parser('SELECT foo_limit FROM bar_offset limit 2000,50').limit_and_offset
# (50, 2000)
Extracting with names
from sql_metadata import Parser
parser = Parser(
"""
WITH
database1.tableFromWith AS (SELECT aa.* FROM table3 as aa
left join table4 on aa.col1=table4.col2),
test as (SELECT * from table3)
SELECT
"xxxxx"
FROM
database1.tableFromWith alias
LEFT JOIN database2.table2 ON ("tt"."ttt"."fff" = "xx"."xxx")
"""
)
# get names/ aliases of with statements
parser.with_names
# ["database1.tableFromWith", "test"]
# note that names of with statements do not appear in tables
parser.tables
# ["table3", "table4", "database2.table2"]
Extracting sub-queries
from sql_metadata import Parser
parser = Parser(
"""
SELECT COUNT(1) FROM
(SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1) a
JOIN (SELECT st.task_id FROM some_task st WHERE task_type_id = 80) b
ON a.task_id = b.task_id;
"""
)
# get sub-queries dictionary
parser.subqueries
# {"a": "SELECT std.task_id FROM some_task_detail std WHERE std.STATUS = 1",
# "b": "SELECT st.task_id FROM some_task st WHERE task_type_id = 80"}
# get names/ aliases of sub-queries / derived tables
parser.subqueries_names
# ["a", "b"]
# note that you can also exclude columns coming from sub-queries
# all columns
parser.columns
#["some_task_detail.task_id", "some_task_detail.STATUS", "some_task.task_id",
# "task_type_id", "a.task_id", "b.task_id"]
# without subqueries
parser.columns_without_subqueries
#["some_task_detail.task_id", "some_task_detail.STATUS", "some_task.task_id",
# "task_type_id"]
See tests
file for more examples of a bit more complex queries.
Queries normalization and comments extraction
from sql_metadata import Parser
parser = Parser('SELECT /* Test */ foo FROM bar WHERE id in (1, 2, 56)')
# generalize query
parser.generalize
# 'SELECT foo FROM bar WHERE id in (XYZ)'
# remove comments
parser.without_comments
# 'SELECT foo FROM bar WHERE id in (1, 2, 56)'
# extract comments
parser.comments
# ['/* Test */']
See test/test_normalization.py
file for more examples of a bit more complex queries.
Migrating from sql_metadata
1.x
sql_metadata.compat
module has been implemented to make the introduction of sql-metadata v2.0 smoother.
You can use it by simply changing the imports in your code from:
from sql_metadata import get_query_columns, get_query_tables
into:
from sql_metadata.compat import get_query_columns, get_query_tables
The following functions from the old API are available in the sql_metadata.compat
module:
generalize_sql
get_query_columns
(since #131 columns aliases ARE NOT returned by this function)get_query_limit_and_offset
get_query_tables
get_query_tokens
preprocess_query
Authors and contributors
Created and maintained by @macbre with a great contributions from @collerek and the others.
- aborecki (https://github.com/aborecki)
- collerek (https://github.com/collerek)
- dylanhogg (https://github.com/dylanhogg)
- macbre (https://github.com/macbre)
Stargazers over time
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Hashes for sql_metadata-2.1.0-py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 9ada59ffe1f1e24caaf1820f9b4180778e84a28ba8c288ca3c3dc44e3958c868 |
|
MD5 | d8f11f76b7d19c3a973ef1910fa7d845 |
|
BLAKE2b-256 | b2906fa23a49df4e5dcc8aff8b4f38df4636968dc619a8ea9c71476db5ffba3c |