Python module for validating BigQuery sql queries with support for Jinja templated variables
Project description
Python BigQuery Validator
Python module for validating BigQuery sql queries with support for Jinja templated variables
This package was built with the goal of automating testing of sql for Apache Airflow dags.
Installation Instructions
pip install python-bigquery-validator
Validate sql using unit tests
class BigqueryValidatorTest(unittest.TestCase):
bigquery_validator = BigQueryValidator()
def test_valid_query_returns_true(self):
query = "SELECT count(*) FROM `bigquery-public-data.samples.github_timeline`"
valid_sql = self.bigquery_validator.validate_query(query)
self.assertTrue(valid_sql)
def test_bad_query_returns_false(self):
query = "SELECT count(*) ROM `bigquery-public-data.samples.github_timeline`"
bad_sql = self.bigquery_validator.validate_query(query)
self.assertFalse(bad_sql)
def test_valid_query_from_file_returns_true(self):
valid_sql = self.bigquery_validator.validate_query_from_file("./valid_query.sql")
self.assertTrue(valid_sql)
def test_bad_query_from_file_returns_false(self):
bad_sql = self.bigquery_validator.validate_query_from_file("./bad_query.sql")
self.assertFalse(bad_sql, 'assert_bad_sql_from_file_fails_validation')
Validate the output of query results using unit tests
Taken from bigquery_validator/tests/bigquery_validator_test.py
class BigqueryResultTest(unittest.TestCase):
def test_query_executes_by_default(self):
query = "SELECT count(*) AS nrows FROM `{{ params.project }}.samples.github_timeline`"
bqr = BigQueryResult(query)
print(bqr.result)
self.assertIsNotNone(bqr.result)
def test_query_auto_executes_set_to_false_returns_empty_arrya(self):
query = "SELECT count(*) AS nrows FROM `{{ params.project }}.samples.github_timeline`"
bqr = BigQueryResult(query, auto_execute=False)
self.assertEquals(bqr.result, [])
def test_query_metadata_is_not_none(self):
query = "SELECT count(*) AS nrows FROM `{{ params.project }}.samples.github_timeline`"
bqr = BigQueryResult(query)
result_metadata = bqr.metadata()
unique_rows = len(result_metadata['unique_values']['nrows'])
total_rows = result_metadata['nrows']
self.assertIsNotNone(result_metadata)
self.assertEquals(unique_rows, total_rows)
def test_query_from_file_metadata_is_not_none(self):
bqr = BigQueryResult(file_path='./sql/bigquery_result_metadata.sql')
result_metadata = bqr.metadata()
unique_rows = len(result_metadata['unique_values']['nrows'])
total_rows = result_metadata['nrows']
self.assertIsNotNone(result_metadata)
self.assertEquals(unique_rows, total_rows)
def test_query_metadata_returns_correct_unique_values(self):
query = '''
select 'andrew' as name, 21 as age
union all
select 'james' as name, 20 as age
'''
bqr = BigQueryResult(query)
result_metadata = bqr.metadata()
unique_names = result_metadata['unique_values']['name']
self.assertEquals(unique_names, ['andrew', 'james'])
def test_query_from_file_metadata_returns_correct_unique_values(self):
bqr = BigQueryResult(file_path='./sql/bigquery_result_test.sql')
result_metadata = bqr.metadata()
unique_names = result_metadata['unique_values']['name']
self.assertEquals(unique_names, ['john', 'peter', 'andrew', 'james'])
def test_query_metadata_returns_correct_null_values(self):
query = '''
select 'andrew' as name, null as age
union all
select null as name, null as age
'''
bqr = BigQueryResult(query)
result_metadata = bqr.metadata()
null_names = result_metadata['null_values']['name']
self.assertEquals(null_names, 1)
null_age = result_metadata['null_values']['age']
self.assertEquals(null_age, 2)
def test_query_metadata_returns_correct_value_counts(self):
query = '''
select 'andrew' as name, 20 as age
union all
select 'john' as name, 20 as age
'''
bqr = BigQueryResult(query)
result_metadata = bqr.metadata()
value_counts = result_metadata['value_counts']
name_value_counts = value_counts['name']
self.assertEquals(name_value_counts, {'john': 1, 'andrew': 1})
age_value_counts = value_counts['age']
self.assertEquals(age_value_counts, {20: 2})
Run functions using the command line
Taken from bigquery_validator/tests/bigquery_result_test.py
# Continuously monitor a sql file and automatically validate the sql on every
# saved change to the file
python -m bigquery_validator auto_validate_query_from_file './valid_query.sql'
# Convert the Jinja templated SQL to a valid query
python -m bigquery_validator render_templated_query 'select date("{{ params.date }}") as date'
# Check if query is valid
python -m bigquery_validator validate_query 'select true'
# Check if sql file contains valid query
python -m bigquery_validator validate_query_from_file './valid_query.sql'
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Close
Hashes for python-bigquery-validator-0.0.8.tar.gz
Algorithm | Hash digest | |
---|---|---|
SHA256 | d2303c9bd948c062f51f6ceba79492396ce2469c4ed95a80e587ded463489fac |
|
MD5 | eac71b4271476e4146da45a33eec3019 |
|
BLAKE2b-256 | e60aacc4ecab9559540b8006952603a83c7d8a407b8e855da52194e7a57bfaca |
Close
Hashes for python_bigquery_validator-0.0.8-py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | c0de50ff8eeeac9591007f3ad009aaa3046d992152affcb453d2e07d0ca26135 |
|
MD5 | a8cfae81b0a3bcd1a9943009c9f3bcaf |
|
BLAKE2b-256 | e0cae24d7083306a57cdc8bef3a4e58bddec347bad8cca0b4ee3448cd8a25ebe |