Data Quality Framework provides by Jabar Digital Service
Project description
DataSae
Data Quality Framework provides by Jabar Digital Service
Converter
Local Computer
pip install 'DataSae[converter]'
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# Local computer file to DataFrame
local = config('test_local')
df = local('path/file_name.csv', sep=',')
df = local('path/file_name.json')
df = local('path/file_name.parquet')
df = local('path/file_name.xlsx', sheet_name='Sheet1')
df = local('path/file_name.csv') # Default: sep = ','
df = local('path/file_name.json')
df = local('path/file_name.parquet')
df = local('path/file_name.xlsx') # Default: sheet_name = 'Sheet1'
Google Spreadsheet
pip install 'DataSae[converter,gsheet]'
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# Google Spreadsheet to DataFrame
gsheet = config('test_gsheet')
df = gsheet('Sheet1')
df = gsheet('Sheet1', 'gsheet_id')
S3
pip install 'DataSae[converter,s3]'
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# S3 object to DataFrame
s3 = config('test_s3')
df = s3('path/file_name.csv', sep=',')
df = s3('path/file_name.json')
df = s3('path/file_name.parquet')
df = s3('path/file_name.xlsx', sheet_name='Sheet1')
df = s3('path/file_name.csv', 'bucket_name') # Default: sep = ','
df = s3('path/file_name.json', 'bucket_name')
df = s3('path/file_name.parquet', 'bucket_name')
df = s3('path/file_name.xlsx', 'bucket_name') # Default: sheet_name = 'Sheet1'
SQL
pip install 'DataSae[converter,sql]'
MariaDB or MySQL
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# MariaDB or MySQL to DataFrame
mariadb_or_mysql = config('test_mariadb_or_mysql')
df = mariadb_or_mysql('select 1 column_name from schema_name.table_name;')
df = mariadb_or_mysql('path/file_name.sql')
PostgreSQL
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# PostgreSQL to DataFrame
postgresql = config('test_postgresql')
df = postgresql('select 1 column_name from schema_name.table_name;')
df = postgresql('path/file_name.sql')
Checker for Data Quality
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# Check all data qualities on configuration
config.checker # dict result
# Check data quality by config name
config('test_local').checker # list of dict result
config('test_gsheet').checker # list of dict result
config('test_s3').checker # list of dict result
config('test_mariadb_or_mysql').checker # list of dict result
config('test_postgresql').checker # list of dict result
Example results: https://github.com/jabardigitalservice/DataSae/blob/46ef80072b98ca949084b4e1ae50bcf23d07d646/tests/data/checker.json#L1-L432
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
DataSae-0.4.0.tar.gz
(32.9 kB
view hashes)
Built Distribution
DataSae-0.4.0-py3-none-any.whl
(34.3 kB
view hashes)