Data Quality Framework provides by Jabar Digital Service
Project description
DataSae
Data Quality Framework provides by Jabar Digital Service
Converter
Local Computer
pip install 'DataSae[converter]'
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# Local computer file to DataFrame
local = config('test_local')
df = local('path/file_name.csv', sep=',')
df = local('path/file_name.json')
df = local('path/file_name.parquet')
df = local('path/file_name.xlsx', sheet_name='Sheet1')
df = local('path/file_name.csv') # Default: sep = ','
df = local('path/file_name.json')
df = local('path/file_name.parquet')
df = local('path/file_name.xlsx') # Default: sheet_name = 'Sheet1'
Google Spreadsheet
pip install 'DataSae[converter,gsheet]'
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# Google Spreadsheet to DataFrame
gsheet = config('test_gsheet')
df = gsheet('Sheet1')
df = gsheet('Sheet1', 'gsheet_id')
S3
pip install 'DataSae[converter,s3]'
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# S3 object to DataFrame
s3 = config('test_s3')
df = s3('path/file_name.csv', sep=',')
df = s3('path/file_name.json')
df = s3('path/file_name.parquet')
df = s3('path/file_name.xlsx', sheet_name='Sheet1')
df = s3('path/file_name.csv', 'bucket_name') # Default: sep = ','
df = s3('path/file_name.json', 'bucket_name')
df = s3('path/file_name.parquet', 'bucket_name')
df = s3('path/file_name.xlsx', 'bucket_name') # Default: sheet_name = 'Sheet1'
SQL
pip install 'DataSae[converter,sql]'
MariaDB or MySQL
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# MariaDB or MySQL to DataFrame
mariadb_or_mysql = config('test_mariadb_or_mysql')
df = mariadb_or_mysql('select 1 column_name from schema_name.table_name;')
df = mariadb_or_mysql('path/file_name.sql')
PostgreSQL
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# PostgreSQL to DataFrame
postgresql = config('test_postgresql')
df = postgresql('select 1 column_name from schema_name.table_name;')
df = postgresql('path/file_name.sql')
Checker for Data Quality
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# Check all data qualities on configuration
config.checker # dict result
# Check data quality by config name
config('test_local').checker # list of dict result
config('test_gsheet').checker # list of dict result
config('test_s3').checker # list of dict result
config('test_mariadb_or_mysql').checker # list of dict result
config('test_postgresql').checker # list of dict result
Example results: https://github.com/jabardigitalservice/DataSae/blob/46ef80072b98ca949084b4e1ae50bcf23d07d646/tests/data/checker.json#L1-L432
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
DataSae-0.5.0a2.tar.gz
(33.8 kB
view hashes)
Built Distribution
DataSae-0.5.0a2-py3-none-any.whl
(35.8 kB
view hashes)
Close
Hashes for DataSae-0.5.0a2-py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 272349b6317944c838bf2d9e9fdf6bf7603b9b67e4c6c8298dc4bdf3d1f85cf4 |
|
MD5 | b9bb4752c26f0238a058b9d4f02c6683 |
|
BLAKE2b-256 | 5905df081382e4180ea999f270e0f0b451a4ffce9b7f9f495c0fc328e48173ab |