Data Quality Framework provides by Jabar Digital Service
Project description
DataSae
Data Quality Framework provides by Jabar Digital Service
Converter
Local Computer
pip install 'DataSae[converter]'
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# Local computer file to DataFrame
local = config('test_local')
df = local('path/file_name.csv', sep=',')
df = local('path/file_name.json')
df = local('path/file_name.parquet')
df = local('path/file_name.xlsx', sheet_name='Sheet1')
df = local('path/file_name.csv') # Default: sep = ','
df = local('path/file_name.json')
df = local('path/file_name.parquet')
df = local('path/file_name.xlsx') # Default: sheet_name = 'Sheet1'
Google Spreadsheet
pip install 'DataSae[converter,gsheet]'
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# Google Spreadsheet to DataFrame
gsheet = config('test_gsheet')
df = gsheet('Sheet1')
df = gsheet('Sheet1', 'gsheet_id')
S3
pip install 'DataSae[converter,s3]'
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# S3 object to DataFrame
s3 = config('test_s3')
df = s3('path/file_name.csv', sep=',')
df = s3('path/file_name.json')
df = s3('path/file_name.parquet')
df = s3('path/file_name.xlsx', sheet_name='Sheet1')
df = s3('path/file_name.csv', 'bucket_name') # Default: sep = ','
df = s3('path/file_name.json', 'bucket_name')
df = s3('path/file_name.parquet', 'bucket_name')
df = s3('path/file_name.xlsx', 'bucket_name') # Default: sheet_name = 'Sheet1'
SQL
pip install 'DataSae[converter,sql]'
MariaDB or MySQL
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# MariaDB or MySQL to DataFrame
mariadb_or_mysql = config('test_mariadb_or_mysql')
df = mariadb_or_mysql('select 1 column_name from schema_name.table_name;')
df = mariadb_or_mysql('path/file_name.sql')
PostgreSQL
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# PostgreSQL to DataFrame
postgresql = config('test_postgresql')
df = postgresql('select 1 column_name from schema_name.table_name;')
df = postgresql('path/file_name.sql')
Checker for Data Quality
from datasae.converter import Config
# From JSON
config = Config('DataSae/tests/data/config.json')
# From YAML
config = Config('DataSae/tests/data/config.yaml')
# Check all data qualities on configuration
config.checker # dict result
# Check data quality by config name
config('test_local').checker # list of dict result
config('test_gsheet').checker # list of dict result
config('test_s3').checker # list of dict result
config('test_mariadb_or_mysql').checker # list of dict result
config('test_postgresql').checker # list of dict result
Example results: https://github.com/jabardigitalservice/DataSae/blob/46ef80072b98ca949084b4e1ae50bcf23d07d646/tests/data/checker.json#L1-L432
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
DataSae-0.5.0a3.tar.gz
(33.8 kB
view details)
Built Distribution
DataSae-0.5.0a3-py3-none-any.whl
(35.8 kB
view details)
File details
Details for the file DataSae-0.5.0a3.tar.gz
.
File metadata
- Download URL: DataSae-0.5.0a3.tar.gz
- Upload date:
- Size: 33.8 kB
- Tags: Source
- Uploaded using Trusted Publishing? Yes
- Uploaded via: twine/4.0.2 CPython/3.11.8
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 7660b2643679c321f26d9bc5732363b3ecb93fabb41649440961e6847efefb0e |
|
MD5 | 1f42d4219d7dca28e0d8e848efe84bb9 |
|
BLAKE2b-256 | b1645293c0729537c4f755af8f4f212db51950d0b16f24d0fbcf98b07f2cd1e2 |
File details
Details for the file DataSae-0.5.0a3-py3-none-any.whl
.
File metadata
- Download URL: DataSae-0.5.0a3-py3-none-any.whl
- Upload date:
- Size: 35.8 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? Yes
- Uploaded via: twine/4.0.2 CPython/3.11.8
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 29c697db1c950fc281334e10788ca4580ccbb765a21c779f3829807b40184351 |
|
MD5 | 65d8c2e2bde171604d655a6e8435e5f1 |
|
BLAKE2b-256 | 766384c3de246a6e423be52fcd2571adad9277c384780f2b6d6acdb4ff08f954 |