A DQ package
Project description
Requirements
pip install -r requirements.txt
Env
.env File
API_KEY=xyz
API_URL=xyz
Or Export variables
export API_KEY=xyz
export API_URL=xyz
Database User
GRANT USAGE on schema "validation" to anon;
GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA "validation" TO anon;
GRANT ALL ON SEQUENCE validation.rule_output_rule_output_id_seq TO anon;
GRANT ALL ON SEQUENCE validation. connections_connection_id_seq TO anon;
GRANT ALL ON SEQUENCE validation.owlcheck_q_job_id_seq TO anon;
GRANT ALL ON SEQUENCE validation.job_log_log_id_seq TO anon;
GRANT ALL ON SEQUENCE validation.assignment_q_id_seq TO anon;
Examples
"""
# Invididual Usage Examples
#Client instantiation
api = APIClient(api_client)
engine = duckdb.connect(':memory:')
engine.sql(" select * from read_csv_auto('./data/fake_customers.csv') limit 10").show()
engine.close()
# owl_check_history, Delete, Insert, Read
api.delete_owl_check_history("test")
api.insert_owl_check_history("test", "2024-09-16")
rs = api.get_owl_check_history("test")
print(rs)
# owl_catalog, Delete, Insert, Read
api.delete_owl_catalog("test")
api.insert_owl_catalog("test")
rs = api.get_owl_catalog("test")
print(rs)
# dataset_schema, Delete, Insert, Read
api.delete_dataset_schema("test")
api.insert_dataset_schema("test")
rs = api.get_dataset_schema("test")
print(rs)
# dataset_field, Delete, Insert, Read
api.delete_dataset_field("test", "2024-09-16")
api.insert_dataset_field("test", "2024-09-16")
rs = api.get_dataset_field("test", "2024-09-16")
print(rs)
# Print the result
df = pd.DataFrame(rs.data)
print(df[['dataset','run_id','rc']])
# run rules
api.delete_rule_output("test", "2024-09-16")
api.run_rules("test", "2024-09-16")
# scoring
rule_output = api.get_rule_output("test", "2024-09-16")
rule_score = 0
for r in rule_output.data:
rule_score += r['score']
print(rule_score)
# dataset_scan, Delete, Insert, Read
delete_record = api.delete_dataset_scan("test", "2024-09-16")
add_record = api.insert_dataset_scan("test", "2024-09-16", 100, 100 - rule_score)
Register
dataset = 'test'
# opt_spark
api.delete_opt_spark(dataset)
api.insert_opt_spark(dataset)
rs = api.get_opt_spark(dataset)
print(rs)
df = pd.DataFrame(rs.data)
display(df)
# opt_pushdown
api.delete_opt_pushdown(dataset)
api.insert_opt_pushdown(dataset)
rs = api.get_opt_pushdown(dataset)
print(rs)
df = pd.DataFrame(rs.data)
display(df)
# opt_profile
api.delete_opt_profile(dataset)
api.insert_opt_profile(dataset)
rs = api.get_opt_profile(dataset)
print(rs)
df = pd.DataFrame(rs.data)
display(df)
# opt_load
api.delete_opt_load(dataset)
api.insert_opt_load(dataset)
rs = api.get_opt_load(dataset)
print(rs)
df = pd.DataFrame(rs.data)
display(df)
# opt_profile
api.delete_opt_profile(dataset)
api.insert_opt_profile(dataset)
rs = api.get_opt_profile(dataset)
print(rs)
df = pd.DataFrame(rs.data)
display(df)
# opt_env
api.delete_opt_env(dataset)
api.insert_opt_env(dataset)
rs = api.get_opt_env(dataset)
print(rs)
df = pd.DataFrame(rs.data)
display(df)
# opt_owl
api.delete_opt_owl(dataset)
api.insert_opt_owl(dataset)
rs = api.get_opt_owl(dataset)
print(rs)
df = pd.DataFrame(rs.data)
display(df)
Job
dataset = 'test'
run_id = '2024-09-20'
conn.sql(f"create table if not exists {dataset} as select * from read_csv_auto('./data/fake_customers.csv') ")
# owl_check_history,
# Delete, Insert, Read
api.delete_owl_check_history(dataset)
api.insert_owl_check_history(dataset, run_id)
rs = api.get_owl_check_history(dataset)
print(rs)
# owl_catalog,
# Delete, Insert, Read
api.delete_owl_catalog(dataset)
api.insert_owl_catalog(dataset)
rs = api.get_owl_catalog(dataset)
print(rs)
# dataset_schema,
# Delete, Insert, Read
api.delete_dataset_schema(dataset)
api.insert_dataset_schema(dataset)
rs = api.get_dataset_schema(dataset)
print(rs)
# dataset_field,
# Delete, Insert, Read
api.delete_dataset_field(dataset, run_id)
api.insert_dataset_field(dataset, run_id)
rs = api.get_dataset_field(dataset, run_id)
print(rs)
# run rules
api.delete_rule_output(dataset, run_id)
api.run_rules(dataset, run_id)
# scoring
rule_output = api.get_rule_output(dataset, run_id)
print(rule_output.data)
rule_score = 0
for r in rule_output.data:
rule_score += r['score']
print(str(rule_score))
# dataset_scan,
# Delete, Insert, Read
delete_record = api.delete_dataset_scan(dataset, run_id)
add_record = api.insert_dataset_scan(dataset, run_id, 100, 100 - rule_score)
rs = api.get_dataset_scan(dataset, run_id)
print(rs.data)
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
duckdq_mearnsb-0.0.3.tar.gz
(2.5 kB
view details)
Built Distribution
Filter files by name, interpreter, ABI, and platform.
If you're not sure about the file name format, learn more about wheel file names.
Copy a direct link to the current filters
File details
Details for the file duckdq_mearnsb-0.0.3.tar.gz.
File metadata
- Download URL: duckdq_mearnsb-0.0.3.tar.gz
- Upload date:
- Size: 2.5 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/5.1.1 CPython/3.12.4
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
66cf8af5c69e8b27305dd4baf53946dd58c7663dcd12d14a6ed1c54fa6121804
|
|
| MD5 |
ec672a580ac8b1032af057158c2c7c14
|
|
| BLAKE2b-256 |
065e4e7a7024c29e34c59b9292c87812b40cab8492929a274cbccf004396828f
|
File details
Details for the file duckdq_mearnsb-0.0.3-py3-none-any.whl.
File metadata
- Download URL: duckdq_mearnsb-0.0.3-py3-none-any.whl
- Upload date:
- Size: 2.3 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/5.1.1 CPython/3.12.4
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
0984e8ce2fb7a9ad529a9bb5404a4ce58429cda3dcaaa782fa0d61d57f74f693
|
|
| MD5 |
db656f76359e064d05875a1efad226a1
|
|
| BLAKE2b-256 |
0d20fddd25a641852ae851343fa85aa04891d80acdbb0ec71fba8d34c7c82737
|