A library for flattening json objects
Project description
DictCruncher - a library for flattening json objects
dictcruncher is a library for flattening objects into a format that can be easily imported into other libraries eg: pandas.DataFrame.from_records()
Usage:
We can easily convert lists of json objects into flattened "rows" we can iterate over
import dictcruncher as dc
in_list = [
{
"id": "000001337",
"tripUpdate": {
"trip": {
"tripId": "1337",
"startTime": "02:17:23",
"startDate": "20231207",
"routeId": "Blue"
}
}
}
]
table_config = {
'trip_config': [
dm.MapperLocation(location='root::id', column_name='route_id'),
dm.MapperLocation(location='root::tripUpdate::trip::tripId', column_name='trip_id'),
dm.MapperLocation(location='root::tripUpdate::trip::startTime', column_name='start_time'),
dm.MapperLocation(location='root::tripUpdate::trip::startDate', column_name='start_date'),
dm.MapperLocation(location='root::tripUpdate::trip::routeId', column_name='routeId'),
]
}
in_dictcruncher = dm.dictcruncher(in_dict_list=in_list, mapper=table_config)
flattened = in_dictcruncher.get_records(table_name='trip_config')
# flattened
[
{
"route_id": "000001337",
"trip_id": "1337",
"start_time": "02:17:23",
"start_date": "20231207",
"routeId": "Blue"
}
]
We can iterate over nested lists that are one level down (Currently it does not support more than one level.)
import dictcruncher as dc
in_list = [
{
"id": "000001337",
"timestamp": "1701938918",
"[realtime_data]": {
"tripRouteData": [
{
"routeId": "Blue",
"nextTrain": {
"start": "1701940718"
}
},
{
"routeId": "Red",
"nextTrain": {
"start": "1701940718"
}
},
{
"routeId": "Orange",
"nextTrain": {
"start": "1701940718"
}
}
]
}
}
]
table_config = {
'routes': [
dm.MapperLocation(location='root::id', column_name='route_id'),
dm.MapperLocation(location='root::timestamp', column_name='route_timestamp'),
dm.MapperLocation(location='root::[realtime_data]::tripRouteData[]::routeId', column_name='line_name'),
dm.MapperLocation(location='root::[realtime_data]::tripRouteData[]::nextTrain::start', column_name='next_train_start'),
]
}
in_dictcruncher = dm.dictcruncher(in_dict_list=in_list, mapper=table_config)
flattened = in_dictcruncher.get_records(table_name='routes')
# flattened:
[
{
"route_id": "000001337",
"route_timestamp": "1701938918",
"line_name": "Blue",
"next_train_start": "1701940718"
},
{
"route_id": "000001337",
"route_timestamp": "1701938918",
"line_name": "Red",
"next_train_start": "1701940718"
},
{
"route_id": "000001337",
"route_timestamp": "1701938918",
"line_name": "Orange",
"next_train_start": "1701940718"
}
]
We also support passing in default values, and preforming operations on them
import dictcruncher as dc
in_dict = [
{
"transaction_id": 1,
"total_amount": 1000,
"tip_amount": 300
},
{
"transaction_id": 2,
"total_amount": 1000
},
]
table_config = {
'transactions': [
dm.MapperLocation(location='root::transaction_id', column_name='transaction_id'),
dm.MapperLocation(location='root::total_amount', column_name='total_amount', coalesce_value=0),
dm.MapperLocation(location='root::tip_amount', column_name='tip_amount', coalesce_value=0),
dm.MapperLocation(location='root::tip_amount', column_name='tip_amount_negative', coalesce_value=0, convert_function=lambda x: x * -1)
]
}
in_dictcruncher = dm.dictcruncher(in_dict_list=in_dict, mapper=table_config)
flattened = in_dictcruncher.get_records(table_name='transactions')
# Flattened
[
{
"transaction_id": 1,
"total_amount": 1000,
"tip_amount": 300,
"tip_amount_negative": -300
},
{
"transaction_id": 2,
"total_amount": 1000,
"tip_amount": 0,
"tip_amount_negative": 0
}
]
Finally, we support error handling on incomplete objects
import dictcruncher as dc
in_dict = [
{
"transaction_id": 1,
"total_amount": 1000,
"tip_amount": 300
},
{
"transaction_id": 2,
"total_amount": 1000
},
]
table_config = {
'tips_only': [
dm.MapperLocation(location='root::transaction_id', column_name='transaction_id'),
dm.MapperLocation(location='root::total_amount', column_name='total_amount', coalesce_value=0),
# if_missing supports 'drop' to drop object or 'fail' to raise an exception.
dm.MapperLocation(location='root::tip_amount', column_name='tip_amount', if_missing='drop')
]
}
in_dictcruncher = dm.dictcruncher(in_dict_list=in_dict, mapper=table_config)
flattened = in_dictcruncher.get_records(table_name='tips_only')
# Flattened
[
{
"transaction_id": 1,
"total_amount": 1000,
"tip_amount": 300
}
]
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
dictcruncher-0.1.1.tar.gz
(7.1 kB
view details)
File details
Details for the file dictcruncher-0.1.1.tar.gz
.
File metadata
- Download URL: dictcruncher-0.1.1.tar.gz
- Upload date:
- Size: 7.1 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/4.0.2 CPython/3.10.12
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 2d67d14a42cebef6f2564994a0aee68d2468502a1b6b9aa05b9f58d87532ad14 |
|
MD5 | a9c752d0fd84167d4bace8f93f47e104 |
|
BLAKE2b-256 | fcea4c1208ce5198998fa7ffc03691f5e8220497ca621663ed630c8596b70807 |