A simple library to import data into a database from different sources (extensible)
Project description
simpletasks-data
Additional tasks for simpletasks to handle data.
Provides an ImportTask
to import data into a Flask-SQLAlchemy model, from any source of data.
Data sources provided are:
- CSV (
ImportCsv
) - SQLAlchemy query (
ImportTable
) Custom data sources can easily be implemented via inheritingImportSource
.
Other data sources are provided by other libraries:
- gapi-helper provides Google Sheets as source.
Sample:
import contextlib
from typing import Iterable, Iterator, List, Optional, Sequence
import click
from simpletasks import Cli, CliParams
from simpletasks_data import ImportSource, ImportTask, Mapping
from myapp import db
@click.group()
def cli():
pass
class Asset(db.Model):
"""Model to import to"""
id = db.Column(db.Integer, primary_key=True)
serialnumber = db.Column(db.String(128), index=True)
warehouse = db.Column(db.String(128))
status = db.Column(db.String(128))
product = db.Column(db.String(128))
guid = db.Column(db.String(36))
class AssetHistory(db.Model):
"""Model to keep track of changes"""
id = db.Column(db.Integer, primary_key=True)
date = db.Column(db.DateTime)
asset_id = db.Column(db.Integer, db.ForeignKey("asset.id"), nullable=False, index=True)
asset = db.relationship("Asset", foreign_keys=asset_id)
old_warehouse = db.Column(db.String(128))
new_warehouse = db.Column(db.String(128))
old_status = db.Column(db.String(128))
new_status = db.Column(db.String(128))
@Cli(cli, params=[CliParams.progress(), CliParams.dryrun()])
class ImportAssetsTask(ImportTask):
class _AssetsSource(ImportSource):
class _AssetMapping(Mapping):
def __init__(self) -> None:
super().__init__()
# Defines mapping between the input data and the fields from the model
# self.<name of the field in the model> = self.auto() -- in the order of the input data
self.serialnumber = self.auto()
self.status = self.auto(keep_history=True)
self.warehouse = self.auto(keep_history=True)
self.product = self.auto()
self.guid = self.auto()
# If there are gaps in the input data (i.e. fields not being used in the model), you can either:
# - use `self.foobar = self.col()` instead of `self.foobar = self.auto()` to specify the column name after the gap
# - use `foobar = self.auto()` to still register the gap/column, but not use it in the model
def get_key_column_name(self) -> str:
# By default, we use the "id" field - this overrides it
return "serialnumber"
def get_header_line_number(self) -> int:
# By default we skip the first (0-index) line (header) - setting to -1 includes all lines
return -1
@contextlib.contextmanager
def getGeneratorData(self) -> Iterator[Iterable[Sequence[str]]]:
# Custom data generator
output: List[Sequence[str]] = []
for x in o:
output.append([serialnumber, status, warehouse, product, guid])
yield output
def __init__(self) -> None:
super().__init__(self._AssetMapping())
def createModel(self) -> Asset:
return Asset()
def createHistoryModel(self, base: Asset) -> Optional[AssetHistory]:
o = AssetHistory()
o.asset_id = base.id
return o
def __init__(self, *args, **kwargs):
super().__init__(model=Asset(), keep_history=True, *args, **kwargs)
def get_sources(self) -> Iterable[ImportSource]:
# Here we can have multiple sources if we wish
return [self._AssetsSource()]
Contributing
To initialize the environment:
poetry install --no-root
poetry install -E geoalchemy
To run tests (including linting and code formatting checks), please run:
poetry run pytest --mypy --flake8 && poetry run black --check .
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
simpletasks-data-0.2.0.tar.gz
(19.6 kB
view details)
Built Distribution
File details
Details for the file simpletasks-data-0.2.0.tar.gz
.
File metadata
- Download URL: simpletasks-data-0.2.0.tar.gz
- Upload date:
- Size: 19.6 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: poetry/1.1.4 CPython/3.6.7 Linux/4.15.0-1077-gcp
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | d23dd57f826ff7e8080a818dc11bc7a77ce1d1e3c464112db300fae70177d60d |
|
MD5 | 6cb13c985c1e4b44567cbfe11316857c |
|
BLAKE2b-256 | 12f66daa1eee5c70f63d0312b1b74925459ddfd1a84ada0da5b6fc73e969e27d |
File details
Details for the file simpletasks_data-0.2.0-py3-none-any.whl
.
File metadata
- Download URL: simpletasks_data-0.2.0-py3-none-any.whl
- Upload date:
- Size: 20.3 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: poetry/1.1.4 CPython/3.6.7 Linux/4.15.0-1077-gcp
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 7c2b3dd3d4576653daedf05fbf19365efd22cdf0d54b42bd32cf2ee214d18bb1 |
|
MD5 | 8236f41b94dd3c25d0eb75374a13aad5 |
|
BLAKE2b-256 | fad8ab31f2682f7d5f55d2a04326f734b232faa3e1796516feef5fa03e83b31e |