Skip to main content

A simple library to import data into a database from different sources (extensible)

Project description

simpletasks-data

Additional tasks for simpletasks to handle data.

Provides an ImportTask to import data into a Flask-SQLAlchemy model, from any source of data.

Data sources provided are:

  • CSV (ImportCsv)
  • SQLAlchemy query (ImportTable) Custom data sources can easily be implemented via inheriting ImportSource.

Other data sources are provided by other libraries:

Sample:

import contextlib
from typing import Iterable, Iterator, List, Optional, Sequence

import click

from simpletasks import Cli, CliParams
from simpletasks_data import ImportSource, ImportTask, Mapping

from myapp import db

@click.group()
def cli():
    pass


class Asset(db.Model):
    """Model to import to"""
    id = db.Column(db.Integer, primary_key=True)
    serialnumber = db.Column(db.String(128), index=True)
    warehouse = db.Column(db.String(128))
    status = db.Column(db.String(128))
    product = db.Column(db.String(128))
    guid = db.Column(db.String(36))


class AssetHistory(db.Model):
    """Model to keep track of changes"""
    id = db.Column(db.Integer, primary_key=True)
    date = db.Column(db.DateTime)
    asset_id = db.Column(db.Integer, db.ForeignKey("asset.id"), nullable=False, index=True)
    asset = db.relationship("Asset", foreign_keys=asset_id)

    old_warehouse = db.Column(db.String(128))
    new_warehouse = db.Column(db.String(128))
    old_status = db.Column(db.String(128))
    new_status = db.Column(db.String(128))


@Cli(cli, params=[CliParams.progress(), CliParams.dryrun()])
class ImportAssetsTask(ImportTask):
    class _AssetsSource(ImportSource):
        class _AssetMapping(Mapping):
            def __init__(self) -> None:
                super().__init__()

                # Defines mapping between the input data and the fields from the model
                # self.<name of the field in the model> = self.auto() -- in the order of the input data
                self.serialnumber = self.auto()
                self.status = self.auto(keep_history=True)
                self.warehouse = self.auto(keep_history=True)
                self.product = self.auto()
                self.guid = self.auto()

                # If there are gaps in the input data (i.e. fields not being used in the model), you can either:
                # - use `self.foobar = self.col()` instead of `self.foobar = self.auto()` to specify the column name after the gap
                # - use `foobar = self.auto()` to still register the gap/column, but not use it in the model

            def get_key_column_name(self) -> str:
                # By default, we use the "id" field - this overrides it
                return "serialnumber"

            def get_header_line_number(self) -> int:
                # By default we skip the first (0-index) line (header) - setting to -1 includes all lines
                return -1

        @contextlib.contextmanager
        def getGeneratorData(self) -> Iterator[Iterable[Sequence[str]]]:
            # Custom data generator
            output: List[Sequence[str]] = []

            for x in o:
                output.append([serialnumber, status, warehouse, product, guid])

            yield output

        def __init__(self) -> None:
            super().__init__(self._AssetMapping())

    def createModel(self) -> Asset:
        return Asset()

    def createHistoryModel(self, base: Asset) -> Optional[AssetHistory]:
        o = AssetHistory()
        o.asset_id = base.id
        return o

    def __init__(self, *args, **kwargs):
        super().__init__(model=Asset(), keep_history=True, *args, **kwargs)

    def get_sources(self) -> Iterable[ImportSource]:
        # Here we can have multiple sources if we wish
        return [self._AssetsSource()]

Contributing

To initialize the environment:

poetry install --no-root
poetry install -E geoalchemy

To run tests (including linting and code formatting checks), please run:

poetry run pytest --mypy --flake8 && poetry run black --check .

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

simpletasks-data-0.2.0.tar.gz (19.6 kB view hashes)

Uploaded Source

Built Distribution

simpletasks_data-0.2.0-py3-none-any.whl (20.3 kB view hashes)

Uploaded Python 3

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page