Skip to main content

Abstraction for read and write to the lmdb file

Project description

Python Lmdbsystem

Testing Latest Version Downloads Pypi Status Python Versions

About Lmdbsystem

LMDB Convertor is a lmdb file handle library for python. It provides method to read, write to the lmdb file

Install

$ pip install lmdbsystem

Usage

from dataset_loaders import ImageLoader

from lmdbsystem.lmdb import Lmdb
from lmdbsystem.write_adapters.text import TextWriteAdapter
from lmdbsystem.write_adapters.image import ImageWriteAdapter
from lmdbsystem.read_adapters.cv2_image import Cv2ImageReadAdapter
from lmdbsystem.read_adapters.pil_image import PilImageReadAdapter
from lmdbsystem.read_adapters.bytes_image import BytesImageReadAdapter
from lmdbsystem.read_adapters.text import TextReadAdapter
from lmdbsystem.dataloader import DataLoader


class ImageLoader(DataLoader):
    def __init__(
        self,
        directory: str,
        suffix: str,
        fn_md5_mode: str,
        fn_md5_path: str,
    ):
        self.directory = directory
        self.suffix = suffix
        self.fn_md5_mode = fn_md5_mode
        self.fn_md5_path = fn_md5_path
        if fn_md5_mode == "r":
            self.dict_filename_md5 = json_reader(fn_md5_path)
        elif fn_md5_mode == "w":
            self.dict_filename_md5 = {}
        else:
            raise ValueError(f"Don't support fn_md5_mode: {fn_md5_mode}")
        self.file_paths = sorted(glob(f"{directory}/**/*{suffix}", recursive=True))

    def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
        for file_path in self.file_paths:
            yield self[file_path]

    def __getitem__(self, file_path: str) -> Tuple[Optional[bytes], Optional[bytes]]:
        filename = get_relative_path(self.directory, file_path).removesuffix(self.suffix)
        value = raw_reader(file_path)
        if self.fn_md5_mode == "r":
            md5_file = self.dict_filename_md5[filename]
            value = dump_pickle((str2bytes(get_md5_file(file_path)), value))
        else:
            md5_file = get_md5_file(file_path)
            self.dict_filename_md5[filename] = md5_file
        key = str2bytes(md5_file)

        return key, value


class LabelInFilenameLoader(DataLoader):
    def __init__(
        self,
        directory: str,
        suffix: str,
        fn_md5_path: str,
        values_index: List[int],
        values_map: Dict[str, str],
        delimiter: str,
    ):
        self.directory = directory
        self.suffix = suffix
        self.values_map = values_map
        self.delimiter = delimiter
        self.values_index = values_index
        self.dict_filename_md5 = json_reader(fn_md5_path)
        self.file_paths = sorted(glob(f"{directory}/**/*{suffix}", recursive=True))

    def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
        for file_path in self.file_paths:
            yield self[file_path]

    def __getitem__(self, file_path: str) -> Tuple[Optional[bytes], Optional[bytes]]:
        md5_file = self.dict_filename_md5[get_relative_path(self.directory, file_path).removesuffix(self.suffix)]
        key = str2bytes(md5_file)
        sub_key = str2bytes(get_md5_file(file_path))

        line_values = os.path.basename(file_path).removesuffix(self.suffix).split(self.delimiter)
        labels = [value.strip() for index, value in enumerate(line_values) if index in self.values_index]

        if self.values_map:
            if "type" in self.values_map:
                value_type = self.values_map["type"]
                labels = [str(eval(value_type)(item)) for item in labels]
            else:
                labels = [self.values_map.get(item, item) for item in labels]

        value = dump_pickle((sub_key, str2bytes(" ".join(labels))))
        return key, value


# Write lmdb file with label in filename       
lmdb_obj = Lmdb(TextWriteAdapter(path=args.lmdb_file))
lmdb_obj.write_loader(
    LabelInFilenameLoader(
        directory=args.folder,
        suffix=args.suffix,
        fn_md5_path=args.fn_md5_path,
        values_map=values_map,
        delimiter=args.delimiter,
        values_index=values_index,
    ),
)

# Write lmdb file with image directory 
lmdb_obj = Lmdb(ImageWriteAdapter(path=args.lmdb_file, map_size=args.lmdb_map_size))
lmdb_obj.write_loader(
    ImageLoader(
        directory=args.folder,
        suffix=args.suffix,
        fn_md5_mode=args.fn_md5_mode,
        fn_md5_path=args.fn_md5_path,
    ),
)


# Read image
value = Lmdb(Cv2ImageReadAdapter(path=path)).read_index(index)
value = Lmdb(PilImageReadAdapter(path=path)).read_index(index)
value = Lmdb(BytesImageReadAdapter(path=path)).read_index(index)

# Read text
value = Lmdb(TextReadAdapter(path=path)).read_index(index)

Changelog

Please see CHANGELOG for more information on what has changed recently.

Contributing

Please see CONTRIBUTING for details.

Security Vulnerabilities

Please review our security policy on how to report security vulnerabilities.

Credits

License

The MIT License (MIT). Please see License File for more information.

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

lmdbsystem-0.2.2.tar.gz (9.9 kB view details)

Uploaded Source

Built Distribution

lmdbsystem-0.2.2-py3-none-any.whl (13.3 kB view details)

Uploaded Python 3

File details

Details for the file lmdbsystem-0.2.2.tar.gz.

File metadata

  • Download URL: lmdbsystem-0.2.2.tar.gz
  • Upload date:
  • Size: 9.9 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/5.0.0 CPython/3.12.3

File hashes

Hashes for lmdbsystem-0.2.2.tar.gz
Algorithm Hash digest
SHA256 c680d2bb4f538aa5a47339d327de7f4c18413aea9f526418b2f22a6770eeb53f
MD5 990fca4be34cf8360d9178ea31192bfd
BLAKE2b-256 6ad17f723cd7b54b1570a397fa20b966862e17b35f280ec78a14e376811e26f9

See more details on using hashes here.

File details

Details for the file lmdbsystem-0.2.2-py3-none-any.whl.

File metadata

  • Download URL: lmdbsystem-0.2.2-py3-none-any.whl
  • Upload date:
  • Size: 13.3 kB
  • Tags: Python 3
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/5.0.0 CPython/3.12.3

File hashes

Hashes for lmdbsystem-0.2.2-py3-none-any.whl
Algorithm Hash digest
SHA256 1615a149016c4e3f1398a194afb51fa64f3ec0023aa833d9885992540927a5ea
MD5 9eaf7f24aac07e88d3453b6906161918
BLAKE2b-256 0734d404a0df8220fa52108474b49c3a64e5390770f5a8ea93931955ce851012

See more details on using hashes here.

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page