Abstraction for read and write to the lmdb file
Project description
Python Lmdbsystem
About Lmdbsystem
LMDB Convertor is a lmdb file handle library for python. It provides method to read, write to the lmdb file
Install
$ pip install lmdbsystem
Usage
from dataset_loaders import ImageLoader
from lmdbsystem.lmdb import Lmdb
from lmdbsystem.write_adapters.text import TextWriteAdapter
from lmdbsystem.write_adapters.image import ImageWriteAdapter
from lmdbsystem.read_adapters.cv2_image import Cv2ImageReadAdapter
from lmdbsystem.read_adapters.pil_image import PilImageReadAdapter
from lmdbsystem.read_adapters.bytes_image import BytesImageReadAdapter
from lmdbsystem.read_adapters.text import TextReadAdapter
from lmdbsystem.dataloader import DataLoader
class ImageLoader(DataLoader):
def __init__(
self,
directory: str,
suffix: str,
fn_md5_mode: str,
fn_md5_path: str,
):
self.directory = directory
self.suffix = suffix
self.fn_md5_mode = fn_md5_mode
self.fn_md5_path = fn_md5_path
if fn_md5_mode == "r":
self.dict_filename_md5 = json_reader(fn_md5_path)
elif fn_md5_mode == "w":
self.dict_filename_md5 = {}
else:
raise ValueError(f"Don't support fn_md5_mode: {fn_md5_mode}")
self.file_paths = sorted(glob(f"{directory}/**/*{suffix}", recursive=True))
def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
for file_path in self.file_paths:
yield self[file_path]
def __getitem__(self, file_path: str) -> Tuple[Optional[bytes], Optional[bytes]]:
filename = get_relative_path(self.directory, file_path).removesuffix(self.suffix)
value = raw_reader(file_path)
if self.fn_md5_mode == "r":
md5_file = self.dict_filename_md5[filename]
value = dump_pickle((str2bytes(get_md5_file(file_path)), value))
else:
md5_file = get_md5_file(file_path)
self.dict_filename_md5[filename] = md5_file
key = str2bytes(md5_file)
return key, value
class LabelInFilenameLoader(DataLoader):
def __init__(
self,
directory: str,
suffix: str,
fn_md5_path: str,
values_index: List[int],
values_map: Dict[str, str],
delimiter: str,
):
self.directory = directory
self.suffix = suffix
self.values_map = values_map
self.delimiter = delimiter
self.values_index = values_index
self.dict_filename_md5 = json_reader(fn_md5_path)
self.file_paths = sorted(glob(f"{directory}/**/*{suffix}", recursive=True))
def iterator(self) -> Generator[Tuple[Optional[bytes], Optional[bytes]], Any, None]:
for file_path in self.file_paths:
yield self[file_path]
def __getitem__(self, file_path: str) -> Tuple[Optional[bytes], Optional[bytes]]:
md5_file = self.dict_filename_md5[get_relative_path(self.directory, file_path).removesuffix(self.suffix)]
key = str2bytes(md5_file)
sub_key = str2bytes(get_md5_file(file_path))
line_values = os.path.basename(file_path).removesuffix(self.suffix).split(self.delimiter)
labels = [value.strip() for index, value in enumerate(line_values) if index in self.values_index]
if self.values_map:
if "type" in self.values_map:
value_type = self.values_map["type"]
labels = [str(eval(value_type)(item)) for item in labels]
else:
labels = [self.values_map.get(item, item) for item in labels]
value = dump_pickle((sub_key, str2bytes(" ".join(labels))))
return key, value
# Write lmdb file with label in filename
lmdb_obj = Lmdb(TextWriteAdapter(path=args.lmdb_file))
lmdb_obj.write_loader(
LabelInFilenameLoader(
directory=args.folder,
suffix=args.suffix,
fn_md5_path=args.fn_md5_path,
values_map=values_map,
delimiter=args.delimiter,
values_index=values_index,
),
)
# Write lmdb file with image directory
lmdb_obj = Lmdb(ImageWriteAdapter(path=args.lmdb_file, map_size=args.lmdb_map_size))
lmdb_obj.write_loader(
ImageLoader(
directory=args.folder,
suffix=args.suffix,
fn_md5_mode=args.fn_md5_mode,
fn_md5_path=args.fn_md5_path,
),
)
# Read image
value = Lmdb(Cv2ImageReadAdapter(path=path)).read_index(index)
value = Lmdb(PilImageReadAdapter(path=path)).read_index(index)
value = Lmdb(BytesImageReadAdapter(path=path)).read_index(index)
# Read text
value = Lmdb(TextReadAdapter(path=path)).read_index(index)
Changelog
Please see CHANGELOG for more information on what has changed recently.
Contributing
Please see CONTRIBUTING for details.
Security Vulnerabilities
Please review our security policy on how to report security vulnerabilities.
Credits
License
The MIT License (MIT). Please see License File for more information.
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
lmdbsystem-0.2.2.tar.gz
(9.9 kB
view details)
Built Distribution
File details
Details for the file lmdbsystem-0.2.2.tar.gz
.
File metadata
- Download URL: lmdbsystem-0.2.2.tar.gz
- Upload date:
- Size: 9.9 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/5.0.0 CPython/3.12.3
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | c680d2bb4f538aa5a47339d327de7f4c18413aea9f526418b2f22a6770eeb53f |
|
MD5 | 990fca4be34cf8360d9178ea31192bfd |
|
BLAKE2b-256 | 6ad17f723cd7b54b1570a397fa20b966862e17b35f280ec78a14e376811e26f9 |
File details
Details for the file lmdbsystem-0.2.2-py3-none-any.whl
.
File metadata
- Download URL: lmdbsystem-0.2.2-py3-none-any.whl
- Upload date:
- Size: 13.3 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/5.0.0 CPython/3.12.3
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 1615a149016c4e3f1398a194afb51fa64f3ec0023aa833d9885992540927a5ea |
|
MD5 | 9eaf7f24aac07e88d3453b6906161918 |
|
BLAKE2b-256 | 0734d404a0df8220fa52108474b49c3a64e5390770f5a8ea93931955ce851012 |