On-disk Dict With RocksDB (dbm alternative)
Project description
RocksDict
Key-value storage supporting any python object
Abstract
This package enables users to store, query, and delete a large number of key-value pairs on disk.
This is especially useful when the data cannot fit into RAM. If you have hundreds of GBs or many TBs of key-value data to store and query from, this is the package for you.
Installation
This package is built for macOS (x86/arm), Windows 64/32, and Linux x86.
It can be installed from pypi with pip install rocksdict
.
Plans
- set, get, del
- multi get
- support string, float, int, bytes
- support other python objects through pickle
- support BigInt
- compare BigInt by value size
- keys, values, items iterator
- options, read options, write options, all options
- SstFileWriter and bulk ingest
- column families
- open as secondary
- write batch
Introduction
Below is a code example that shows how to do the following:
- Create Rdict
- Store something on disk
- Close Rdict
- Open Rdict again
- Check Rdict elements
- Iterate from Rdict
- Batch get
- Delete storage
from rocksdict import Rdict
import numpy as np
import pandas as pd
path = str("./test_dict")
# create a Rdict with default options at `path`
db = Rdict(path)
db[1.0] = 1
db[1] = 1.0
db["huge integer"] = 2343546543243564534233536434567543
db["good"] = True
db["bad"] = False
db["bytes"] = b"bytes"
db["this is a list"] = [1, 2, 3]
db["store a dict"] = {0: 1}
db[b"numpy"] = np.array([1, 2, 3])
db["a table"] = pd.DataFrame({"a": [1, 2], "b": [2, 1]})
# close Rdict
db.close()
# reopen Rdict from disk
db = Rdict(path)
assert db[1.0] == 1
assert db[1] == 1.0
assert db["huge integer"] == 2343546543243564534233536434567543
assert db["good"] == True
assert db["bad"] == False
assert db["bytes"] == b"bytes"
assert db["this is a list"] == [1, 2, 3]
assert db["store a dict"] == {0: 1}
assert np.all(db[b"numpy"] == np.array([1, 2, 3]))
assert np.all(db["a table"] == pd.DataFrame({"a": [1, 2], "b": [2, 1]}))
# iterate through all elements
for k, v in db.items():
print(f"{k} -> {v}")
# batch get:
print(db[["good", "bad", 1.0]])
# [True, False, 1]
# delete Rdict from dict
db.close()
Rdict.destroy(path)
Supported types:
- key:
int, float, bool, str, bytes
- value:
int, float, bool, str, bytes
and anything that supportspickle
.
Rocksdb Options
Since the backend is implemented using rocksdb, most of rocksdb options are supported:
Example of tuning
from rocksdict import Rdict, Options, SliceTransform, PlainTableFactoryOptions
import os
def db_options():
opt = Options()
# create table
opt.create_if_missing(True)
# config to more jobs
opt.set_max_background_jobs(os.cpu_count())
# configure mem-table to a large value (256 MB)
opt.set_write_buffer_size(0x10000000)
opt.set_level_zero_file_num_compaction_trigger(4)
# configure l0 and l1 size, let them have the same size (1 GB)
opt.set_max_bytes_for_level_base(0x40000000)
# 256 MB file size
opt.set_target_file_size_base(0x10000000)
# use a smaller compaction multiplier
opt.set_max_bytes_for_level_multiplier(4.0)
# use 8-byte prefix (2 ^ 64 is far enough for transaction counts)
opt.set_prefix_extractor(SliceTransform.create_max_len_prefix(8))
# set to plain-table for better performance
opt.set_plain_table_factory(PlainTableFactoryOptions())
return opt
db = Rdict(str("./some_path"), db_options())
Example of Column Families
from rocksdict import Rdict, Options, SliceTransform, PlainTableFactoryOptions
import random
path = str("tmp")
cf1_name = str("cf1")
cf2_name = str("cf2")
# set cf2 as a plain table
cf2_opt = Options()
cf2_opt.set_prefix_extractor(SliceTransform.create_max_len_prefix(8))
p_opt = PlainTableFactoryOptions()
p_opt.user_key_length = 200
cf2_opt.set_plain_table_factory(p_opt)
# create column families if missing
opt = Options() # create_if_missing=True by default
opt.create_missing_column_families(True)
db = Rdict(path, options=opt, column_families={cf1_name: Options(),
cf2_name: cf2_opt})
# add column families
db_cf1 = db.get_column_family(cf1_name)
db_cf2 = db.get_column_family(cf2_name)
db_cf3 = db.create_column_family(str("cf3")) # with default Options
db_cf4 = db.create_column_family(str("cf4"), cf2_opt) # custom options
# remove column families
db.drop_column_family(str("cf3"))
db.drop_column_family(str("cf4"))
del db_cf3, db_cf4
# insert into column families
for i in range(10000):
db_cf1[i] = i ** 2
rand_bytes = [random.randbytes(200) for _ in range(100000)]
for b in rand_bytes:
db_cf2[b] = b
# close database
db_cf1.close()
db_cf2.close()
db.close()
# reopen db
db = Rdict(path, column_families={cf1_name: Options(),
cf2_name: cf2_opt})
db_cf1 = db.get_column_family(cf1_name)
db_cf2 = db.get_column_family(cf2_name)
# check keys
count = 0
for k, v in db_cf1.items():
assert k == count
assert v == count ** 2
count += 1
rand_bytes.sort()
assert list(db_cf2.keys()) == rand_bytes
# delete db
db.close()
db_cf1.close()
db_cf2.close()
Rdict.destroy(path)
Example of Bulk Ingestion By SstFileWriter
from rocksdict import Rdict, Options, SstFileWriter
import random
# generate some rand bytes
rand_bytes1 = [random.randbytes(200) for _ in range(100000)]
rand_bytes1.sort()
rand_bytes2 = [random.randbytes(200) for _ in range(100000)]
rand_bytes2.sort()
# write to file1.sst
writer = SstFileWriter()
writer.open("file1.sst")
for k, v in zip(rand_bytes1, rand_bytes1):
writer[k] = v
writer.finish()
# write to file2.sst
writer = SstFileWriter(Options())
writer.open("file2.sst")
for k, v in zip(rand_bytes2, rand_bytes2):
writer[k] = v
writer.finish()
# Create a new Rdict with default options
d = Rdict("tmp")
d.ingest_external_file(["file1.sst", "file2.sst"])
d.close()
# reopen, check if all key-values are there
d = Rdict("tmp")
for k in rand_bytes2 + rand_bytes1:
assert d[k] == k
d.close()
# delete tmp
Rdict.destroy("tmp")
Contribution
This project is still in an early stage of development. People are welcome to add tests, benchmarks and new features.
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distributions
Built Distributions
Hashes for rocksdict-0.2.11-cp310-none-win_amd64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | c565cb32b351c7078690d719c73c5f5b02ed63db6c3c768ff16f40351318cf42 |
|
MD5 | 5d03478e0b8a49db92724cee4ac3a335 |
|
BLAKE2b-256 | b4496f71de600a55b9e33f18ecacad43fb2d9e4073d29c700e3c953da39b9625 |
Hashes for rocksdict-0.2.11-cp310-none-win32.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | a84250f2c63f0f7a61ad4c022f3558d06701a2a014851bada9d2d82061cd04db |
|
MD5 | f481c59bdcb2340aced68454db7b8e1d |
|
BLAKE2b-256 | c5709366bf74428802e7e74b311074905a831fd0ac1a90377a5e2ba51fb5572a |
Hashes for rocksdict-0.2.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | c1d7aff646cc81edb4e5ebf15ab4658942e890859a51860ea1ec339017eaecfd |
|
MD5 | 080843cb384f664feed9c30012625475 |
|
BLAKE2b-256 | f0afc5c3b926a839104a2ea8b49827565ce5dab80baceef749bd2b9d4b2d7428 |
Hashes for rocksdict-0.2.11-cp310-cp310-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | e8099dd417a2e6e936b7f0efb05433365677ee4277fb2be63c0548a42dd3b439 |
|
MD5 | 6978a0bf61967d14f18e2491a5d7dcd8 |
|
BLAKE2b-256 | 9fb170c8fe3519a772e82e5b219f18931c6820cf1158eecbd8ca784c4c1992bd |
Hashes for rocksdict-0.2.11-cp39-none-win_amd64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 85d0b508b8b0e6176927509bb9dc7073d66bbe6128bd740acb7c5042199daada |
|
MD5 | 5c149605bbba623cc77a374652ad6550 |
|
BLAKE2b-256 | c6160e31ccac3784a32bf1c5df341114a3706751a6a684d7ade0a68d06247b89 |
Hashes for rocksdict-0.2.11-cp39-none-win32.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 8c7027ea247021dede2aa7d04b752251314ca5fe078116f849eedd98b3f38032 |
|
MD5 | ab845cdf8fa465c6bf501fe13f26a350 |
|
BLAKE2b-256 | 8f32072f3aca0e98aefbfebd43ff21e255dc43aad81aee047d8df1cf3e813165 |
Hashes for rocksdict-0.2.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | a2fa68a7d20fb41705fef76a32c60b030487c325a90a94585e993fcaf3b7a064 |
|
MD5 | e1438008d75ceadc73821adbf495bd73 |
|
BLAKE2b-256 | 82a18de8037c2b5ce0ead3fc5e79e3ad58df52eb4bba8fc77ef38d6674ed3eb7 |
Hashes for rocksdict-0.2.11-cp39-cp39-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 019960913acce6be888f15da1a0f8c6ecfda279d31f7b6706053256a2870e83e |
|
MD5 | 538ea86b1df7bb7814c3a5cc56509aad |
|
BLAKE2b-256 | d247d966d650f581c597ba74d53c2f3e9edbb07b5f833fbe9e88c9c15c25befd |
Hashes for rocksdict-0.2.11-cp38-none-win_amd64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 24396b993cb7dc56ac5f825ec21083bb84436bb4b340b4fefe7bd78171065b8d |
|
MD5 | 9a8b251cdd0a2022e27e7f10142ac257 |
|
BLAKE2b-256 | e2dd755fa8646e9ddb602e2d6e63c2df0fe5556e54ca634e1ea6980e0ea0451e |
Hashes for rocksdict-0.2.11-cp38-none-win32.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | c328f54bfe534ddbb361172cce391a2915b5cd3b9881139b66edaac5c8a96bca |
|
MD5 | a95992afa632cf8d2e554136eef0fe8e |
|
BLAKE2b-256 | 0017066edaa2eac79bfe88a982f69cfbe5bab4d730996a514293eb4db2a2d688 |
Hashes for rocksdict-0.2.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 9acbffea7709411a0ba248cbe8b19d6950e20ebaa124af07699216f0c5486232 |
|
MD5 | 4a13cc1e563e6707aa1270533365fdac |
|
BLAKE2b-256 | 21d9a5b14a7a090f559fa4d39b2524876c50d72825fe8fae350e6a37c5f9a03a |
Hashes for rocksdict-0.2.11-cp38-cp38-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 2161c9c0ca42cbeb1c402f9387cb2d05d3977b037f44d05b10cb02d70db9bfcb |
|
MD5 | 687fa18b16a89fd5f254fd8885dbbac6 |
|
BLAKE2b-256 | b7bf7296220cc2af1be8f35a771b2bedeca4147dd256a8348cf978422df68d21 |
Hashes for rocksdict-0.2.11-cp37-none-win_amd64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | b1dbdcc4096dde3781fddc44e15052dddecd55b70e427c076f4ed83aa309aa98 |
|
MD5 | 4466645f98e85c0f2ef90228b68b8180 |
|
BLAKE2b-256 | f262830271f519763ee0214f5d8a0c0ca9f775118fc3ee62b894165908f86fc3 |
Hashes for rocksdict-0.2.11-cp37-none-win32.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | c5dc2effce35e413d98ca67acc228b56dd5eeee52509cd9bd96000a0b08b6f60 |
|
MD5 | 4e4708b25a87993e5a6d79430a733b0f |
|
BLAKE2b-256 | d926a3e340702e861f572a49b185f47cb9f14244af552e000ce7f40b4f70c2ef |
Hashes for rocksdict-0.2.11-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 92f146c4b7bd9d0cfd30ad5c62d732c6642b6aa4f61873131fe7f8820589899c |
|
MD5 | 8235e59678ff31918eb16bb9919be2db |
|
BLAKE2b-256 | 09a20dd8ce440bfd6186428bb42af8b1557c95c26d812e8f428c16e0f4ff9a9d |
Hashes for rocksdict-0.2.11-cp37-cp37m-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 1e78ed40e93239317d7b352504fd8e512d280d05d081726a9bbd52566099e98c |
|
MD5 | e3196e84bfa392cfd8fe7a2f477902d7 |
|
BLAKE2b-256 | f1f559caa345d6db38f40cf07db29c6052ed391d99a563cc778beb23a58da590 |
Hashes for rocksdict-0.2.11-cp36-none-win_amd64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | cc7f0fa0e6b70048a25f69f7248b1fbcfd338886c687a13111a92d19c3b84656 |
|
MD5 | 7a242163ed5755ba324e096928b9842f |
|
BLAKE2b-256 | f1af9e54aedb2261fd4c8bc8d675e595978f44d91d64f4cc5f93ba29652b236e |
Hashes for rocksdict-0.2.11-cp36-none-win32.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 036b95801fc3ed678b0a9cb2f95e5cc2bd852c2dd957f7b0a7b7b1593b4ef8c0 |
|
MD5 | 38ef12dde2410d255da318f9e045558a |
|
BLAKE2b-256 | f091524d3675e9883652200666dcf345341285ef60765fb5a93d7d3840957c9a |
Hashes for rocksdict-0.2.11-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 17fa9f5b244a1a2701637cee97c5182e1ab4813ee17642a96db678b83acd268a |
|
MD5 | 91805c090c8eb1fedfa724983efaa41e |
|
BLAKE2b-256 | 8904b9c6d69bf11ed1ef8c36a9303184cec99ee7aa69f6074995dd74f43b9f34 |
Hashes for rocksdict-0.2.11-cp36-cp36m-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 2cb467c41b632663200509359051d7e8c74b7e4046b7b17a78abb0482809c199 |
|
MD5 | 6dbc28773c29e807fd93fec72a165750 |
|
BLAKE2b-256 | a2ffdf2765b9d809cbc6cc50fb0b8a82df1cbe7a420c80e4da92c787184e82d3 |