Sparse and dense arrays backed by on-disk storage in Zarr or HDF5
Project description
Sparse (csc and csr) and dense arrays backed by on-disk storage in Zarr or HDF5. Allows accessing slices of larger than memory arrays. Inspired by h5sparse.
Examples
import backedarray as ba
import scipy.sparse
import numpy as np
import h5py
import zarr
Create Dataset
csr_matrix = scipy.sparse.random(100, 50, format="csr", density=0.2)
dense_array = csr_matrix.toarray()
HDF5 Backend
# Write sparse matrix in csc or csr format to hdf5 file
h5_csr_path = 'csr.h5'
with h5py.File(h5_csr_path, "w") as f:
ba.write_sparse(f.create_group("X"), csr_matrix)
# Write 2-d numpy array to hdf5
h5_dense_path = 'dense.h5'
with h5py.File(h5_dense_path, "w") as f:
f["X"] = dense_array
Zarr Backend
# Write sparse matrix in csc or csr format to zarr file
zarr_csr_path = 'csr.zarr'
with zarr.open(zarr_csr_path, mode="w") as f:
ba.write_sparse(f.create_group("X"), csr_matrix)
# Write 2-d numpy array to zarr format
zarr_dense_path = 'dense.zarr'
with zarr.open(zarr_dense_path, mode="w") as f:
f["X"] = dense_array
Read Dataset
HDF5 Backend
h5_csr_file = h5py.File(h5_csr_path, "r")
h5_csr_disk = ba.open(h5_csr_file["X"])
h5_dense_file = h5py.File(h5_dense_path, "r")
h5_dense_disk = ba.open(h5_dense_file["X"])
Zarr Backend
zarr_csr_disk = ba.open(zarr.open(zarr_csr_path)["X"])
zarr_dense_disk = ba.open(zarr.open(zarr_dense_path)["X"])
Numpy Style Indexing
zarr_csr_disk[1:3].toarray()
array([[0. , 0.25620103, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.57643237, 0.7628611 , 0. , 0. , 0. , 0.99872378, 0. , 0. , 0. , 0. , 0. , 0. , 0.82040632, 0. , 0.09788999, 0. , 0. , 0.67186548, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.24171919, 0. , 0. , 0. , 0. , 0.5893689 , 0. , 0. ], [0. , 0. , 0. , 0. , 0.1650544 , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.98852861, 0. , 0.01475572, 0. , 0.82875194, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.28405987, 0. , 0. , 0.72342298, 0. , 0. , 0. , 0.12985154, 0. ]])
zarr_dense_disk[-2:]
array([[0.51141143, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.87214978, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.95867897, 0. , 0.00825137, 0. , 0. , 0. , 0. , 0. , 0. , 0.29541905, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.68913921, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.87239577, 0. , 0.93164802, 0. , 0. ], [0. , 0. , 0. , 0.04102313, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.81888661, 0. , 0. , 0. , 0. , 0. , 0. , 0.18858683, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.83726992, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.60594181, 0.61483901, 0. , 0. , 0.37080615, 0.62691013]])
h5_csr_disk[2:].toarray()
array([[0. , 0. , 0. , ..., 0. , 0.12985154, 0. ], [0. , 0. , 0.56872386, ..., 0. , 0. , 0.36926708], [0. , 0. , 0.75702799, ..., 0.97589322, 0. , 0.34865313], ..., [0. , 0.14634835, 0. , ..., 0. , 0. , 0. ], [0.51141143, 0. , 0. , ..., 0.93164802, 0. , 0. ], [0. , 0. , 0. , ..., 0. , 0.37080615, 0.62691013]])
h5_csr_disk[...].toarray()
array([[0. , 0. , 0. , ..., 0. , 0. , 0. ], [0. , 0.25620103, 0. , ..., 0.5893689 , 0. , 0. ], [0. , 0. , 0. , ..., 0. , 0.12985154, 0. ], ..., [0. , 0.14634835, 0. , ..., 0. , 0. , 0. ], [0.51141143, 0. , 0. , ..., 0.93164802, 0. , 0. ], [0. , 0. , 0. , ..., 0. , 0.37080615, 0.62691013]])
h5_dense_disk[:2]
array([[0. , 0. , 0. , 0. , 0. , 0.71493443, 0.20460768, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.68284516, 0. , 0. , 0. , 0. , 0. , 0.93012152, 0. , 0. , 0.2165738 , 0. , 0. , 0. , 0.93954512, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.1808206 , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ], [0. , 0.25620103, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.57643237, 0.7628611 , 0. , 0. , 0. , 0.99872378, 0. , 0. , 0. , 0. , 0. , 0. , 0.82040632, 0. , 0.09788999, 0. , 0. , 0.67186548, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.24171919, 0. , 0. , 0. , 0. , 0.5893689 , 0. , 0. ]])
h5_csr_file.close()
h5_dense_file.close()
Append
zarr_csr_disk.append(csr_matrix)
np.testing.assert_array_equal(zarr_csr_disk[...].toarray(), scipy.sparse.vstack((csr_matrix, csr_matrix)).toarray())
Read h5ad files created using anndata
%%bash
if [ ! -f "pbmc3k.h5ad" ]; then
wget https://raw.githubusercontent.com/chanzuckerberg/cellxgene/main/example-dataset/pbmc3k.h5ad
fi
import anndata.experimental
with h5py.File('pbmc3k.h5ad', 'r') as f:
obs = anndata.experimental.read_elem(f['obs'])
var = anndata.experimental.read_elem(f['var'])
X = ba.open(f['X'])
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
backedarray-0.0.1.tar.gz
(10.1 kB
view details)
Built Distribution
File details
Details for the file backedarray-0.0.1.tar.gz
.
File metadata
- Download URL: backedarray-0.0.1.tar.gz
- Upload date:
- Size: 10.1 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/4.0.1 CPython/3.10.4
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 1188372a4817dad2c9858d70f0883fd044cb6f2d5ea53003d9aba71e7f4a7181 |
|
MD5 | 027516e55078ed50e8ddd87c3c42be5b |
|
BLAKE2b-256 | 9d8ef5574e5e8c66fbe099a9c0679ed632353b83399e03ae0b4c3d12d254b97c |
File details
Details for the file backedarray-0.0.1-py3-none-any.whl
.
File metadata
- Download URL: backedarray-0.0.1-py3-none-any.whl
- Upload date:
- Size: 9.7 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/4.0.1 CPython/3.10.4
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 64deb1c089dbaad2cc0b48a705e1cc8ed458289229e215090bbdc956ccf06476 |
|
MD5 | 896f7513f06191747cae355613a354a0 |
|
BLAKE2b-256 | 1b268b83a41f17138b1c4b7fb5c4937d2a085bcec7490943a47e035109c343ff |