Storage abstraction for SQLAlchemy models with local and S3 backends, streaming uploads, and orphan garbage collection.
Project description
granite-storage
granite-storage is a lightweight, backend-agnostic storage abstraction for SQLAlchemy 2 models. It decouples the physical storage of files from your ORM models with a clean API designed for FastAPI services and any async-friendly Python application.
Features
- Backend-agnostic — swap between local filesystem and Amazon S3 without changing application code.
- Policy-driven routing — configure
max_size,key_prefix, and backend per storage slot. - Streaming uploads — memory-safe streaming with automatic size enforcement via
SizeLimitedStream. - SQLAlchemy integration —
StoredContentMixinandStoredObjectRefTypeturn any ORM model into a file-aware model with zero boilerplate. - FastAPI support —
set_content_from_uploadfile()accepts FastAPIUploadFiledirectly. - Garbage collection —
StorageGarbageCollectorscans for and removes orphaned objects. - Alembic helpers — portable
JSON/JSONBcolumn type for cross-database migrations. - Type-safe — full type hints throughout; compatible with mypy strict mode.
Requirements
- Python 3.11+
- SQLAlchemy 2.0+
- boto3 1.34+ (S3 backend)
Installation
pip install granite-storage
With UV:
uv add granite-storage
For development:
git clone https://github.com/impalah/granite-storage.git
cd granite-storage
uv sync
Quick Start
1. Configure backends and policies
from granite_storage import StorageManager, StoragePolicy
from granite_storage.backends.local import LocalStorageBackend
manager = StorageManager(
backends={
"local": LocalStorageBackend("./var/storage"),
},
policies={
"avatars": StoragePolicy(
storage_key="avatars",
backend_key="local",
max_size=2 * 1024 * 1024, # 2 MB
key_prefix="avatars",
),
"documents": StoragePolicy(
storage_key="documents",
backend_key="local",
max_size=10 * 1024 * 1024, # 10 MB
key_prefix="docs",
),
},
)
2. Define a file-aware SQLAlchemy model
import uuid
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
from sqlalchemy.dialects.postgresql import UUID
from granite_storage import (
StoredContentMixin,
StoredObjectRef,
StoredObjectRefType,
)
class Base(DeclarativeBase):
pass
class Document(StoredContentMixin, Base):
__tablename__ = "document"
__stored_content_field_name__ = "_file_ref"
__stored_content_storage_key__ = "documents"
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
title: Mapped[str] = mapped_column()
_file_ref: Mapped[StoredObjectRef | None] = mapped_column(
StoredObjectRefType(), nullable=True
)
# Inject the manager once at startup
Document.configure_storage_manager(manager)
3. Store and retrieve content
with Session(engine) as session:
doc = Document(title="Annual Report")
session.add(doc)
session.flush() # ensure id is set before storing the file
# From bytes
doc.set_content(pdf_bytes, filename="report.pdf", content_type="application/pdf")
session.commit()
# From a stream (memory-safe for large files)
with open("video.mp4", "rb") as f:
doc.set_content_from_stream(f, filename="video.mp4", content_type="video/mp4")
session.commit()
# From a FastAPI UploadFile
await doc.set_content_from_uploadfile(upload_file)
session.commit()
# Read back
data: bytes = doc.get_content()
with doc.open_content() as fh:
first_chunk = fh.read(8192)
# Replace (returns previous ref for manual cleanup or GC)
result = doc.replace_content(new_bytes, filename="report-v2.pdf")
# result.previous_ref — the old StoredObjectRef
# result.new_ref — the new StoredObjectRef
# Remove the reference (does NOT delete the physical file)
old_ref = doc.clear_content_reference()
session.commit()
4. Use the manager directly (without the mixin)
ref = manager.put_stream(
storage_key="avatars",
model_name="user",
entity_id=str(user.id),
field_name="avatar",
stream=image_stream,
content_type="image/jpeg",
original_filename="photo.jpg",
)
# ref is a StoredObjectRef — persist it in your database as JSON
data = manager.get(ref)
manager.delete(ref)
5. S3 backend
import boto3
from granite_storage.backends.s3 import S3StorageBackend
manager = StorageManager(
backends={
"s3": S3StorageBackend(
bucket="my-app-uploads",
prefix="production",
client=boto3.client("s3", region_name="us-east-1"),
),
},
policies={
"avatars": StoragePolicy("avatars", backend_key="s3", max_size=2*1024*1024),
},
)
Recommended Workflow
- Create the entity and call
session.add(obj). - Call
session.flush()so the primary key is available before storing the file. - Call
set_content(),set_content_from_stream(), orset_content_from_uploadfile(). - Call
session.commit(). - If the transaction fails, the physical object may become orphaned — the garbage collector will clean it up later.
Error Handling
from granite_storage import ContentTooLargeError, StorageError
try:
ref = manager.put_bytes(storage_key="avatars", ..., content=huge_bytes)
except ContentTooLargeError:
# Content exceeds the policy max_size — return HTTP 413
...
except StorageError:
# Unknown policy, backend failure, etc.
...
Garbage Collection
from granite_storage.gc import StorageGarbageCollector
from granite_storage.integrations.sqlalchemy import iter_model_storage_refs
with Session(engine) as session:
gc = StorageGarbageCollector(
manager=manager,
iter_references=lambda: iter_model_storage_refs(session, Document, "_file_ref"),
)
# Dry run
report = gc.collect(storage_key="documents", dry_run=True)
print(f"Orphaned: {report.orphaned} / {report.scanned}")
# Delete orphans
report = gc.collect(storage_key="documents", dry_run=False)
print(f"Deleted: {report.deleted}")
Alembic Migration Helper
from alembic import op
import sqlalchemy as sa
from granite_storage import portable_storage_ref_type
def upgrade():
op.add_column(
"document",
sa.Column("_file_ref", portable_storage_ref_type(), nullable=True),
)
Development
# Run tests
make test
# Run tests with coverage
make test-cov
# Lint and format
make lint
make format
# Type check
make type-check
# Build documentation
make docs
# Serve documentation locally (http://localhost:8000)
make docs-serve
Documentation
Full documentation (API reference, user guide, SQLAlchemy integration, garbage collection,
and a guide for implementing custom backends) is available in docs/ after running make docs,
or in the source files under docs_source/.
License
MIT — see LICENSE for details.
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Filter files by name, interpreter, ABI, and platform.
If you're not sure about the file name format, learn more about wheel file names.
Copy a direct link to the current filters
File details
Details for the file granite_storage-0.1.0.tar.gz.
File metadata
- Download URL: granite_storage-0.1.0.tar.gz
- Upload date:
- Size: 8.7 MB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: uv/0.11.8 {"installer":{"name":"uv","version":"0.11.8","subcommand":["publish"]},"python":null,"implementation":{"name":null,"version":null},"distro":{"name":"Ubuntu","version":"24.04","id":"noble","libc":null},"system":{"name":null,"release":null},"cpu":null,"openssl_version":null,"setuptools_version":null,"rustc_version":null,"ci":true}
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
b00d38964ec0cfde37d37d00b86c6175d6ae51e67310fea87aee8fd2d2757301
|
|
| MD5 |
d7965af463408bd3e01d9b9e76f57b5a
|
|
| BLAKE2b-256 |
7abe47dc7c479a60c5b579cca59ffe93fa3ad236582777ec0daf92d70ff095f2
|
File details
Details for the file granite_storage-0.1.0-py3-none-any.whl.
File metadata
- Download URL: granite_storage-0.1.0-py3-none-any.whl
- Upload date:
- Size: 15.9 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: uv/0.11.8 {"installer":{"name":"uv","version":"0.11.8","subcommand":["publish"]},"python":null,"implementation":{"name":null,"version":null},"distro":{"name":"Ubuntu","version":"24.04","id":"noble","libc":null},"system":{"name":null,"release":null},"cpu":null,"openssl_version":null,"setuptools_version":null,"rustc_version":null,"ci":true}
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
a59aac12f2aec0d632fc010c6fe1cdc4488acb246399c2b948dfa9150f3bb5b3
|
|
| MD5 |
985e0cf09f035f6a52ec318100581c00
|
|
| BLAKE2b-256 |
51d55bff06d0ba4e0638a2c22c985c84fe60f1e11f779c182d1d9f655ac47f03
|