An embeddable, in-process search engine written in Rust
Project description
lucisearch
The SQLite of Search — an embeddable, in-process search engine.
No cluster to manage. No HTTP layer. No JVM. pip install and search.
pip install lucisearch
Quick Start
import luci
# Create an index with field mappings
index = luci.Index.create("products.luci", {
"properties": {
"title": {"type": "text"},
"description": {"type": "text"},
"category": {"type": "keyword"},
"price": {"type": "float"},
"in_stock": {"type": "boolean"},
}
})
# Index documents
index.bulk([
{"title": "Wireless Headphones", "description": "Noise-cancelling bluetooth headphones", "category": "electronics", "price": 79.99, "in_stock": True},
{"title": "Running Shoes", "description": "Lightweight trail running shoes", "category": "sports", "price": 129.99, "in_stock": True},
{"title": "Coffee Maker", "description": "Programmable drip coffee maker", "category": "kitchen", "price": 49.99, "in_stock": False},
])
# Search
results = index.search({"match": {"title": "headphones"}}, 10)
for hit in results.hits:
print(f'{hit.score:.2f} {hit.source["title"]}')
Queries
Luci supports the Elasticsearch query DSL. Pass any query as a Python dict.
Full-text search
# Single field
index.search({"match": {"title": "running shoes"}}, 10)
# Multiple fields
index.search({"multi_match": {"query": "wireless", "fields": ["title", "description"]}}, 10)
# Exact phrase
index.search({"match_phrase": {"description": "trail running"}}, 10)
Filtering and boolean logic
# Term query (exact match on keyword fields)
index.search({"term": {"category": "electronics"}}, 10)
# Bool query — combine must, should, must_not, filter
index.search({
"bool": {
"must": [{"match": {"title": "shoes"}}],
"should": [{"term": {"brand": "nike"}}, {"term": {"brand": "adidas"}}],
"filter": [
{"term": {"in_stock": True}},
{"range": {"price": {"lte": 100}}},
],
"minimum_should_match": 1,
}
}, 10)
# Prefix, wildcard, regexp, fuzzy
index.search({"prefix": {"category": "elec"}}, 10)
index.search({"fuzzy": {"title": {"value": "headphoens", "fuzziness": 1}}}, 10)
Sorting and pagination
# Sort by field
results = index.search({
"query": {"match_all": {}},
"sort": [{"price": "asc"}],
"size": 10
})
# Pagination with from/size
results = index.search({
"query": {"match_all": {}},
"sort": ["price"],
"from": 20,
"size": 10
})
# Cursor-based pagination with search_after
results = index.search({
"query": {"match_all": {}},
"sort": ["price"],
"size": 10,
"search_after": [49.99]
})
Aggregations
# Terms aggregation
results = index.search({
"query": {"match_all": {}},
"aggs": {"categories": {"terms": {"field": "category"}}},
"size": 0
})
for bucket in results.aggregations["categories"]["buckets"]:
print(f'{bucket["key"]}: {bucket["doc_count"]}')
# Metric aggregations
results = index.search({
"query": {"match_all": {}},
"aggs": {
"avg_price": {"avg": {"field": "price"}},
"price_stats": {"stats": {"field": "price"}},
"price_ranges": {"range": {
"field": "price",
"ranges": [{"to": 50}, {"from": 50, "to": 100}, {"from": 100}]
}},
},
"size": 0
})
# Nested aggregations
results = index.search({
"query": {"match_all": {}},
"aggs": {"by_category": {
"terms": {"field": "category"},
"aggs": {"avg_price": {"avg": {"field": "price"}}},
}},
"size": 0
})
Highlighting
Highlighting is a lazy per-hit method, not a request-body parameter.
Call hit.highlight(field) to get a list of structured Highlight
spans (text, start, end) — the consumer chooses how to render
them.
results = index.search({"query": {"match": {"description": "coffee"}}})
for hit in results.hits:
for span in hit.highlight("description"):
print(f"matched {span.text!r} at {span.start}..{span.end}")
Vector search (kNN)
# Create index with vector field
index = luci.Index.create("vectors.luci", {
"properties": {
"title": {"type": "text"},
"embedding": {"type": "dense_vector", "dims": 384},
}
})
# kNN search
results = index.search({
"query": {"knn": {
"field": "embedding",
"query_vector": [0.1, 0.2, ...], # 384-dim vector
"k": 10,
}}
}, 10)
# kNN with similarity threshold
results = index.search({
"query": {"knn": {
"field": "embedding",
"query_vector": query_vector,
"k": 50,
"threshold": 0.7, # exclude low-similarity results
}}
}, 10)
# kNN inside bool (vector as filter)
results = index.search({
"query": {"bool": {
"must": [{"match": {"title": "headphones"}}],
"filter": [{"knn": {
"field": "embedding",
"query_vector": query_vector,
"k": 100,
}}],
}}
}, 10)
Hybrid search (RRF fusion)
# Reciprocal Rank Fusion — combine text + vector results
results = index.search({
"query": {"fusion": {
"sources": [
{"match": {"title": "wireless headphones"}},
{"knn": {
"field": "embedding",
"query_vector": query_vector,
"k": 50,
}},
],
"method": "rrf", # or "sum", "arithmetic_mean"
}}
}, 10)
# Weighted fusion with 3 sources
results = index.search({
"query": {"fusion": {
"sources": [
{"match": {"title": "headphones"}},
{"term": {"brand": "sony"}},
{"knn": {"field": "embedding", "query_vector": qv, "k": 50}},
],
"method": "rrf",
"weights": [1.0, 0.5, 2.0],
"rank_window_size": 100,
}}
}, 10)
Geospatial queries
# Create index with geo fields
index = luci.Index.create("places.luci", {
"properties": {
"name": {"type": "text"},
"location": {"type": "geo_point"},
}
})
# Geo distance
index.search({
"geo_distance": {
"distance": "10km",
"location": {"lat": 40.7128, "lon": -74.0060}
}
}, 10)
# Geo bounding box
index.search({
"geo_bounding_box": {
"location": {
"top_left": {"lat": 41.0, "lon": -74.5},
"bottom_right": {"lat": 40.5, "lon": -73.5}
}
}
}, 10)
Document CRUD
# Add with explicit ID
index.add({"_id": "prod-1", "title": "Widget", "price": 9.99})
# Get by ID
doc = index.get("prod-1")
# Update (partial merge)
index.update("prod-1", {"price": 7.99})
# Delete by ID
index.delete("prod-1")
# Delete by query
index.delete_by_query({"term": {"category": "discontinued"}})
# Count
count = index.count({"term": {"in_stock": True}})
Transactions
By default, add() and bulk() auto-commit after every call. For batch
operations with atomic commit/rollback semantics, use a transaction:
# Sync transaction
with index.transaction() as txn:
txn.add({"title": "doc 1", "category": "tech"})
txn.add({"title": "doc 2", "category": "science"})
# commits on clean exit, rolls back on exception
# Async transaction (for asyncio)
async with index.async_transaction() as txn:
txn.add({"title": "doc 3"})
txn.add({"title": "doc 4"})
While a transaction is open, add() and bulk() from other threads
block until the transaction completes.
Multi-process Safety
Multiple processes can read the same .luci file concurrently. Writes
are serialized via a cross-process lock — the second writer blocks until
the first finishes (with a configurable timeout).
# Set write lock timeout (default: 5 seconds)
index = luci.Index.create("shared.luci", write_timeout=10.0)
# Change mid-session
index.set_write_timeout(2.0)
# Per-operation override
index.add(doc, write_timeout=1.0)
index.bulk(docs, write_timeout=30.0)
Field Types
| Type | Description |
|---|---|
text |
Full-text search with BM25 scoring and analysis |
keyword |
Exact match, sorting, aggregations |
integer, long |
Signed integers |
float, double |
Floating point numbers |
boolean |
true / false |
date |
Date/time values |
dense_vector |
Fixed-dimension float vectors (cosine, L2, dot product; int8 quantization) |
geo_point |
Latitude/longitude pairs |
geo_shape |
Polygons, multipolygons with spatial relations |
nested |
Arrays of objects with independent field scoping |
Features
- Full-text search with BM25 scoring, analyzers, phrase queries, fuzzy matching
- Vector search with HNSW, int8 quantization, pre-filtering
- Hybrid search with Reciprocal Rank Fusion (RRF)
- 20+ aggregation types — terms, avg, sum, min, max, stats, range, histogram, cardinality, percentiles, date_histogram, geo_bounds, filters, nested, and more
- Geospatial — geo_distance, geo_bounding_box, geo_shape with all spatial relations
- Nested documents with block-join queries and inner_hits
- Highlighting with custom tags, per-field configuration
- Sort by field — keyword, numeric, score, with multi-level sort
- Pagination —
from/sizeand cursor-basedsearch_after - Collapse — deduplicate results by a keyword field
- Explain — BM25 score breakdowns
- Rescore — two-phase scoring with custom query weights
- Single-file storage — one
.lucifile, no directory sprawl - Auto-commit — documents are searchable immediately after
add()orbulk() - Transactions — batch writes with atomic commit/rollback (sync and async)
- Multi-process safe — cross-process file locking with configurable timeout
- ES-compatible JSON query DSL — same queries, same field types
License
MIT
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distributions
Built Distribution
Filter files by name, interpreter, ABI, and platform.
If you're not sure about the file name format, learn more about wheel file names.
Copy a direct link to the current filters
File details
Details for the file lucisearch-0.7.0-cp311-abi3-macosx_11_0_arm64.whl.
File metadata
- Download URL: lucisearch-0.7.0-cp311-abi3-macosx_11_0_arm64.whl
- Upload date:
- Size: 2.2 MB
- Tags: CPython 3.11+, macOS 11.0+ ARM64
- Uploaded using Trusted Publishing? No
- Uploaded via: uv/0.9.26 {"installer":{"name":"uv","version":"0.9.26","subcommand":["publish"]},"python":null,"implementation":{"name":null,"version":null},"distro":{"name":"macOS","version":null,"id":null,"libc":null},"system":{"name":null,"release":null},"cpu":null,"openssl_version":null,"setuptools_version":null,"rustc_version":null,"ci":null}
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
acd4b30fbb69220d438aa1e520d7c04a729762b1af280752d516637bfe4ffb03
|
|
| MD5 |
4a10109181d115d97ba60d54dace6c50
|
|
| BLAKE2b-256 |
ed656efc2069ea8c7091a0077af56ab58472299d795183723a6c5046dcf1f1d0
|