Skip to main content

An embeddable, in-process search engine written in Rust

Project description

lucisearch

The SQLite of Search — an embeddable, in-process search engine.

No cluster to manage. No HTTP layer. No JVM. pip install and search.

pip install lucisearch

Quick Start

import luci

# Create an index with field mappings
index = luci.Index.create("products.luci", {
    "properties": {
        "title": {"type": "text"},
        "description": {"type": "text"},
        "category": {"type": "keyword"},
        "price": {"type": "float"},
        "in_stock": {"type": "boolean"},
    }
})

# Index documents
index.bulk([
    {"title": "Wireless Headphones", "description": "Noise-cancelling bluetooth headphones", "category": "electronics", "price": 79.99, "in_stock": True},
    {"title": "Running Shoes", "description": "Lightweight trail running shoes", "category": "sports", "price": 129.99, "in_stock": True},
    {"title": "Coffee Maker", "description": "Programmable drip coffee maker", "category": "kitchen", "price": 49.99, "in_stock": False},
])

# Search
results = index.search({"match": {"title": "headphones"}}, 10)
for hit in results["hits"]:
    print(f'{hit["_score"]:.2f}  {hit["_source"]["title"]}')

Queries

Luci supports the Elasticsearch query DSL. Pass any query as a Python dict.

Full-text search

# Single field
index.search({"match": {"title": "running shoes"}}, 10)

# Multiple fields
index.search({"multi_match": {"query": "wireless", "fields": ["title", "description"]}}, 10)

# Exact phrase
index.search({"match_phrase": {"description": "trail running"}}, 10)

Filtering and boolean logic

# Term query (exact match on keyword fields)
index.search({"term": {"category": "electronics"}}, 10)

# Bool query — combine must, should, must_not, filter
index.search({
    "bool": {
        "must": [{"match": {"title": "shoes"}}],
        "should": [{"term": {"brand": "nike"}}, {"term": {"brand": "adidas"}}],
        "filter": [
            {"term": {"in_stock": True}},
            {"range": {"price": {"lte": 100}}},
        ],
        "minimum_should_match": 1,
    }
}, 10)

# Prefix, wildcard, regexp, fuzzy
index.search({"prefix": {"category": "elec"}}, 10)
index.search({"fuzzy": {"title": {"value": "headphoens", "fuzziness": 1}}}, 10)

Sorting and pagination

# Sort by field
results = index.search({
    "query": {"match_all": {}},
    "sort": [{"price": "asc"}],
    "size": 10
})

# Pagination with from/size
results = index.search({
    "query": {"match_all": {}},
    "sort": ["price"],
    "from": 20,
    "size": 10
})

# Cursor-based pagination with search_after
results = index.search({
    "query": {"match_all": {}},
    "sort": ["price"],
    "size": 10,
    "search_after": [49.99]
})

Aggregations

# Terms aggregation
results = index.search({
    "query": {"match_all": {}},
    "aggs": {"categories": {"terms": {"field": "category"}}},
    "size": 0
})
for bucket in results["aggregations"]["categories"]["buckets"]:
    print(f'{bucket["key"]}: {bucket["doc_count"]}')

# Metric aggregations
results = index.search({
    "query": {"match_all": {}},
    "aggs": {
        "avg_price": {"avg": {"field": "price"}},
        "price_stats": {"stats": {"field": "price"}},
        "price_ranges": {"range": {
            "field": "price",
            "ranges": [{"to": 50}, {"from": 50, "to": 100}, {"from": 100}]
        }},
    },
    "size": 0
})

# Nested aggregations
results = index.search({
    "query": {"match_all": {}},
    "aggs": {"by_category": {
        "terms": {"field": "category"},
        "aggs": {"avg_price": {"avg": {"field": "price"}}},
    }},
    "size": 0
})

Highlighting

results = index.search({
    "query": {"match": {"description": "coffee"}},
    "highlight": {
        "fields": {"description": {}},
        "pre_tags": ["<b>"],
        "post_tags": ["</b>"],
    }
})
for hit in results["hits"]:
    print(hit.get("highlight", {}))

Vector search (kNN)

# Create index with vector field
index = luci.Index.create("vectors.luci", {
    "properties": {
        "title": {"type": "text"},
        "embedding": {"type": "dense_vector", "dims": 384},
    }
})

# kNN search
results = index.search({
    "query": {"knn": {
        "field": "embedding",
        "query_vector": [0.1, 0.2, ...],  # 384-dim vector
        "k": 10,
    }}
}, 10)

# kNN with similarity threshold
results = index.search({
    "query": {"knn": {
        "field": "embedding",
        "query_vector": query_vector,
        "k": 50,
        "threshold": 0.7,  # exclude low-similarity results
    }}
}, 10)

# kNN inside bool (vector as filter)
results = index.search({
    "query": {"bool": {
        "must": [{"match": {"title": "headphones"}}],
        "filter": [{"knn": {
            "field": "embedding",
            "query_vector": query_vector,
            "k": 100,
        }}],
    }}
}, 10)

Hybrid search (RRF fusion)

# Reciprocal Rank Fusion — combine text + vector results
results = index.search({
    "query": {"fusion": {
        "sources": [
            {"match": {"title": "wireless headphones"}},
            {"knn": {
                "field": "embedding",
                "query_vector": query_vector,
                "k": 50,
            }},
        ],
        "method": "rrf",  # or "sum", "arithmetic_mean"
    }}
}, 10)

# Weighted fusion with 3 sources
results = index.search({
    "query": {"fusion": {
        "sources": [
            {"match": {"title": "headphones"}},
            {"term": {"brand": "sony"}},
            {"knn": {"field": "embedding", "query_vector": qv, "k": 50}},
        ],
        "method": "rrf",
        "weights": [1.0, 0.5, 2.0],
        "rank_window_size": 100,
    }}
}, 10)

Geospatial queries

# Create index with geo fields
index = luci.Index.create("places.luci", {
    "properties": {
        "name": {"type": "text"},
        "location": {"type": "geo_point"},
    }
})

# Geo distance
index.search({
    "geo_distance": {
        "distance": "10km",
        "location": {"lat": 40.7128, "lon": -74.0060}
    }
}, 10)

# Geo bounding box
index.search({
    "geo_bounding_box": {
        "location": {
            "top_left": {"lat": 41.0, "lon": -74.5},
            "bottom_right": {"lat": 40.5, "lon": -73.5}
        }
    }
}, 10)

Document CRUD

# Add with explicit ID
index.add({"_id": "prod-1", "title": "Widget", "price": 9.99})

# Get by ID
doc = index.get("prod-1")

# Update (partial merge)
index.update("prod-1", {"price": 7.99})

# Delete by ID
index.delete("prod-1")

# Delete by query
index.delete_by_query({"term": {"category": "discontinued"}})

# Count
count = index.count({"term": {"in_stock": True}})

Transactions

By default, add() and bulk() auto-commit after every call. For batch operations with atomic commit/rollback semantics, use a transaction:

# Sync transaction
with index.transaction() as txn:
    txn.add({"title": "doc 1", "category": "tech"})
    txn.add({"title": "doc 2", "category": "science"})
    # commits on clean exit, rolls back on exception

# Async transaction (for asyncio)
async with index.async_transaction() as txn:
    txn.add({"title": "doc 3"})
    txn.add({"title": "doc 4"})

While a transaction is open, add() and bulk() from other threads block until the transaction completes.

Multi-process Safety

Multiple processes can read the same .luci file concurrently. Writes are serialized via a cross-process lock — the second writer blocks until the first finishes (with a configurable timeout).

# Set write lock timeout (default: 5 seconds)
index = luci.Index.create("shared.luci", write_timeout=10.0)

# Change mid-session
index.set_write_timeout(2.0)

# Per-operation override
index.add(doc, write_timeout=1.0)
index.bulk(docs, write_timeout=30.0)

Field Types

Type Description
text Full-text search with BM25 scoring and analysis
keyword Exact match, sorting, aggregations
integer, long Signed integers
float, double Floating point numbers
boolean true / false
date Date/time values
dense_vector Fixed-dimension float vectors (cosine, L2, dot product; int8 quantization)
geo_point Latitude/longitude pairs
geo_shape Polygons, multipolygons with spatial relations
nested Arrays of objects with independent field scoping

Features

  • Full-text search with BM25 scoring, analyzers, phrase queries, fuzzy matching
  • Vector search with HNSW, int8 quantization, pre-filtering
  • Hybrid search with Reciprocal Rank Fusion (RRF)
  • 20+ aggregation types — terms, avg, sum, min, max, stats, range, histogram, cardinality, percentiles, date_histogram, geo_bounds, filters, nested, and more
  • Geospatial — geo_distance, geo_bounding_box, geo_shape with all spatial relations
  • Nested documents with block-join queries and inner_hits
  • Highlighting with custom tags, per-field configuration
  • Sort by field — keyword, numeric, score, with multi-level sort
  • Paginationfrom/size and cursor-based search_after
  • Collapse — deduplicate results by a keyword field
  • Explain — BM25 score breakdowns
  • Rescore — two-phase scoring with custom query weights
  • Single-file storage — one .luci file, no directory sprawl
  • Auto-commit — documents are searchable immediately after add() or bulk()
  • Transactions — batch writes with atomic commit/rollback (sync and async)
  • Multi-process safe — cross-process file locking with configurable timeout
  • ES-compatible JSON query DSL — same queries, same field types

License

MIT

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

lucisearch-0.6.1.tar.gz (576.5 kB view details)

Uploaded Source

Built Distribution

If you're not sure about the file name format, learn more about wheel file names.

lucisearch-0.6.1-cp39-cp39-macosx_11_0_arm64.whl (1.8 MB view details)

Uploaded CPython 3.9macOS 11.0+ ARM64

File details

Details for the file lucisearch-0.6.1.tar.gz.

File metadata

  • Download URL: lucisearch-0.6.1.tar.gz
  • Upload date:
  • Size: 576.5 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: maturin/1.12.6

File hashes

Hashes for lucisearch-0.6.1.tar.gz
Algorithm Hash digest
SHA256 0ef52a8cb544d1cedd544b1ea14f19fcc742cd480d4d6cde1aed3283c826dbf1
MD5 7d82b8af2a5b82af179fa4fd1cf18c0a
BLAKE2b-256 dec0452f4ac335e19205a16f273efb9741a033348a9d6a5acbf2b0b7e8011d4b

See more details on using hashes here.

File details

Details for the file lucisearch-0.6.1-cp39-cp39-macosx_11_0_arm64.whl.

File metadata

File hashes

Hashes for lucisearch-0.6.1-cp39-cp39-macosx_11_0_arm64.whl
Algorithm Hash digest
SHA256 c251a43eadb08a15c514b6900f7f6a5cb146f944a2fdaf01fb0121de9456d841
MD5 d4a0bee7b00767d7964683aed68db5ea
BLAKE2b-256 b0f82c9d19e8a26bc23a3692eabfcb50b3878d5c16e28c599260f24afc104ee1

See more details on using hashes here.

Supported by

AWS Cloud computing and Security Sponsor Datadog Monitoring Depot Continuous Integration Fastly CDN Google Download Analytics Pingdom Monitoring Sentry Error logging StatusPage Status page