No project description provided
Project description
Milvus Document Store for Haystack
Installation
pip install milvus-haystack
Usage
First, to start up a Milvus service, follow the 'Start Milvus' instructions in the documentation.
Then, to use the MilvusDocumentStore
in a Haystack pipeline"
from haystack import Document
from milvus_haystack import MilvusDocumentStore
document_store = MilvusDocumentStore()
documents = [Document(
content="A Foo Document",
meta={"page": "100", "chapter": "intro"},
embedding=[-10.0] * 128,
)]
document_store.write_documents(documents)
document_store.count_documents() # 1
Dive deep usage
Here are the ways to build index, retrieval, and build rag pipeline respectively.
Create the indexing Pipeline and index some documents
import glob
import os
from haystack import Pipeline
from haystack.components.converters import MarkdownToDocument
from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.writers import DocumentWriter
from milvus_haystack import MilvusDocumentStore
from milvus_haystack.milvus_embedding_retriever import MilvusEmbeddingRetriever
file_paths = glob.glob("./your_docs.md")
document_store = MilvusDocumentStore(
connection_args={
"host": "localhost",
"port": "19530",
"user": "",
"password": "",
"secure": False,
},
drop_old=True,
)
indexing_pipeline = Pipeline()
indexing_pipeline.add_component("converter", MarkdownToDocument())
indexing_pipeline.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=2))
indexing_pipeline.add_component("embedder", SentenceTransformersDocumentEmbedder())
indexing_pipeline.add_component("writer", DocumentWriter(document_store))
indexing_pipeline.connect("converter", "splitter")
indexing_pipeline.connect("splitter", "embedder")
indexing_pipeline.connect("embedder", "writer")
indexing_pipeline.run({"converter": {"sources": file_paths}})
print("Number of documents:", document_store.count_documents())
Create the retrieval pipeline and try a query
question = "What is Milvus?"
retrieval_pipeline = Pipeline()
retrieval_pipeline.add_component("embedder", SentenceTransformersTextEmbedder())
retrieval_pipeline.add_component("retriever", MilvusEmbeddingRetriever(document_store=document_store, top_k=3))
retrieval_pipeline.connect("embedder", "retriever")
retrieval_results = retrieval_pipeline.run({"embedder": {"text": question}})
for doc in retrieval_results["retriever"]["documents"]:
print(doc.content)
print("-" * 10)
Create the RAG pipeline and try a query
from haystack.utils import Secret
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
prompt_template = """Answer the following query based on the provided context. If the context does
not include an answer, reply with 'I don't know'.\n
Query: {{query}}
Documents:
{% for doc in documents %}
{{ doc.content }}
{% endfor %}
Answer:
"""
rag_pipeline = Pipeline()
rag_pipeline.add_component("text_embedder", SentenceTransformersTextEmbedder())
rag_pipeline.add_component("retriever", MilvusEmbeddingRetriever(document_store=document_store, top_k=3))
rag_pipeline.add_component("prompt_builder", PromptBuilder(template=prompt_template))
rag_pipeline.add_component("generator", OpenAIGenerator(api_key=Secret.from_token(os.getenv("OPENAI_API_KEY")),
generation_kwargs={"temperature": 0}))
rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
rag_pipeline.connect("retriever.documents", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "generator")
results = rag_pipeline.run(
{
"text_embedder": {"text": question},
"prompt_builder": {"query": question},
}
)
print('RAG answer:', results["generator"]["replies"][0])
License
milvus-haystack
is distributed under the terms of the Apache-2.0 license.
Project details
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
milvus_haystack-0.0.5.tar.gz
(17.7 kB
view hashes)
Built Distribution
Close
Hashes for milvus_haystack-0.0.5-py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | d250309b8d33598fefc64198c1c1d761c2a4c42612278499658e3bc3b94d8638 |
|
MD5 | 8138476464501663e7facbc9a34fc52c |
|
BLAKE2b-256 | 2fc9bfacb4242b398f6c1a93466ff9e062053a13529f10f97ac625af61f9ed02 |