llama-index vector_stores oracle database integration
Project description
LlamaIndex VectorStore Integration for Oracle
This is a very basic example on how to use Oracle as a vector store with llamaindex. For a detailed guide look at https://github.com/run-llama/llama_index/blob/main/docs/docs/examples/vector_stores/orallamavs.ipynb
pip install llama-index-vector-stores-oracledb
A sample example
from typing import TYPE_CHECKING
import sys
from llama_index.core.schema import Document, TextNode
from llama_index.readers.oracleai import OracleReader, OracleTextSplitter
from llama_index.embeddings.oracleai import OracleEmbeddings
from llama_index.utils.oracleai import OracleSummary
from llama_index.vector_stores.oracledb import OraLlamaVS, DistanceStrategy
from llama_index.vector_stores.oracledb import base as orallamavs
if TYPE_CHECKING:
import oracledb
"""
In this sample example, we will use 'database' provider for both summary and embeddings.
So, we don't need to do the following:
- set proxy for 3rd party providers
- create credential for 3rd party providers
If you choose to use 3rd party provider,
please follow the necessary steps for proxy and credential.
"""
# oracle connection
# please update with your username, password, hostname, and service_name
username = "testuser"
password = "testuser"
dsn = "<hostname/service_name>"
try:
conn = oracledb.connect(user=username, password=password, dsn=dsn)
print("Connection successful!")
except Exception as e:
print("Connection failed!")
sys.exit(1)
# load onnx model
# please update with your related information
onnx_dir = "DEMO_PY_DIR"
onnx_file = "tinybert.onnx"
model_name = "demo_model"
try:
OracleEmbeddings.load_onnx_model(conn, onnx_dir, onnx_file, model_name)
print("ONNX model loaded.")
except Exception as e:
print("ONNX model loading failed!")
sys.exit(1)
# params
# please update necessary fields with related information
loader_params = {
"owner": "testuser",
"tablename": "demo_tab",
"colname": "data",
}
summary_params = {
"provider": "database",
"glevel": "S",
"numParagraphs": 1,
"language": "english",
}
splitter_params = {"normalize": "all"}
embedder_params = {"provider": "database", "model": "demo_model"}
# instantiate loader, summary, splitter, and embedder
loader = OracleReader(conn=conn, params=loader_params)
summary = OracleSummary(conn=conn, params=summary_params)
splitter = OracleTextSplitter(conn=conn, params=splitter_params)
embedder = OracleEmbeddings(conn=conn, params=embedder_params)
# process the documents
loader = OracleReader(conn=conn, params=loader_params)
docs = loader.load()
chunks_with_mdata = []
for id, doc in enumerate(docs, start=1):
summ = summary.get_summary(doc.text)
chunks = splitter.split_text(doc.text)
for ic, chunk in enumerate(chunks, start=1):
chunk_metadata = doc.metadata.copy()
chunk_metadata["id"] = (
chunk_metadata["_oid"] + "$" + str(id) + "$" + str(ic)
)
chunk_metadata["document_id"] = str(id)
chunk_metadata["document_summary"] = str(summ[0])
textnode = TextNode(
text=chunk,
id_=chunk_metadata["id"],
embedding=embedder._get_text_embedding(chunk),
metadata=chunk_metadata,
)
chunks_with_mdata.append(textnode)
""" verify """
print(f"Number of total chunks with metadata: {len(chunks_with_mdata)}")
# create Oracle AI Vector Store
vectorstore = OraLlamaVS.from_documents(
client=conn,
docs=chunks_with_mdata,
table_name="oravs",
distance_strategy=DistanceStrategy.DOT_PRODUCT,
)
""" verify """
print(f"Vector Store Table: {vectorstore.table_name}")
# Create Index
orallamavs.create_index(
conn, vectorstore, params={"idx_name": "hnsw_oravs", "idx_type": "HNSW"}
)
print("Index created.")
# Perform Semantic Search
query = "What is Oracle AI Vector Store?"
filter = {"document_id": ["1"]}
# Similarity search without a filter
print(vectorstore.similarity_search(query, 1))
# Similarity search with a filter
print(vectorstore.similarity_search(query, 1, filter=filter))
# Similarity search with relevance score
print(vectorstore.similarity_search_with_score(query, 1))
# Similarity search with relevance score with filter
print(vectorstore.similarity_search_with_score(query, 1, filter=filter))
# Max marginal relevance search
print(
vectorstore.max_marginal_relevance_search(
query, 1, fetch_k=20, lambda_mult=0.5
)
)
# Max marginal relevance search with filter
print(
vectorstore.max_marginal_relevance_search(
query, 1, fetch_k=20, lambda_mult=0.5, filter=filter
)
)
Project details
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
File details
Details for the file llama_index_vector_stores_oracledb-0.2.0.tar.gz
.
File metadata
- Download URL: llama_index_vector_stores_oracledb-0.2.0.tar.gz
- Upload date:
- Size: 10.1 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: poetry/1.8.3 CPython/3.11.10 Darwin/22.3.0
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | f3ef32788047ee39c9288fd20b419b3079c4561d3b1213b2f0bd3bf1e98db582 |
|
MD5 | 70c382af824986ba4b53cd125ec64f1f |
|
BLAKE2b-256 | 4a509659e3a1ec90db90d0e83a241d801ff30a2225a28f2ac296e79b5ac45cf2 |
File details
Details for the file llama_index_vector_stores_oracledb-0.2.0-py3-none-any.whl
.
File metadata
- Download URL: llama_index_vector_stores_oracledb-0.2.0-py3-none-any.whl
- Upload date:
- Size: 9.2 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: poetry/1.8.3 CPython/3.11.10 Darwin/22.3.0
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 044dc28b6a1028fa34390ca06f7a2c4b6917152936be4a8683fe28982c374c23 |
|
MD5 | ceb22f5d08962666ae619c205f998973 |
|
BLAKE2b-256 | f88c8b333b8f4681309cbdb3ced4c7527da6130f3f0ab13e4e30ecf6dcfe7954 |