Skip to main content

llama-index vector_stores oracle database integration

Project description

LlamaIndex VectorStore Integration for Oracle

This is a very basic example on how to use Oracle as a vector store with llamaindex. For a detailed guide look at https://github.com/run-llama/llama_index/blob/main/docs/docs/examples/vector_stores/orallamavs.ipynb

pip install llama-index-vector-stores-oracledb

A sample example

from typing import TYPE_CHECKING
import sys
from llama_index.core.schema import Document, TextNode
from llama_index.readers.oracleai import OracleReader, OracleTextSplitter
from llama_index.embeddings.oracleai import OracleEmbeddings
from llama_index.utils.oracleai import OracleSummary
from llama_index.vector_stores.oracledb import OraLlamaVS, DistanceStrategy
from llama_index.vector_stores.oracledb import base as orallamavs

if TYPE_CHECKING:
    import oracledb

"""
In this sample example, we will use 'database' provider for both summary and embeddings.
So, we don't need to do the following:
    - set proxy for 3rd party providers
    - create credential for 3rd party providers

If you choose to use 3rd party provider,
please follow the necessary steps for proxy and credential.
"""

# oracle connection
# please update with your username, password, hostname, and service_name
username = "testuser"
password = "testuser"
dsn = "<hostname/service_name>"

try:
    conn = oracledb.connect(user=username, password=password, dsn=dsn)
    print("Connection successful!")
except Exception as e:
    print("Connection failed!")
    sys.exit(1)


# load onnx model
# please update with your related information
onnx_dir = "DEMO_PY_DIR"
onnx_file = "tinybert.onnx"
model_name = "demo_model"
try:
    OracleEmbeddings.load_onnx_model(conn, onnx_dir, onnx_file, model_name)
    print("ONNX model loaded.")
except Exception as e:
    print("ONNX model loading failed!")
    sys.exit(1)


# params
# please update necessary fields with related information
loader_params = {
    "owner": "testuser",
    "tablename": "demo_tab",
    "colname": "data",
}
summary_params = {
    "provider": "database",
    "glevel": "S",
    "numParagraphs": 1,
    "language": "english",
}
splitter_params = {"normalize": "all"}
embedder_params = {"provider": "database", "model": "demo_model"}

# instantiate loader, summary, splitter, and embedder
loader = OracleReader(conn=conn, params=loader_params)
summary = OracleSummary(conn=conn, params=summary_params)
splitter = OracleTextSplitter(conn=conn, params=splitter_params)
embedder = OracleEmbeddings(conn=conn, params=embedder_params)

# process the documents
loader = OracleReader(conn=conn, params=loader_params)
docs = loader.load()

chunks_with_mdata = []
for id, doc in enumerate(docs, start=1):
    summ = summary.get_summary(doc.text)
    chunks = splitter.split_text(doc.text)
    for ic, chunk in enumerate(chunks, start=1):
        chunk_metadata = doc.metadata.copy()
        chunk_metadata["id"] = (
            chunk_metadata["_oid"] + "$" + str(id) + "$" + str(ic)
        )
        chunk_metadata["document_id"] = str(id)
        chunk_metadata["document_summary"] = str(summ[0])
        textnode = TextNode(
            text=chunk,
            id_=chunk_metadata["id"],
            embedding=embedder._get_text_embedding(chunk),
            metadata=chunk_metadata,
        )
        chunks_with_mdata.append(textnode)

""" verify """
print(f"Number of total chunks with metadata: {len(chunks_with_mdata)}")


# create Oracle AI Vector Store
vectorstore = OraLlamaVS.from_documents(
    client=conn,
    docs=chunks_with_mdata,
    table_name="oravs",
    distance_strategy=DistanceStrategy.DOT_PRODUCT,
)

""" verify """
print(f"Vector Store Table: {vectorstore.table_name}")

# Create Index
orallamavs.create_index(
    conn, vectorstore, params={"idx_name": "hnsw_oravs", "idx_type": "HNSW"}
)

print("Index created.")


# Perform Semantic Search
query = "What is Oracle AI Vector Store?"
filter = {"document_id": ["1"]}

# Similarity search without a filter
print(vectorstore.similarity_search(query, 1))

# Similarity search with a filter
print(vectorstore.similarity_search(query, 1, filter=filter))

# Similarity search with relevance score
print(vectorstore.similarity_search_with_score(query, 1))

# Similarity search with relevance score with filter
print(vectorstore.similarity_search_with_score(query, 1, filter=filter))

# Max marginal relevance search
print(
    vectorstore.max_marginal_relevance_search(
        query, 1, fetch_k=20, lambda_mult=0.5
    )
)

# Max marginal relevance search with filter
print(
    vectorstore.max_marginal_relevance_search(
        query, 1, fetch_k=20, lambda_mult=0.5, filter=filter
    )
)

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

Built Distribution

File details

Details for the file llama_index_vector_stores_oracledb-0.2.0.tar.gz.

File metadata

File hashes

Hashes for llama_index_vector_stores_oracledb-0.2.0.tar.gz
Algorithm Hash digest
SHA256 f3ef32788047ee39c9288fd20b419b3079c4561d3b1213b2f0bd3bf1e98db582
MD5 70c382af824986ba4b53cd125ec64f1f
BLAKE2b-256 4a509659e3a1ec90db90d0e83a241d801ff30a2225a28f2ac296e79b5ac45cf2

See more details on using hashes here.

File details

Details for the file llama_index_vector_stores_oracledb-0.2.0-py3-none-any.whl.

File metadata

File hashes

Hashes for llama_index_vector_stores_oracledb-0.2.0-py3-none-any.whl
Algorithm Hash digest
SHA256 044dc28b6a1028fa34390ca06f7a2c4b6917152936be4a8683fe28982c374c23
MD5 ceb22f5d08962666ae619c205f998973
BLAKE2b-256 f88c8b333b8f4681309cbdb3ced4c7527da6130f3f0ab13e4e30ecf6dcfe7954

See more details on using hashes here.

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page