Skip to main content

llama-index vector_stores oracle database integration

Project description

LlamaIndex VectorStore Integration for Oracle

This is a very basic example on how to use Oracle as a vector store with llamaindex. For a detailed guide look at https://github.com/run-llama/llama_index/blob/main/docs/docs/examples/vector_stores/orallamavs.ipynb

pip install llama-index-vector-stores-oracledb

A sample example

from typing import TYPE_CHECKING
import sys
from llama_index.core.schema import Document, TextNode
from llama_index.readers.oracleai import OracleReader, OracleTextSplitter
from llama_index.embeddings.oracleai import OracleEmbeddings
from llama_index.utils.oracleai import OracleSummary
from llama_index.vector_stores.oracledb import OraLlamaVS, DistanceStrategy
from llama_index.vector_stores.oracledb import base as orallamavs

if TYPE_CHECKING:
    import oracledb

"""
In this sample example, we will use 'database' provider for both summary and embeddings.
So, we don't need to do the following:
    - set proxy for 3rd party providers
    - create credential for 3rd party providers

If you choose to use 3rd party provider,
please follow the necessary steps for proxy and credential.
"""

# oracle connection
# please update with your username, password, hostname, and service_name
username = "testuser"
password = "testuser"
dsn = "<hostname/service_name>"

try:
    conn = oracledb.connect(user=username, password=password, dsn=dsn)
    print("Connection successful!")
except Exception as e:
    print("Connection failed!")
    sys.exit(1)


# load onnx model
# please update with your related information
onnx_dir = "DEMO_PY_DIR"
onnx_file = "tinybert.onnx"
model_name = "demo_model"
try:
    OracleEmbeddings.load_onnx_model(conn, onnx_dir, onnx_file, model_name)
    print("ONNX model loaded.")
except Exception as e:
    print("ONNX model loading failed!")
    sys.exit(1)


# params
# please update necessary fields with related information
loader_params = {
    "owner": "testuser",
    "tablename": "demo_tab",
    "colname": "data",
}
summary_params = {
    "provider": "database",
    "glevel": "S",
    "numParagraphs": 1,
    "language": "english",
}
splitter_params = {"normalize": "all"}
embedder_params = {"provider": "database", "model": "demo_model"}

# instantiate loader, summary, splitter, and embedder
loader = OracleReader(conn=conn, params=loader_params)
summary = OracleSummary(conn=conn, params=summary_params)
splitter = OracleTextSplitter(conn=conn, params=splitter_params)
embedder = OracleEmbeddings(conn=conn, params=embedder_params)

# process the documents
loader = OracleReader(conn=conn, params=loader_params)
docs = loader.load()

chunks_with_mdata = []
for id, doc in enumerate(docs, start=1):
    summ = summary.get_summary(doc.text)
    chunks = splitter.split_text(doc.text)
    for ic, chunk in enumerate(chunks, start=1):
        chunk_metadata = doc.metadata.copy()
        chunk_metadata["id"] = (
            chunk_metadata["_oid"] + "$" + str(id) + "$" + str(ic)
        )
        chunk_metadata["document_id"] = str(id)
        chunk_metadata["document_summary"] = str(summ[0])
        textnode = TextNode(
            text=chunk,
            id_=chunk_metadata["id"],
            embedding=embedder._get_text_embedding(chunk),
            metadata=chunk_metadata,
        )
        chunks_with_mdata.append(textnode)

""" verify """
print(f"Number of total chunks with metadata: {len(chunks_with_mdata)}")


# create Oracle AI Vector Store
vectorstore = OraLlamaVS.from_documents(
    client=conn,
    docs=chunks_with_mdata,
    table_name="oravs",
    distance_strategy=DistanceStrategy.DOT_PRODUCT,
)

""" verify """
print(f"Vector Store Table: {vectorstore.table_name}")

# Create Index
orallamavs.create_index(
    conn, vectorstore, params={"idx_name": "hnsw_oravs", "idx_type": "HNSW"}
)

print("Index created.")

# Perform Semantic Search
embedding = embedder._get_text_embedding("What is Oracle AI Vector Store?")
query = VectorStoreQuery(query_embedding=embedding, similarity_top_k=1)
# Similarity search without a filter
print(vectorstore.query(query))

filters = MetadataFilters(
    filters=[ExactMatchFilter(key="document_id", value="1")]
)
query = VectorStoreQuery(
    query_embedding=embedding, filters=filters, similarity_top_k=1
)
# Similarity search with a filter
print(vectorstore.query(query))

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

llama_index_vector_stores_oracledb-0.3.2.tar.gz (11.2 kB view details)

Uploaded Source

Built Distribution

If you're not sure about the file name format, learn more about wheel file names.

File details

Details for the file llama_index_vector_stores_oracledb-0.3.2.tar.gz.

File metadata

File hashes

Hashes for llama_index_vector_stores_oracledb-0.3.2.tar.gz
Algorithm Hash digest
SHA256 78f18e1718b611e1e18cf9de6d59df1729a177115ed248c9affeb7568f724937
MD5 f9d4fc84cd769e67da400310c06208ff
BLAKE2b-256 c8ee5bbf5d4de634cd883a4a89e05056125de447186ae842ee11935b1923222b

See more details on using hashes here.

File details

Details for the file llama_index_vector_stores_oracledb-0.3.2-py3-none-any.whl.

File metadata

File hashes

Hashes for llama_index_vector_stores_oracledb-0.3.2-py3-none-any.whl
Algorithm Hash digest
SHA256 d55f48dab378999a8e73c2fa13d363ae503a6f204e9e77c06969b5f6e0734c11
MD5 1030c928e07510d31cd6955b300359f9
BLAKE2b-256 8d47557d3438547ad273b959e3a72edbc21c8a83afe627d8e419f65fb91bd753

See more details on using hashes here.

Supported by

AWS Cloud computing and Security Sponsor Datadog Monitoring Depot Continuous Integration Fastly CDN Google Download Analytics Pingdom Monitoring Sentry Error logging StatusPage Status page