Utility functions for Azure GenAI
Project description
Azure GenAI Utils
This repository contains a set of utilities for working with Azure GenAI. The utilities are written in Python and are designed to be used for Hackathons, Workshops, and other events where you need to quickly get started with Azure GenAI.
Requirements
- Azure Subscription
- Azure AI Foundry
- Bing Search API Key
- Python 3.8 or later
.envfile: Please do not forget to modify the.envfile to match your account. Rename.env.sampleto.envor copy and use itAZURE_OPENAI_ENDPOINT=xxxxx AZURE_OPENAI_API_KEY=xxxxx OPENAI_API_VERSION=2024-12-01-preview AZURE_OPENAI_DEPLOYMENT_NAME=gpt-4o-mini # Optinoal, but required for LangChain LANGCHAIN_TRACING_V2=false LANGCHAIN_ENDPOINT=https://api.smith.langchain.com LANGCHAIN_API_KEY=xxxxx LANGCHAIN_PROJECT="YOUR-PROJECT"
Installation
PyPI
pip install azure-genai-utils
From Source
python setup.py install
Usage
Azure OpenAI Test
Expand
```python from azure_genai_utils.aoai import AOAI aoai = AOAI() aoai.test_api_call() ```PDF RAG Chain
Expand
from azure_genai_utils.rag.pdf import PDFRetrievalChain
pdf_path = "[YOUR-PDF-PATH]"
pdf = PDFRetrievalChain(
source_uri=[pdf_path],
loader_type="PDFPlumber",
model_name="gpt-4o-mini",
embedding_name="text-embedding-3-large",
chunk_size=500,
chunk_overlap=50,
).create_chain()
question = "[YOUR-QUESTION]"
docs = pdf.retriever.invoke(question)
results = pdf.chain.invoke({"chat_history": "", "question": question, "context": docs})
Bing Search
Please make sure to set the following environment variables in your .env file:
BING_SUBSCRIPTION_KEY=xxxxx
Expand
from azure_genai_utils.tools import BingSearch
from dotenv import load_dotenv
# You need to add BING_SUBSCRIPTION_KEY=xxxx in .env file
load_dotenv()
# Basic usage
bing = BingSearch(max_results=2, locale="ko-KR")
results = bing.invoke("Microsoft AutoGen")
print(results)
## Include news search results and format output
bing = BingSearch(
max_results=2,
locale="ko-KR",
include_news=True,
include_entity=False,
format_output=True,
)
results = bing.invoke("Microsoft AutoGen")
print(results)
LangGraph Example (Bing Search + Azure GenAI)
Expand
import json
from typing import Annotated
from typing_extensions import TypedDict
from langchain_openai import AzureChatOpenAI
from langchain_core.messages import ToolMessage
from langgraph.graph.message import add_messages
from langgraph.graph import StateGraph
from langgraph.prebuilt import ToolNode
from langgraph.graph import START, END
from azure_genai_utils.tools import BingSearch
from dotenv import load_dotenv
load_dotenv()
class State(TypedDict):
messages: Annotated[list, add_messages]
llm = AzureChatOpenAI(model="gpt-4o-mini")
tool = BingSearch(max_results=3, format_output=False)
tools = [tool]
llm_with_tools = llm.bind_tools(tools)
def chatbot(state: State):
answer = llm_with_tools.invoke(state["messages"])
return {"messages": [answer]}
def route_tools(
state: State,
):
if messages := state.get("messages", []):
ai_message = messages[-1]
else:
raise ValueError(f"No messages found in input state to tool_edge: {state}")
if hasattr(ai_message, "tool_calls") and len(ai_message.tool_calls) > 0:
return "tools"
return END
graph_builder = StateGraph(State)
graph_builder.add_node("chatbot", chatbot)
tool_node = ToolNode(tools=[tool])
graph_builder.add_node("tools", tool_node)
graph_builder.add_conditional_edges(
source="chatbot",
path=route_tools,
path_map={"tools": "tools", END: END},
)
graph_builder.add_edge("tools", "chatbot")
graph_builder.add_edge(START, "chatbot")
graph = graph_builder.compile()
# Test
inputs = {"messages": "Microsoft AutoGen"}
for event in graph.stream(inputs, stream_mode="values"):
for key, value in event.items():
print(f"\n==============\nSTEP: {key}\n==============\n")
print(value[-1])
Synthetic Data Generation
Expand
from azure_genai_utils.synthetic import (
QADataGenerator,
CustomQADataGenerator,
QAType,
generate_qas,
)
input_batch = [
"The quick brown fox jumps over the lazy dog.",
"What is the capital of France?",
]
model_config = {
"deployment": "gpt-4o-mini",
"model": "gpt-4o-mini",
"max_tokens": 256,
}
try:
qa_generator = QADataGenerator(model_config=model_config)
# qa_generator = CustomQADataGenerator(
# model_config=model_config, templates_dir=f"./azure_genai_utils/synthetic/prompt_templates/ko"
# )
task = generate_qas(
input_texts=input_batch,
qa_generator=qa_generator,
qa_type=QAType.LONG_ANSWER,
num_questions=2,
concurrency=3,
)
except Exception as e:
print(f"Error generating QAs: {e}")
Azure Custom Speech
Please make sure to set the following environment variables in your .env file:
AZURE_AI_SPEECH_REGION=xxxxx
AZURE_AI_SPEECH_API_KEY=xxxxx
Expand
from azure_genai_utils.stt.stt_generator import CustomSpeechToTextGenerator
# Initialize the CustomSpeechToTextGenerator
stt = CustomSpeechToTextGenerator(
custom_speech_lang="Korean",
synthetic_text_file="cc_support_expressions.jsonl",
train_output_dir="synthetic_data_train",
train_output_dir_aug="synthetic_data_train_aug",
eval_output_dir="synthetic_data_eval",
)
### Training set
# Generate synthetic text
topic = "Call center QnA related expected spoken utterances"
content = stt.generate_synthetic_text(
topic=topic, num_samples=2, model_name="gpt-4o-mini"
)
stt.save_synthetic_text(output_dir="plain_text")
# Generate synthetic wav files for training
train_tts_voice_list = [
"ko-KR-InJoonNeural",
"zh-CN-XiaoxiaoMultilingualNeural",
"en-GB-AdaMultilingualNeural",
]
stt.generate_synthetic_wav(
mode="train", tts_voice_list=train_tts_voice_list, delete_old_data=True
)
# Augment the train data (Optional)
stt.augment_wav_files(num_augments=4)
# Package the train data to be used in the training pipeline
stt.package_trainset(use_augmented_data=True)
### Evaluation set
# Generate synthetic wav files for evaluation
eval_tts_voice_list = ["ko-KR-YuJinNeural"]
stt.generate_synthetic_wav(
mode="eval", tts_voice_list=eval_tts_voice_list, delete_old_data=True
)
# Package the eval data to be used in the evaluation pipeline
stt.package_evalset(eval_dataset_dir="eval_dataset")
License Summary
This sample code is provided under the Apache 2.0 license. See the LICENSE file.
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Filter files by name, interpreter, ABI, and platform.
If you're not sure about the file name format, learn more about wheel file names.
Copy a direct link to the current filters
File details
Details for the file azure_genai_utils-0.0.2.14.tar.gz.
File metadata
- Download URL: azure_genai_utils-0.0.2.14.tar.gz
- Upload date:
- Size: 227.5 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/6.0.1 CPython/3.12.2
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
8c60397a62727d416969da2dea7a842b79483668fa17bf9d756a04400795c539
|
|
| MD5 |
12bd7690fd743b69935af984acb9a028
|
|
| BLAKE2b-256 |
445453a897516330ef317284f66add8ebd0cddb40514eb52f86d9032e2145375
|
File details
Details for the file azure_genai_utils-0.0.2.14-py3-none-any.whl.
File metadata
- Download URL: azure_genai_utils-0.0.2.14-py3-none-any.whl
- Upload date:
- Size: 234.4 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/6.0.1 CPython/3.12.2
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
b0055cfa10b2633f0ea0230faa82fa1b3631d531865c463e6b5d12ac8a5216c2
|
|
| MD5 |
c83c37331fd59a301dcdcdf3cbbc9a3a
|
|
| BLAKE2b-256 |
7659e41cf36f14e34698440ebc86ee1ae2ccbba59beda2f3ca51767dc2e45a9a
|