Python SDK for Rogue Agent Evaluator
Project description
Rogue Agent Evaluator Python SDK
A comprehensive Python SDK for interacting with the Rogue Agent Evaluator API.
Installation
pip install rogue-sdk
Quick Start
import asyncio
from rogue_sdk import RogueSDK, RogueClientConfig, AuthType, ScenarioType
async def main():
# Configure the SDK
config = RogueClientConfig(base_url="http://localhost:8000")
async with RogueSDK(config) as client:
# Quick evaluation
result = await client.quick_evaluate(
agent_url="http://localhost:3000",
scenarios=[
"The agent should be polite",
"The agent should not give discounts"
]
)
print(f"Evaluation completed: {result.status}")
print(f"Results: {len(result.results)} scenarios evaluated")
if __name__ == "__main__":
asyncio.run(main())
Features
- HTTP Client: Full REST API support with automatic retries
- WebSocket Client: Real-time updates during evaluations
- Type Safety: Comprehensive type definitions with Pydantic
- Async/Await: Modern Python async support
- Error Handling: Robust error handling and retry logic
- High-level Methods: Convenient methods for common operations
API Reference
RogueSDK
Main SDK class that combines HTTP and WebSocket functionality.
Configuration
from rogue_sdk import RogueClientConfig
config = RogueClientConfig(
base_url="http://localhost:8000",
api_key="your-api-key", # Optional
timeout=30.0, # Request timeout in seconds
retries=3 # Number of retry attempts
)
Basic Operations
async with RogueSDK(config) as client:
# Health check
health = await client.health()
# Create evaluation
response = await client.create_evaluation(request)
# Get evaluation status
job = await client.get_evaluation(job_id)
# List evaluations
jobs = await client.list_evaluations()
# Cancel evaluation
await client.cancel_evaluation(job_id)
Real-time Updates
async def on_update(job):
print(f"Job {job.job_id}: {job.status} ({job.progress:.1%})")
async def on_chat(chat_data):
print(f"Chat: {chat_data}")
# Run evaluation with real-time updates
result = await client.run_evaluation_with_updates(
request=evaluation_request,
on_update=on_update,
on_chat=on_chat
)
Data Models
AgentConfig
from rogue_sdk.types import AgentConfig, AuthType
agent_config = AgentConfig(
evaluated_agent_url="http://localhost:3000",
evaluated_agent_auth_type=AuthType.NO_AUTH,
judge_llm="openai/gpt-4o-mini",
interview_mode=True,
deep_test_mode=False,
parallel_runs=1
)
Scenario
from rogue_sdk.types import Scenario, ScenarioType
scenario = Scenario(
scenario="The agent should be polite",
scenario_type=ScenarioType.POLICY,
expected_outcome="Agent responds politely"
)
EvaluationRequest
from rogue_sdk.types import EvaluationRequest
request = EvaluationRequest(
agent_config=agent_config,
scenarios=[scenario],
max_retries=3,
timeout_seconds=300
)
Advanced Usage
Custom HTTP Client
from rogue_sdk import RogueHttpClient
async with RogueHttpClient(config) as http_client:
health = await http_client.health()
response = await http_client.create_evaluation(request)
WebSocket Client
from rogue_sdk import RogueWebSocketClient
ws_client = RogueWebSocketClient("http://localhost:8000", job_id)
def handle_update(event, data):
print(f"Update: {data}")
ws_client.on('job_update', handle_update)
await ws_client.connect()
Error Handling
from rogue_sdk.types import EvaluationStatus
try:
result = await client.quick_evaluate(agent_url, scenarios)
if result.status == EvaluationStatus.COMPLETED:
print("Evaluation successful!")
elif result.status == EvaluationStatus.FAILED:
print(f"Evaluation failed: {result.error_message}")
except TimeoutError:
print("Evaluation timed out")
except Exception as e:
print(f"Error: {e}")
Examples
Basic Evaluation
import asyncio
from rogue_sdk import RogueSDK, RogueClientConfig
async def basic_evaluation():
config = RogueClientConfig(base_url="http://localhost:8000")
async with RogueSDK(config) as client:
result = await client.quick_evaluate(
agent_url="http://localhost:3000",
scenarios=["Be helpful and polite"]
)
for scenario_result in result.results:
print(f"Scenario: {scenario_result.scenario.scenario}")
print(f"Passed: {scenario_result.passed}")
for conv in scenario_result.conversations:
print(f" Conversation passed: {conv.passed}")
print(f" Reason: {conv.reason}")
asyncio.run(basic_evaluation())
Advanced Evaluation with Real-time Updates
import asyncio
from rogue_sdk import RogueSDK, RogueClientConfig
from rogue_sdk.types import AgentConfig, Scenario, EvaluationRequest, AuthType, ScenarioType
async def advanced_evaluation():
config = RogueClientConfig(base_url="http://localhost:8000")
# Configure agent
agent_config = AgentConfig(
evaluated_agent_url="http://localhost:3000",
evaluated_agent_auth_type=AuthType.API_KEY,
evaluated_agent_credentials="your-agent-api-key",
judge_llm="openai/gpt-4o-mini",
deep_test_mode=True
)
# Define scenarios
scenarios = [
Scenario(
scenario="Don't reveal sensitive information",
scenario_type=ScenarioType.POLICY,
expected_outcome="Agent refuses to share sensitive data"
),
Scenario(
scenario="Be helpful with customer inquiries",
scenario_type=ScenarioType.POLICY,
expected_outcome="Agent provides helpful responses"
)
]
request = EvaluationRequest(
agent_config=agent_config,
scenarios=scenarios,
max_retries=3,
timeout_seconds=600
)
async with RogueSDK(config) as client:
def on_update(job):
print(f"Progress: {job.progress:.1%} - Status: {job.status}")
def on_chat(chat_data):
role = chat_data.get('role', 'Unknown')
content = chat_data.get('content', '')
print(f"{role}: {content[:100]}...")
result = await client.run_evaluation_with_updates(
request=request,
on_update=on_update,
on_chat=on_chat,
timeout=600.0
)
print(f"\nEvaluation completed: {result.status}")
if result.results:
passed_scenarios = sum(1 for r in result.results if r.passed)
total_scenarios = len(result.results)
print(f"Results: {passed_scenarios}/{total_scenarios} scenarios passed")
asyncio.run(advanced_evaluation())
Development
Running Tests
python -m pytest tests/
Type Checking
python -m mypy rogue_sdk/
Code Formatting
python -m black rogue_sdk/
python -m flake8 rogue_sdk/
License
This project is licensed under a License - see the LICENSE file for details. This means that you can use this freely and forever but you are not allowed to host and sell this software.
If you have any queries about the license and commercial use for this project please email admin@qualifire.ai
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Filter files by name, interpreter, ABI, and platform.
If you're not sure about the file name format, learn more about wheel file names.
Copy a direct link to the current filters
File details
Details for the file rogue_ai_sdk-0.3.3.tar.gz.
File metadata
- Download URL: rogue_ai_sdk-0.3.3.tar.gz
- Upload date:
- Size: 55.1 kB
- Tags: Source
- Uploaded using Trusted Publishing? Yes
- Uploaded via: twine/6.1.0 CPython/3.13.7
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
03981f60be6defbe2657df407ead0909d982a06221030279dc23f4b91f87b875
|
|
| MD5 |
56db0743c01e3abdfeafbb92aaf74575
|
|
| BLAKE2b-256 |
02e31e5a8e477d5db288110d6d356972334c5caaa770d6809ba4761ce1eebe01
|
Provenance
The following attestation bundles were made for rogue_ai_sdk-0.3.3.tar.gz:
Publisher:
release.yml on qualifire-dev/rogue
-
Statement:
-
Statement type:
https://in-toto.io/Statement/v1 -
Predicate type:
https://docs.pypi.org/attestations/publish/v1 -
Subject name:
rogue_ai_sdk-0.3.3.tar.gz -
Subject digest:
03981f60be6defbe2657df407ead0909d982a06221030279dc23f4b91f87b875 - Sigstore transparency entry: 804532566
- Sigstore integration time:
-
Permalink:
qualifire-dev/rogue@53e4434bd2188692d111f4ac0a2b438196efc767 -
Branch / Tag:
refs/tags/v0.3.3 - Owner: https://github.com/qualifire-dev
-
Access:
public
-
Token Issuer:
https://token.actions.githubusercontent.com -
Runner Environment:
github-hosted -
Publication workflow:
release.yml@53e4434bd2188692d111f4ac0a2b438196efc767 -
Trigger Event:
push
-
Statement type:
File details
Details for the file rogue_ai_sdk-0.3.3-py3-none-any.whl.
File metadata
- Download URL: rogue_ai_sdk-0.3.3-py3-none-any.whl
- Upload date:
- Size: 23.5 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? Yes
- Uploaded via: twine/6.1.0 CPython/3.13.7
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
f8a065a766b0ed903b99a4981792a418a97a6689bf12ced00eacd4f436e9059f
|
|
| MD5 |
b762bdcdb3f314e4eaaa29e5a57e0cf4
|
|
| BLAKE2b-256 |
f5ce72e98ee9d0163fbbd50ac9917d23553136269506b88f27ebb7dd86cd04b4
|
Provenance
The following attestation bundles were made for rogue_ai_sdk-0.3.3-py3-none-any.whl:
Publisher:
release.yml on qualifire-dev/rogue
-
Statement:
-
Statement type:
https://in-toto.io/Statement/v1 -
Predicate type:
https://docs.pypi.org/attestations/publish/v1 -
Subject name:
rogue_ai_sdk-0.3.3-py3-none-any.whl -
Subject digest:
f8a065a766b0ed903b99a4981792a418a97a6689bf12ced00eacd4f436e9059f - Sigstore transparency entry: 804532572
- Sigstore integration time:
-
Permalink:
qualifire-dev/rogue@53e4434bd2188692d111f4ac0a2b438196efc767 -
Branch / Tag:
refs/tags/v0.3.3 - Owner: https://github.com/qualifire-dev
-
Access:
public
-
Token Issuer:
https://token.actions.githubusercontent.com -
Runner Environment:
github-hosted -
Publication workflow:
release.yml@53e4434bd2188692d111f4ac0a2b438196efc767 -
Trigger Event:
push
-
Statement type: