Python SDK for Eddytor - Connect via Apache Arrow Flight SQL, REST API, and MCP
Project description
Eddytor Python SDK
Python SDK for connecting to Eddytor — Master Data Management powered by Apache Arrow and Delta Lake.
Installation
pip install eddytor-sdk
Quick Start
from eddytor_sdk import EddytorClient
# Connect to Eddytor
client = EddytorClient(api_key="eak_xxx")
# Query data (returns pandas DataFrame)
df = client.query("SELECT * FROM my_table LIMIT 10")
print(df)
# Close connection
client.close()
Context Manager
with EddytorClient(api_key="eak_xxx") as client:
df = client.query("SELECT * FROM my_table")
print(df)
# Connection automatically closed
Flight SQL Operations
# Query as pandas DataFrame
df = client.query("SELECT * FROM my_table LIMIT 10")
# Query as PyArrow Table
table = client.query_arrow("SELECT * FROM my_table")
# Execute and get raw tuples
rows = client.execute("SELECT id, name FROM my_table")
# DML operations (INSERT, UPDATE, DELETE)
count = client.execute_dml("INSERT INTO my_table VALUES (1, 'hello')")
count = client.execute_dml("DELETE FROM my_table WHERE id = 1")
# Bulk ingest (Arrow or pandas)
import pyarrow as pa
data = pa.table({"id": [1, 2], "name": ["Alice", "Bob"]})
client.ingest("my_table", data, mode="append", catalog="eddytor", schema="cfg_xxx")
# List schemas and tables
schemas = client.list_schemas()
tables = client.list_tables()
# Count rows
total = client.count("my_table")
filtered = client.count("my_table", "status = 'active'")
# Check if table exists
if client.table_exists("my_table"):
print("Table exists!")
# Interactive SQL session
client.interactive()
DDL via Flight SQL
import pyarrow as pa
# Create a new table
schema = pa.schema([
pa.field("id", pa.int64(), nullable=False),
pa.field("name", pa.string()),
pa.field("created_at", pa.timestamp("us", tz="UTC")),
])
client.create_table("my_table", "s3://bucket/my_table", schema)
# Add columns to an existing table
new_cols = pa.schema([pa.field("email", pa.string())])
client.add_column("my_table", new_cols, catalog="eddytor", schema="cfg_xxx")
REST API Operations
Access REST APIs via client.rest:
Table Management
# Get table metadata
metadata = client.rest.get_table_metadata("catalog", "schema", "table")
print(f"Columns: {[c.name for c in metadata.columns]}")
# Get table history (version history)
history = client.rest.get_table_history("catalog", "schema", "table")
for entry in history.entries:
print(f"Version {entry.version}: {entry.operation} at {entry.timestamp}")
# Rollback to a specific version
client.rest.rollback_table("catalog", "schema", "table", version=5)
# Add constraints
client.rest.add_constraints("catalog", "schema", "table", [
{"type": "NOT_NULL", "column": "id"},
])
# Update field metadata
client.rest.update_field_metadata(
"catalog", "schema", "table",
field_name="status",
metadata={"description": "Order status"}
)
# Delete a table (irreversible)
client.rest.delete_table("catalog", "schema", "table")
# Move a table to a different storage configuration
client.rest.move_table(
"catalog", "schema", "table",
destination_config_id="cfg_xxx",
destination_path="new/path/",
)
AI-Powered Analysis
from eddytor_sdk import AIProvider, AIAction
# Summarize table contents
result = client.rest.magic_dust(
"catalog", "schema", "table",
provider=AIProvider.CLAUDE,
action=AIAction.SUMMARY,
model="claude-sonnet-4-6",
sample_size=1000,
)
print(result.content)
# Detect anomalies
result = client.rest.magic_dust(
"catalog", "schema", "table",
provider=AIProvider.OPENAI,
action=AIAction.DETECT_ANOMALIES,
model="gpt-4o",
)
# Find duplicates
result = client.rest.magic_dust(
"catalog", "schema", "table",
provider=AIProvider.CLAUDE,
action=AIAction.FIND_DUPLICATES,
model="claude-sonnet-4-6",
)
# Explain rows with tagged cell references
result = client.rest.explain_rows(
"catalog", "schema", "table",
provider=AIProvider.CLAUDE,
model="claude-sonnet-4-6",
sample_size=20,
)
print(result.explanation)
for ref in result.references:
print(f" {ref.display}: row {ref.row_index}, column {ref.column}")
# Explain specific rows by primary key
result = client.rest.explain_rows(
"catalog", "schema", "table",
provider=AIProvider.CLAUDE,
model="claude-sonnet-4-6",
row_pks=[1, 2, 5],
pk_column="id",
)
Supported AI providers: Claude, OpenAI, Gemini, Mistral
Storage Operations
# Get all storage configurations
configs = client.rest.get_storage_configs()
for cfg in configs:
print(f"{cfg.name}: {cfg.path} ({cfg.scheme_type})")
# Register S3 storage
config_id = client.rest.register_s3_storage(
bucket="my-bucket",
access_key_id="AKIAXXXXXXXX",
secret_access_key="secret",
region="us-east-1",
)
# Register Azure storage
config_id = client.rest.register_azure_storage(
account_name="myaccount",
container="mycontainer",
access_key="key...",
)
# List objects in storage
result = client.rest.list_objects(config_id, path="data/", extensions="csv,parquet")
for obj in result.objects:
print(f"{obj.name}: {obj.size} bytes")
# Download / delete objects
data, filename = client.rest.download_object(config_id, "data/file.csv")
client.rest.delete_object(config_id, "data/old_file.csv")
# Create folders
client.rest.create_folder(config_id, "data/new/")
# Move objects between storage configurations
client.rest.move_objects(
source_config_id="cfg_aaa",
source_path="old/path/",
destination_config_id="cfg_bbb",
destination_path="new/path/",
)
# Delete storage configuration
client.rest.delete_storage_config(config_id)
Column Domains
# Get domain configuration for a column
domain = client.rest.get_column_domain("catalog", "schema", "table", "status")
# Set a fixed domain (enum values)
client.rest.set_fixed_domain(
"catalog", "schema", "table", "status",
values=["pending", "active", "completed", "cancelled"]
)
# Add / remove values from a fixed domain
client.rest.add_fixed_value("catalog", "schema", "table", "status", "on_hold")
client.rest.remove_fixed_value("catalog", "schema", "table", "status", "cancelled")
# Set hierarchical domain (parent-child relationships)
client.rest.set_hierarchical_inline_domain(
"catalog", "schema", "table", "subcategory",
hierarchy={
"Electronics": ["Phones", "Laptops", "Tablets"],
"Clothing": ["Shirts", "Pants", "Shoes"],
}
)
# Get allowed values (optionally filtered by parent)
allowed = client.rest.get_allowed_values(
"catalog", "schema", "table", "subcategory",
parent_value="Electronics"
)
print(allowed.values) # ["Phones", "Laptops", "Tablets"]
# Delete domain
client.rest.delete_column_domain("catalog", "schema", "table", "status")
Table and Column Handles
For convenience, you can get typed handles for tables and columns:
# Get a table handle
table = client.table("eddytor", "cfg_xxx", "my_table")
print(table.count())
print(table.history())
print(table.metadata())
df = table.query_all(limit=100)
# Get a column handle
col = table.column("status")
col.set_fixed_domain(["Active", "Inactive"])
print(col.allowed_values())
Storage Handle
store = client.storage("cfg_xxx")
objects = store.list_objects(path="data/")
store.create_folder("data/new/")
MCP Integration
Eddytor supports the Model Context Protocol for AI agent integration. Add to your Claude Desktop config (claude_desktop_config.json):
{
"mcpServers": {
"eddytor": {
"url": "https://mcp.eddytor.com/sse",
"headers": {
"Authorization": "Bearer eak_xxx"
}
}
}
}
This gives AI agents access to query, insert, merge, delete, create tables, manage schemas, run aggregations, and more.
Error Handling
from eddytor_sdk import EddytorClient, EddytorConnectionError, EddytorQueryError, EddytorError
try:
client = EddytorClient(api_key="invalid_key")
except EddytorConnectionError as e:
print(f"Connection failed: {e}")
try:
df = client.query("SELECT * FROM nonexistent_table")
except EddytorQueryError as e:
print(f"Query failed: {e}")
try:
metadata = client.rest.get_table_metadata("invalid", "schema", "table")
except EddytorError as e:
print(f"API error: {e}")
Other SDKs
| SDK | Package | Install |
|---|---|---|
| Python | eddytor-sdk |
pip install eddytor-sdk |
| Go | github.com/eddytor/eddytor-go-sdk |
go get github.com/eddytor/eddytor-go-sdk |
API Reference
EddytorClient
EddytorClient(api_key: str) # Your Eddytor API key (eak_xxx)
Flight SQL Methods
| Method | Returns | Description |
|---|---|---|
query(sql) |
pd.DataFrame |
Execute SQL, return pandas DataFrame |
query_arrow(sql) |
pa.Table |
Execute SQL, return PyArrow Table |
execute(sql) |
list[tuple] |
Execute SQL, return raw tuples |
execute_dml(sql) |
int |
Execute DML, return affected row count |
ingest(table, data, mode, catalog, schema) |
int |
Bulk ingest data |
create_table(name, location, schema, ...) |
None | Create a new table |
add_column(table, columns, ...) |
None | Add columns to a table |
list_schemas() |
list[str] |
List all schemas |
list_tables(schema=None) |
list[str] |
List tables (optionally in schema) |
describe_table(table) |
pd.DataFrame |
Get table schema |
table_exists(table) |
bool |
Check if table exists |
count(table, where=None) |
int |
Count rows in table |
table(catalog, schema, table) |
Table |
Get a Table handle |
storage(config_id) |
Storage |
Get a Storage handle |
interactive() |
None | Start interactive SQL session |
close() |
None | Close connection |
REST API Methods (via client.rest)
Table API
| Method | Description |
|---|---|
get_table_metadata(catalog, schema, table) |
Get table metadata |
get_table_history(catalog, schema, table) |
Get version history |
rollback_table(catalog, schema, table, version) |
Rollback to version |
delete_table(catalog, schema, table) |
Delete table permanently |
move_table(...) |
Move table to different storage |
add_constraints(...) |
Add table constraints |
drop_constraint(...) |
Drop a constraint |
update_field_metadata(...) |
Update field metadata |
infer_schema(file_path, has_header, delimiter) |
Infer schema from CSV |
magic_dust(...) |
AI-powered analysis |
explain_rows(...) |
AI row explanations with cell references |
Storage API
| Method | Description |
|---|---|
get_storage_configs() |
List storage configurations |
register_s3_storage(...) |
Register S3 storage |
register_azure_storage(...) |
Register Azure storage |
delete_storage_config(config_id) |
Delete storage config |
get_registered_tables(with_discovery) |
Get registered tables |
list_objects(config_id, ...) |
List objects in storage |
download_object(config_id, path) |
Download object |
delete_object(config_id, path) |
Delete object |
create_folder(config_id, path) |
Create folder |
move_objects(...) |
Move objects between configs |
upload_files(files, path) |
Upload files |
Column Domain API
| Method | Description |
|---|---|
get_column_domain(...) |
Get domain configuration |
set_fixed_domain(...) |
Set fixed (enum) domain |
set_hierarchical_inline_domain(...) |
Set hierarchical domain |
set_hierarchical_derived_domain(...) |
Link domain to another column |
set_hierarchical_table_domain(...) |
Domain from another table |
delete_column_domain(...) |
Remove domain |
add_fixed_value(...) |
Add value to fixed domain |
set_fixed_values(...) |
Replace all fixed values |
remove_fixed_value(...) |
Remove value from fixed domain |
get_allowed_values(...) |
Get allowed values |
Requirements
- Python 3.10+
- adbc-driver-flightsql >= 0.10.0
- pyarrow >= 15.0.0
- pandas >= 2.0.0
- requests >= 2.28.0
Project details
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Filter files by name, interpreter, ABI, and platform.
If you're not sure about the file name format, learn more about wheel file names.
Copy a direct link to the current filters
File details
Details for the file eddytor_sdk-1.0.1.tar.gz.
File metadata
- Download URL: eddytor_sdk-1.0.1.tar.gz
- Upload date:
- Size: 39.5 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/6.1.0 CPython/3.13.7
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
abe98691f1d337c8f11bc45eb0834192ae4327581cc711d48b09a5f30cce75be
|
|
| MD5 |
c00c1a84a04b16ec29e8a2067ec734e2
|
|
| BLAKE2b-256 |
228db34ad8e6c3db5d146e69e89a030544a4df9690cc60c1ec79657cb3ef0d11
|
File details
Details for the file eddytor_sdk-1.0.1-py3-none-any.whl.
File metadata
- Download URL: eddytor_sdk-1.0.1-py3-none-any.whl
- Upload date:
- Size: 38.8 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/6.1.0 CPython/3.13.7
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
5d81d180aabfa4d7064671d529a06c50328a9c68352087e16f85677ded3aebdd
|
|
| MD5 |
c86448b15473021614cfeadd7fb73176
|
|
| BLAKE2b-256 |
ff5a253037f44be12c483a6a77b8c3ab2972035d19c255f24de57f5c2c597b73
|