Python SDK for Eddytor - Connect via Apache Arrow Flight SQL, REST API, and MCP
Project description
Eddytor Python SDK
Python SDK for connecting to Eddytor — Master Data Management powered by Apache Arrow and Delta Lake.
Installation
pip install eddytor-sdk
Understanding Table Names (FQN)
Every table in Eddytor has a three-part Fully Qualified Name (FQN):
eddytor.cfg_<config_id_hex>.<hash>_<table_name>
| Part | Value | Example |
|---|---|---|
| Catalog | Always "eddytor" (fixed) |
eddytor |
| Schema | cfg_ + storage config UUID (dashes removed) |
cfg_550e8400e29b41d4a716446655440000 |
| Table | Hash prefix + human-readable name | abc123_customers |
You don't need to construct these yourself. Use client.tables() to discover all available tables with FQNs already resolved.
Quick Start
from eddytor_sdk import EddytorClient
with EddytorClient(api_key="edd_live_xxx") as client:
# Step 1: Discover your tables
tables = client.tables()
for t in tables:
print(f"{t.name} -> {t.fqn}")
# Output: abc123_customers -> `eddytor`.`cfg_550e8400...`.`abc123_customers`
# Step 2: Work with a table
customers = [t for t in tables if "customers" in t.name][0]
df = customers.query_all(limit=10)
print(df)
# Or query with raw SQL using the FQN
df = client.query(f"SELECT * FROM {customers.fqn} WHERE status = 'active'")
Flight SQL Operations
# Discover tables first
tables = client.tables()
my_table = tables[0]
# Query as pandas DataFrame
df = client.query(f"SELECT * FROM {my_table.fqn} LIMIT 10")
# Query as PyArrow Table
arrow_table = client.query_arrow(f"SELECT * FROM {my_table.fqn}")
# Execute and get raw tuples
rows = client.execute(f"SELECT id, name FROM {my_table.fqn}")
# DML operations (INSERT, UPDATE, DELETE)
count = client.execute_dml(f"INSERT INTO {my_table.fqn} VALUES (1, 'hello')")
count = client.execute_dml(f"DELETE FROM {my_table.fqn} WHERE id = 1")
# Bulk ingest (Arrow or pandas) — uses the table's catalog/schema/name parts
import pyarrow as pa
data = pa.table({"id": [1, 2], "name": ["Alice", "Bob"]})
client.ingest(my_table.name, data, mode="append", catalog=my_table.catalog, schema=my_table.schema)
# List schemas and tables (raw SQL)
schemas = client.list_schemas()
tables_in_schema = client.list_tables(schema="cfg_550e8400e29b41d4a716446655440000")
# Count rows
total = client.count(my_table.fqn)
filtered = client.count(my_table.fqn, "status = 'active'")
# Interactive SQL session
client.interactive()
DDL via Flight SQL
import pyarrow as pa
# Create a new table (provide the object store location)
schema = pa.schema([
pa.field("id", pa.int64(), nullable=False),
pa.field("name", pa.string()),
pa.field("created_at", pa.timestamp("us", tz="UTC")),
])
client.create_table("my_table", "s3://bucket/my_table", schema)
# Add columns — use a discovered table's catalog/schema
tables = client.tables()
my_table = [t for t in tables if "my_table" in t.name][0]
new_cols = pa.schema([pa.field("email", pa.string())])
client.add_column(my_table.name, new_cols, catalog=my_table.catalog, schema=my_table.schema)
REST API Operations
Access REST APIs via client.rest:
Table Management
# Discover tables first (or use a Table handle — see Table Handles section)
tables = client.tables()
t = tables[0] # catalog="eddytor", schema="cfg_550e...", name="abc123_customers"
# Get table metadata
metadata = client.rest.get_table_metadata(t.catalog, t.schema, t.name)
print(f"Columns: {[c.name for c in metadata.columns]}")
# Get table history (version history)
history = client.rest.get_table_history(t.catalog, t.schema, t.name)
for entry in history.entries:
print(f"Version {entry.version}: {entry.operation} at {entry.timestamp}")
# Rollback to a specific version
client.rest.rollback_table(t.catalog, t.schema, t.name, version=5)
# Add constraints
client.rest.add_constraints(t.catalog, t.schema, t.name, [
{"type": "NOT_NULL", "column": "id"},
])
# Update field metadata
client.rest.update_field_metadata(
t.catalog, t.schema, t.name,
field_name="status",
metadata={"description": "Order status"}
)
# Delete a table (irreversible)
client.rest.delete_table(t.catalog, t.schema, t.name)
# Move a table to a different storage configuration
client.rest.move_table(
t.catalog, t.schema, t.name,
destination_config_id="target-config-uuid",
destination_path="new/path/",
)
AI-Powered Analysis
from eddytor_sdk import AIProvider, AIAction
# Use a discovered table
tables = client.tables()
t = tables[0]
# Summarize table contents
result = client.rest.magic_dust(
t.catalog, t.schema, t.name,
provider=AIProvider.CLAUDE,
action=AIAction.SUMMARY,
model="claude-sonnet-4-6",
sample_size=1000,
)
print(result.content)
# Detect anomalies
result = client.rest.magic_dust(
t.catalog, t.schema, t.name,
provider=AIProvider.OPENAI,
action=AIAction.DETECT_ANOMALIES,
model="gpt-4o",
)
# Explain rows with tagged cell references
result = client.rest.explain_rows(
t.catalog, t.schema, t.name,
provider=AIProvider.CLAUDE,
model="claude-sonnet-4-6",
sample_size=20,
)
print(result.explanation)
for ref in result.references:
print(f" {ref.display}: row {ref.row_index}, column {ref.column}")
Supported AI providers: Claude, OpenAI, Gemini, Mistral
Storage Operations
# Get all storage configurations
configs = client.rest.get_storage_configs()
for cfg in configs:
print(f"{cfg.name}: {cfg.path} ({cfg.scheme_type})")
# Register S3 storage
config_id = client.rest.register_s3_storage(
bucket="my-bucket",
access_key_id="AKIAXXXXXXXX",
secret_access_key="secret",
region="us-east-1",
)
# Register Azure storage
config_id = client.rest.register_azure_storage(
account_name="myaccount",
container="mycontainer",
access_key="key...",
)
# List objects in storage
result = client.rest.list_objects(config_id, path="data/", extensions="csv,parquet")
for obj in result.objects:
print(f"{obj.name}: {obj.size} bytes")
# Download / delete objects
data, filename = client.rest.download_object(config_id, "data/file.csv")
client.rest.delete_object(config_id, "data/old_file.csv")
# Create folders
client.rest.create_folder(config_id, "data/new/")
# Move objects between storage configurations
client.rest.move_objects(
source_config_id="cfg_aaa",
source_path="old/path/",
destination_config_id="cfg_bbb",
destination_path="new/path/",
)
# Delete storage configuration
client.rest.delete_storage_config(config_id)
Column Domains
# Use a discovered table (or Table handle)
tables = client.tables()
t = tables[0]
# Get domain configuration for a column
domain = client.rest.get_column_domain(t.catalog, t.schema, t.name, "status")
# Set a fixed domain (enum values)
client.rest.set_fixed_domain(
t.catalog, t.schema, t.name, "status",
values=["pending", "active", "completed", "cancelled"]
)
# Or use the Table/Column handle (simpler):
t.column("status").set_fixed_domain(["pending", "active", "completed", "cancelled"])
# Set hierarchical domain (parent-child relationships)
client.rest.set_hierarchical_inline_domain(
t.catalog, t.schema, t.name, "subcategory",
hierarchy={
"Electronics": ["Phones", "Laptops", "Tablets"],
"Clothing": ["Shirts", "Pants", "Shoes"],
}
)
# Get allowed values (optionally filtered by parent)
allowed = client.rest.get_allowed_values(
t.catalog, t.schema, t.name, "subcategory",
parent_value="Electronics"
)
print(allowed.values) # ["Phones", "Laptops", "Tablets"]
Table and Column Handles
The recommended way to get table handles is via client.tables() (discovery):
# Discover all tables (recommended)
tables = client.tables()
customers = [t for t in tables if "customers" in t.name][0]
print(customers.fqn) # `eddytor`.`cfg_550e...`.`abc123_customers`
print(customers.count())
print(customers.history())
print(customers.metadata())
df = customers.query_all(limit=100)
# Or construct manually if you know the exact FQN parts
table = client.table("eddytor", "cfg_550e8400e29b41d4a716446655440000", "abc123_customers")
# Column handle
col = customers.column("status")
col.set_fixed_domain(["Active", "Inactive"])
print(col.allowed_values())
Storage Handle
store = client.storage("cfg_xxx")
objects = store.list_objects(path="data/")
store.create_folder("data/new/")
MCP Integration
Eddytor supports the Model Context Protocol for AI agent integration. Add to your Claude Desktop config (claude_desktop_config.json):
{
"mcpServers": {
"eddytor": {
"url": "https://mcp.eddytor.com/sse",
"headers": {
"Authorization": "Bearer edd_live_xxx"
}
}
}
}
This gives AI agents access to query, insert, merge, delete, create tables, manage schemas, run aggregations, and more.
Error Handling
from eddytor_sdk import EddytorClient, EddytorConnectionError, EddytorQueryError, EddytorError
try:
client = EddytorClient(api_key="invalid_key")
except EddytorConnectionError as e:
print(f"Connection failed: {e}")
try:
df = client.query("SELECT * FROM nonexistent_table")
except EddytorQueryError as e:
print(f"Query failed: {e}")
try:
metadata = client.rest.get_table_metadata("invalid", "schema", "table")
except EddytorError as e:
print(f"API error: {e}")
Other SDKs
| SDK | Package | Install |
|---|---|---|
| Python | eddytor-sdk |
pip install eddytor-sdk |
| Go | github.com/eddytor/eddytor-go-sdk |
go get github.com/eddytor/eddytor-go-sdk |
API Reference
EddytorClient
EddytorClient(api_key: str) # Your Eddytor API key (edd_live_xxx)
Flight SQL Methods
| Method | Returns | Description |
|---|---|---|
query(sql) |
pd.DataFrame |
Execute SQL, return pandas DataFrame |
query_arrow(sql) |
pa.Table |
Execute SQL, return PyArrow Table |
execute(sql) |
list[tuple] |
Execute SQL, return raw tuples |
execute_dml(sql) |
int |
Execute DML, return affected row count |
ingest(table, data, mode, catalog, schema) |
int |
Bulk ingest data |
create_table(name, location, schema, ...) |
None | Create a new table |
add_column(table, columns, ...) |
None | Add columns to a table |
list_schemas() |
list[str] |
List all schemas |
list_tables(schema=None) |
list[str] |
List tables (optionally in schema) |
describe_table(table) |
pd.DataFrame |
Get table schema |
table_exists(table) |
bool |
Check if table exists |
count(table, where=None) |
int |
Count rows in table |
tables(with_discovery=True) |
list[Table] |
Discover all tables (recommended starting point) |
table(catalog, schema, table) |
Table |
Get a Table handle by exact FQN parts |
storage(config_id) |
Storage |
Get a Storage handle |
interactive() |
None | Start interactive SQL session |
close() |
None | Close connection |
REST API Methods (via client.rest)
Table API
| Method | Description |
|---|---|
get_table_metadata(catalog, schema, table) |
Get table metadata |
get_table_history(catalog, schema, table) |
Get version history |
rollback_table(catalog, schema, table, version) |
Rollback to version |
delete_table(catalog, schema, table) |
Delete table permanently |
move_table(...) |
Move table to different storage |
add_constraints(...) |
Add table constraints |
drop_constraint(...) |
Drop a constraint |
update_field_metadata(...) |
Update field metadata |
infer_schema(file_path, has_header, delimiter) |
Infer schema from CSV |
magic_dust(...) |
AI-powered analysis |
explain_rows(...) |
AI row explanations with cell references |
Storage API
| Method | Description |
|---|---|
get_storage_configs() |
List storage configurations |
register_s3_storage(...) |
Register S3 storage |
register_azure_storage(...) |
Register Azure storage |
delete_storage_config(config_id) |
Delete storage config |
get_registered_tables(with_discovery) |
Get registered tables |
list_objects(config_id, ...) |
List objects in storage |
download_object(config_id, path) |
Download object |
delete_object(config_id, path) |
Delete object |
create_folder(config_id, path) |
Create folder |
move_objects(...) |
Move objects between configs |
upload_files(files, path) |
Upload files |
Column Domain API
| Method | Description |
|---|---|
get_column_domain(...) |
Get domain configuration |
set_fixed_domain(...) |
Set fixed (enum) domain |
set_hierarchical_inline_domain(...) |
Set hierarchical domain |
set_hierarchical_derived_domain(...) |
Link domain to another column |
set_hierarchical_table_domain(...) |
Domain from another table |
delete_column_domain(...) |
Remove domain |
add_fixed_value(...) |
Add value to fixed domain |
set_fixed_values(...) |
Replace all fixed values |
remove_fixed_value(...) |
Remove value from fixed domain |
get_allowed_values(...) |
Get allowed values |
Requirements
- Python 3.10+
- adbc-driver-flightsql >= 0.10.0
- pyarrow >= 15.0.0
- pandas >= 2.0.0
- requests >= 2.28.0
Project details
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Filter files by name, interpreter, ABI, and platform.
If you're not sure about the file name format, learn more about wheel file names.
Copy a direct link to the current filters
File details
Details for the file eddytor_sdk-1.0.3.tar.gz.
File metadata
- Download URL: eddytor_sdk-1.0.3.tar.gz
- Upload date:
- Size: 41.1 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/6.1.0 CPython/3.13.7
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
3d1a0c22b7050cfdb436c2e5b8099c0dff85306dd7ae4754de8bbd4dad5ff947
|
|
| MD5 |
505da0cbaa0cae7de53eeec73e01080a
|
|
| BLAKE2b-256 |
432073e65973df38b69eb912588c276b61076751e0008f0011237b0ccccf0863
|
File details
Details for the file eddytor_sdk-1.0.3-py3-none-any.whl.
File metadata
- Download URL: eddytor_sdk-1.0.3-py3-none-any.whl
- Upload date:
- Size: 40.6 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/6.1.0 CPython/3.13.7
File hashes
| Algorithm | Hash digest | |
|---|---|---|
| SHA256 |
71d72644f9ed306fc9f6a03fe876b1ef037489b94ebf541812086ad2f4557df8
|
|
| MD5 |
b658d5d150e8ac86d20ed32dbf02dffc
|
|
| BLAKE2b-256 |
df8ddf47287a2b977b05825d5e046de98e2043cc85e943afa09dc7ad0e4ee77f
|