Skip to main content

Python SDK for Eddytor - Connect via Apache Arrow Flight SQL, REST API, and MCP

Project description

Eddytor Python SDK

Python SDK for connecting to Eddytor — Master Data Management powered by Apache Arrow and Delta Lake.

Installation

pip install eddytor-sdk

Quick Start

from eddytor_sdk import EddytorClient

# Connect to Eddytor
client = EddytorClient(api_key="eak_xxx")

# Query data (returns pandas DataFrame)
df = client.query("SELECT * FROM my_table LIMIT 10")
print(df)

# Close connection
client.close()

Context Manager

with EddytorClient(api_key="eak_xxx") as client:
    df = client.query("SELECT * FROM my_table")
    print(df)
# Connection automatically closed

Flight SQL Operations

# Query as pandas DataFrame
df = client.query("SELECT * FROM my_table LIMIT 10")

# Query as PyArrow Table
table = client.query_arrow("SELECT * FROM my_table")

# Execute and get raw tuples
rows = client.execute("SELECT id, name FROM my_table")

# DML operations (INSERT, UPDATE, DELETE)
count = client.execute_dml("INSERT INTO my_table VALUES (1, 'hello')")
count = client.execute_dml("DELETE FROM my_table WHERE id = 1")

# Bulk ingest (Arrow or pandas)
import pyarrow as pa
data = pa.table({"id": [1, 2], "name": ["Alice", "Bob"]})
client.ingest("my_table", data, mode="append", catalog="eddytor", schema="cfg_xxx")

# List schemas and tables
schemas = client.list_schemas()
tables = client.list_tables()

# Count rows
total = client.count("my_table")
filtered = client.count("my_table", "status = 'active'")

# Check if table exists
if client.table_exists("my_table"):
    print("Table exists!")

# Interactive SQL session
client.interactive()

DDL via Flight SQL

import pyarrow as pa

# Create a new table
schema = pa.schema([
    pa.field("id", pa.int64(), nullable=False),
    pa.field("name", pa.string()),
    pa.field("created_at", pa.timestamp("us", tz="UTC")),
])
client.create_table("my_table", "s3://bucket/my_table", schema)

# Add columns to an existing table
new_cols = pa.schema([pa.field("email", pa.string())])
client.add_column("my_table", new_cols, catalog="eddytor", schema="cfg_xxx")

REST API Operations

Access REST APIs via client.rest:

Table Management

# Get table metadata
metadata = client.rest.get_table_metadata("catalog", "schema", "table")
print(f"Columns: {[c.name for c in metadata.columns]}")

# Get table history (version history)
history = client.rest.get_table_history("catalog", "schema", "table")
for entry in history.entries:
    print(f"Version {entry.version}: {entry.operation} at {entry.timestamp}")

# Rollback to a specific version
client.rest.rollback_table("catalog", "schema", "table", version=5)

# Add constraints
client.rest.add_constraints("catalog", "schema", "table", [
    {"type": "NOT_NULL", "column": "id"},
])

# Update field metadata
client.rest.update_field_metadata(
    "catalog", "schema", "table",
    field_name="status",
    metadata={"description": "Order status"}
)

# Delete a table (irreversible)
client.rest.delete_table("catalog", "schema", "table")

# Move a table to a different storage configuration
client.rest.move_table(
    "catalog", "schema", "table",
    destination_config_id="cfg_xxx",
    destination_path="new/path/",
)

AI-Powered Analysis

from eddytor_sdk import AIProvider, AIAction

# Summarize table contents
result = client.rest.magic_dust(
    "catalog", "schema", "table",
    provider=AIProvider.CLAUDE,
    action=AIAction.SUMMARY,
    model="claude-sonnet-4-6",
    sample_size=1000,
)
print(result.content)

# Detect anomalies
result = client.rest.magic_dust(
    "catalog", "schema", "table",
    provider=AIProvider.OPENAI,
    action=AIAction.DETECT_ANOMALIES,
    model="gpt-4o",
)

# Find duplicates
result = client.rest.magic_dust(
    "catalog", "schema", "table",
    provider=AIProvider.CLAUDE,
    action=AIAction.FIND_DUPLICATES,
    model="claude-sonnet-4-6",
)

# Explain rows with tagged cell references
result = client.rest.explain_rows(
    "catalog", "schema", "table",
    provider=AIProvider.CLAUDE,
    model="claude-sonnet-4-6",
    sample_size=20,
)
print(result.explanation)
for ref in result.references:
    print(f"  {ref.display}: row {ref.row_index}, column {ref.column}")

# Explain specific rows by primary key
result = client.rest.explain_rows(
    "catalog", "schema", "table",
    provider=AIProvider.CLAUDE,
    model="claude-sonnet-4-6",
    row_pks=[1, 2, 5],
    pk_column="id",
)

Supported AI providers: Claude, OpenAI, Gemini, Mistral

Storage Operations

# Get all storage configurations
configs = client.rest.get_storage_configs()
for cfg in configs:
    print(f"{cfg.name}: {cfg.path} ({cfg.scheme_type})")

# Register S3 storage
config_id = client.rest.register_s3_storage(
    bucket="my-bucket",
    access_key_id="AKIAXXXXXXXX",
    secret_access_key="secret",
    region="us-east-1",
)

# Register Azure storage
config_id = client.rest.register_azure_storage(
    account_name="myaccount",
    container="mycontainer",
    access_key="key...",
)

# List objects in storage
result = client.rest.list_objects(config_id, path="data/", extensions="csv,parquet")
for obj in result.objects:
    print(f"{obj.name}: {obj.size} bytes")

# Download / delete objects
data, filename = client.rest.download_object(config_id, "data/file.csv")
client.rest.delete_object(config_id, "data/old_file.csv")

# Create folders
client.rest.create_folder(config_id, "data/new/")

# Move objects between storage configurations
client.rest.move_objects(
    source_config_id="cfg_aaa",
    source_path="old/path/",
    destination_config_id="cfg_bbb",
    destination_path="new/path/",
)

# Delete storage configuration
client.rest.delete_storage_config(config_id)

Column Domains

# Get domain configuration for a column
domain = client.rest.get_column_domain("catalog", "schema", "table", "status")

# Set a fixed domain (enum values)
client.rest.set_fixed_domain(
    "catalog", "schema", "table", "status",
    values=["pending", "active", "completed", "cancelled"]
)

# Add / remove values from a fixed domain
client.rest.add_fixed_value("catalog", "schema", "table", "status", "on_hold")
client.rest.remove_fixed_value("catalog", "schema", "table", "status", "cancelled")

# Set hierarchical domain (parent-child relationships)
client.rest.set_hierarchical_inline_domain(
    "catalog", "schema", "table", "subcategory",
    hierarchy={
        "Electronics": ["Phones", "Laptops", "Tablets"],
        "Clothing": ["Shirts", "Pants", "Shoes"],
    }
)

# Get allowed values (optionally filtered by parent)
allowed = client.rest.get_allowed_values(
    "catalog", "schema", "table", "subcategory",
    parent_value="Electronics"
)
print(allowed.values)  # ["Phones", "Laptops", "Tablets"]

# Delete domain
client.rest.delete_column_domain("catalog", "schema", "table", "status")

Table and Column Handles

For convenience, you can get typed handles for tables and columns:

# Get a table handle
table = client.table("eddytor", "cfg_xxx", "my_table")

print(table.count())
print(table.history())
print(table.metadata())

df = table.query_all(limit=100)

# Get a column handle
col = table.column("status")
col.set_fixed_domain(["Active", "Inactive"])
print(col.allowed_values())

Storage Handle

store = client.storage("cfg_xxx")
objects = store.list_objects(path="data/")
store.create_folder("data/new/")

MCP Integration

Eddytor supports the Model Context Protocol for AI agent integration. Add to your Claude Desktop config (claude_desktop_config.json):

{
  "mcpServers": {
    "eddytor": {
      "url": "https://mcp.eddytor.com/sse",
      "headers": {
        "Authorization": "Bearer eak_xxx"
      }
    }
  }
}

This gives AI agents access to query, insert, merge, delete, create tables, manage schemas, run aggregations, and more.

Error Handling

from eddytor_sdk import EddytorClient, EddytorConnectionError, EddytorQueryError, EddytorError

try:
    client = EddytorClient(api_key="invalid_key")
except EddytorConnectionError as e:
    print(f"Connection failed: {e}")

try:
    df = client.query("SELECT * FROM nonexistent_table")
except EddytorQueryError as e:
    print(f"Query failed: {e}")

try:
    metadata = client.rest.get_table_metadata("invalid", "schema", "table")
except EddytorError as e:
    print(f"API error: {e}")

Other SDKs

SDK Package Install
Python eddytor-sdk pip install eddytor-sdk
Go github.com/eddytor/eddytor-go-sdk go get github.com/eddytor/eddytor-go-sdk

API Reference

EddytorClient

EddytorClient(api_key: str)  # Your Eddytor API key (eak_xxx)

Flight SQL Methods

Method Returns Description
query(sql) pd.DataFrame Execute SQL, return pandas DataFrame
query_arrow(sql) pa.Table Execute SQL, return PyArrow Table
execute(sql) list[tuple] Execute SQL, return raw tuples
execute_dml(sql) int Execute DML, return affected row count
ingest(table, data, mode, catalog, schema) int Bulk ingest data
create_table(name, location, schema, ...) None Create a new table
add_column(table, columns, ...) None Add columns to a table
list_schemas() list[str] List all schemas
list_tables(schema=None) list[str] List tables (optionally in schema)
describe_table(table) pd.DataFrame Get table schema
table_exists(table) bool Check if table exists
count(table, where=None) int Count rows in table
table(catalog, schema, table) Table Get a Table handle
storage(config_id) Storage Get a Storage handle
interactive() None Start interactive SQL session
close() None Close connection

REST API Methods (via client.rest)

Table API

Method Description
get_table_metadata(catalog, schema, table) Get table metadata
get_table_history(catalog, schema, table) Get version history
rollback_table(catalog, schema, table, version) Rollback to version
delete_table(catalog, schema, table) Delete table permanently
move_table(...) Move table to different storage
add_constraints(...) Add table constraints
drop_constraint(...) Drop a constraint
update_field_metadata(...) Update field metadata
infer_schema(file_path, has_header, delimiter) Infer schema from CSV
magic_dust(...) AI-powered analysis
explain_rows(...) AI row explanations with cell references

Storage API

Method Description
get_storage_configs() List storage configurations
register_s3_storage(...) Register S3 storage
register_azure_storage(...) Register Azure storage
delete_storage_config(config_id) Delete storage config
get_registered_tables(with_discovery) Get registered tables
list_objects(config_id, ...) List objects in storage
download_object(config_id, path) Download object
delete_object(config_id, path) Delete object
create_folder(config_id, path) Create folder
move_objects(...) Move objects between configs
upload_files(files, path) Upload files

Column Domain API

Method Description
get_column_domain(...) Get domain configuration
set_fixed_domain(...) Set fixed (enum) domain
set_hierarchical_inline_domain(...) Set hierarchical domain
set_hierarchical_derived_domain(...) Link domain to another column
set_hierarchical_table_domain(...) Domain from another table
delete_column_domain(...) Remove domain
add_fixed_value(...) Add value to fixed domain
set_fixed_values(...) Replace all fixed values
remove_fixed_value(...) Remove value from fixed domain
get_allowed_values(...) Get allowed values

Requirements

  • Python 3.10+
  • adbc-driver-flightsql >= 0.10.0
  • pyarrow >= 15.0.0
  • pandas >= 2.0.0
  • requests >= 2.28.0

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

eddytor_sdk-1.0.1.tar.gz (39.5 kB view details)

Uploaded Source

Built Distribution

If you're not sure about the file name format, learn more about wheel file names.

eddytor_sdk-1.0.1-py3-none-any.whl (38.8 kB view details)

Uploaded Python 3

File details

Details for the file eddytor_sdk-1.0.1.tar.gz.

File metadata

  • Download URL: eddytor_sdk-1.0.1.tar.gz
  • Upload date:
  • Size: 39.5 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/6.1.0 CPython/3.13.7

File hashes

Hashes for eddytor_sdk-1.0.1.tar.gz
Algorithm Hash digest
SHA256 abe98691f1d337c8f11bc45eb0834192ae4327581cc711d48b09a5f30cce75be
MD5 c00c1a84a04b16ec29e8a2067ec734e2
BLAKE2b-256 228db34ad8e6c3db5d146e69e89a030544a4df9690cc60c1ec79657cb3ef0d11

See more details on using hashes here.

File details

Details for the file eddytor_sdk-1.0.1-py3-none-any.whl.

File metadata

  • Download URL: eddytor_sdk-1.0.1-py3-none-any.whl
  • Upload date:
  • Size: 38.8 kB
  • Tags: Python 3
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/6.1.0 CPython/3.13.7

File hashes

Hashes for eddytor_sdk-1.0.1-py3-none-any.whl
Algorithm Hash digest
SHA256 5d81d180aabfa4d7064671d529a06c50328a9c68352087e16f85677ded3aebdd
MD5 c86448b15473021614cfeadd7fb73176
BLAKE2b-256 ff5a253037f44be12c483a6a77b8c3ab2972035d19c255f24de57f5c2c597b73

See more details on using hashes here.

Supported by

AWS Cloud computing and Security Sponsor Datadog Monitoring Depot Continuous Integration Fastly CDN Google Download Analytics Pingdom Monitoring Sentry Error logging StatusPage Status page