Skip to main content

Python SDK for Eddytor - Connect via Apache Arrow Flight SQL, REST API, and MCP

Project description

Eddytor Python SDK

Python SDK for connecting to Eddytor — Master Data Management powered by Apache Arrow and Delta Lake.

Installation

pip install eddytor-sdk

Quick Start

from eddytor_sdk import EddytorClient

# Connect to Eddytor
client = EddytorClient(api_key="edd_live_xxx")

# Query data (returns pandas DataFrame)
df = client.query("SELECT * FROM my_table LIMIT 10")
print(df)

# Close connection
client.close()

Context Manager

with EddytorClient(api_key="edd_live_xxx") as client:
    df = client.query("SELECT * FROM my_table")
    print(df)
# Connection automatically closed

Flight SQL Operations

# Query as pandas DataFrame
df = client.query("SELECT * FROM my_table LIMIT 10")

# Query as PyArrow Table
table = client.query_arrow("SELECT * FROM my_table")

# Execute and get raw tuples
rows = client.execute("SELECT id, name FROM my_table")

# DML operations (INSERT, UPDATE, DELETE)
count = client.execute_dml("INSERT INTO my_table VALUES (1, 'hello')")
count = client.execute_dml("DELETE FROM my_table WHERE id = 1")

# Bulk ingest (Arrow or pandas)
import pyarrow as pa
data = pa.table({"id": [1, 2], "name": ["Alice", "Bob"]})
client.ingest("my_table", data, mode="append", catalog="eddytor", schema="cfg_xxx")

# List schemas and tables
schemas = client.list_schemas()
tables = client.list_tables()

# Count rows
total = client.count("my_table")
filtered = client.count("my_table", "status = 'active'")

# Check if table exists
if client.table_exists("my_table"):
    print("Table exists!")

# Interactive SQL session
client.interactive()

DDL via Flight SQL

import pyarrow as pa

# Create a new table
schema = pa.schema([
    pa.field("id", pa.int64(), nullable=False),
    pa.field("name", pa.string()),
    pa.field("created_at", pa.timestamp("us", tz="UTC")),
])
client.create_table("my_table", "s3://bucket/my_table", schema)

# Add columns to an existing table
new_cols = pa.schema([pa.field("email", pa.string())])
client.add_column("my_table", new_cols, catalog="eddytor", schema="cfg_xxx")

REST API Operations

Access REST APIs via client.rest:

Table Management

# Get table metadata
metadata = client.rest.get_table_metadata("catalog", "schema", "table")
print(f"Columns: {[c.name for c in metadata.columns]}")

# Get table history (version history)
history = client.rest.get_table_history("catalog", "schema", "table")
for entry in history.entries:
    print(f"Version {entry.version}: {entry.operation} at {entry.timestamp}")

# Rollback to a specific version
client.rest.rollback_table("catalog", "schema", "table", version=5)

# Add constraints
client.rest.add_constraints("catalog", "schema", "table", [
    {"type": "NOT_NULL", "column": "id"},
])

# Update field metadata
client.rest.update_field_metadata(
    "catalog", "schema", "table",
    field_name="status",
    metadata={"description": "Order status"}
)

# Delete a table (irreversible)
client.rest.delete_table("catalog", "schema", "table")

# Move a table to a different storage configuration
client.rest.move_table(
    "catalog", "schema", "table",
    destination_config_id="cfg_xxx",
    destination_path="new/path/",
)

AI-Powered Analysis

from eddytor_sdk import AIProvider, AIAction

# Summarize table contents
result = client.rest.magic_dust(
    "catalog", "schema", "table",
    provider=AIProvider.CLAUDE,
    action=AIAction.SUMMARY,
    model="claude-sonnet-4-6",
    sample_size=1000,
)
print(result.content)

# Detect anomalies
result = client.rest.magic_dust(
    "catalog", "schema", "table",
    provider=AIProvider.OPENAI,
    action=AIAction.DETECT_ANOMALIES,
    model="gpt-4o",
)

# Find duplicates
result = client.rest.magic_dust(
    "catalog", "schema", "table",
    provider=AIProvider.CLAUDE,
    action=AIAction.FIND_DUPLICATES,
    model="claude-sonnet-4-6",
)

# Explain rows with tagged cell references
result = client.rest.explain_rows(
    "catalog", "schema", "table",
    provider=AIProvider.CLAUDE,
    model="claude-sonnet-4-6",
    sample_size=20,
)
print(result.explanation)
for ref in result.references:
    print(f"  {ref.display}: row {ref.row_index}, column {ref.column}")

# Explain specific rows by primary key
result = client.rest.explain_rows(
    "catalog", "schema", "table",
    provider=AIProvider.CLAUDE,
    model="claude-sonnet-4-6",
    row_pks=[1, 2, 5],
    pk_column="id",
)

Supported AI providers: Claude, OpenAI, Gemini, Mistral

Storage Operations

# Get all storage configurations
configs = client.rest.get_storage_configs()
for cfg in configs:
    print(f"{cfg.name}: {cfg.path} ({cfg.scheme_type})")

# Register S3 storage
config_id = client.rest.register_s3_storage(
    bucket="my-bucket",
    access_key_id="AKIAXXXXXXXX",
    secret_access_key="secret",
    region="us-east-1",
)

# Register Azure storage
config_id = client.rest.register_azure_storage(
    account_name="myaccount",
    container="mycontainer",
    access_key="key...",
)

# List objects in storage
result = client.rest.list_objects(config_id, path="data/", extensions="csv,parquet")
for obj in result.objects:
    print(f"{obj.name}: {obj.size} bytes")

# Download / delete objects
data, filename = client.rest.download_object(config_id, "data/file.csv")
client.rest.delete_object(config_id, "data/old_file.csv")

# Create folders
client.rest.create_folder(config_id, "data/new/")

# Move objects between storage configurations
client.rest.move_objects(
    source_config_id="cfg_aaa",
    source_path="old/path/",
    destination_config_id="cfg_bbb",
    destination_path="new/path/",
)

# Delete storage configuration
client.rest.delete_storage_config(config_id)

Column Domains

# Get domain configuration for a column
domain = client.rest.get_column_domain("catalog", "schema", "table", "status")

# Set a fixed domain (enum values)
client.rest.set_fixed_domain(
    "catalog", "schema", "table", "status",
    values=["pending", "active", "completed", "cancelled"]
)

# Add / remove values from a fixed domain
client.rest.add_fixed_value("catalog", "schema", "table", "status", "on_hold")
client.rest.remove_fixed_value("catalog", "schema", "table", "status", "cancelled")

# Set hierarchical domain (parent-child relationships)
client.rest.set_hierarchical_inline_domain(
    "catalog", "schema", "table", "subcategory",
    hierarchy={
        "Electronics": ["Phones", "Laptops", "Tablets"],
        "Clothing": ["Shirts", "Pants", "Shoes"],
    }
)

# Get allowed values (optionally filtered by parent)
allowed = client.rest.get_allowed_values(
    "catalog", "schema", "table", "subcategory",
    parent_value="Electronics"
)
print(allowed.values)  # ["Phones", "Laptops", "Tablets"]

# Delete domain
client.rest.delete_column_domain("catalog", "schema", "table", "status")

Table and Column Handles

For convenience, you can get typed handles for tables and columns:

# Get a table handle
table = client.table("eddytor", "cfg_xxx", "my_table")

print(table.count())
print(table.history())
print(table.metadata())

df = table.query_all(limit=100)

# Get a column handle
col = table.column("status")
col.set_fixed_domain(["Active", "Inactive"])
print(col.allowed_values())

Storage Handle

store = client.storage("cfg_xxx")
objects = store.list_objects(path="data/")
store.create_folder("data/new/")

MCP Integration

Eddytor supports the Model Context Protocol for AI agent integration. Add to your Claude Desktop config (claude_desktop_config.json):

{
  "mcpServers": {
    "eddytor": {
      "url": "https://mcp.eddytor.com/sse",
      "headers": {
        "Authorization": "Bearer edd_live_xxx"
      }
    }
  }
}

This gives AI agents access to query, insert, merge, delete, create tables, manage schemas, run aggregations, and more.

Error Handling

from eddytor_sdk import EddytorClient, EddytorConnectionError, EddytorQueryError, EddytorError

try:
    client = EddytorClient(api_key="invalid_key")
except EddytorConnectionError as e:
    print(f"Connection failed: {e}")

try:
    df = client.query("SELECT * FROM nonexistent_table")
except EddytorQueryError as e:
    print(f"Query failed: {e}")

try:
    metadata = client.rest.get_table_metadata("invalid", "schema", "table")
except EddytorError as e:
    print(f"API error: {e}")

Other SDKs

SDK Package Install
Python eddytor-sdk pip install eddytor-sdk
Go github.com/eddytor/eddytor-go-sdk go get github.com/eddytor/eddytor-go-sdk

API Reference

EddytorClient

EddytorClient(api_key: str)  # Your Eddytor API key (edd_live_xxx)

Flight SQL Methods

Method Returns Description
query(sql) pd.DataFrame Execute SQL, return pandas DataFrame
query_arrow(sql) pa.Table Execute SQL, return PyArrow Table
execute(sql) list[tuple] Execute SQL, return raw tuples
execute_dml(sql) int Execute DML, return affected row count
ingest(table, data, mode, catalog, schema) int Bulk ingest data
create_table(name, location, schema, ...) None Create a new table
add_column(table, columns, ...) None Add columns to a table
list_schemas() list[str] List all schemas
list_tables(schema=None) list[str] List tables (optionally in schema)
describe_table(table) pd.DataFrame Get table schema
table_exists(table) bool Check if table exists
count(table, where=None) int Count rows in table
table(catalog, schema, table) Table Get a Table handle
storage(config_id) Storage Get a Storage handle
interactive() None Start interactive SQL session
close() None Close connection

REST API Methods (via client.rest)

Table API

Method Description
get_table_metadata(catalog, schema, table) Get table metadata
get_table_history(catalog, schema, table) Get version history
rollback_table(catalog, schema, table, version) Rollback to version
delete_table(catalog, schema, table) Delete table permanently
move_table(...) Move table to different storage
add_constraints(...) Add table constraints
drop_constraint(...) Drop a constraint
update_field_metadata(...) Update field metadata
infer_schema(file_path, has_header, delimiter) Infer schema from CSV
magic_dust(...) AI-powered analysis
explain_rows(...) AI row explanations with cell references

Storage API

Method Description
get_storage_configs() List storage configurations
register_s3_storage(...) Register S3 storage
register_azure_storage(...) Register Azure storage
delete_storage_config(config_id) Delete storage config
get_registered_tables(with_discovery) Get registered tables
list_objects(config_id, ...) List objects in storage
download_object(config_id, path) Download object
delete_object(config_id, path) Delete object
create_folder(config_id, path) Create folder
move_objects(...) Move objects between configs
upload_files(files, path) Upload files

Column Domain API

Method Description
get_column_domain(...) Get domain configuration
set_fixed_domain(...) Set fixed (enum) domain
set_hierarchical_inline_domain(...) Set hierarchical domain
set_hierarchical_derived_domain(...) Link domain to another column
set_hierarchical_table_domain(...) Domain from another table
delete_column_domain(...) Remove domain
add_fixed_value(...) Add value to fixed domain
set_fixed_values(...) Replace all fixed values
remove_fixed_value(...) Remove value from fixed domain
get_allowed_values(...) Get allowed values

Requirements

  • Python 3.10+
  • adbc-driver-flightsql >= 0.10.0
  • pyarrow >= 15.0.0
  • pandas >= 2.0.0
  • requests >= 2.28.0

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

eddytor_sdk-1.0.2.tar.gz (39.5 kB view details)

Uploaded Source

Built Distribution

If you're not sure about the file name format, learn more about wheel file names.

eddytor_sdk-1.0.2-py3-none-any.whl (38.9 kB view details)

Uploaded Python 3

File details

Details for the file eddytor_sdk-1.0.2.tar.gz.

File metadata

  • Download URL: eddytor_sdk-1.0.2.tar.gz
  • Upload date:
  • Size: 39.5 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/6.1.0 CPython/3.13.7

File hashes

Hashes for eddytor_sdk-1.0.2.tar.gz
Algorithm Hash digest
SHA256 1ea5d3e654e9c787bbe2d32086f6c1c547cff629740ace80330c41962b8c6060
MD5 f844f3057e5c9b8bb852fea9821d16ae
BLAKE2b-256 f9c77020271046eed8cbf91763b7b304912dfe7089149124284a0bc0195ea964

See more details on using hashes here.

File details

Details for the file eddytor_sdk-1.0.2-py3-none-any.whl.

File metadata

  • Download URL: eddytor_sdk-1.0.2-py3-none-any.whl
  • Upload date:
  • Size: 38.9 kB
  • Tags: Python 3
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/6.1.0 CPython/3.13.7

File hashes

Hashes for eddytor_sdk-1.0.2-py3-none-any.whl
Algorithm Hash digest
SHA256 bcd8c6587b772af3c5ec1c09772443feeab20507ec69fec6abc2160e06c2f6ff
MD5 f890084eb6a46aa62d44c3c7a2da36d3
BLAKE2b-256 444bea75886f563950fbd2f3bf07bf55947c143af779c4b4a892f7f925248952

See more details on using hashes here.

Supported by

AWS Cloud computing and Security Sponsor Datadog Monitoring Depot Continuous Integration Fastly CDN Google Download Analytics Pingdom Monitoring Sentry Error logging StatusPage Status page