SDK for Scale EGP API

These details have not been verified by PyPI

Project links

GitHub Statistics

View statistics for this project via Libraries.io, or by using our public dataset on Google BigQuery

Project description

Scale EGP Python Client

The official Python client for Scale's Enterprise Generative AI Platform.

Generative AI applications are proliferating in the modern enterprise. However, building these applications can be challenging and expensive, especially when they need to conform to enterprise security and scalability standards. Scale EGP APIs provide the full-stack capabilities enterprises need to rapidly develop and deploy Generative AI applications for custom use cases. These capabilities include loading custom data sources, indexing data into vector stores, running inference, executing agents, and robust evaluation features.

Install from PyPI:

pip install scale-egp

Evaluation API

Quickstart

import hashlib
import json
import os
import pickle
from datetime import datetime
from typing import List, Union

import dotenv
import questionary as q

from egp.sdk.client import EGPClient
from egp.sdk.models import (
    TestCaseSchemaType,
    CategoricalChoice, CategoricalQuestion, EvaluationConfig, TestCaseResultRequest,
)
from egp.sdk.enums import TestCaseSchemaType, EvaluationType
from egp.utils.model_utils import BaseModel

ENV_FILE = "../../.env.local"
dotenv.load_dotenv(ENV_FILE, override=True)

DATASET_ID = None
APP_SPEC_ID = None
STUDIO_PROJECT_ID = None


def timestamp():
    return datetime.now().strftime('%Y-%m-%d %H:%M:%S')


def dump_model(model: Union[BaseModel, List[BaseModel]]):
    if isinstance(model, list):
        return json.dumps([m.dict() for m in model], indent=2, sort_keys=True, default=str)
    return json.dumps(model.dict(), indent=2, sort_keys=True, default=str)


# Not part of our SDK. This is scratch code example of what a user might write as an application.
class MyGenerativeAIApplication:

    name = "AI@MS"
    description = "AI Chatbot to help Wealth Management Advisors"
    embedding_model = "openai/text-embedding-ada-002"
    llm_model = "gpt-3.5-turbo-0613"

    @staticmethod
    def generate(input: str):
        """
        This can be an arbitrarily complex AI application and can return any type of output. In
        general, you application should output the output string of the generate response and a
        JSON object containing any extra information you want annotators to see that the
        application used to generate the output.
        """
        output = f"Output for: {input}"
        extra_info = {
            "info": "This is a string",
            "schema": "string",  # Validate that this schema is something we support.
        }
        return output, extra_info

    def tags(self):
        return {
            "embedding_model": self.embedding_model,
            "llm_model": self.llm_model,
        }

    @property
    def version(self):
        """
        Returns a hash of the application state that is stable across processes.
        """
        return hashlib.sha256(pickle.dumps(self.tags)).hexdigest()


if __name__ == "__main__":
    gen_ai_app = MyGenerativeAIApplication()
    client = EGPClient()
    current_timestamp = timestamp()

    # Create a new dataset or use an existing one.
    evaluation_dataset_name = f"AI@MS Regression Test Dataset {current_timestamp}"
    if DATASET_ID:
        evaluation_dataset_id = DATASET_ID
    else:
        evaluation_dataset_id = q.text(
            f"ID of existing dataset (Leave blank to create a new one with name "
            f"'{evaluation_dataset_name}'):"
        ).ask()
    if evaluation_dataset_id:
        evaluation_dataset = client.evaluation_datasets().get(id=evaluation_dataset_id)
    else:
        evaluation_dataset = client.evaluation_datasets().create_from_file(
            name=evaluation_dataset_name,
            schema_type=TestCaseSchemaType.GENERATION,
            filepath=os.path.join(os.path.dirname(__file__), "data/golden_dataset.jsonl"),
        )
        print(
            f"Created evaluation dataset:\n{dump_model(evaluation_dataset)}"
        )

    # Create a new application spec or use an existing one.
    if APP_SPEC_ID:
        application_spec_id = APP_SPEC_ID
    else:
        application_spec_id = q.text(
            f"ID of existing application spec (Leave blank to create a new one with name "
            f"'{gen_ai_app.name}'):"
        ).ask()
    if application_spec_id:
        application_spec = client.application_specs().get(id=application_spec_id)
    else:
        application_spec = client.application_specs().create(
            name=gen_ai_app.name,
            # TODO: Make application names globally unique or at least unique per account
            description=gen_ai_app.description
        )
        print(f"Created application spec:\n{dump_model(application_spec)}")

    # Create a new studio project or use an existing one.
    studio_project_name = f"{current_timestamp}"
    if STUDIO_PROJECT_ID:
        studio_project_id = STUDIO_PROJECT_ID
    else:
        studio_project_id = q.text(
            f"ID of existing studio project (Leave blank to create a new one with name "
            f"'{studio_project_name}'):"
        ).ask()
    if studio_project_id:
        studio_project = client.studio_projects().get(id=studio_project_id)
    else:
        studio_project = client.studio_projects().create(
            name=studio_project_name,
            description="Annotation project for the AI@MS project",
            studio_api_key=os.environ.get("STUDIO_API_KEY"),
        )
        studio_project_id = studio_project.id
        print(f"Created studio project:\n{dump_model(studio_project)}")

    evaluation = client.evaluations().create(
        application_spec_id=application_spec_id,
        name=f"AI@MS Regression Test - {current_timestamp}",
        description="Evaluation of the AI@MS project against the AI@MS regression test dataset",
        tags=gen_ai_app.tags(),
        evaluation_config=EvaluationConfig(
            evaluation_type=EvaluationType.STUDIO,
            studio_project_id=studio_project.id,
            questions=[
                # For categorical questions, the value is used as a score for the answer.
                # Higher values are better. This score will be used to track if the AI is improving.
                # The value can be set to None if
                CategoricalQuestion(
                    question_id="based_on_content",
                    title="Was the answer based on the content provided?",
                    choices=[
                        CategoricalChoice(label="No", value=0),
                        CategoricalChoice(label="Yes", value=1),
                    ],
                ),
                CategoricalQuestion(
                    question_id="accurate",
                    title="Was the answer accurate?",
                    choices=[
                        CategoricalChoice(label="No", value=0),
                        CategoricalChoice(label="Yes", value=1),
                    ],
                ),
                CategoricalQuestion(
                    question_id="complete",
                    title="Was the answer complete?",
                    choices=[
                        CategoricalChoice(label="No", value=0),
                        CategoricalChoice(label="Yes", value=1),
                    ],
                ),
                CategoricalQuestion(
                    question_id="recent",
                    title="Was the information recent?",
                    choices=[
                        CategoricalChoice(label="Not Applicable", value=None),
                        CategoricalChoice(label="No", value=0),
                        CategoricalChoice(label="Yes", value=1),
                    ],
                ),
                CategoricalQuestion(
                    question_id="core_issue",
                    title="What was the core issue?",
                    choices=[
                        CategoricalChoice(label="No Issue", value=None),
                        CategoricalChoice(label="User Behavior Issue", value=None),
                        CategoricalChoice(label="Unable to Provide Response", value=None),
                        CategoricalChoice(label="Incomplete Answer", value=None),
                    ],
                ),
            ]
        ).dict(),
    )
    print(f"Created evaluation:\n{dump_model(evaluation)}")

    # Execute test cases
    # TODO: dataset.test_cases.iter() returns *all* test cases right now
    # not just the test_cases for the current dataset
    print(f"Submitting test case results for evaluation dataset:\n{evaluation_dataset.name}")
    test_case_results_batch = []
    for test_case in client.evaluation_datasets().test_cases().iter(
        evaluation_dataset_id=evaluation_dataset.id
    ):
        if test_case.evaluation_dataset_id == evaluation_dataset.id:
            output, extra_info = gen_ai_app.generate(input=test_case.test_case_data['input'])
            test_case_results_batch.append(
                TestCaseResultRequest(
                    application_spec_id=application_spec.id,
                    evaluation_dataset_id=evaluation_dataset.id,
                    test_case_version_id=test_case.version_id,
                    test_case_id=test_case.id,
                    test_case_evaluation_data_schema=TestCaseSchemaType.GENERATION,
                    test_case_evaluation_data=dict(
                        output=output,
                        output_extra_info=extra_info
                    ),
                )
            )
    test_case_results = client.evaluations().test_case_results().create_batch(
        evaluation_id=evaluation.id,
        test_case_results=test_case_results_batch,
    )
    print(f"Created {len(test_case_results)} test case results:\n{dump_model(test_case_results)}")

Key Features

Features Coming Soon

Project details

These details have not been verified by PyPI

Project links

GitHub Statistics

View statistics for this project via Libraries.io, or by using our public dataset on Google BigQuery

Release history Release notifications | RSS feed

1.1.7

Apr 18, 2024

1.1.6

Apr 16, 2024

1.1.5

Apr 9, 2024

1.0.5

Mar 28, 2024

1.0.4

Mar 16, 2024

1.0.3

Mar 14, 2024

1.0.2

Mar 11, 2024

1.0.1

Mar 7, 2024

1.0.0

Mar 5, 2024

0.2.2

Feb 23, 2024

0.2.1

Feb 21, 2024

0.2.0

Feb 16, 2024

0.1.3

Feb 15, 2024

0.1.2

Feb 14, 2024

0.1.1

Feb 13, 2024

0.1.0

Feb 10, 2024

0.0.0b40 pre-release

Feb 7, 2024

0.0.0b39 pre-release

Feb 5, 2024

0.0.0b38 pre-release

Feb 5, 2024

0.0.0b37 pre-release

Feb 2, 2024

0.0.0b36 pre-release

Jan 31, 2024

0.0.0b35 pre-release

Jan 31, 2024

0.0.0b34 pre-release

Jan 30, 2024

0.0.0b33 pre-release

Jan 29, 2024

0.0.0b32 pre-release

Jan 29, 2024

0.0.0b31 pre-release

Jan 29, 2024

0.0.0b30 pre-release

Jan 29, 2024

0.0.0b29 pre-release

Jan 26, 2024

0.0.0b28 pre-release

Jan 25, 2024

0.0.0b27 pre-release

Jan 13, 2024

0.0.0b26 pre-release

Jan 11, 2024

0.0.0b25 pre-release

Jan 3, 2024

0.0.0b24 pre-release

Dec 29, 2023

0.0.0b23 pre-release

Dec 23, 2023

0.0.0b22 pre-release

Dec 20, 2023

0.0.0b21 pre-release

Dec 20, 2023

0.0.0b20 pre-release

Dec 9, 2023

0.0.0b19 pre-release

Dec 7, 2023

0.0.0b18 pre-release

Dec 6, 2023

0.0.0b17 pre-release

Dec 1, 2023

0.0.0b16 pre-release

Nov 22, 2023

0.0.0b15 pre-release

Nov 22, 2023

0.0.0b14 pre-release

Nov 21, 2023

0.0.0b13 pre-release

Nov 20, 2023

0.0.0b12 pre-release

Nov 19, 2023

0.0.0b11 pre-release

Nov 17, 2023

0.0.0b10 pre-release

Nov 16, 2023

0.0.0b9 pre-release

Nov 16, 2023

0.0.0b8 pre-release

Nov 14, 2023

0.0.0b7 pre-release

Nov 14, 2023

0.0.0b6 pre-release

Nov 14, 2023

This version

0.0.0b5 pre-release

Nov 14, 2023

0.0.0b4 pre-release

Nov 14, 2023

Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

scale_egp-0.0.0b5.tar.gz (26.2 kB view hashes)

Uploaded Nov 14, 2023 Source

Built Distribution

scale_egp-0.0.0b5-py3-none-any.whl (30.2 kB view hashes)

Uploaded Nov 14, 2023 Python 3

Hashes for scale_egp-0.0.0b5.tar.gz

Hashes for scale_egp-0.0.0b5.tar.gz
Algorithm	Hash digest
SHA256	`b43f8220c074d12d8c2f8b50b66f7a49d0ccca7b728634eeb87011b0bffaa018`
MD5	`6a60dec588fd8daaadfd5c2c3deb106c`
BLAKE2b-256	`5ef7efb6835fd6b2d837197e14df60f02d5d87c3f038e329c81636594511e7e1`

Hashes for scale_egp-0.0.0b5-py3-none-any.whl

Hashes for scale_egp-0.0.0b5-py3-none-any.whl
Algorithm	Hash digest
SHA256	`ffeae4a173bafdf0eb7dd832fc70c4abe2ce1b7d8740fcfd7aaa2fc99b9b77e9`
MD5	`d4de76b409d10908a7d9afec24299e01`
BLAKE2b-256	`c0faa02d35fb9636dd8d652a33575353a8fdb4116cb2c707f966ff98edf6e996`