Host your deep learning models easily.
Project description
Ventu
Serving the deep learning models easily.
Install
pip install vento
Features
- Only need to implement Model(
preprocess
,postprocess
,inference
orbatch_inference
) - request & response data validation using pydantic
- API document using SpecTree (when run with
run_http
) - backend service using falcon supports both JSON and msgpack
- dynamic batching with batching using Unix Domain Socket
- errors in one request won't affect others in the same batch
- support all the runtime
- health check
- inference warm-up
How to use
- define your request data schema and response data schema with
pydantic
- add examples to
schema.Config.schema_extra[examples]
for warm-up and health check (optional)
- add examples to
- inherit
ventu.Ventu
, implement thepreprocess
andpostprocess
methods - for standalone HTTP service, implement the
inference
method, run withrun_http
- for the worker behind dynamic batching service, implement the
batch_inference
method, run withrun_socket
check the document for API details
Example
Dynamic Batching Demo
Server
Need to run the batching server first.
The demo code can be found in batching demo.
import logging
from pydantic import BaseModel
from ventu import Ventu
# request schema
class Req(BaseModel):
num: int
# request examples, used for health check and inference warm-up
class Config:
schema_extra = {
'examples': [
{'num': 23},
{'num': 0},
]
}
# response schema
class Resp(BaseModel):
square: int
# response examples, should be the true results for request examples
class Config:
schema_extra = {
'examples': [
{'square': 23 * 23},
{'square': 0},
]
}
class ModelInference(Ventu):
def __init__(self, *args, **kwargs):
# init parent class
super().__init__(*args, **kwargs)
def preprocess(self, data: Req):
return data.num
def batch_inference(self, data):
return [num ** 2 for num in data]
def postprocess(self, data):
return {'square': data}
if __name__ == "__main__":
logger = logging.getLogger()
formatter = logging.Formatter(
fmt='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.setLevel(logging.DEBUG)
logger.addHandler(handler)
model = ModelInference(Req, Resp, use_msgpack=True)
model.run_socket('batching.socket')
Client
from concurrent import futures
import httpx
import msgpack
URL = 'http://localhost:8080'
packer = msgpack.Packer(
autoreset=True,
use_bin_type=True,
)
def request(text):
return httpx.post(URL, data=packer.pack({'num': text}))
if __name__ == "__main__":
with futures.ThreadPoolExecutor() as executor:
text = (0, 'test', -1, 233)
results = executor.map(request, text)
for i, resp in enumerate(results):
print(
f'>> {text[i]} -> [{resp.status_code}]\n'
f'{msgpack.unpackb(resp.content, raw=False)}'
)
Single Service Demo
source code can be found in single_service_demo.py
import logging
import pathlib
from typing import Tuple
import numpy
import onnxruntime
from pydantic import BaseModel
from ventu import Ventu
# define the input schema
class Input(BaseModel):
text: Tuple[(str,) * 3]
# provide an example for health check and inference warm-up
class Config:
schema_extra = {
'examples': [
{'text': ('hello', 'world', 'test')},
]
}
# define the output schema
class Output(BaseModel):
label: Tuple[(bool,) * 3]
class CustomModel(Ventu):
def __init__(self, model_path, *args, **kwargs):
super().__init__(*args, **kwargs)
# load model
self.sess = onnxruntime.InferenceSession(model_path)
self.input_name = self.sess.get_inputs()[0].name
self.output_name = self.sess.get_outputs()[0].name
def preprocess(self, data: Input):
# data format is defined in ``Input``
words = [sent.split(' ')[:4] for sent in data.text]
# padding
words = [word + [''] * (4 - len(word)) for word in words]
# build embedding
emb = [[
numpy.random.random(5) if w else [0] * 5
for w in word]
for word in words]
return numpy.array(emb, dtype=numpy.float32)
def inference(self, data):
# model inference
return self.sess.run([self.output_name], {self.input_name: data})[0]
def postprocess(self, data):
# generate the same format as defined in ``Output``
return {'label': [bool(numpy.mean(d) > 0.5) for d in data]}
if __name__ == "__main__":
logger = logging.getLogger()
formatter = logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.setLevel(logging.DEBUG)
logger.addHandler(handler)
model_path = pathlib.Path(__file__).absolute().parent / 'sigmoid.onnx'
model = CustomModel(str(model_path), Input, Output)
model.run_http(host='localhost', port=8000)
try with httpie
# health check
http :8000/health
# inference
http POST :8000/inference text:='["hello", "world", "test"]'
Open localhost:8000/apidoc/redoc
in your browser to see the API document.
Run with Gunicorn
gunicorn -w 2 model.app
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
ventu-0.4.0.tar.gz
(8.9 kB
view details)
Built Distribution
ventu-0.4.0-py3-none-any.whl
(9.1 kB
view details)
File details
Details for the file ventu-0.4.0.tar.gz
.
File metadata
- Download URL: ventu-0.4.0.tar.gz
- Upload date:
- Size: 8.9 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/3.1.1 pkginfo/1.5.0.1 requests/2.22.0 setuptools/41.4.0 requests-toolbelt/0.9.1 tqdm/4.36.1 CPython/3.7.4
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 52b19bfe26f3d7325dd9f1ba172183d4b4a66be67d5a01a01e0f69cb24dcd78d |
|
MD5 | 10eead06c53b9eeb9ab17608c150985c |
|
BLAKE2b-256 | 1ade7a670f55d0e4e0f86b21d1b6c7344a08273834fc3d00da0453c2403eee08 |
File details
Details for the file ventu-0.4.0-py3-none-any.whl
.
File metadata
- Download URL: ventu-0.4.0-py3-none-any.whl
- Upload date:
- Size: 9.1 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/3.1.1 pkginfo/1.5.0.1 requests/2.22.0 setuptools/41.4.0 requests-toolbelt/0.9.1 tqdm/4.36.1 CPython/3.7.4
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 8e84fcf7c544e01ddf7f0c78ca7adbd9ab211d40e34ca5de82e4b8a7fa7e9c0c |
|
MD5 | 55a8868ad1d92e924a5c408062656d10 |
|
BLAKE2b-256 | 15bc9953163573d1ae144e825ddc48313c2e2d04acfd4d39505ac321f2753981 |