Host your deep learning models easily.
Project description
Ventu
Serving the deep learning models easily.
Install
pip install vento
Features
- Only need to implement Model(
preprocess
,postprocess
,inference
orbatch_inference
) - request & response data check using pydantic
- API document using SpecTree
- backend service using falcon
- dynamic batching with batching using Unix Domain Socket
- errors in one request won't affect others in the same batch
- support all the runtime
- health check
Example
Single Service Demo
source code can be found in single_service_demo.py
from ventu import Ventu
from typing import Tuple
from pydantic import BaseModel
import logging
import numpy
import onnxruntime
# define the input schema
class Input(BaseModel):
text: Tuple[(str,) * 3]
# define the output schema
class Output(BaseModel):
label: Tuple[(bool,) * 3]
class CustomModel(Ventu):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# load model
self.sess = onnxruntime.InferenceSession('./sigmoid.onnx')
self.input_name = self.sess.get_inputs()[0].name
self.output_name = self.sess.get_outputs()[0].name
def preprocess(self, data: Input):
# data format is defined in ``Input``
words = [sent.split(' ')[:4] for sent in data.text]
# padding
words = [word + [''] * (4 - len(word)) for word in words]
# build embedding
emb = [[
numpy.random.random(5) if w else [0] * 5
for w in word]
for word in words]
return numpy.array(emb, dtype=numpy.float32)
def inference(self, data):
# model inference
return self.sess.run([self.output_name], {self.input_name: data})[0]
def postprocess(self, data):
# generate the same format as defined in ``Output``
return {'label': [bool(numpy.mean(d) > 0.5) for d in data]}
if __name__ == "__main__":
logger = logging.getLogger()
formatter = logging.Formatter(fmt='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.setLevel(logging.DEBUG)
logger.addHandler(handler)
model = CustomModel(Input, Output)
model.run_http(host='localhost', port=8000)
"""
# try with `httpie`
## health check
http :8000/health
## inference
http POST :8000/inference text:='["hello", "world", "test"]'
"""
Dynamic Batching Demo
Server
Need to run the batching server first.
To use batching, you need to overwrite batch_inference
instead of inference
.
import logging
from pydantic import BaseModel
from ventu import Ventu
class Req(BaseModel):
num: int
class Resp(BaseModel):
square: int
class ModelInference(Ventu):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def preprocess(self, data: Req):
return data.num
def batch_inference(self, data):
return [num ** 2 for num in data]
def postprocess(self, data):
return {'square': data}
if __name__ == "__main__":
logger = logging.getLogger()
formatter = logging.Formatter(
fmt='%(asctime)s - %(levelname)s - %(module)s - %(message)s')
handler = logging.StreamHandler()
handler.setFormatter(formatter)
logger.setLevel(logging.DEBUG)
logger.addHandler(handler)
model = ModelInference(Req, Resp, use_msgpack=True)
model.run_socket('batching.socket')
Client
from concurrent import futures
import httpx
import msgpack
URL = 'http://localhost:8080'
packer = msgpack.Packer(
autoreset=True,
use_bin_type=True,
)
def request(text):
return httpx.post(URL, data=packer.pack({'num': text}))
if __name__ == "__main__":
with futures.ThreadPoolExecutor() as executor:
text = (0, 'test', -1, 233)
results = executor.map(request, text)
for i, resp in enumerate(results):
print(
f'>> {text[i]} -> [{resp.status_code}]\n'
f'{msgpack.unpackb(resp.content, raw=False)}'
)
Run with Gunicorn as single services
gunicorn -w 2 ventu.app
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
ventu-0.3.0.tar.gz
(6.2 kB
view details)
Built Distribution
ventu-0.3.0-py3-none-any.whl
(6.9 kB
view details)
File details
Details for the file ventu-0.3.0.tar.gz
.
File metadata
- Download URL: ventu-0.3.0.tar.gz
- Upload date:
- Size: 6.2 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/3.1.1 pkginfo/1.5.0.1 requests/2.22.0 setuptools/42.0.2 requests-toolbelt/0.9.1 tqdm/4.38.0 CPython/3.7.4
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 67b03269b6b3cf00d5d4535e472ce742b62b4ef989d7154477245185aad23a42 |
|
MD5 | 7c374c7e9ca35fc3aa23f3cc32cde737 |
|
BLAKE2b-256 | a0f327d43f59b0041d79fc110a8193b488c366c507d3a786572f739320fd8918 |
File details
Details for the file ventu-0.3.0-py3-none-any.whl
.
File metadata
- Download URL: ventu-0.3.0-py3-none-any.whl
- Upload date:
- Size: 6.9 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/3.1.1 pkginfo/1.5.0.1 requests/2.22.0 setuptools/42.0.2 requests-toolbelt/0.9.1 tqdm/4.38.0 CPython/3.7.4
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 56d6631c72115f8dff9ff917b5d6ed1cabefe4002f51c59b8b498d7e5c51edfe |
|
MD5 | f833ec597362f36c58b4f102f07c33ec |
|
BLAKE2b-256 | efd79249588e768ce5c68c85ef90f3cd39d4c0be871438d1788d674a6fbd80a7 |