Tensor Flow Model Server
Project description
Introduce
tfserver is an example for serving Tensorflow model with Skitai App Engine.
It can be accessed by gRPC and JSON RESTful API.
This project is inspired by issue #176.
From version 0.3, it is now TensorFlow 2+ compatible.
Saving Tensorflow Model
# MUST import top of the code for disabling eager execution
from tfserver import saved_model
import tensorflow as tf
import numpy as np
x = tf.keras.layers.Input (3)
h = tf.keras.layers.Dense (10, activation='relu') (x)
y = tf.keras.layers.Dense (2, activation='softmax') (h)
model = tf.keras.Model (x, y)
model.compile (
optimizer=tf.keras.optimizers.Adam(0.001),
loss = 'categorical_crossentropy',
metrics = ['accuracy']
)
model.summary()
train_data = np.array ([
(0.1, 0.2, 0.6),
(0.3, 0.6, 0.7),
(0.2, 0.9, 0.3),
(0.3, 0.9, 0.1),
])
labels = np.array ([
(1.0, 0),
(0, 1.0),
(1.0, 0),
(0, 1.0),
])
model.fit(train_data, labels, epochs=3, batch_size=32)
model.evaluate(train_data, labels)
model.predict(train_data)
inputs, outputs = saved_model.save ('exported', {'x': model.inputs[0]}, {'y': model.outputs[0]}, 'predict')
print ("* Saved Model")
print (" - Inputs")
for k, v in inputs.items (): print (" . {}: {}".format (k, v.name))
print (" - Outputs")
for k, v in outputs.items (): print (" . {}: {}".format (k, v.name))
Running Server
You just setup model path and tensorflow configuration, then you can have gRPC and JSON API services.
Example of api.py
import tfserver
import skitai
pref = skitai.pref ()
pref.max_client_body_size = 100 * 1024 * 1024 # 100 MB
# we want to serve 2 models:
# alias and (model_dir, optional session config)
tfserver.add_model ("model1", "exported/model1/200", gpu_usage = 0.1)
tfserver.add_model ("model1", "exported/model2/100", 0.2)
# If you want to activate gRPC, should mount on '/'
skitai.mount ("/", tfserver, pref = pref)
skitai.run (port = 5000)
And run,
python3 api.py
Adding Custom APIs
You can create your own APIs.
For example,
# services/apis.py
import tfserver
def predict (alias, signature_name, **inputs):
result = tfserver.run (alias, signature_name, **inputs)
pred = np.argmax (result ["y"][0])
return dict (
confidence = float (result ["y"][0][pred]),
code = tfserver.tfsess [alias].labels [0].item (pred)
)
def __mount__ (app):
import os
from dnn import tf
from .helpers.unspsc import datautil
def initialize_models (app):
for alias, (model_dir, gpu_usage) in tfserver.added_models.items ():
if model == "f22":
datautil.load_features (os.path.join (model_path, 'features.pkl'))
initialize_models (app)
@app.route ("/", methods = ["GET"])
def models (was):
return was.API (models = list (tfserver.tfsess.keys ()))
@app.route ("/unspsc", methods = ["POST"])
def unspsc (was, text, signature_name = "predict"):
x, seq_length = datautil.encode (text)
result = predict ("unspsc", signature_name, x = [x], seq_length = [seq_length])
return was.API (result = result)
Then mount these services and run.
# serve.py
from services import apis
with skitai.preference () as pref:
pref.mount ("/apis", apis)
skitai.mount ("/", tfserver, pref = pref)
skitai.run (port = 5000, name = "tfapi")
Request Examples
gRPC Client
Using grpcio library,
from tfserver import cli
from tensorflow.python.framework import tensor_util
import numpy as np
stub = cli.Server ("http://localhost:5000")
problem = np.array ([1.0, 2.0])
resp = stub.predict (
'model1', #alias for model
'predict', #signature_def_name
x = tensor_util.make_tensor_proto(problem.astype('float32'), shape=problem.shape)
)
# then get 'y'
resp.y
>> np.ndarray ([-1.5, 1.6])
Using aquests for async request,
import aquests
from tfserver import cli
from tensorflow.python.framework import tensor_util
import numpy as np
def print_result (resp):
cli.Response (resp.data).y
>> np.ndarray ([-1.5, 1.6])
stub = aquests.grpc ("http://localhost:5000/tensorflow.serving.PredictionService", callback = print_result)
problem = np.array ([1.0, 2.0])
request = cli.build_request (
'model1',
'predict',
x = problem
)
stub.Predict (request, 10.0)
aquests.fetchall ()
RESTful API
Using requests,
import requests
problem = np.array ([1.0, 2.0])
api = requests.session ()
resp = api.post (
"http://localhost:5000/predict",
json.dumps ({"x": problem.astype ("float32").tolist()}),
headers = {"Content-Type": "application/json"}
)
data = json.loads (resp.text)
data ["y"]
>> [-1.5, 1.6]
Another,
from aquests.lib import siesta
problem = np.array ([1.0, 2.0])
api = siesta.API ("http://localhost:5000")
resp = api.predict.post ({"x": problem.astype ("float32").tolist()})
resp.data.y
>> [-1.5, 1.6]
Performance Note Comparing with Proto Buffer and JSON
Test Environment
Input:
dtype: Float 32
shape: Various, From (50, 1025) To (300, 1025), Prox. Average (100, 1025)
Output:
dtype: Float 32
shape: (60,)
Request Threads: 16
Requests Per Thread: 100
Total Requests: 1,600
Results
Average of 3 runs,
gRPC with Proto Buffer:
Use grpcio
11.58 seconds
RESTful API with JSON
Use requests
216.66 seconds
Proto Buffer is 20 times faster than JSON…
Release History
0.3 (2018. 6. 28): reactivate project and compatible with TF2+
0.2 (2020. 6. 26): integrated with dnn 0.3
0.1b8 (2018. 4. 13): fix grpc trailers, skitai upgrade is required
0.1b6 (2018. 3. 19): found works only grpcio 1.4.0
0.1b3 (2018. 2. 4): add @app.umounted decorator for clearing resource
0.1b2: remove self.tfsess.run (tf.global_variables_initializer())
0.1b1 (2018. 1. 28): Beta release
0.1a (2018. 1. 4): Alpha release
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.