Deep AI modules developed by MOGO RTX team
Project description
rtx_deep
: Deep AI modules developed by MOGO RTX team, aims to accelerate the distributed training, int8-aware distributed training, distributed evaluation and inference, model tracing and optimization, and TensorRT deployment.
1 Dependency
torch>=1.8.0
tensorrt>=7.0
graphviz
2 Installation
pip3 install graphviz
apt-get install graphviz
python3 setup.py install
3 Examples
3.1 Graph Tracing and Model Optimization
import torch
import torch.nn as nn
import torch.nn.functional as F
import rtx_deep
import rtx_deep_plugin
class conv3x3_bn_relu(nn.Module):
def __init__(self, in_planes, out_planes, stride=1, dilation=1, groups=1):
super(conv3x3_bn_relu, self).__init__()
self.net = nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, groups=groups, bias=False),
nn.BatchNorm2d(out_planes),
nn.ReLU(inplace=True)
)
def forward(self, x):
x1 = self.net(x)
return x1
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.net = nn.Sequential(
conv3x3_bn_relu(64, 64),
conv3x3_bn_relu(64, 64)
)
def forward(self, x):
x1 = self.net(x)
x2 = rtx_deep_plugin.max_op(x1, dim=1)
return x2
model = Model()
model.eval()
model.cuda()
input_data = torch.randn(1, 64, 1024, 1024).cuda()
# graph tracing
model_fx = rtx_deep.graph_tracer.ad_trace.graph_trace(model, function_name=None)
# Model Optimization
# conduct graph tracing in graph_optim_from_module automatically
model_fx_optim = rtx_deep.graph_tracer.graph_utils.graph_optim_from_module(model, function_name=None, sample_inputs=(input_data,))
3.2 Quantization-Aware Training
import torch
import torch.nn as nn
import torch.nn.functional as F
import rtx_deep
import rtx_deep_plugin
class conv3x3_bn_relu(nn.Module):
def __init__(self, in_planes, out_planes, stride=1, dilation=1, groups=1):
super(conv3x3_bn_relu, self).__init__()
self.net = nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, groups=groups, bias=False),
nn.BatchNorm2d(out_planes),
nn.ReLU(inplace=True)
)
def forward(self, x):
x1 = self.net(x)
return x1
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.net = nn.Sequential(
conv3x3_bn_relu(64, 64),
conv3x3_bn_relu(64, 64)
)
def forward(self, x):
x1 = self.net(x)
x2 = rtx_deep_plugin.max_op(x1, dim=1)
return x2
model = Model()
model.eval()
model.cuda()
input_data = torch.randn(1, 64, 1024, 1024).cuda()
# Model Optimization
# conduct graph tracing in graph_optim_from_module automatically
model_fx_optim = rtx_deep.graph_tracer.graph_utils.graph_optim_from_module(model, function_name=None, sample_inputs=(input_data,))
# qat
model_qat = rtx_deep.quant_lib.quant_utils.prepare_qat(model_fx_optim,
sample_inputs=[input_data],
observe_config_dic=dict(averaging_constant=0.05),
quant_config_dic=dict(quant_min=-127, quant_max=127, is_symmetric=True, is_quant=True),
disable_prefix=[])
# qat training
...
3.3 TensorRT Deployment
import torch
import torch.nn as nn
import torch.nn.functional as F
import rtx_deep
import rtx_deep_plugin
from rtx_deep.deploy_lib.convert_trt import InputTensor, torch2trt
class conv3x3_bn_relu(nn.Module):
def __init__(self, in_planes, out_planes, stride=1, dilation=1, groups=1):
super(conv3x3_bn_relu, self).__init__()
self.net = nn.Sequential(
nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, groups=groups, bias=False),
nn.BatchNorm2d(out_planes),
nn.ReLU(inplace=True)
)
def forward(self, x):
x1 = self.net(x)
return x1
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.net = nn.Sequential(
conv3x3_bn_relu(64, 64),
conv3x3_bn_relu(64, 64)
)
def forward(self, x):
x1 = self.net(x)
x2 = rtx_deep_plugin.max_op(x1, dim=1)
return x2
model = Model()
model.eval()
model.cuda()
input_data = torch.randn(1, 64, 1024, 1024).cuda()
# Model Optimization
# conduct graph tracing in graph_optim_from_module automatically
model_fx_optim = rtx_deep.graph_tracer.graph_utils.graph_optim_from_module(model, function_name=None, sample_inputs=(input_data,))
# TensorRT Deployment
model_trt = torch2trt(
model=model_fx,
input_specs=[InputTensor(input_data, 'input_data')],
output_names=['max_value', 'max_index'],
fp16_mode=True,
#dla_core=0,
strict_type_constraints=True,
explicit_precision=True
)
# vis tensorrt network
rtx_deep.deploy_lib.tools.vis_trt.vis(model_trt.network, 'test.png')
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distributions
No source distribution files available for this release.See tutorial on generating distribution archives.
Built Distributions
rtx_deep-1.3.2-py311-none-any.whl
(116.8 kB
view hashes)
rtx_deep-1.3.2-py310-none-any.whl
(76.3 kB
view hashes)
rtx_deep-1.3.2-py39-none-any.whl
(75.6 kB
view hashes)
rtx_deep-1.3.2-py38-none-any.whl
(75.9 kB
view hashes)
rtx_deep-1.3.2-py37-none-any.whl
(75.9 kB
view hashes)
rtx_deep-1.3.2-py36-none-any.whl
(75.5 kB
view hashes)
Close
Hashes for rtx_deep-1.3.2-py311-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 2e0b69492e331485f7c348e4831446a1110c60ee2b15e10016e4791ab6053bee |
|
MD5 | 3fbd5ff07caf897b9aab3749243c5615 |
|
BLAKE2b-256 | 9ae2462fe97d967e910c9a6c030794d216fdf60ccad802a9306f466f061354c6 |
Close
Hashes for rtx_deep-1.3.2-py310-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 581970aa1712773cb4268522b6c4e808d325828c37b2105cc1c2b100cce7f4ff |
|
MD5 | cd34071de8e271d22d12300369837778 |
|
BLAKE2b-256 | 31e3f219ac345340b66599c71e083cb27cef1eca5f5db57e5dbe6bd5d6eb4e2b |
Close
Hashes for rtx_deep-1.3.2-py39-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 6ffd7bab20cc9152410935c008194f768203f4cc9d093d3a29715509e0a38689 |
|
MD5 | b09ae397770f8a05c9fbfe6091a17b70 |
|
BLAKE2b-256 | fc78b978603eaf28c92acf14b7b19be773a9eddd9dcf96915d8caf2956d1456d |
Close
Hashes for rtx_deep-1.3.2-py38-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 91c1a339aa5e49823161ce9d4292c44c647dc4a9ae709ce428e6928504320ed9 |
|
MD5 | 2a79a43873e4ba123b24fc1a7e5daccf |
|
BLAKE2b-256 | 1dc3d8fa718a7546f9d727cd5c895739f7c031fe8c5bdc35b1b519668bfb168f |
Close
Hashes for rtx_deep-1.3.2-py37-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 477ae62c5104fcd04fc19d97e11adea36181afc5dcc59b7ee2cd86824694f5c0 |
|
MD5 | 320d49682ec8b4b0f3a71bd5e4790db6 |
|
BLAKE2b-256 | beb1272a258f8a3dec3a279788b4a92180a08f32b53628e5ee7c6741abb6b1ab |
Close
Hashes for rtx_deep-1.3.2-py36-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 68515a17fbe84c59c9c3da5bd9dc510af17dd7556e5829a07dc07ea5c4407605 |
|
MD5 | d07418aee34580d8aba578be58a1ca14 |
|
BLAKE2b-256 | 60cb6c6e5713c3ed9956ccd778569d428a84cb4214df8247dc3c125b1f440de5 |