Catalyst. PyTorch framework for DL research and development.
Project description
PyTorch framework for Deep Learning research and development.
It was developed with a focus on reproducibility,
fast experimentation and code/ideas reusing.
Being able to research/develop something new,
rather than write another regular train loop.
Break the cycle - use the Catalyst!
Project manifest. Part of PyTorch Ecosystem. Part of Catalyst Ecosystem:
- Alchemy - Experiments logging & visualization
- Catalyst - Accelerated Deep Learning Research and Development
- Reaction - Convenient Deep Learning models serving
Getting started
import torch
from torch.utils.data import DataLoader, TensorDataset
from catalyst.dl import SupervisedRunner
# experiment setup
logdir = "./logdir"
num_epochs = 8
# data
num_samples, num_features = int(1e4), int(1e1)
X, y = torch.rand(num_samples, num_features), torch.rand(num_samples)
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=32, num_workers=1)
loaders = {"train": loader, "valid": loader}
# model, criterion, optimizer, scheduler
model = torch.nn.Linear(num_features, 1)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6])
# model training
runner = SupervisedRunner()
runner.train(
model=model,
criterion=criterion,
optimizer=optimizer,
scheduler=scheduler,
loaders=loaders,
logdir=logdir,
num_epochs=num_epochs,
verbose=True,
)
Minimal Examples
ML - Linear Regression is my profession
import torch
from torch.utils.data import DataLoader, TensorDataset
from catalyst.dl import SupervisedRunner
# experiment setup
logdir = "./logdir"
num_epochs = 8
# data
num_samples, num_features = int(1e4), int(1e1)
X, y = torch.rand(num_samples, num_features), torch.rand(num_samples)
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=32, num_workers=1)
loaders = {"train": loader, "valid": loader}
# model, criterion, optimizer, scheduler
model = torch.nn.Linear(num_features, 1)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6])
# model training
runner = SupervisedRunner()
runner.train(
model=model,
criterion=criterion,
optimizer=optimizer,
scheduler=scheduler,
loaders=loaders,
logdir=logdir,
num_epochs=num_epochs,
verbose=True,
)
CV - MNIST one more time
import os
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
from catalyst import dl
model = torch.nn.Linear(28 * 28, 10)
optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
loaders = {
"train": DataLoader(MNIST(os.getcwd(), train=False, download=True,transform=transforms.ToTensor()), batch_size=32),
"valid": DataLoader(MNIST(os.getcwd(), train=False, download=True,transform=transforms.ToTensor()), batch_size=32),
}
class CustomRunner(dl.Runner):
def _handle_batch(self, batch):
x, y = batch
y_hat = self.model(x.view(x.size(0), -1))
loss = F.cross_entropy(y_hat, y)
self.state.batch_metrics["loss"] = loss
if self.state.is_train_loader:
loss.backward()
self.state.optimizer.step()
self.state.optimizer.zero_grad()
runner = CustomRunner()
runner.train(
model=model,
optimizer=optimizer,
loaders=loaders,
verbose=True,
)
CV - MNIST classification with AutoEncoder
import os
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
from catalyst import dl
class ClassifyAE(nn.Module):
def __init__(self, in_features, hid_features, out_features):
super().__init__()
self.encoder = nn.Sequential(nn.Linear(in_features, hid_features), nn.Tanh())
self.decoder = nn.Linear(hid_features, in_features)
self.clf = nn.Linear(hid_features, out_features)
def forward(self, x):
z = self.encoder(x)
y_hat = self.clf(z)
x_ = self.decoder(z)
return y_hat, x_
model = ClassifyAE(28 * 28, 128, 10)
optimizer = torch.optim.Adam(model.parameters(), lr=0.02)
loaders = {
"train": DataLoader(MNIST(os.getcwd(), train=False, download=True,transform=transforms.ToTensor()), batch_size=32),
"valid": DataLoader(MNIST(os.getcwd(), train=False, download=True,transform=transforms.ToTensor()), batch_size=32),
}
class CustomRunner(dl.Runner):
def _handle_batch(self, batch):
x, y = batch
x = x.view(x.size(0), -1)
y_hat, x_ = self.model(x)
loss_clf = F.cross_entropy(y_hat, y)
loss_ae = F.mse_loss(x_, x)
loss = loss_clf + loss_ae
self.state.batch_metrics = {
"loss_clf": loss_clf,
"loss_ae": loss_ae,
"loss": loss
}
if self.state.is_train_loader:
loss.backward()
self.state.optimizer.step()
self.state.optimizer.zero_grad()
runner = CustomRunner()
runner.train(
model=model,
optimizer=optimizer,
loaders=loaders,
verbose=True,
)
GAN - MNIST, flatten version
import os
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
from catalyst import dl
generator = nn.Sequential(nn.Linear(128, 28 * 28), nn.Tanh())
discriminator = nn.Sequential(nn.Linear(28 * 28, 1), nn.Sigmoid())
model = nn.ModuleDict({"generator": generator, "discriminator": discriminator})
generator_optimizer = torch.optim.Adam(
generator.parameters(), lr=0.0001, betas=(0.5, 0.999))
discriminator_optimizer = torch.optim.Adam(
discriminator.parameters(), lr=0.0001, betas=(0.5, 0.999))
optimizer = {
"generator": generator_optimizer,
"discriminator": discriminator_optimizer,
}
loaders = {
"train": DataLoader(MNIST(os.getcwd(), train=False, download=True,transform=transforms.ToTensor()), batch_size=32),
"valid": DataLoader(MNIST(os.getcwd(), train=False, download=True,transform=transforms.ToTensor()), batch_size=32),
}
class CustomRunner(dl.Runner):
def _handle_batch(self, batch):
state = self.state
images, _ = batch
images = images.view(images.size(0), -1)
bs = images.shape[0]
z = torch.randn(bs, 128).to(self.device)
generated_images = self.model["generator"](z)
# generator step
## predictions & labels
generated_labels = torch.ones(bs, 1).to(self.device)
generated_pred = self.model["discriminator"](generated_images)
## loss
loss_generator = F.binary_cross_entropy(generated_pred, generated_labels)
state.batch_metrics["loss_generator"] = loss_generator
# discriminator step
## real
images_labels = torch.ones(bs, 1).to(self.device)
images_pred = self.model["discriminator"](images)
real_loss = F.binary_cross_entropy(images_pred, images_labels)
## fake
generated_labels_ = torch.zeros(bs, 1).to(self.device)
generated_pred_ = self.model["discriminator"](generated_images.detach())
fake_loss = F.binary_cross_entropy(generated_pred_, generated_labels_)
## loss
loss_discriminator = (real_loss + fake_loss) / 2.0
state.batch_metrics["loss_discriminator"] = loss_discriminator
runner = CustomRunner()
runner.train(
model=model,
optimizer=optimizer,
loaders=loaders,
callbacks=[
dl.OptimizerCallback(
optimizer_key="generator",
loss_key="loss_generator"
),
dl.OptimizerCallback(
optimizer_key="discriminator",
loss_key="loss_discriminator"
),
],
main_metric="loss_generator",
num_epochs=5,
logdir="./logs/gan",
verbose=True,
)
Demo with minimal examples for ML, CV, NLP, GANs and RecSys
For Catalyst.RL introduction, please follow Catalyst.RL repo.
Table of Contents
Overview
Catalyst helps you write compact but full-featured Deep Learning pipelines in a few lines of code. You get a training loop with metrics, early-stopping, model checkpointing and other features without the boilerplate.
Installation
Common installation:
pip install -U catalyst
Specific versions with additional requirements
pip install catalyst[ml] # installs DL+ML based catalyst
pip install catalyst[cv] # installs DL+CV based catalyst
pip install catalyst[nlp] # installs DL+NLP based catalyst
pip install catalyst[ecosystem] # installs Catalyst.Ecosystem
pip install catalyst[contrib] # installs DL+contrib based catalyst
pip install catalyst[all] # installs everything
# and master version installation
pip install git+https://github.com/catalyst-team/catalyst@master --upgrade
Catalyst is compatible with: Python 3.6+. PyTorch 1.0.0+.
Features
- Universal train/inference loop.
- Configuration files for model/data hyperparameters.
- Reproducibility – all source code and environment variables will be saved.
- Callbacks – reusable train/inference pipeline parts with easy customization.
- Training stages support.
- Deep Learning best practices - SWA, AdamW, Ranger optimizer, OneCycle, and more.
- Developments best practices - fp16 support, distributed training, slurm.
Structure
- contrib - additional modules contributed by Catalyst users.
- core - framework core with main abstractions - Experiment, Runner, Callback and State.
- data - useful tools and scripts for data processing.
- DL – runner for training and inference, all of the classic ML and CV/NLP/RecSys metrics and a variety of callbacks for training, validation and inference of neural networks.
- utils - typical utils for Deep Learning research.
Tests
All the Catalyst code is tested rigorously with every new PR.
In fact, we train a number of different models for various of tasks - image classification, image segmentation, text classification, GAN training. During the tests, we compare their convergence metrics in order to verify the correctness of the training procedure and its reproducibility.
Overall, Catalyst guarantees fully tested, correct and reproducible best practices for the automated parts.
Catalyst
Tutorials
- Demo with minimal examples for ML, CV, NLP, GANs and RecSys
- Detailed classification tutorial
- Advanced segmentation tutorial
- Comprehensive classification pipeline
- Binary and semantic segmentation pipeline
- Beyond fashion: Deep Learning with Catalyst (Config API)
- Tutorial from Notebook API to Config API (RU)
API documentation and an overview of the library can be found here
.
In the examples folder
of the repository, you can find advanced tutorials and Catalyst best practices.
Projects
- Kaggle Quick, Draw! Doodle Recognition Challenge - 11th place solution
- Catalyst.RL - NeurIPS 2018: AI for Prosthetics Challenge – 3rd place solution
- CamVid Segmentation Example - Example of semantic segmentation for CamVid dataset
- Notebook API tutorial for segmentation in Understanding Clouds from Satellite Images Competition
- Kaggle Google Landmark 2019 - 30th place solution
- Hierarchical attention for sentiment classification with visualization
- Pediatric bone age assessment
- iMet Collection 2019 - FGVC6 - 24th place solution
- ID R&D Anti-spoofing Challenge - 14th place solution
- Implementation of paper "Tell Me Where to Look: Guided Attention Inference Network"
- Catalyst.RL - NeurIPS 2019: Learn to Move - Walk Around – starter kit
- Catalyst.RL - NeurIPS 2019: Animal-AI Olympics - starter kit
- NeurIPS 2019: Recursion Cellular Image Classification - 4th place solution
- MICCAI 2019: Automatic Structure Segmentation for Radiotherapy Planning Challenge 2019
- 3rd place solution for
Task 3: Organ-at-risk segmentation from chest CT scans
- and 4th place solution for
Task 4: Gross Target Volume segmentation of lung cancer
- 3rd place solution for
- Kaggle Seversteal steel detection - 5th place solution
- Implementation of paper "Filter Response Normalization Layer: Eliminating Batch Dependence in the Training of Deep Neural Networks"
- RSNA Intracranial Hemorrhage Detection - 5th place solution
- Implementation of paper "Utterance-level Aggregation For Speaker Recognition In The Wild"
- APTOS 2019 Blindness Detection – 7th place solution
- Catalyst.RL - NeurIPS 2019: Learn to Move - Walk Around – 2nd place solution
- xView2 Damage Assessment Challenge - 3rd place solution
- Inria Segmentation Example - An example of training segmentation model for Inria Sattelite Segmentation Challenge
- iglovikov_segmentation - Semantic segmentation pipeline using Catalyst
Tools and pipelines
- Catalyst.RL – A Distributed Framework for Reproducible RL Research by Scitator
- Catalyst.Classification - Comprehensive classification pipeline with Pseudo-Labeling by Bagxi and Pdanilov
- Catalyst.Segmentation - Segmentation pipelines - binary, semantic and instance, by Bagxi
- Catalyst.Detection - Anchor-free detection pipeline by Avi2011class and TezRomacH
- Catalyst.GAN - Reproducible GANs pipelines by Asmekal
- Catalyst.Neuro - Brain image analysis project, in collaboration with TReNDS Center
- MLComp – distributed DAG framework for machine learning with UI by Lightforever
- Pytorch toolbelt - PyTorch extensions for fast R&D prototyping and Kaggle farming by BloodAxe
- Helper functions - An unstructured set of helper functions by Ternaus
Talks and videos
- Catalyst-team YouTube channel
- Catalyst.RL – reproducible RL research framework at Stachka
- Catalyst.DL – reproducible DL research framework (rus) and slides (eng) at RIF
- Catalyst.DL – reproducible DL research framework (rus) and slides (eng) at AI-Journey
- Catalyst.DL – fast & reproducible DL at Datastart
- Catalyst.RL - NeurIPS 2019: Learn to Move - Walk Around and slides (eng) at RL reading group Meetup
- Catalyst – accelerated DL & RL (rus) and slides (eng) at Facebook Developer Circle: Moscow | ML & AI Meetup
- Catalyst.RL - Learn to Move - Walk Around 2nd place solution at NeurIPS competition track
- Open Source ML 2019 edition at Datafest.elka
Community
Contribution guide
We appreciate all contributions. If you are planning to contribute back bug-fixes, please do so without any further discussion. If you plan to contribute new features, utility functions or extensions, please first open an issue and discuss the feature with us.
- Please see the contribution guide for more information.
- By participating in this project, you agree to abide by its Code of Conduct.
User feedback
We have created catalyst.team.core@gmail.com
for "user feedback".
- If you like the project and want to say thanks, this the right place.
- If you would like to start a collaboration between your team and Catalyst team to do better Deep Learning R&D - you are always welcome.
- If you just don't like Github issues and this ways suits you better - feel free to email us.
- Finally, if you do not like something, please, share it with us and we can see how to improve it.
We appreciate any type of feedback. Thank you!
Trusted by
- Awecom
- Researchers@Center for Translational Research in Neuroimaging and Data Science (TReNDS)
- Researchers@Emory University
- Evil Martians
- Researchers@Georgia Institute of Technology
- Researchers@Georgia State University
- Helios
- HPCD Lab
- iFarm
- Kinoplan
- Researchers@Moscow Institute of Physics and Technology
- Neuromation
- Poteha Labs
- Provectus
- Researchers@Skolkovo Institute of Science and Technology
- SoftConstruct
- Researchers@Tinkoff
- Researchers@Yandex.Research
Supported by
Citation
Please use this bibtex if you want to cite this repository in your publications:
@misc{catalyst,
author = {Kolesnikov, Sergey},
title = {Accelerated DL R&D},
year = {2018},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/catalyst-team/catalyst}},
}
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Hashes for catalyst-20.4-py2.py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 6be4d69762e32d6126fc99f7d1d2ba2f87f1593886fe55f69a97577189382644 |
|
MD5 | e1c6ab49a4e05a5735b6f0286629a534 |
|
BLAKE2b-256 | 8bffc58e111bb9b6bc0d64337009e37d6db7be08e777e33b6de62c3f0c28e928 |