Skip to main content

reinforcement learning algorithms in python

Project description

Reinforcement learning Algorithms

  • On-Policy
    • A2C: Actor-Critic
    • PPO: Proximal Policy Optimization
  • Off-Policy
    • DQN: Deep Q Networks
    • DuelingDQN: Dueling DQN
    • DDPG: Deep Deterministic Policy Gradients
    • TD3: Twin Delayed DDPG
    • SAC: Soft Actor Critic

Usage

Classical way

import gymnasium
from tensorflow import keras

import rlearn

# define an environment
env = gymnasium.make('CartPole-v1', render_mode="human")

# set reinforcement learning trainer
trainer = rlearn.DQNTrainer()
trainer.set_replay_buffer(max_size=1000)
trainer.set_model_encoder(
  q=keras.Sequential([
    keras.layers.InputLayer(4),  # state has dimension of 4
    keras.layers.Dense(32),
    keras.layers.ReLU(),
  ]),
  action_num=env.action_space.n
)

# training loop
for _ in range(100):
  s, _ = env.reset()
  for _ in range(200):
    a = trainer.predict(s)
    s_, r, done, _, _ = env.step(a)
    trainer.store_transition(s, a, r, s_, done)
    trainer.train_batch()
    s = s_
    if done:
      break

set training hyper parameters

import rlearn

trainer = rlearn.DQNTrainer()
trainer.set_params(
  learning_rate=0.01,
  batch_size=32,
  gamma=0.9,
  replace_ratio=1.,
  replace_step=0,
  min_epsilon=0.1,
  epsilon_decay=1e-3,
)

Parallel training

experience parallel

Start a remote buffer:

from rlearn import distributed

distributed.experience.start_replay_buffer_server(
  port=50051,
)

Start actors:

from rlearn import distributed
import gymnasium


class CartPole(rlearn.EnvWrapper):
  def __init__(self, render_mode="human"):
    self.env = gymnasium.make('CartPole-v1', render_mode=render_mode)

  def reset(self):
    s, _ = self.env.reset()
    return s

  def step(self, a):
    s_, _, done, _, _ = self.env.step(a)
    r = -1 if done else 0
    return s_, r, done


distributed.experience.start_actor_server(
  port=50052,
  remote_buffer_address="localhost:50051",
  env=CartPole(),
)

Start a learner:

import rlearn
from tensorflow import keras

trainer = rlearn.trainer.DQNTrainer()
trainer.set_model_encoder(
  q=keras.Sequential([
    keras.layers.InputLayer(4),
    keras.layers.Dense(32),
    keras.layers.ReLU(),
  ]),
  action_num=2
)
trainer.set_params(
  learning_rate=0.01,
  batch_size=32,
  replace_step=15,
)
trainer.set_action_transformer(rlearn.transformer.DiscreteAction([0, 1]))
learner = rlearn.distributed.experience.Learner(
  trainer=trainer,
  remote_buffer_address="localhost:50051",
  actors_address=["localhost:50052", ],
  actor_buffer_size=10,
  remote_buffer_size=1000,
  remote_buffer_type="RandomReplayBuffer",
)
learner.run(max_train_time=100, max_ep_step=-1)

gradient parallel

Start a parameter server

import rlearn

trainer = rlearn.trainer.DQNTrainer()
trainer.set_model_encoder(
  q=keras.Sequential([
    keras.layers.InputLayer(4),
    keras.layers.Dense(20),
    keras.layers.ReLU(),
  ]),
  action_num=2
)
trainer.set_params(
  learning_rate=0.001,
  batch_size=32,
  replace_step=100,
)
trainer.set_action_transformer(rlearn.transformer.DiscreteAction([0, 1]))

rlearn.distributed.gradient.start_param_server(
  port=50051,
  trainer=trainer,
  sync_step=5,
  worker_buffer_type="RandomReplayBuffer",
  worker_buffer_size=3000,
  max_train_time=60,
  # debug=True,
)

Start workers

import gymnasium
import rlearn


class CartPole(rlearn.EnvWrapper):
  def __init__(self, render_mode="human"):
    self.env = gymnasium.make('CartPole-v1', render_mode=render_mode)

  def reset(self):
    s, _ = self.env.reset()
    return s

  def step(self, a):
    s_, _, done, _, _ = self.env.step(a)
    r = -1 if done else 0
    return s_, r, done


rlearn.distributed.gradient.worker.run(
  env=CartPole(),
  params_server_address="localhost:50051",
  name="worker1",
  # debug=True,
)

Save and reload

Save entire model

import rlearn
from tensorflow import keras
import numpy as np

# define and save a model
trainer = rlearn.DQNTrainer()
trainer.set_model_encoder(
  keras.Sequential([
    keras.layers.InputLayer(2),
    keras.layers.Dense(32),
  ]), action_num=3)
path = "tmp_model0"
trainer.save_model(path)

# reload directory from path
m = rlearn.load_model(path)
action = m.predict(np.random.random((2,)))

Save model parameters and reload to a new trainer or new model.

import rlearn
from tensorflow import keras
import numpy as np

# define and save a model
trainer = rlearn.DQNTrainer()
trainer.set_model_encoder(
  keras.Sequential([
    keras.layers.InputLayer(2),
    keras.layers.Dense(32),
  ]), action_num=3)
path = "tmp_model_weights0"
trainer.save_model_weights(path)

# trainer load parameters from path
trainer2 = rlearn.DQNTrainer()
trainer2.set_model_encoder(
  keras.Sequential([
    keras.layers.InputLayer(2),
    keras.layers.Dense(32),
  ]), action_num=3)
trainer2.load_model_weights(path)
action = trainer2.predict(np.random.random((2,)))

# model load parameters
m = rlearn.DQN()
m.set_encoder(encoder=keras.Sequential([
  keras.layers.InputLayer(2),
  keras.layers.Dense(32),
]), action_num=3)
action = m.predict(np.random.random((2,)))

Install

git clone https://git.woa.com/TIPE/rlearn.git
cd rlearn

# apple m1 silicon should use conda command:
conda install -c apple tensorflow-deps
########

python3 setup.py install

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

rlearn-0.0.4.tar.gz (59.8 kB view details)

Uploaded Source

Built Distribution

rlearn-0.0.4-py3-none-any.whl (73.2 kB view details)

Uploaded Python 3

File details

Details for the file rlearn-0.0.4.tar.gz.

File metadata

  • Download URL: rlearn-0.0.4.tar.gz
  • Upload date:
  • Size: 59.8 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/4.0.0 CPython/3.8.13

File hashes

Hashes for rlearn-0.0.4.tar.gz
Algorithm Hash digest
SHA256 d492d71cf904e2fb8b1231af22e8440b1b320a5130add91f83514b062e79ebfa
MD5 fbdbc063cef474dbad1204a31172d0b0
BLAKE2b-256 6f460343381e8a5665d5db8ec3bc736edd5625c9c5e933425b77910c256049de

See more details on using hashes here.

File details

Details for the file rlearn-0.0.4-py3-none-any.whl.

File metadata

  • Download URL: rlearn-0.0.4-py3-none-any.whl
  • Upload date:
  • Size: 73.2 kB
  • Tags: Python 3
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/4.0.0 CPython/3.8.13

File hashes

Hashes for rlearn-0.0.4-py3-none-any.whl
Algorithm Hash digest
SHA256 e606b212c5c46b256558ba5bb1ffc982796c0fade7206c886abd258b8e534ba3
MD5 b8da126acf041909fe931000338c21c4
BLAKE2b-256 cbafa481e3e244073feade21c36255ceba4ab0ec3427e885836db6ce3192bc73

See more details on using hashes here.

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page