Skip to main content

Chained Workflow built on RDKit

Project description

RDWORKS

Rdworks is inspired by pandas. Many rdworks functions work like pandas and can be chained into a workflow.

Installation

$ pip install rdworks

# or build from the source
$ pip install git+https://github.com/sunghunbae/rdworks.git

# or if you use uv package/project manager
$ uv add rdworks

Usage

Process Compound Library

from rdworks import MolLibr

# initialize the library
libr = MolLibr(drug_smiles[:5], drug_names[:5])
libr = MolLibr([Chem.MolFromSmiles(_) for _ in drug_smiles[5:10]], drug_names[5:10])
libr = MolLibr([Mol(smi, name) for smi, name in zip(drug_smiles[10:15], drug_names[10:15])])

# libr will be changed by +=, -=, &= operators
libr += other_libr

# remove redundant compounds
libr_unique = libr.unique()

# select compounds satisfying ZINC druglike rules
drug_like_libr = libr.drop("~ZINC_druglike") 

# select compounds CNS compliant
cns_compliant_libr = libr.drop("~CNS") 
cns_compliant_libr = libr.drop("CNS", invert=True)

# select compounds compliant to a custom rule defined in a xml file
custom_rule_compliant_libr = libr.drop(custom_rul_xml_path, invert=True)

# select neural network potential applicable compounds
ani_2x_applicable_libr = libr.nnp_ready("ANI-2x", progress=False)

# select compounds similar to query with a threshold
sim = libr.similar(query_Mol, threshold=0.2)

Complete Stereoisomers

from rdworks import Mol, MolLibr
from rdworks.complete import complete_stereoisomers

mol = Mol(cmpd.smiles)
stereoisomers = complete_stereoisomers(mol)

Complete Tautomers

m = Mol("Oc1c(cccc3)c3nc2ccncc12", "tautomer")
libr = complete_tautomers(m)
assert libr.count() == 3
expected_names = ["tautomer.1", "tautomer.2", "tautomer.3"]
names = [_.name for _ in libr]
assert names == expected_names
expected_canonical_smiles = [
    "O=c1c2c[nH]ccc-2nc2ccccc12",
    "O=c1c2ccccc2[nH]c2ccncc12",
    "Oc1c2ccccc2nc2ccncc12",
]
canonical_smiles = [_.smiles for _ in libr]
difference = set(expected_canonical_smiles) - set(canonical_smiles)
assert len(difference) == 0

Generate Conformer

from rdworks import Mol
from batchopt import BatchOptimizer


mol = Mol(smiles, name)
mol = mol.make_confs(n=n, method='ETKDG') # optimize with MMFF94
mol = mol.optimize_confs()
# remove similar conformers (RMSD <0.3) [and stereo-flipped conformers by default]
mol = mol.drop_confs(similar=True, similar_rmsd=0.3)
mol = mol.optimize_confs(calculator=BatchOptimizer, batchsize_atoms=16384)
mol = mol.sort_confs().rename()
mol = mol.align_confs().cluster_confs(sort='energy')

Serialize/deserialize Molecule or Conformer

Mol and Conf objects have serialize() and deserialize() functions to exchange and reproduce the exact objects.

from rdworks import Conf


mol_serialized = mol.serialize()
lowest_energy_conf_serialized = mol.confs[0].serialize() 
# lowest energy conformer
# serialized conformer is regular string can be easily exchanged
conf = Conf().deserialize(lowest_energy_conf_serialized)

Profile Torsion Angle Energies

from batchopt import BatchSinglePointer


conf = conf.calculate_sp_torsion_energies(
        calculator = BatchSinglePointer,
        torsion_angle_idx = torsion_angle_idx,
        simplify = simplify,
        interval = interval,
        water = water,
        batchsize_atoms = batchsize_atoms,
    )
results = conf.serialize()

Generate Microstates

Microstates are molecular states that have defined protonation states. A molecule exists as an ensemble of microstates at a given pH condition.

import logging
import math
import numpy as np
from rdworks import State, StateNetwork


logger = logging.getLogger(__name__)
original_state = State(smiles=smiles)
num_ionizable_sites = len(original_state.sites)
logger.info(f"found {num_ionizable_sites} ionizable sites.")
state_net = StateNetwork()

if num_ionizable_sites > 6:
    state_net.build(smiles=smiles,
                    max_formal_charge=3,
                    protomer_rule='simple',
                    tautomer_rule=None)
else:
    state_net.build(smiles=smiles,
                    max_formal_charge=3,
                    protomer_rule='default',
                    tautomer_rule=None)

state_ens = state_net.get_state_ensemble()
logger.info(f'generated {state_ens.size()} microstates.')

Calculate Microstate Population with External dG Predictor

from unipka.deltaG import UniMolFreeEnergy


dG_dict = dG_predictor.predict([st.smiles for st in state_ens])

# set state energies via Uni-pKa model
# Uni-pka model specific variable for pH dependent deltaG
# Training might be conducted with a dataset in which raw pKa values
# were subtracted by the mean value (TRANSLATE_PH), 6.504894871171601.
dG = [dG_dict[st.smiles] for st in state_ens]
state_ens.set_energies(dG, ref_ph=6.504894871171601)

ph_values = np.linspace(-2, 16, 180)
p = state_ens.get_population(ph_values, C=math.log(10), beta=1.0)
state_ens = state_ens.trim(p, threshold=0.05)

# representative state(s) at pH 7.4, a subset of state_ens
ph_74 = np.array([7.4])
p_74 = state_ens.get_population(ph_74)
# p_74.shape == (self.size(), pH.shape[0] or number of pH)
ph74_pop = {state_idx: p.item() for state_idx, p in enumerate(p_74)}
dictdata = {
    'ensemble' : state_ens.serialize(), # str
    'ph74_pop' : ph74_pop, # dict
    }

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

rdworks-0.74.1.tar.gz (4.9 MB view details)

Uploaded Source

Built Distribution

If you're not sure about the file name format, learn more about wheel file names.

rdworks-0.74.1-py3-none-any.whl (5.0 MB view details)

Uploaded Python 3

File details

Details for the file rdworks-0.74.1.tar.gz.

File metadata

  • Download URL: rdworks-0.74.1.tar.gz
  • Upload date:
  • Size: 4.9 MB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: uv/0.9.18 {"installer":{"name":"uv","version":"0.9.18","subcommand":["publish"]},"python":null,"implementation":{"name":null,"version":null},"distro":{"name":"Ubuntu","version":"24.04","id":"noble","libc":null},"system":{"name":null,"release":null},"cpu":null,"openssl_version":null,"setuptools_version":null,"rustc_version":null,"ci":null}

File hashes

Hashes for rdworks-0.74.1.tar.gz
Algorithm Hash digest
SHA256 c1bc0ebd630bbb9131c2d7f777783e6081e53ac5790a1a4a86e1b31b878245df
MD5 0c52e7d5e3047d119cb3bd3573f0cce9
BLAKE2b-256 e570b6e835a52fc625b0cdc8cec4d16e2c823ee209d5eb16930d9086b296d0d4

See more details on using hashes here.

File details

Details for the file rdworks-0.74.1-py3-none-any.whl.

File metadata

  • Download URL: rdworks-0.74.1-py3-none-any.whl
  • Upload date:
  • Size: 5.0 MB
  • Tags: Python 3
  • Uploaded using Trusted Publishing? No
  • Uploaded via: uv/0.9.18 {"installer":{"name":"uv","version":"0.9.18","subcommand":["publish"]},"python":null,"implementation":{"name":null,"version":null},"distro":{"name":"Ubuntu","version":"24.04","id":"noble","libc":null},"system":{"name":null,"release":null},"cpu":null,"openssl_version":null,"setuptools_version":null,"rustc_version":null,"ci":null}

File hashes

Hashes for rdworks-0.74.1-py3-none-any.whl
Algorithm Hash digest
SHA256 d4a203381a1fa52c9fe5e15df1f7e9172971689ba4d36a5c4e7cea64348e0ed0
MD5 6b5a8d028ec30e2db4f59f61df58da84
BLAKE2b-256 5e87a7c47bacfd13f097ea9249710a461b192e6952fba79f5c70b398a524f477

See more details on using hashes here.

Supported by

AWS Cloud computing and Security Sponsor Datadog Monitoring Depot Continuous Integration Fastly CDN Google Download Analytics Pingdom Monitoring Sentry Error logging StatusPage Status page