Skip to main content

This project aims to train neural networks by compound-protein interactions and provides interpretation of the learned model by interactively showing transformed chemical landscape and visualized SAR for chemicals of interest.

Project description

Visar Tutorial

model training

import os
from Model_training_utils import ST_model_hyperparam_screen, ST_model_training
os.environ['CUDA_VISIBLE_DEVICES']='1'
# initialize parameters
task_names = ['T107', 'T108','T51',
     'T106','T105', 'T10618','T227', 'T168', 'T10624', 'T10627', 'T10209']
MT_dat_name = './data/MT_data_clean_Feb28.csv'
FP_type = 'Circular_2048'

params_dict = {
    "n_tasks": [1],
    "n_features": [2048], ## need modification given FP types
    "activation": ['relu'],
    "momentum": [.9],
    "batch_size": [128],
    "init": ['glorot_uniform'],
    "learning_rate": [0.01],
    "decay": [1e-6],
    "nb_epoch": [30],
    "dropouts": [.2, .4],
    "nb_layers": [1],
    "batchnorm": [False],
    #"layer_sizes": [(100, 20), (64, 24)],
    "layer_sizes": [(1024, 512),(1024,128) ,(512, 128),(512,64),(128,64),(64,32), 
                    (1024,512,128), (512,128,64), (128,64,32)],
    "penalty": [0.1]
}
# initialize model setup
import random
import time
random_seed = random.randint(0,1000)
local_time = time.localtime(time.time())
log_path = './logs/'
RUN_KEY = 'ST_%d_%d_%d_%d' % (local_time.tm_year, local_time.tm_mon, 
                              local_time.tm_mday, random_seed)
os.system('mkdir %s%s' % (log_path, RUN_KEY))
print(RUN_KEY)
# hyperparam screening using deepchem
log_output = ST_model_hyperparam_screen(MT_dat_name, task_names, FP_type, params_dict, 
                                        log_path = './logs/'+RUN_KEY)
# manually pick the training parameters
best_hyperparams = {'T107': [(512,64,1), 0.4],
                    'T108': [(512,128,1), 0.2],
                    'T10209': [(512,64,1), 0.4],
                    'T105': [(512,128,1), 0.2],
                    'T106': [(512,64,1), 0.2],
                    'T10618': [(512,128,1), 0.4],
                    'T10624': [(512,128,1), 0.2],
                    'T10627': [(512,64,1), 0.2],
                    'T168': [(512,128,1), 0.2],
                    'T227': [(512, 64, 1), 0.4],
                    'T51': [(512, 128, 64,1), 0.2]
                   }
# model training
output_df = ST_model_training(MT_dat_name, FP_type, 
                              best_hyperparams, result_path = './logs/'+RUN_KEY)

build landscape and display interactive plot

from Model_landscape_utils import landscape_building
from Model_training_utils import prepare_dataset, extract_clean_dataset
import os
os.environ['CUDA_VISIBLE_DEVICES']='1'
import pandas as pd
from bokeh.plotting import output_notebook, show
output_notebook()
task_name = 'T107'
db_name = './data/MT_data_clean_Feb28.csv'
FP_type = 'Circular_2048'
log_path = './logs/MT_2019_4_16_780/'
prev_model = './logs/ST_2019_3_6_697/T107_rep0_50.hdf5'
n_layer = 1
SAR_result_dir = log_path
output_sdf_name = log_path + 'T107_chemical_landscape.sdf'
landscape_building(task_name, db_name, log_path, FP_type,
                       prev_model, n_layer, 
                       SAR_result_dir, output_sdf_name)
# pick clusters of interest and pack them as an sdf for pharmacophore modeling
from Model_landscape_utils import sdf2df
landscape_sdf_file = './Result/T107_baseline_landscape.sdf'
landscape_df = sdf2df(landscape_sdf_file)

custom_filter = landscape_df['Label'] == 7
df2sdf(df, output_sdf_name, smiles_field, id_field, custom_filter = None)
# pharmacophore building
home_dir = './Result/'
os.chdir(home_dir)

# prepare ligand conformations
from rdkit import Chem
from rdkit.Chem import AllChem

raw_sdf_file = 'Label_7.sdf'
sdf_file = home_dir + 'Label7_rdkit_conf.sdf'
ms = [x for x in Chem.SDMolSupplier(raw_sdf_file)]
n_conf = 5
w = Chem.SDWriter(sdf_file)
for i in range(n_conf):
    ms_addH = [Chem.AddHs(m) for m in ms]
    for m in ms_addH:
        AllChem.EmbedMolecule(m)
        AllChem.MMFFOptimizeMoleculeConfs(m)
        w.write(m)

# process pharmacophores
result_dir = home_dir + 'Label7_rdkit_phars/'
output_name = 'Cluster7_'
proceed_pharmacophore(home_dir, sdf_file, result_dir, output_name)
# visualize the pharmacophore model in pymol

analysis of custom chemicals

from Model_landscape_utils import landscape_positioning
import os
os.environ['CUDA_VISIBLE_DEVICES']='1'
import pandas as pd
from bokeh.plotting import output_notebook
output_notebook()
# set custom file
custom_file = './Result/custom_df.csv'
custom_smi_field = "smiles"
custom_id_field = 'molname'
custom_task_field = 'dummy'

# set the landscape to compare to
task_name = 'T107'
db_name = './data/MT_data_clean_Feb28.csv'
FP_type = 'Circular_2048'
log_path = './logs/MT_2019_4_16_780/'
prev_model = './logs/ST_2019_3_6_697/T107_rep0_50.hdf5'
n_layer = 1
custom_SAR_result_dir = log_path
custom_sdf_name = log_path + 'custom_chemicals_on_T107_landscape.sdf'
landscape_positioning(custom_file, custom_smi_field, custom_id_field, custom_task_field,
                        task_name, db_name, FP_type, log_path,
                        prev_model, n_layer, custom_SAR_result_dir, custom_sdf_name)
# pick clusters of interest and pack them as an sdf fur pharmacophore modeling
from Model_landscape_utils import sdf2df
landscape_sdf_file = './Result/T107_baseline_landscape.sdf'
landscape_df = sdf2df(landscape_sdf_file)

custom_filter = landscape_df['Label'] == 7
df2sdf(df, output_sdf_name, smiles_field, id_field, custom_filter = None)

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

visar-0.1.3.1.tar.gz (17.6 kB view details)

Uploaded Source

File details

Details for the file visar-0.1.3.1.tar.gz.

File metadata

  • Download URL: visar-0.1.3.1.tar.gz
  • Upload date:
  • Size: 17.6 kB
  • Tags: Source
  • Uploaded using Trusted Publishing? No
  • Uploaded via: twine/1.13.0 pkginfo/1.4.2 requests/2.18.4 setuptools/41.0.0 requests-toolbelt/0.9.1 tqdm/4.28.1 CPython/3.6.5

File hashes

Hashes for visar-0.1.3.1.tar.gz
Algorithm Hash digest
SHA256 95a21e96556185fd8adf70db214731220c112d942f3ccf7a2d3c6450f873615b
MD5 4ffb324348cb42db073431b32244e625
BLAKE2b-256 e5a76f88d38a2b02f6307d9252b0479fa4047ce594b39114548662665f3a727e

See more details on using hashes here.

Supported by

AWS Cloud computing and Security Sponsor Datadog Monitoring Depot Continuous Integration Fastly CDN Google Download Analytics Pingdom Monitoring Sentry Error logging StatusPage Status page