Spaces:
Running
Running
Commit
·
8e66b23
0
Parent(s):
Duplicate from GT4SD/paccmann_gp
Browse files- .gitattributes +34 -0
- .gitignore +1 -0
- LICENSE +21 -0
- README.md +15 -0
- app.py +164 -0
- model_cards/article.md +89 -0
- model_cards/description.md +6 -0
- model_cards/examples.csv +3 -0
- requirements.txt +30 -0
- utils.py +76 -0
.gitattributes
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2022 Generative Toolkit 4 Scientific Discovery
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: PaccMann^GP
|
| 3 |
+
emoji: 💡
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 3.9.1
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
python_version: 3.8.13
|
| 11 |
+
pypi_version: 20.2.4
|
| 12 |
+
duplicated_from: GT4SD/paccmann_gp
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
import pathlib
|
| 3 |
+
from typing import List
|
| 4 |
+
|
| 5 |
+
import gradio as gr
|
| 6 |
+
import pandas as pd
|
| 7 |
+
from gt4sd.algorithms.controlled_sampling.paccmann_gp import (
|
| 8 |
+
PaccMannGPGenerator,
|
| 9 |
+
PaccMannGP,
|
| 10 |
+
)
|
| 11 |
+
from gt4sd.algorithms.controlled_sampling.paccmann_gp.implementation import (
|
| 12 |
+
MINIMIZATION_FUNCTIONS,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
from gt4sd.algorithms.registry import ApplicationsRegistry
|
| 16 |
+
|
| 17 |
+
from utils import draw_grid_generate
|
| 18 |
+
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
logger.addHandler(logging.NullHandler())
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
MINIMIZATION_FUNCTIONS.pop("callable", None)
|
| 24 |
+
MINIMIZATION_FUNCTIONS.pop("molwt", None)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def run_inference(
|
| 28 |
+
algorithm_version: str,
|
| 29 |
+
targets: List[str],
|
| 30 |
+
protein_target: str,
|
| 31 |
+
temperature: float,
|
| 32 |
+
length: float,
|
| 33 |
+
number_of_samples: int,
|
| 34 |
+
limit: int,
|
| 35 |
+
number_of_steps: int,
|
| 36 |
+
number_of_initial_points: int,
|
| 37 |
+
number_of_optimization_rounds: int,
|
| 38 |
+
sampling_variance: float,
|
| 39 |
+
samples_for_evaluation: int,
|
| 40 |
+
maximum_number_of_sampling_steps: int,
|
| 41 |
+
seed: int,
|
| 42 |
+
):
|
| 43 |
+
|
| 44 |
+
config = PaccMannGPGenerator(
|
| 45 |
+
algorithm_version=algorithm_version.split("_")[-1],
|
| 46 |
+
batch_size=32,
|
| 47 |
+
temperature=temperature,
|
| 48 |
+
generated_length=length,
|
| 49 |
+
limit=limit,
|
| 50 |
+
acquisition_function="EI",
|
| 51 |
+
number_of_steps=number_of_steps,
|
| 52 |
+
number_of_initial_points=number_of_initial_points,
|
| 53 |
+
initial_point_generator="random",
|
| 54 |
+
number_of_optimization_rounds=number_of_optimization_rounds,
|
| 55 |
+
sampling_variance=sampling_variance,
|
| 56 |
+
samples_for_evaluation=samples_for_evaluation,
|
| 57 |
+
maximum_number_of_sampling_steps=maximum_number_of_sampling_steps,
|
| 58 |
+
seed=seed,
|
| 59 |
+
)
|
| 60 |
+
target = {i: {} for i in targets}
|
| 61 |
+
if "affinity" in targets:
|
| 62 |
+
if protein_target == "" or not isinstance(protein_target, str):
|
| 63 |
+
raise ValueError(
|
| 64 |
+
f"Protein target must be specified for affinity prediction, not ={protein_target}"
|
| 65 |
+
)
|
| 66 |
+
target["affinity"]["protein"] = protein_target
|
| 67 |
+
else:
|
| 68 |
+
protein_target = ""
|
| 69 |
+
|
| 70 |
+
model = PaccMannGP(config, target=target)
|
| 71 |
+
samples = list(model.sample(number_of_samples))
|
| 72 |
+
|
| 73 |
+
return draw_grid_generate(
|
| 74 |
+
samples=samples,
|
| 75 |
+
n_cols=5,
|
| 76 |
+
properties=set(target.keys()),
|
| 77 |
+
protein_target=protein_target,
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
if __name__ == "__main__":
|
| 82 |
+
|
| 83 |
+
# Preparation (retrieve all available algorithms)
|
| 84 |
+
all_algos = ApplicationsRegistry.list_available()
|
| 85 |
+
algos = [
|
| 86 |
+
x["algorithm_version"]
|
| 87 |
+
for x in list(filter(lambda x: "PaccMannGP" in x["algorithm_name"], all_algos))
|
| 88 |
+
]
|
| 89 |
+
|
| 90 |
+
# Load metadata
|
| 91 |
+
metadata_root = pathlib.Path(__file__).parent.joinpath("model_cards")
|
| 92 |
+
|
| 93 |
+
examples = pd.read_csv(
|
| 94 |
+
metadata_root.joinpath("examples.csv"), header=None, sep="|"
|
| 95 |
+
).fillna("")
|
| 96 |
+
examples[1] = examples[1].apply(eval)
|
| 97 |
+
|
| 98 |
+
with open(metadata_root.joinpath("article.md"), "r") as f:
|
| 99 |
+
article = f.read()
|
| 100 |
+
with open(metadata_root.joinpath("description.md"), "r") as f:
|
| 101 |
+
description = f.read()
|
| 102 |
+
|
| 103 |
+
demo = gr.Interface(
|
| 104 |
+
fn=run_inference,
|
| 105 |
+
title="PaccMannGP",
|
| 106 |
+
inputs=[
|
| 107 |
+
gr.Dropdown(algos, label="Algorithm version", value="v0"),
|
| 108 |
+
gr.CheckboxGroup(
|
| 109 |
+
choices=list(MINIMIZATION_FUNCTIONS.keys()),
|
| 110 |
+
value=["qed"],
|
| 111 |
+
multiselect=True,
|
| 112 |
+
label="Property goals",
|
| 113 |
+
),
|
| 114 |
+
gr.Textbox(
|
| 115 |
+
label="Protein target",
|
| 116 |
+
placeholder="MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT",
|
| 117 |
+
lines=1,
|
| 118 |
+
),
|
| 119 |
+
gr.Slider(minimum=0.5, maximum=2, value=1, label="Decoding temperature"),
|
| 120 |
+
gr.Slider(
|
| 121 |
+
minimum=5,
|
| 122 |
+
maximum=400,
|
| 123 |
+
value=100,
|
| 124 |
+
label="Maximal sequence length",
|
| 125 |
+
step=1,
|
| 126 |
+
),
|
| 127 |
+
gr.Slider(
|
| 128 |
+
minimum=1, maximum=50, value=10, label="Number of samples", step=1
|
| 129 |
+
),
|
| 130 |
+
gr.Slider(minimum=1, maximum=8, value=4.0, label="Limit"),
|
| 131 |
+
gr.Slider(minimum=1, maximum=32, value=8, label="Number of steps", step=1),
|
| 132 |
+
gr.Slider(
|
| 133 |
+
minimum=1, maximum=32, value=4, label="Number of initial points", step=1
|
| 134 |
+
),
|
| 135 |
+
gr.Slider(
|
| 136 |
+
minimum=1,
|
| 137 |
+
maximum=4,
|
| 138 |
+
value=1,
|
| 139 |
+
label="Number of optimization rounds",
|
| 140 |
+
step=1,
|
| 141 |
+
),
|
| 142 |
+
gr.Slider(minimum=0.01, maximum=1, value=0.1, label="Sampling variance"),
|
| 143 |
+
gr.Slider(
|
| 144 |
+
minimum=1,
|
| 145 |
+
maximum=10,
|
| 146 |
+
value=1,
|
| 147 |
+
label="Samples used for evaluation",
|
| 148 |
+
step=1,
|
| 149 |
+
),
|
| 150 |
+
gr.Slider(
|
| 151 |
+
minimum=1,
|
| 152 |
+
maximum=64,
|
| 153 |
+
value=4,
|
| 154 |
+
label="Maximum number of sampling steps",
|
| 155 |
+
step=1,
|
| 156 |
+
),
|
| 157 |
+
gr.Number(value=42, label="Seed", precision=0),
|
| 158 |
+
],
|
| 159 |
+
outputs=gr.HTML(label="Output"),
|
| 160 |
+
article=article,
|
| 161 |
+
description=description,
|
| 162 |
+
examples=examples.values.tolist(),
|
| 163 |
+
)
|
| 164 |
+
demo.launch(debug=True, show_error=True)
|
model_cards/article.md
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Model documentation & parameters
|
| 2 |
+
|
| 3 |
+
**Algorithm Version**: Which model version to use.
|
| 4 |
+
|
| 5 |
+
**Property goals**: One or multiple properties that will be optimized.
|
| 6 |
+
|
| 7 |
+
**Protein target**: An AAS of a protein target used for conditioning. Leave blank unless you use `affinity` as a `property goal`.
|
| 8 |
+
|
| 9 |
+
**Decoding temperature**: The temperature parameter in the SMILES/SELFIES decoder. Higher values lead to more explorative choices, smaller values culminate in mode collapse.
|
| 10 |
+
|
| 11 |
+
**Maximal sequence length**: The maximal number of SMILES tokens in the generated molecule.
|
| 12 |
+
|
| 13 |
+
**Number of samples**: How many samples should be generated (between 1 and 50).
|
| 14 |
+
|
| 15 |
+
**Limit**: Hypercube limits in the latent space.
|
| 16 |
+
|
| 17 |
+
**Number of steps**: Number of steps for a GP optmization round. The longer the slower. Has to be at least `Number of initial points`.
|
| 18 |
+
|
| 19 |
+
**Number of initial points**: Number of initial points evaluated. The longer the slower.
|
| 20 |
+
|
| 21 |
+
**Number of optimization rounds**: Maximum number of optimization rounds.
|
| 22 |
+
|
| 23 |
+
**Sampling variance**: Variance of the Gaussian noise applied during sampling from the optimal point.
|
| 24 |
+
|
| 25 |
+
**Samples for evaluation**: Number of samples averaged for each minimization function evaluation.
|
| 26 |
+
|
| 27 |
+
**Max. sampling steps**: Maximum number of sampling steps in an optmization round.
|
| 28 |
+
|
| 29 |
+
**Seed**: The random seed used for initialization.
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# Model card -- PaccMannGP
|
| 34 |
+
|
| 35 |
+
**Model Details**: [PaccMann<sup>GP</sup>](https://github.com/PaccMann/paccmann_gp) is a language-based Variational Autoencoder that is coupled with a GaussianProcess for controlled sampling. This model systematically explores the latent space of a trained molecular VAE.
|
| 36 |
+
|
| 37 |
+
**Developers**: Jannis Born, Matteo Manica and colleagues from IBM Research.
|
| 38 |
+
|
| 39 |
+
**Distributors**: Original authors' code wrapped and distributed by GT4SD Team (2023) from IBM Research.
|
| 40 |
+
|
| 41 |
+
**Model date**: Published in 2022.
|
| 42 |
+
|
| 43 |
+
**Model version**: A molecular VAE trained on 1.5M molecules from ChEMBL.
|
| 44 |
+
|
| 45 |
+
**Model type**: A language-based molecular generative model that can be explored with Gaussian Processes to generate molecules with desired properties.
|
| 46 |
+
|
| 47 |
+
**Information about training algorithms, parameters, fairness constraints or other applied approaches, and features**:
|
| 48 |
+
Described in the [original paper](https://pubs.acs.org/doi/10.1021/acs.jcim.1c00889).
|
| 49 |
+
|
| 50 |
+
**Paper or other resource for more information**:
|
| 51 |
+
[Active Site Sequence Representations of Human Kinases Outperform Full Sequence Representations for Affinity Prediction and Inhibitor Generation: 3D Effects in a 1D Model (2022; *Journal of Chemical Information & Modeling*)](https://pubs.acs.org/doi/10.1021/acs.jcim.1c00889).
|
| 52 |
+
|
| 53 |
+
**License**: MIT
|
| 54 |
+
|
| 55 |
+
**Where to send questions or comments about the model**: Open an issue on [GT4SD repository](https://github.com/GT4SD/gt4sd-core).
|
| 56 |
+
|
| 57 |
+
**Intended Use. Use cases that were envisioned during development**: Chemical research, in particular drug discovery.
|
| 58 |
+
|
| 59 |
+
**Primary intended uses/users**: Researchers and computational chemists using the model for model comparison or research exploration purposes.
|
| 60 |
+
|
| 61 |
+
**Out-of-scope use cases**: Production-level inference, producing molecules with harmful properties.
|
| 62 |
+
|
| 63 |
+
**Factors**: Not applicable.
|
| 64 |
+
|
| 65 |
+
**Metrics**: High reward on generating molecules with desired properties.
|
| 66 |
+
|
| 67 |
+
**Datasets**: ChEMBL.
|
| 68 |
+
|
| 69 |
+
**Ethical Considerations**: Unclear, please consult with original authors in case of questions.
|
| 70 |
+
|
| 71 |
+
**Caveats and Recommendations**: Unclear, please consult with original authors in case of questions.
|
| 72 |
+
|
| 73 |
+
Model card prototype inspired by [Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)
|
| 74 |
+
|
| 75 |
+
## Citation
|
| 76 |
+
```bib
|
| 77 |
+
@article{born2022active,
|
| 78 |
+
author = {Born, Jannis and Huynh, Tien and Stroobants, Astrid and Cornell, Wendy D. and Manica, Matteo},
|
| 79 |
+
title = {Active Site Sequence Representations of Human Kinases Outperform Full Sequence Representations for Affinity Prediction and Inhibitor Generation: 3D Effects in a 1D Model},
|
| 80 |
+
journal = {Journal of Chemical Information and Modeling},
|
| 81 |
+
volume = {62},
|
| 82 |
+
number = {2},
|
| 83 |
+
pages = {240-257},
|
| 84 |
+
year = {2022},
|
| 85 |
+
doi = {10.1021/acs.jcim.1c00889},
|
| 86 |
+
note ={PMID: 34905358},
|
| 87 |
+
URL = {https://doi.org/10.1021/acs.jcim.1c00889}
|
| 88 |
+
}
|
| 89 |
+
```
|
model_cards/description.md
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<img align="right" src="https://raw.githubusercontent.com/GT4SD/gt4sd-core/main/docs/_static/gt4sd_logo.png" alt="logo" width="120" >
|
| 2 |
+
|
| 3 |
+
[PaccMann<sup>GP</sup>](https://github.com/PaccMann/paccmann_gp) is a language-based Variational Autoencoder that is coupled with a GaussianProcess for controlled sampling. For details of the methodology, please see [Born et al., (2022), *Journal of Chemical Information & Modeling*](https://pubs.acs.org/doi/10.1021/acs.jcim.1c00889).
|
| 4 |
+
|
| 5 |
+
For **examples** and **documentation** of the model parameters, please see below.
|
| 6 |
+
Moreover, we provide a **model card** ([Mitchell et al. (2019)](https://dl.acm.org/doi/abs/10.1145/3287560.3287596?casa_token=XD4eHiE2cRUAAAAA:NL11gMa1hGPOUKTAbtXnbVQBDBbjxwcjGECF_i-WC_3g1aBgU1Hbz_f2b4kI_m1in-w__1ztGeHnwHs)) at the bottom of this page.
|
model_cards/examples.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
v0|["qed"]||1.2|100|10|4|8|4|1|0.1|3|4|42
|
| 2 |
+
v0|["qed","sa"]||1.2|100|10|4|8|4|1|0.1|3|4|42
|
| 3 |
+
v0|["affinity"]|MVLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTT|1.2|100|10|4|8|4|1|0.1|3|4|42
|
requirements.txt
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-f https://download.pytorch.org/whl/cpu/torch_stable.html
|
| 2 |
+
-f https://data.pyg.org/whl/torch-1.12.1+cpu.html
|
| 3 |
+
# pip==20.2.4
|
| 4 |
+
torch==1.12.1
|
| 5 |
+
torch-scatter
|
| 6 |
+
torch-spline-conv
|
| 7 |
+
torch-sparse
|
| 8 |
+
torch-geometric
|
| 9 |
+
torchvision==0.13.1
|
| 10 |
+
torchaudio==0.12.1
|
| 11 |
+
gt4sd>=1.1.6
|
| 12 |
+
diffusers==0.6.0
|
| 13 |
+
molgx>=0.22.0a1
|
| 14 |
+
molecule_generation
|
| 15 |
+
nglview
|
| 16 |
+
PyTDC==0.3.7
|
| 17 |
+
gradio==3.12.0
|
| 18 |
+
markdown-it-py>=2.1.0
|
| 19 |
+
mols2grid>=0.2.0
|
| 20 |
+
numpy==1.23.5
|
| 21 |
+
pandas>=1.0.0
|
| 22 |
+
terminator @ git+https://github.com/IBM/regression-transformer@gt4sd
|
| 23 |
+
guacamol_baselines @ git+https://github.com/GT4SD/[email protected]
|
| 24 |
+
moses @ git+https://github.com/GT4SD/[email protected]
|
| 25 |
+
paccmann_chemistry @ git+https://github.com/PaccMann/[email protected]
|
| 26 |
+
paccmann_generator @ git+https://github.com/PaccMann/[email protected]
|
| 27 |
+
paccmann_gp @ git+https://github.com/PaccMann/[email protected]
|
| 28 |
+
paccmann_omics @ git+https://github.com/PaccMann/[email protected]
|
| 29 |
+
paccmann_predictor @ git+https://github.com/PaccMann/paccmann_predictor@sarscov2
|
| 30 |
+
reinvent_models @ git+https://github.com/GT4SD/[email protected]
|
utils.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
from collections import defaultdict
|
| 3 |
+
from typing import List, Callable
|
| 4 |
+
from gt4sd.properties import PropertyPredictorRegistry
|
| 5 |
+
from gt4sd.algorithms.prediction.paccmann.core import PaccMann, AffinityPredictor
|
| 6 |
+
import torch
|
| 7 |
+
|
| 8 |
+
import mols2grid
|
| 9 |
+
import pandas as pd
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
logger.addHandler(logging.NullHandler())
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def get_affinity_function(target: str) -> Callable:
|
| 16 |
+
return lambda mols: torch.stack(
|
| 17 |
+
list(
|
| 18 |
+
PaccMann(
|
| 19 |
+
AffinityPredictor(protein_targets=[target] * len(mols), ligands=mols)
|
| 20 |
+
).sample(len(mols))
|
| 21 |
+
)
|
| 22 |
+
).tolist()
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
EVAL_DICT = {
|
| 26 |
+
"qed": PropertyPredictorRegistry.get_property_predictor("qed"),
|
| 27 |
+
"sa": PropertyPredictorRegistry.get_property_predictor("sas"),
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def draw_grid_generate(
|
| 32 |
+
samples: List[str],
|
| 33 |
+
properties: List[str],
|
| 34 |
+
protein_target: str,
|
| 35 |
+
n_cols: int = 3,
|
| 36 |
+
size=(140, 200),
|
| 37 |
+
) -> str:
|
| 38 |
+
"""
|
| 39 |
+
Uses mols2grid to draw a HTML grid for the generated molecules
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
samples: The generated samples.
|
| 43 |
+
n_cols: Number of columns in grid. Defaults to 5.
|
| 44 |
+
size: Size of molecule in grid. Defaults to (140, 200).
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
HTML to display
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
if protein_target != "":
|
| 51 |
+
EVAL_DICT.update({"affinity": get_affinity_function(protein_target)})
|
| 52 |
+
|
| 53 |
+
result = defaultdict(list)
|
| 54 |
+
result.update(
|
| 55 |
+
{"SMILES": samples, "Name": [f"Generated_{i}" for i in range(len(samples))]},
|
| 56 |
+
)
|
| 57 |
+
if "affinity" in properties:
|
| 58 |
+
properties.remove("affinity")
|
| 59 |
+
vals = EVAL_DICT["affinity"](samples)
|
| 60 |
+
result["affinity"] = vals
|
| 61 |
+
# Fill properties
|
| 62 |
+
for sample in samples:
|
| 63 |
+
for prop in properties:
|
| 64 |
+
value = EVAL_DICT[prop](sample)
|
| 65 |
+
result[prop].append(f"{prop} = {value}")
|
| 66 |
+
|
| 67 |
+
result_df = pd.DataFrame(result)
|
| 68 |
+
obj = mols2grid.display(
|
| 69 |
+
result_df,
|
| 70 |
+
tooltip=list(result.keys()),
|
| 71 |
+
height=1100,
|
| 72 |
+
n_cols=n_cols,
|
| 73 |
+
name="Results",
|
| 74 |
+
size=size,
|
| 75 |
+
)
|
| 76 |
+
return obj.data
|