## ----eval=TRUE---------------------------------------------------------------- if (keras::is_keras_available() & reticulate::py_available()) { library(DeepPINCS) example_cpi <- example_cpi[1:500,] validation_split <- 0.3 idx <- sample(seq_len(length(example_cpi[,1]))) train_idx <- seq_len(length(example_cpi[,1])) %in% idx[seq_len(round(length(example_cpi[,1]) * (1 - validation_split)))] } ## ----eval=TRUE---------------------------------------------------------------- if (keras::is_keras_available() & reticulate::py_available()) { net_args <- list( compound = "gcn_in_out", compound_args = list( gcn_units = c(128, 64), gcn_activation = c("relu", "relu"), fc_units = c(10), fc_activation = c("relu")), protein = "cnn_in_out", protein_args = list( cnn_filters = c(32), cnn_kernel_size = c(3), cnn_activation = c("relu"), fc_units = c(10), fc_activation = c("relu")), fc_units = c(1), fc_activation = c("sigmoid"), loss = "binary_crossentropy", optimizer = keras::optimizer_adam(), metrics = "accuracy") } ## ----eval=TRUE---------------------------------------------------------------- if (keras::is_keras_available() & reticulate::py_available()) { compound_max_atoms <- 50 protein_embedding_dim <- 16 protein_length_seq <- 100 gcn_cnn_cpi <- fit_cpi( smiles = example_cpi[train_idx, 1], AAseq = example_cpi[train_idx, 2], outcome = example_cpi[train_idx, 3], compound_type = "graph", compound_max_atoms = compound_max_atoms, protein_length_seq = protein_length_seq, protein_embedding_dim = protein_embedding_dim, protein_ngram_max = 2, protein_ngram_min = 1, smiles_val = example_cpi[!train_idx, 1], AAseq_val = example_cpi[!train_idx, 2], outcome_val = example_cpi[!train_idx, 3], net_args = net_args, epochs = 20, batch_size = 64, callbacks = keras::callback_early_stopping( monitor = "val_accuracy", patience = 10, restore_best_weights = TRUE)) ttgsea::plot_model(gcn_cnn_cpi$model) } ## ----eval=TRUE---------------------------------------------------------------- if (keras::is_keras_available() & reticulate::py_available()) { pred <- predict_cpi(gcn_cnn_cpi, smiles = example_cpi[!train_idx, 1], AAseq = example_cpi[!train_idx, 2], batch_size = 32) pred_calss <- ifelse(pred$values > 0.5, 1, 0) table(pred_calss, example_cpi[!train_idx, 3]) roc <- PRROC::roc.curve(scores.class0 = pred$values[example_cpi[!train_idx, 3] == 1], scores.class1 = pred$values[example_cpi[!train_idx,3] == 0], curve = TRUE) plot(roc) pr <- PRROC::pr.curve(scores.class0 = pred$values[example_cpi[!train_idx, 3] == 1], scores.class1 = pred$values[example_cpi[!train_idx,3] == 0], curve = TRUE) plot(pr) } ## ----eval=TRUE---------------------------------------------------------------- if (keras::is_keras_available() & reticulate::py_available()) { library(DeepPINCS) validation_split <- 0.3 idx <- sample(seq_len(length(example_cci[,1]))) train_idx <- seq_len(length(example_cci[,1])) %in% idx[seq_len(round(length(example_cci[,1]) * (1 - validation_split)))] mlp_mlp_cci <- fit_cpi( smiles = example_cci[train_idx, 1:2], outcome = example_cci[train_idx, 3], compound_type = "fingerprint", smiles_val = example_cci[!train_idx, 1:2], outcome_val = example_cci[!train_idx, 3], net_args = list( compound = "mlp_in_out", compound_args = list( fc_units = c(10, 5), fc_activation = c("relu", "relu")), fc_units = c(1), fc_activation = c("sigmoid"), loss = "binary_crossentropy", optimizer = keras::optimizer_adam(), metrics = "accuracy"), epochs = 20, batch_size = 64, callbacks = keras::callback_early_stopping( monitor = "val_accuracy", patience = 10, restore_best_weights = TRUE)) ttgsea::plot_model(mlp_mlp_cci$model) pred <- predict_cpi(mlp_mlp_cci, smiles = example_cci[!train_idx, 1:2], batch_size = 32) pred_calss <- ifelse(pred$values > 0.5, 1, 0) table(pred_calss, example_cci[!train_idx, 3]) } ## ----eval=TRUE---------------------------------------------------------------- if (keras::is_keras_available() & reticulate::py_available()) { validation_split <- 0.3 idx <- sample(seq_len(length(example_ppi[,1]))) train_idx <- seq_len(length(example_ppi[,1])) %in% idx[seq_len(round(length(example_ppi[,1]) * (1 - validation_split)))] protein_embedding_dim <- 16 protein_length_seq <- 100 mlp_mlp_ppi <- fit_cpi( AAseq = example_ppi[train_idx, 1:2], outcome = example_ppi[train_idx, 3], protein_length_seq = protein_length_seq, protein_embedding_dim = protein_embedding_dim, AAseq_val = example_ppi[!train_idx, 1:2], outcome_val = example_ppi[!train_idx, 3], net_args = list( protein = "mlp_in_out", protein_args = list( fc_units = c(10, 5), fc_activation = c("relu", "relu")), fc_units = c(1), fc_activation = c("sigmoid"), loss = "binary_crossentropy", optimizer = keras::optimizer_adam(), metrics = "accuracy"), epochs = 20, batch_size = 64, callbacks = keras::callback_early_stopping( monitor = "val_accuracy", patience = 10, restore_best_weights = TRUE)) ttgsea::plot_model(mlp_mlp_ppi$model) pred <- predict_cpi(mlp_mlp_ppi, AAseq = example_ppi[!train_idx, 1:2], batch_size = 32) pred_calss <- ifelse(pred$values > 0.5, 1, 0) table(pred_calss, example_ppi[!train_idx,3]) } ## ----eval=TRUE---------------------------------------------------------------- if (keras::is_keras_available() & reticulate::py_available()) { validation_split <- 0.1 idx <- sample(seq_len(length(example_pd[,1]))) train_idx <- seq_len(length(example_pd[,1])) %in% idx[seq_len(round(length(example_pd[,1]) * (1 - validation_split)))] protein_embedding_dim <- 16 protein_length_seq <- 30 mlp_mlp_pd <- fit_cpi( AAseq = example_pd[train_idx, 1:2], outcome = example_pd[train_idx, 3], protein_length_seq = protein_length_seq, protein_embedding_dim = protein_embedding_dim, AAseq_val = example_pd[!train_idx, 1:2], outcome_val = example_pd[!train_idx, 3], net_args = list( protein = "mlp_in_out", protein_args = list( fc_units = c(10, 5), fc_activation = c("relu", "relu")), fc_units = c(1), fc_activation = c("sigmoid"), loss = "binary_crossentropy", optimizer = keras::optimizer_adam(), metrics = "accuracy"), epochs = 30, batch_size = 16, callbacks = keras::callback_early_stopping( monitor = "val_accuracy", patience = 10, restore_best_weights = TRUE)) pred <- predict_cpi(mlp_mlp_pd, AAseq = example_pd[!train_idx, 1:2], batch_size = 16) pred_calss <- ifelse(pred$values > 0.5, 1, 0) table(pred_calss, example_pd[!train_idx, 3]) } ## ----eval=TRUE---------------------------------------------------------------- if (keras::is_keras_available() & reticulate::py_available()) { validation_split <- 0.3 idx <- sample(seq_len(length(example_chem[,1]))) train_idx <- seq_len(length(example_chem[,1])) %in% idx[seq_len(round(length(example_chem[,1]) * (1 - validation_split)))] compound_length_seq <- 50 compound_embedding_dim <- 16 gcn_chem <- fit_cpi( smiles = example_chem[train_idx, 1], outcome = example_chem[train_idx, 2], compound_type = "sequence", compound_length_seq = compound_length_seq, compound_embedding_dim = compound_embedding_dim, smiles_val = example_chem[!train_idx, 1], outcome_val = example_chem[!train_idx, 2], net_args = list( compound = "mlp_in_out", compound_args = list( fc_units = c(5), fc_activation = c("relu")), fc_units = c(1), fc_activation = c("sigmoid"), loss='binary_crossentropy', optimizer = keras::optimizer_adam(), metrics = "accuracy"), epochs = 20, batch_size = 16, callbacks = keras::callback_early_stopping( monitor = "val_accuracy", patience = 10, restore_best_weights = TRUE)) ttgsea::plot_model(gcn_chem$model) pred <- predict_cpi(gcn_chem, smiles = example_chem[!train_idx, 1]) pred_calss <- ifelse(pred$values > 0.5, 1, 0) table(pred_calss, smiles = example_chem[!train_idx,2]) } ## ----eval=TRUE---------------------------------------------------------------- if (keras::is_keras_available() & reticulate::py_available()) { example_prot <- example_prot[1:500,] example_prot[,2] <- as.numeric(factor(example_prot[,2])) - 1 validation_split <- 0.3 idx <- sample(seq_len(length(example_prot[,1]))) train_idx <- seq_len(length(example_prot[,1])) %in% idx[seq_len(round(length(example_prot[,1]) * (1 - validation_split)))] protein_embedding_dim <- 16 protein_length_seq <- 100 rnn_prot <- fit_cpi( AAseq = example_prot[train_idx, 1], outcome = to_categorical(example_prot[train_idx, 2]), protein_length_seq = protein_length_seq, protein_embedding_dim = protein_embedding_dim, AAseq_val = example_prot[!train_idx, 1], outcome_val = to_categorical(example_prot[!train_idx, 2]), net_args = list( protein = "rnn_in_out", protein_args = list( rnn_type = c("gru"), rnn_bidirectional = c(TRUE), rnn_units = c(50), rnn_activation = c("relu"), fc_units = c(10), fc_activation = c("relu")), fc_units = c(3), fc_activation = c("softmax"), loss = 'categorical_crossentropy', optimizer = keras::optimizer_adam(clipvalue = 0.5), metrics = "accuracy"), epochs = 20, batch_size = 64, callbacks = keras::callback_early_stopping( monitor = "val_accuracy", patience = 10, restore_best_weights = TRUE)) ttgsea::plot_model(rnn_prot$model) val_index <- seq_len(length(example_prot[,2]))[!train_idx] if (!is.null(rnn_prot$preprocessing$removed_AAseq_val)) { pred <- predict_cpi(rnn_prot, AAseq = example_prot[val_index[-rnn_prot$preprocessing$removed_AAseq_val[[1]]], 1]) pred_calss <- apply(pred$values, 1, which.max) - 1 table(pred_calss, example_prot[val_index[-rnn_prot$preprocessing$removed_AAseq_val[[1]]], 2]) } else { pred <- predict_cpi(rnn_prot, AAseq = example_prot[!train_idx, 1]) pred_calss <- apply(pred$values, 1, which.max) - 1 table(pred_calss, example_prot[!train_idx, 2]) } } ## ----------------------------------------------------------------------------- if (keras::is_keras_available() & reticulate::py_available()) { compound_length_seq <- 50 protein_length_seq <- 500 compound_embedding_dim <- 16 protein_embedding_dim <- 16 mlp_mlp <- fit_cpi( smiles = example_bioassay[,1], AAseq = example_bioassay[,2], outcome = example_bioassay[,3], compound_type = "sequence", compound_length_seq = compound_length_seq, protein_length_seq = protein_length_seq, compound_embedding_dim = compound_embedding_dim, protein_embedding_dim = protein_embedding_dim, net_args = list( compound = "mlp_in_out", compound_args = list( fc_units = c(10, 5), fc_activation = c("relu", "relu")), protein = "mlp_in_out", protein_args = list( fc_units = c(10, 5), fc_activation = c("relu", "relu")), fc_units = c(1), fc_activation = c("sigmoid"), loss = 'binary_crossentropy', optimizer = keras::optimizer_adam(), metrics = "accuracy"), epochs = 20, batch_size = 64, validation_split = 0.3, verbose = 0, callbacks = keras::callback_early_stopping( monitor = "val_accuracy", patience = 5, restore_best_weights = TRUE)) ttgsea::plot_model(mlp_mlp$model) pred <- predict_cpi(mlp_mlp, antiviral_drug[,2], rep(SARS_CoV2_3CL_Protease, nrow(antiviral_drug))) Result <- data.frame(antiviral_drug[,1], pred$values) colnames(Result) <- c("drug", "probability") Result[order(Result[,2], decreasing = TRUE),] } ## ----eval=TRUE---------------------------------------------------------------- sessionInfo()