## ----loading_ChIPanalyser, eval = TRUE, echo = TRUE--------------------------- library(ChIPanalyser) # Input data data(ChIPanalyserData) # PFM Matrix PFM <- file.path(system.file("extdata",package="ChIPanalyser"),"BEAF-32.pfm") ## ----bsgenome, eval = TRUE, echo = TRUE--------------------------------------- library(BSgenome.Dmelanogaster.UCSC.dm6) DNASequenceSet <- getSeq(BSgenome.Dmelanogaster.UCSC.dm6) ## ----GA_params, eval = TRUE, echo = TRUE-------------------------------------- # Number of individuals per generation pop <- 10 # Number of generations gen <- 2 # Mutation Probability mut <- 0.3 # Children - Number of ofspring passed to the next generation child <- 2 # Method - Goodness of fit metric used to optimise the Genetic algorithm method <- "MSE" ## ----GA_params_recom, class.source = 'fold-hide' ,eval = FALSE, echo = TRUE---- # # Number of individuals per generation # pop <- 100 # # # Number of generations # gen <- 50 # # # Mutation Probability # mut <- 0.3 # # # Children - Number of ofspring passed to the next generation # child <- 10 # # # Method - Goodness of fit metric used to optimise the Genetic algorithm # method <- "MSE" # ## ----opti_params, eval = FALSE, echo = TRUE----------------------------------- # # Parameters to optimised # params <- c("N","lambda","PWMThreshold", paste0("CS",seq(1:11))) ## ----opti_params2,eval = TRUE,echo= TRUE-------------------------------------- params_custom <- vector("list", 14) names(params_custom) <- c("N","lambda","PWMThreshold", paste0("CS",seq(1:11))) # vector in the format of min value, max value and number of values params_custom$N <- c(1,1000000,5) params_custom$lambda <- c(1,5,5) # Bound between 0 and 1 params_custom$PWMThreshold <- c(0.1,0.9,5) # Bound between 0 and 1 CS <- c(0,1,5) CS_loc <- grep("CS",names(params_custom)) for(i in CS_loc){ params_custom[[i]] <- CS } ## ----gpp, eval = TRUE, echo = TRUE-------------------------------------------- GPP <- genomicProfiles(PFM=PFM,PFMFormat="JASPAR", BPFrequency=DNASequenceSet) ## ----generateStartingPopulation, class.source = 'fold-hide', eval = TRUE, echo = TRUE---- start_pop <- generateStartingPopulation(pop, params_custom) ## ----preprocess, eval = TRUE, echo = TRUE------------------------------------- chipProfile <- processingChIP(chip,loci = top) # Splitting data into training and testing # We recommend setting dist to 20/80. However, here we only have 4 loci. splitdata <- splitData(chipProfile,dist = c(50,50),as.proportion = TRUE) trainingSet <- splitdata$trainingSet testingSet <- splitdata$testingSet ## ----evolve, eval = TRUE, echo = TRUE----------------------------------------- evo <- evolve(population = pop, DNASequenceSet = DNASequenceSet, ChIPScore = trainingSet, genomicProfiles = GPP, parameters = params_custom, mutationProbability = mut, generations = gen, offsprings = child, chromatinState = cs, method = method, filename = "This_TF_is_Best_TF", checkpoint = FALSE, cores= 1) ## ----getFit, echo = TRUE, eval = TRUE----------------------------------------- SuperFit <- getHighestFitnessSolutions(evo$population, child = 1, method = method) single<-evo[["population"]][SuperFit] ## ----singleRun, eval = TRUE, echo = TRUE-------------------------------------- # Set chromatin states for single run - create CS Granges with affinity scores cs_single <- setChromatinStates(single,cs)[[1]] superFit <- singleRun(indiv = single, DNAAffinity = cs_single, genomicProfiles = GPP, DNASequenceSet = DNASequenceSet, ChIPScore = testingSet, fitness = "all") ## ----plotOccup, eval = TRUE, echo = TRUE,fig.height= 8, fig.width=15,fig.show=TRUE---- par(mfrow = c(2,1)) plotOccupancyProfile(predictedProfile = superFit$ChIP, ChIPScore = testingSet, chromatinState = cs_single, occupancy = superFit$occupancy, goodnessOfFit = superFit$gof, geneRef = geneRef, addLegend = TRUE)