## ----eval=TRUE, warnings = FALSE, echo=TRUE,message=FALSE--------------------- library(ChIPanalyser) #Load data data(ChIPanalyserData) # Loading DNASequenceSet from BSgenome object # We recommend using the latest version of the genome # Please ensure that all your data is aligned to the same version of the genome library(BSgenome.Dmelanogaster.UCSC.dm6) DNASequenceSet <- getSeq(BSgenome.Dmelanogaster.UCSC.dm6) #Loading Position Frequency Matrix PFM <- file.path(system.file("extdata",package="ChIPanalyser"),"BEAF-32.pfm") #Checking if correctly loaded ls() ## ----eval=TRUE, warnings = FALSE---------------------------------------------- chip <- processingChIP(profile = chip, loci = top, cores = 1) chip ## ----eval =TRUE--------------------------------------------------------------- # PFMs are automatically converted to PWM when build genomicProfiles GP <- genomicProfiles(PFM = PFM, PFMFormat = "JASPAR") GP ## ----eval=FALSE--------------------------------------------------------------- # GP <- genomicProfiles(PWM=PositionWeightMatrix) ## ----eval=TRUE,warnings=FALSE------------------------------------------------- ## surpress dependency warnings optimal <- suppressWarnings(computeOptimal(genomicProfiles = GP, DNASequenceSet = DNASequenceSet, ChIPScore = chip, chromatinState = Access)) ## ----eval=TRUE, echo=TRUE----------------------------------------------------- ## Lambda Values seq(0.25,5,by=0.25) ## Bound Molecule Values c(1, 10, 20, 50, 100, 200, 500,1000,2000, 5000,10000,20000,50000, 100000, 200000, 500000, 1000000) ## ----eval =T------------------------------------------------------------------ optimalParam <- optimal$Optimal optimalParam$OptimalParameters ## ----eval=TRUE, warnings = FALSE, fig.width=10, fig.height=8------------------ # Plotting Optimal heat maps par(oma=c(0,0,3,0)) layout(matrix(1:8,ncol=4, byrow=T),width=c(6,1.5,6,1.5),height=c(1,1)) plotOptimalHeatMaps(optimalParam,layout=FALSE) ## ----eval=TRUE---------------------------------------------------------------- optimalParam <- searchSites(optimal,lambdaPWM = 1.25,BoundMolecules = 10000) ## ----eval=TRUE,fig.width=15, fig.height=8------------------------------------- plotOccupancyProfile(predictedProfile = optimalParam$ChIPProfiles, ChIPScore = chip, chromatinState = Access, occupancy = optimalParam$Occupancy, goodnessOfFit = optimalParam$goodnessOfFit, geneRef = geneRef) ## ----eval =TRUE--------------------------------------------------------------- ## Suggested Parameters to start with. parameterOptions() ## Changing parameters PO <- parameterOptions(noiseFilter="sigmoid",chipSd=150,chipMean=150,lociWidth=30000) ## ----eval=FALSE--------------------------------------------------------------- # ## Top 50 loci based on ChIP score # processingChIP(profile = "/path/to/ChIP", # loci = NULL, # reduce = 50, # parameterOptions = PO) # # ## Top 50 loci ALSO containing peaks # processingChIP(profile = "/path/to/ChIP", # loci = NULL, # reduce = 50, # peaks = "/path/to/peaks", # parameterOptions=PO) # # ## Top 50 loci containing BOTH peaks and Accessible DNA # processingChIP(profile = "/path/to/ChIP", # loci = NULL, # reduce = 50, # peaks = "/path/to/peaks", # chromatinState = "/path/to/chromatinState", # parameterOptions = PO) # ## ----eval=TRUE---------------------------------------------------------------- str(genomicProfiles()) GP <- genomicProfiles(PFM=PFM, PFMFormat="JASPAR", BPFrequency=DNASequenceSet) GP ## ----eval=FALSE--------------------------------------------------------------- # ## Parsing pre computed parameters (processingChIP function) # GP <- genomicProfiles(PFM = PFM, # PFMFormat = "JASPAR", # BPFrequency = DNASequenceSet, # ChIPScore = ChIPScore) # # ## Parsing pre assigned function (parameterOptions) # parameterOptions <- parameterOptions(lambdaPWM = c(1,2,3), # boundMolecules = c(5,50,500)) # GP <- genomicProfiles(PFM = PFM, # PFMFormat = "JASPAR", # BPFrequency = DNASequenceSet, # parameterOptions = parameterOptions) # # ## Direct parameter assignement # # GP <- genomicProfiles(PFM = PFM, # PFMFormat = "JASPAR", # BPFrequency = DNASequenceSet, # lambdaPWM = c(1,2,3), # boundMolecules = c(4,500,8000)) ## ----eval=FALSE--------------------------------------------------------------- # ## Setting custom parameters # OP <- parameterOptions(lambdaPWM = seq(1,10,by=0.5), # boundMolecules = seq(1,100000, length.out=20)) # # ## Computing ONLY Optimal Parameters and MSE as goodness Of Fit metric # optimal <- computeOptimal(genomicProfiles = GP, # DNASequenceSet = DNASequenceSet, # ChIPScore = chip, # chromatinState = Access, # parameterOptions = OP, # optimalMethod = "MSE", # returnAll = FALSE) # # ### Computing ONLY Optimal Parameters and using Rank slection method # optimal <- computeOptimal(genomicProfiles = GP, # DNASequenceSet = DNASequenceSet, # ChIPScore = chip, # chromatinState = Access, # parameterOptions = OP, # optimalMethod = "all", # rank=TRUE) # ## ----eval=FALSE--------------------------------------------------------------- # ## Extracted Optimal Parameters # optimalParam <- optimal$Optimal # # ## Plotting heat maps # plotOptimalHeatMaps(optimalParam,overlay=TRUE) ## ----eval=TRUE,warnings=FALSE------------------------------------------------- ## Creating genomic Profiles object with PFMs and associated parameters GP <- genomicProfiles(PFM = PFM, PFMFormat = "JASPAR", BPFrequency = DNASequenceSet, lambdaPWM = 1, boundMolecules = 58794) ## Computing Genome Wide Score required GW <- computeGenomeWideScores(genomicProfiles = GP, DNASequenceSet = DNASequenceSet, chromatinState = Access) GW ## Computing PWM score above threshold pwm <- computePWMScore(genomicProfiles = GW, DNASequenceSet = DNASequenceSet, loci = chip, chromatinState = Access) pwm ## Computing Occupancy of sites above threshold occup <- computeOccupancy(genomicProfiles = pwm) occup ## Compute ChIP seq like profiles pred <- computeChIPProfile(genomicProfiles = occup, loci = chip) pred ## Compute goodness Of Fit of model accu <- profileAccuracyEstimate(genomicProfiles = pred, ChIPScore = chip) accu ## ----eval=TRUE,fig.width=12, fig.height=4.5----------------------------------- plotOccupancyProfile(predictedProfile=pred, ChIPScore=chip, chromatinState=Access, occupancy=occup, goodnessOfFit=accu, geneRef=geneRef) ## ----eval=TRUE,echo=TRUE------------------------------------------------------ parameterOptions() ## ----eval=T, echo=T----------------------------------------------------------- str(genomicProfiles()) ## ----eval=F, echo=T----------------------------------------------------------- # ## Accessors and Setters for parameterOptions and genomicProfiles # avrageExpPWMScore(obj) # backgroundSignal(obj) # backgroundSignal(obj)<-value # boundMolecules(obj) # boundMolecules(obj)<-value # BPFrequency(obj) # BPFrequency(obj)<-value # chipMean(obj) # chipMean(obj)<-value # chipSd(obj) # chipSd(obj)<-value # chipSmooth(obj) # chipSmooth(obj)<-value # DNASequenceLength(obj) # drop(obj) # lambdaPWM(obj) # lambdaPWM(obj)<-value # lociWidth(obj) # lociWidth(obj)<-value # maxPWMScore(obj) # maxSignal(obj) # maxSignal(obj)<-value # minPWMScore(obj) # naturalLog(obj) # naturalLog(obj)<-value # noiseFilter(obj) # noiseFilter(obj)<-value # noOfSites(obj) # noOfSites(obj)<-value # PFMFormat(obj) # PFMFormat(obj)<-value # ploidy(obj) # ploidy(obj)<-value # PositionFrequencyMatrix(obj) # PositionFrequencyMatrix(obj)<-value # PositionWeightMatrix(obj) # PositionWeightMatrix(obj)<-value # profiles(obj) # PWMpseudocount(obj) # PWMpseudocount(obj)<-value # PWMThreshold(obj) # PWMThreshold(obj)<-value # removeBackground(obj) # removeBackground(obj)<-value # stepSize(obj) # stepSize(obj)<-value # strandRule(obj) # strandRule(obj)<-value # whichstrand(obj) # whichstrand(obj)<-value # ## ChIPScore slots accessors # loci(obj) # scores(obj) ## ----eval=T------------------------------------------------------------------- sessionInfo()