## ----message=FALSE, warning=FALSE, echo=FALSE, fig.width=8.5, fig.height=6.5, eval=FALSE---- # ## Define colors # colors <- c("#e19995", "#adaf64", "#4fbe9b", "#6eb3d9", "#d098d7") # ## Create artificial GInteractions # library(InteractionSet) # set.seed(5) # pool <- GInteractions( # anchor1 = GRanges(seqnames = "chr1", # ranges = IRanges(start = sample(1:990, 120, replace = TRUE), # width = 10)), # anchor2 = GRanges(seqnames = "chr1", # ranges = IRanges(start = sample(1:990, 120, replace = TRUE), # width = 10)), # mode = "strict", # color = sample(1:5, 120, replace = TRUE) # ) # focal <- GInteractions( # anchor1 = GRanges(seqnames = "chr1", # ranges = IRanges(start = sample(1:990, 16, replace = TRUE), # width = 10)), # anchor2 = GRanges(seqnames = "chr1", # ranges = IRanges(start = sample(1:990, 16, replace = TRUE), # width = 10)), # mode = "strict", # color = sample(1:5, 16, replace = TRUE) # ) # ## Add distance to metadata # pool$distance <- pairdist(pool) # focal$distance <- pairdist(focal) # ## Match ranges # library(nullranges) # set.seed(123) # x <- matchRanges(focal = focal, # pool = pool, # covar = ~color + distance, # method = 'n', replace = TRUE) # ## Visualize sets # library(plotgardener) # library(grid) # set.seed(123) # pageCreate(width = 8.5, height = 6.5, showGuides = FALSE, xgrid = 0, ygrid = 0) # ## Define common parameters # p <- pgParams(chrom = "chr1", chromstart = 1, chromend = 1000) # ## Pool set # poolSet <- plotPairs(data = pool, # params = p, # fill = colors[pool$color], # x = 1, y = 1.25, width = 2.5, height = 2.25) # annoGenomeLabel(plot = poolSet, x = 1, y = 3.55) # plotText(label = "Pool Set", # x = 2.25, y = 0.9, # just = c("center", "bottom"), # fontcolor = "#33A02C", # fontface = "bold", # fontfamily = 'mono') # ## Focal set # focalSet <- plotPairs(data = focal, # params = p, # fill = colors[focal$color], # x = 5, y = 1.1, width = 2.5, height = 0.9) # annoGenomeLabel(plot = focalSet, x = 5, y = 2.05) # plotText(label = "Focal Set", # x = 6.25, y = 0.9, # just = c("center", "bottom"), # fontcolor = "#1F78B4", # fontface = "bold", # fontfamily = 'mono') # ## Matched set # matchedSet <- plotPairs(data = x, # params = p, # fill = colors[x$color], # x = 5, y = 2.6, width = 2.5, height = 0.9) # annoGenomeLabel(plot = matchedSet, x = 5, y = 3.55) # plotText(label = "Matched Set", # x = 6.25, y = 2.50, # just = c("center", "bottom"), # fontcolor = "#A6CEE3", # fontface = "bold", # fontfamily = 'mono') # ## Arrow and matchRanges label # plotSegments(x0 = 3.5, y0 = 3, # x1 = 5, y1 = 3, # arrow = arrow(type = "closed", length = unit(0.1, "inches")), # fill = "black", lwd = 2) # plotText(label = "matchRanges()", fontfamily = 'mono', # x = 4.25, y = 2.9, just = c("center", "bottom")) # ## Matching plots # library(ggplot2) # smallText <- theme(legend.title = element_text(size=8), # legend.text=element_text(size=8), # title = element_text(size=8), # axis.title.x = element_text(size=8), # axis.title.y = element_text(size=8)) # plot1 <- # plotPropensity(x, sets=c('f','m','p')) + # smallText + # theme(legend.key.size = unit(0.5, 'lines'), # title = element_blank()) # plot2 <- # plotCovariate(x=x, covar=covariates(x)[1], sets=c('f','m','p')) + # smallText + # theme(legend.text = element_blank(), # legend.position = 'none') # plot3 <- # plotCovariate(x=x, covar=covariates(x)[2], sets=c('f','m','p'))+ # smallText + # theme(legend.key.size = unit(0.5, 'lines')) # ## Propensity scores # plotText(label = "plotPropensity()", # x = 1.10, y = 4.24, # just = c("left", "bottom"), # fontface = "bold", # fontfamily = 'mono') # plotText(label = "~color + distance", # x = 1.25, y = 4.5, # just = c("left", "bottom"), # fontsize = 10, # fontfamily = "mono") # plotGG(plot = plot1, # x = 1, y = 4.5, width = 2.5, height = 1.5, # just = c("left", "top")) # ## Covariate balance # plotText(label = "plotCovariate()", # x = 3.75, y = 4.24, # just = c("left", "bottom"), # fontface = "bold", # fontfamily = "mono") # plotText(label = covariates(x), # x = c(4, 5.9), y = 4.5, # just = c("left", "bottom"), # fontsize = 10, # fontfamily = "mono") # plotGG(plot = plot2, # x = 3.50, y = 4.5, width = 1.8, height = 1.5, # just = c("left", "top")) # plotGG(plot = plot3, # x = 5.30, y = 4.5, width = 2.75, height = 1.5, # just = c("left", "top")) ## ----message=FALSE, warning=FALSE--------------------------------------------- library(nullrangesData) ## Load example data binPairs <- hg19_10kb_ctcfBoundBinPairs() binPairs ## ----message=FALSE, warning=FALSE--------------------------------------------- library(nullranges) set.seed(123) mgi <- matchRanges(focal = binPairs[binPairs$looped], pool = binPairs[!binPairs$looped], covar = ~ctcfSignal + distance + n_sites, method = 'stratified') mgi ## ----message=FALSE, warning=FALSE--------------------------------------------- library(plyranges) library(ggplot2) ## Summarize ctcfSignal by n_sites mgi %>% regions() %>% group_by(n_sites) %>% summarize(ctcfSignal = mean(ctcfSignal)) %>% as.data.frame() %>% ggplot(aes(x = n_sites, y = ctcfSignal)) + geom_line() + geom_point(shape = 21, stroke = 1, fill = 'white') + theme_minimal() + theme(panel.border = element_rect(color = 'black', fill = NA)) ## ----------------------------------------------------------------------------- ov <- overview(mgi) ov ## ----------------------------------------------------------------------------- ov$quality ## ----message=FALSE------------------------------------------------------------ plotPropensity(mgi, sets = c('f', 'p', 'm')) ## ----------------------------------------------------------------------------- plotPropensity(mgi, sets = c('f', 'p', 'm'), log = 'x') ## ----message=FALSE, warning=FALSE, fig.height=8, fig.width=5------------------ library(patchwork) plots <- lapply(covariates(mgi), plotCovariate, x=mgi, sets = c('f', 'm', 'p')) Reduce('/', plots) ## ----fig.width=6, fig.height=5------------------------------------------------ ## Generate a randomly selected set from all binPairs all <- c(focal(mgi), pool(mgi)) set.seed(123) random <- all[sample(1:length(all), length(mgi), replace = FALSE)] ## Calculate the percent of convergent CTCF sites for each group g1 <- (sum(focal(mgi)$convergent) / length(focal(mgi))) * 100 g2 <- (sum(pool(mgi)$convergent) / length(pool(mgi))) * 100 g3 <- (sum(random$convergent) / length(random)) * 100 g4 <- (sum(mgi$convergent) / length(mgi)) * 100 ## Visualize barplot(height = c(g1, g2, g3, g4), names.arg = c('looped\n(focal)', 'unlooped\n(pool)', 'all pairs\n(random)', 'unlooped\n(matched)'), col = c('#1F78B4', '#33A02C', 'orange2', '#A6CEE3'), ylab = "Convergent CTCF Sites (%)", main = "Testing the Convergence Rule", border = NA, las = 1) ## ----------------------------------------------------------------------------- sessionInfo()