## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
library(purrr)
library(dplyr)
library(tidyr)
library(ggplot2)
library(CellBench)

## -----------------------------------------------------------------------------
library(CellBench)
library(purrr)

# function to raise number to a power
pow <- function(x, n) {
    x^n
}

pow2 <- partial(pow, n = 2)
pow3 <- partial(pow, n = 3)

pow2(2)
pow3(2)

## ----eval = FALSE-------------------------------------------------------------
#  pow2 <- function(x) {
#      x^2
#  }
#  
#  pow3 <- function(x) {
#      x^3
#  }

## ----eval = FALSE-------------------------------------------------------------
#  pow <- function(x, n) {
#      x^n
#  }
#  
#  pow2 <- function(x) {
#      pow(x, 2)
#  }
#  
#  pow3 <- function(x) {
#      pow(x, 3)
#  }

## -----------------------------------------------------------------------------
# find the maximum absolute value
max_absolute <- compose(max, abs)

max_absolute(rnorm(100))

## ----eval = FALSE-------------------------------------------------------------
#  method1 <- function(x) {
#      x <- normalise(x)
#      method_func1(x)
#  }
#  
#  method2 <- function(x) {
#      method_func2(x)
#  }
#  
#  method3 <- function(x) {
#      x <- normalise(x)
#      method_func3(x)
#  }

## ----eval = FALSE-------------------------------------------------------------
#  # identity simply returns its argument, useful here for code consistency
#  method1 <- compose(method_func1, normalise)
#  method2 <- compose(method_func2, identity)
#  method3 <- compose(method_func3, normalise)

## -----------------------------------------------------------------------------
x <- list(1, 2, 3)

map(x, function(x) { x * 2 })

## -----------------------------------------------------------------------------
# list of random values from different distributions
x <- list(
    rpois(100, lambda = 5),
    rpois(100, lambda = 5),
    rgamma(100, shape = 5),
    rgamma(100, shape = 5)
)

# list of additional parameters
y <- list(
    "mean",
    "median",
    "mean",
    "median"
)

# function that takes values and a mode argument
centrality <- function(x, mode = c("mean", "median")) {
    mode <- match.arg(mode)
    
    if (mode == "mean") {
       mean = mean(x)
    } else if (mode == "median") {
       median = median(x)
    }
}

# using map2 to apply function to two lists
map2(x, y, centrality)

## -----------------------------------------------------------------------------
library(dplyr)
# list of data
datasets <- list(
    set1 = rnorm(500, mean = 2, sd = 1),
    set2 = rnorm(500, mean = 1, sd = 2)
)

# list of functions
add_noise <- list(
    none = identity,
    add_bias = function(x) { x + 1 }
)

res <- apply_methods(datasets, add_noise)
class(res)
res

## -----------------------------------------------------------------------------
# filtering rows to only data from set 1
res %>%
    filter(data == "set1")

# filtering rows to only add_bias method
res %>%
    filter(add_noise == "add_bias")

# mutating data column to prepend "data" to data set names
res %>%
    mutate(data = paste0("data", data))

## -----------------------------------------------------------------------------
metric <- list(
    mean = mean,
    median = median
)

# simply applying the metrics results in a single column
res %>%
    apply_methods(metric)

# spread metrics across columns
res %>%
    apply_methods(metric) %>%
    spread(metric, result)

## -----------------------------------------------------------------------------
library(tidyr)
library(ggplot2)

# I prefer my own theme for ggplot2, following theme code is optional
theme_set(theme_bw() + theme(
    plot.title = element_text(face = "plain", size = rel(20/12),
                              hjust = 1/2, margin = margin(t = 10, b = 20)),
    axis.text = element_text(size = rel(14/12)),
    strip.text.x = element_text(size = rel(16/12)),
    axis.title = element_text(size = rel(16/12))
))

scale_colour_discrete <- function(...) scale_colour_brewer(..., palette="Set1")
scale_fill_discrete <- function(...) scale_fill_brewer(... , palette="Set1")

## -----------------------------------------------------------------------------
# pipeline collapse constructs a single string from the pipeline steps,
# unnest expands the list-column of results, transforming the result 
# into a flat table.
collapsed_res <- pipeline_collapse(res) %>%
    unnest()

ggplot(collapsed_res, aes(x = pipeline, y = result)) +
    geom_boxplot()

## -----------------------------------------------------------------------------
# remember that we have to unnest the data before it's appropriate
# for plotting
ggplot(unnest(res), aes(x = add_noise, y = result)) +
    geom_boxplot() +
    facet_grid(~data)