## ----setup, include=FALSE------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

## ----loadPackages, message=FALSE, warning=FALSE--------------------------
library(ggplot2)
library(reshape2)
library(mixOmics)
library(RColorBrewer)
library(gridExtra)
library(edgeR)
library(VennDiagram)
library(devtools)

## ----importData, message=FALSE-------------------------------------------
raw_counts <- read.table("../data/D2-counts.txt", header = TRUE, row.names = 1)
raw_counts <- as.matrix(raw_counts)
design <- read.table("../data/D2-targets.txt", header = TRUE, 
                     stringsAsFactors = FALSE)

## ----basicCountDesc------------------------------------------------------
dim(raw_counts)

## ----headCounts----------------------------------------------------------
head(raw_counts)

## ----basicDesignDesc-----------------------------------------------------
design

## ----filterCounts--------------------------------------------------------
raw_counts_wn <- raw_counts[rowSums(raw_counts) > 0, ]
dim(raw_counts_wn)

## ----pseudoCounts--------------------------------------------------------
pseudo_counts <- log2(raw_counts_wn + 1)

df_raw <- melt(pseudo_counts, id = rownames(raw_counts_wn))
names(df_raw)[1:2] <- c("id", "sample")
df_raw$group <- substr(as.character(df_raw$sample), 1,
                       nchar(as.character(df_raw$sample)) - 5)
df_raw$method <- rep("Raw counts", nrow(df_raw))  

p <- ggplot(data=df_raw, aes(x=sample, y=value, fill=group))
p <- p + geom_boxplot()
p <- p + scale_fill_brewer(type = "qual", palette = 7)
p <- p + theme_bw()
p <- p + ggtitle("Boxplots of raw pseudo counts")
p <- p + ylab(expression(log[2] ~ (raw ~ count + 1))) + xlab("")
p <- p + theme(title = element_text(size=10), axis.text.x = element_blank(), 
               axis.ticks.x = element_blank())
print(p)

## ----PCA, cache = TRUE---------------------------------------------------
resPCA <- pca(t(pseudo_counts), ncomp = 12)
plot(resPCA)

## ----plotIndPCA, warning = FALSE-----------------------------------------
plotIndiv(resPCA, group = design$group, col.per.group = brewer.pal(8, "Dark2"),
          title = "1st and 2nd PCs")

## ----plotIndPCA2, warning = FALSE----------------------------------------
plotIndiv(resPCA, group = design$group, col.per.group = brewer.pal(8, "Dark2"),
          comp = c(2,3), title = "2nd and 3rd PCs")

## ----prepareDataEdgeR----------------------------------------------------
dge <- DGEList(raw_counts_wn)
dge <- calcNormFactors(dge, method = "TMM")

## ----pseudoTMM-----------------------------------------------------------
pseudo_TMM <- log2(cpm(dge) + 1)

df_TMM <- melt(pseudo_TMM, id = rownames(raw_counts_wn))
names(df_TMM)[1:2] <- c ("id", "sample")
df_TMM$group <- substr(as.character(df_TMM$sample), 1,
                       nchar(as.character(df_TMM$sample)) - 5)
df_TMM$method <- rep("TMM", nrow(df_TMM))

## ----boxCompareNorm, fig.height=10---------------------------------------
df_allnorm <- rbind(df_raw, df_TMM)
df_allnorm$method <- factor(df_allnorm$method,
                            levels = c("Raw counts", "TMM"))

p <- ggplot(data=df_allnorm, aes(x=sample, y=value, fill=group))
p <- p + geom_boxplot()  
p <- p + scale_fill_brewer(type = "qual", palette = 7)
p <- p + theme_bw()
p <- p + ggtitle("Boxplots of normalized pseudo counts\n
for all samples by normalization methods")
p <- p + facet_grid(method ~ .) 
p <- p + ylab(expression(log[2] ~ (normalized ~ count + 1))) + xlab("")
p <- p + theme(title = element_text(size=10), axis.text.x = element_blank(), 
               axis.ticks.x = element_blank())
print(p)

## ----normPCA, cache = TRUE-----------------------------------------------
resPCA <- pca(t(pseudo_TMM), ncomp = 24)
plot(resPCA)

## ----plotIndNormPCA, warning = FALSE-------------------------------------
plotIndiv(resPCA, group = design$group, col.per.group = brewer.pal(8, "Dark2"),
          title = "1st and 2nd PCs")

## ----plotIndNormPCA2, warning = FALSE------------------------------------
plotIndiv(resPCA, group = design$group, col.per.group = brewer.pal(8, "Dark2"),
          comp = c(2,3), title = "2nd and 3rd PCs")

## ----GLM1----------------------------------------------------------------
# create design matrix
group <- relevel(as.factor(design$group), ref = "WT_control")
design_matrix <- model.matrix(~ design$replicat + group)
design_matrix

## ----GLM1Disp, cache = TRUE----------------------------------------------
dge <- estimateGLMCommonDisp(dge, design_matrix)
dge <- estimateGLMTrendedDisp(dge, design_matrix)
dge <- estimateGLMTagwiseDisp(dge, design_matrix)
plotBCV(dge, main = paste0("BCV plot"))

## ----GLM1Fit, cache = TRUE-----------------------------------------------
# GLM fit
fit <- glmFit(dge, design_matrix)
contrasts <- rep(0, ncol(design_matrix))
contrasts[4] <- -1
contrasts[5] <- 1
contrasts; colnames(design_matrix)[as.logical(contrasts)]

## ----GLM1LRT, cache = TRUE-----------------------------------------------
res_GLM1 <- glmLRT(fit, contrast = contrasts)
# p-values
pvals_GLM1 <- data.frame("pvalue" = c(res_GLM1$table$PValue, 
                                      p.adjust(res_GLM1$table$PValue, "BH")),
                         "type" = rep(c("raw", "adjusted"),
                                      each = nrow(raw_counts_wn)))
topTab <- topTags(res_GLM1, n = nrow(raw_counts_wn))
# statistics (LR)
LR_GLM1 <- topTab$table$LR
names(LR_GLM1) <- rownames(topTab)
# DEG
DEGs <- decideTestsDGE(res_GLM1, adjust.method = "BH", p.value = 0.01)
sel_deg <- which(DEGs[ ,1] != 0)
DEG_GLM1 <- data.frame("name" = rownames(raw_counts_wn)[sel_deg], 
                       "UD" = DEGs[sel_deg,1])

## ----histGLM1, message = FALSE-------------------------------------------
p <- ggplot(data = pvals_GLM1, aes(x = pvalue, fill = type))
p <- p + geom_histogram()
p <- p + theme_bw()
p <- p + ggtitle("Histogram of raw/adjusted p-values for exact test")
p <- p + facet_grid(type ~ .) 
p <- p + theme(title = element_text(size=10), axis.text.x = element_blank(), 
               axis.ticks.x = element_blank())
print(p)

## ----histStatGLM1, message = FALSE---------------------------------------
df <- data.frame("statistics" = log2(LR_GLM1 + 1))
p <- ggplot(data = df, aes(x = statistics))
p <- p + geom_histogram()
p <- p + theme_bw()
p <- p + ggtitle(expression("Histogram of" ~ log[2](~"statistics" + 1)))
p <- p + theme(title = element_text(size=10), axis.text.x = element_blank(), 
               axis.ticks.x = element_blank())
print(p)

## ----resultsGLM1---------------------------------------------------------
nrow(DEG_GLM1)
table(DEG_GLM1$UD) # 1 means 'up-regulated'

## ----GLM2----------------------------------------------------------------
# create design matrix
factor1 <- relevel(as.factor(design$factor1), ref = "WT")
factor2 <- relevel(as.factor(design$factor2), ref = "control")
design_matrix <- model.matrix(~ design$replicat + factor1 + factor2 +
                                factor1:factor2)
design_matrix

## ----GLM2Disp, cache = TRUE----------------------------------------------
dge <- estimateGLMCommonDisp(dge, design_matrix)
dge <- estimateGLMTrendedDisp(dge, design_matrix)
dge <- estimateGLMTagwiseDisp(dge, design_matrix)
plotBCV(dge, main = paste0("BCV plot"))

## ----GLM2LRT, cache = TRUE-----------------------------------------------
res_GLM2 <- glmLRT(fit, coef = 8)
# p-values
pvals_GLM2 <- data.frame("pvalue" = c(res_GLM2$table$PValue, 
                                      p.adjust(res_GLM2$table$PValue, "BH")),
                         "type" = rep(c("raw", "adjusted"),
                                      each = nrow(raw_counts_wn)))
topTab <- topTags(res_GLM2, n = nrow(raw_counts_wn))
# statistics (LR)
LR_GLM2 <- topTab$table$LR
names(LR_GLM2) <- rownames(topTab)
# DEG
DEGs <- decideTestsDGE(res_GLM2, adjust.method = "BH", p.value = 0.01)
sel_deg <- which(DEGs[ ,1] != 0)
DEG_GLM2 <- data.frame("name" = rownames(raw_counts_wn)[sel_deg], 
                       "UD" = DEGs[sel_deg,1])

## ----histGLM2, message = FALSE-------------------------------------------
p <- ggplot(data = pvals_GLM2, aes(x = pvalue, fill = type))
p <- p + geom_histogram()
p <- p + theme_bw()
p <- p + ggtitle("Histogram of raw/adjusted p-values for exact test")
p <- p + facet_grid(type ~ .) 
p <- p + theme(title = element_text(size=10), axis.text.x = element_blank(), 
               axis.ticks.x = element_blank())
print(p)

## ----histStatGLM2, message = FALSE---------------------------------------
df <- data.frame("statistics" = log2(LR_GLM2 + 1))
p <- ggplot(data = df, aes(x = statistics))
p <- p + geom_histogram()
p <- p + theme_bw()
p <- p + ggtitle(expression("Histogram of" ~ log[2](~"statistics" + 1)))
p <- p + theme(title = element_text(size=10), axis.text.x = element_blank(), 
               axis.ticks.x = element_blank())
print(p)

## ----resultsGLM2---------------------------------------------------------
nrow(DEG_GLM2)
table(DEG_GLM2$UD) # 1 means 'up-regulated'

## ----vennDiagram, warning = FALSE----------------------------------------
vd <- venn.diagram(x=list("Overall group effect" = DEG_GLM1$name,
                          "Separate effects" = DEG_GLM2$name), 
                   fill = brewer.pal(3, "Set3")[1:2], 
                   cat.col = c("darkgreen", "darkred"),
                   cat.cex = 1.5, fontface="bold", filename=NULL, 
                   mar = rep(0.1,4))
grid.draw(vd)

## ----cleanVD, echo=FALSE, fig.show='hide', message=FALSE, results='hide'----
system("rm VennDiagram*.log")

## ----relationsPVal-------------------------------------------------------
order_2in1 <- match(names(LR_GLM1), names(LR_GLM2))
df <- data.frame("statistics1" = log2(LR_GLM1 + 1),
                 "statistics2" = log2(LR_GLM2[order_2in1] + 1))
p1 <- ggplot(data = df, aes(x = statistics1, y = statistics2))
p1 <- p1 + geom_point(colour = "red", alpha = 0.01)
p1 <- p1 + theme_bw()
p1 <- p1 + ggtitle("Statistics for both models")
p1 <- p1 + xlab("Overall group effect") + ylab("Separate effects")
p1 <- p1 + theme(title = element_text(size=10), axis.text.x = element_blank(), 
               axis.ticks.x = element_blank())

pvals1 <- pvals_GLM1$pvalue[pvals_GLM1$type == "adjusted"]
pvals2 <- pvals_GLM2$pvalue[pvals_GLM2$type == "adjusted"]
sel_pvals <- (pvals1 < 0.01 | pvals2 < 0.01)
df <- data.frame("pvals1" = pvals1[sel_pvals], "pvals2" = pvals2[sel_pvals])
p2 <- ggplot(data = df, aes(x = pvals1, y = pvals2))
p2 <- p2 + geom_point(colour = "red", alpha = 0.01)
p2 <- p2 + theme_bw()
p2 <- p2 + ggtitle("Statistics for both models")
p2 <- p2 + xlab("Overall group effect") + ylab("Separate effects")
p2 <- p2 + theme(title = element_text(size=10), axis.text.x = element_blank(), 
                 axis.ticks.x = element_blank())

grid.arrange(p1, p2, ncol = 2)

## ----sessionInfo---------------------------------------------------------
session_info()

