This R Markdown report contains instructions on how to make plots in R. Set your working directory as first thing:
setwd(“your/path/to/working/directory”)
#Load required library
suppressWarnings(library(metaMA))
suppressMessages(library(lattice))
suppressMessages(suppressWarnings(library(genefilter)))
suppressMessages(suppressWarnings(library(edgeR)))
suppressMessages(library(ggplot2))
suppressMessages(library(RColorBrewer))
suppressWarnings(library(cluster))
suppressMessages(suppressWarnings(library(WGCNA)))
suppressMessages(suppressWarnings(library(matrixStats)))
suppressMessages(suppressWarnings(library(dplyr)))
suppressWarnings(library(devtools))
suppressMessages(library(gplots))
suppressMessages(suppressWarnings(library(pathview)))
#Set the working directory where you have downloaded the count table
#setwd("Documents/Conferences/UCDavis")
d <- read.table(file="all_counts.txt", sep="\t", header=T, stringsAsFactors=F)
m <- read.table(file="metafile.txt", sep="\t", header=T, stringsAsFactors=F)
#Inspect the count table and check its dimensions
head(d)
## C61 C62 C63 C64 C91 C92 C93 C94 I561 I562 I563 I564 I591 I592
## AT1G01010 341 371 275 419 400 542 377 372 677 522 455 508 821 466
## AT1G01020 164 94 176 155 200 183 166 115 172 157 122 152 189 171
## AT1G03987 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## AT1G01030 20 34 40 27 28 36 22 40 20 7 57 38 25 10
## AT1G01040 738 487 610 690 945 1033 836 908 857 821 770 751 848 607
## AT1G03993 1 0 0 0 0 0 0 1 3 0 1 1 1 0
## I593 I594 I861 I862 I863 I864 I891 I892 I893 I894
## AT1G01010 691 500 157 473 459 228 590 491 565 496
## AT1G01020 163 185 46 162 119 53 172 212 169 157
## AT1G03987 0 0 0 0 0 0 0 0 0 0
## AT1G01030 17 26 49 17 24 48 27 28 47 32
## AT1G01040 871 756 361 618 641 439 783 692 768 625
## AT1G03993 0 0 0 1 0 1 3 2 1 1
dim(d)
## [1] 32833 24
How many genes and samples does the count table contain?
keep <- rowSums(cpm(d) > 1) > 1 ##What does this command do?
counts <- d[keep,]
norm.counts <- cpm(counts)
#Inspect the normalized count table and check its dimensions
head(norm.counts)
## C61 C62 C63 C64 C91 C92
## AT1G01010 23.778291 29.959781 22.560571 30.08792 30.13728 36.203972
## AT1G01020 11.435894 7.590888 14.438765 11.13038 15.06864 12.223850
## AT1G01030 1.394621 2.745640 3.281538 1.93884 2.10961 2.404692
## AT1G01040 51.461521 39.327260 50.043448 49.54813 71.19933 69.001297
## AT1G01050 105.642554 88.102753 93.195666 92.27442 111.88467 105.606051
## AT1G01060 199.918945 216.098045 252.596353 204.72714 415.36712 443.932839
## C93 C94 I561 I562 I563
## AT1G01010 28.56118 31.961613 46.189096 37.5523602 32.580994
## AT1G01020 12.57601 9.880606 11.734896 11.2944838 8.736003
## AT1G01030 1.66670 3.436733 1.364523 0.5035757 4.081575
## AT1G01040 63.33460 78.013830 58.469801 59.0622370 55.137067
## AT1G01050 91.28971 85.402805 90.467860 114.7433228 74.900483
## AT1G01060 575.46610 628.750227 166.403553 139.9940477 175.006482
## I564 I591 I592 I593 I594
## AT1G01010 36.510032 58.048466 36.5329508 47.177734 35.590816
## AT1G01020 10.924262 13.363167 13.4058682 11.128756 13.168602
## AT1G01030 2.731065 1.767615 0.7839689 1.160668 1.850722
## AT1G01040 53.974476 59.957490 47.5869124 59.467158 53.813314
## AT1G01050 78.841545 102.733765 111.6371717 103.913908 118.588599
## AT1G01060 178.812912 359.037892 264.0407262 322.119461 285.153619
## I861 I862 I863 I864 I891 I892
## AT1G01010 18.371591 34.025482 36.542401 22.393049 40.941044 31.60514
## AT1G01020 5.382759 11.653548 9.473956 5.205402 11.935355 13.64621
## AT1G01030 5.733809 1.222903 1.910714 4.714326 1.873573 1.80233
## AT1G01040 42.242958 44.456127 51.031980 43.116441 54.333623 44.54329
## AT1G01050 51.721295 102.364188 88.927804 65.705920 104.920100 105.95124
## AT1G01060 182.428731 117.110962 124.116783 153.117382 287.697577 324.41935
## I893 I894
## AT1G01010 44.181417 37.844782
## AT1G01020 13.215326 11.979094
## AT1G01030 3.675268 2.441599
## AT1G01040 60.055448 47.687478
## AT1G01050 102.672921 95.298656
## AT1G01060 384.886609 292.076264
dim(norm.counts)
## [1] 20646 24
What are the main differences between the raw count table and the normilized one?
rv <- rowVars(norm.counts)
select <- order(rv, decreasing=TRUE)[seq_len(100)]
pca <- prcomp(t(norm.counts[select,]))
fac <- factor(apply(m[,c("Cultivar", "TimePoint")], 1, paste, collapse=":"))
colours <- brewer.pal(nlevels(fac), "Paired")
pcafig <- xyplot(PC2 ~ PC1, groups=fac, data=as.data.frame(pca$x), pch=16, cex=1, aspect="iso",
col=colours, main=draw.key(key=list(rect=list(col=colours), text=list(levels(fac)),
rep=FALSE)))
print(pcafig)
#We can play around with visualization optsions:
pcafig <- xyplot(PC2 ~ PC1, groups=fac, data=as.data.frame(pca$x), pch=18, cex=1, aspect="iso",
col=colours, main=draw.key(key=list(rect=list(col=colours), text=list(levels(fac)),
rep=FALSE)))
print(pcafig)
pcafig <- xyplot(PC2 ~ PC1, groups=fac, data=as.data.frame(pca$x), pch=6, cex=1, aspect="iso",
col=colours, main=draw.key(key=list(rect=list(col=colours), text=list(levels(fac)),
rep=FALSE)))
print(pcafig)
#We can draw colour keys in a non-default way
pcafig <- xyplot(PC2 ~ PC1, groups=fac, data=as.data.frame(pca$x), pch=16, cex=1, aspect="iso",
col=colours, main=draw.key(key=list(rect=list(col=colours), text=list(levels(fac)),
rep=FALSE, columns=3)))
print(pcafig)
dist.matrix <- dist(t(norm.counts))
sampleTree <- hclust(dist.matrix)
colours <- data.frame(Cultivar=labels2colors(m$Cultivar), TimePoint=labels2colors(m$TimePoint))
plotDendroAndColors(sampleTree, colors=colours, groupLabels=c("Cultivar", "TimePoint"), colorHeight=0.1, autoColorHeight=FALSE)
#We need to import a new table with logFC values.
fc <- read.table(file="I5_v_C_time6.txt", sep="\t", header=T, stringsAsFactors=F)
logFDR <- -log10(fc$adj.P.Val)
plot(fc$logFC, logFDR, xlab="log2(Fold-Change)", ylab="-log10FDR")
#Label significant genes
fc <- mutate(fc, sig=ifelse(fc$adj.P.Val<0.05, "FDR<0.05", "Not Significant")) #What does the command do?
## Warning: package 'bindrcpp' was built under R version 3.4.4
#Make plot
p <- ggplot(fc, aes(logFC, -log10(adj.P.Val))) + geom_point(aes(col=sig)) + scale_color_manual(values=c("red", "black"))
p + geom_text(data=filter(fc, adj.P.Val<0.001), aes(label=Gene))
p + geom_text(data=fc[order(fc$adj.P.Val, decreasing=FALSE)[1:10],], aes(label=Gene))
p + geom_text(data=fc[order(fc$adj.P.Val, decreasing=FALSE)[1:10],], aes(label=Gene)) + geom_vline(xintercept=c(-2,2), colour="black") + geom_hline(yintercept=1.3, colour="black")
slt <- order(rv, decreasing=TRUE)[seq_len(20)]
heatmap.2(norm.counts[slt,], col=heat.colors, trace="none", margin=c(3,7))
heatmap.2(norm.counts[slt,], col=heat.colors, trace="none", margin=c(3,6), cexRow=0.8, cexCol=0.8)
heatmap.2(norm.counts[slt,], col=heat.colors, trace="none", margin=c(3,7), cexRow=0.8, cexCol=0.8, ColSideColors=labels2colors(m$Cultivar))
rowcols <- rep(brewer.pal(4, 'Set1'), each=5)
names(rowcols) <- rownames(norm.counts[slt,])
heatmap.2(norm.counts[slt,], col=heat.colors, trace="none", margin=c(3,7), cexRow=0.8, cexCol=0.8, ColSideColors=labels2colors(m$Cultivar), RowSideColors=rowcols)
#Heatmaps with multiple side bars using heatmap.3()
source_url("https://raw.githubusercontent.com/obigriffith/biostar-tutorials/master/Heatmaps/heatmap.3.R")
## Warning in strptime(x, fmt, tz = "GMT"): unknown timezone 'zone/tz/2018c.
## 1.0/zoneinfo/Europe/Stockholm'
## SHA-1 hash of file is 015fc0457e61e3e93a903e69a24d96d2dac7b9fb
## SHA-1 hash of file is 015fc0457e61e3e93a903e69a24d96d2dac7b9fb
rlab <- t(rowcols)
rownames(rlab) <- "GeneType"
clab <- cbind(labels2colors(m$Cultivar), labels2colors(m$TimePoint))
colnames(clab) <- c("Cultivar", "TimePoint")
# The plot will be saved to a pdf file because of the size of the figure
pdf("test_heatmap3.pdf")
heatmap.3(norm.counts[slt,], col=heat.colors, trace="none", cexRow=0.8, cexCol=0.8, ColSideColors=clab, RowSideColors=rlab, ColSideColorsSize=2, RowSideColorsSize=2, margin=c(5,5))
dev.off()
## quartz_off_screen
## 2
#select genes from the differential expression analysis results
# first, load in your differential expression analysis results
sel.genes <- fc$Gene[1:10]
# then, match the names of your selected genes to the rownames of your counts table
index <- match(sel.genes, rownames(norm.counts))
# then, follow some steps from above to generate necessary colors and labels
rowcols <- rep(brewer.pal(5, 'Set1'), each=2)
names(rowcols) <- rownames(norm.counts[index,])
rlab <- t(rowcols)
rownames(rlab) <- "GeneType"
clab <- cbind(labels2colors(m$Cultivar), labels2colors(m$TimePoint))
colnames(clab) <- c("Cultivar", "TimePoint")
#Using log transformed data.
log.counts <- cpm(counts, log=TRUE)
rv <- rowVars(log.counts)
slt <- order(rv, decreasing=TRUE)[seq_len(20)]
# use non-default color scheme
mypalette <- brewer.pal(11, "RdYlBu")
morecols <- colorRampPalette(mypalette)
heatmap.2(log.counts[slt,], col=morecols, trace="none", margin=c(3,7))
#Visulize pathway enrichment results using bioconductor package "pathview"
DE.paths <- read.table(file="I5_v_C_time6_KEGG.txt", sep="\t", header=T, stringsAsFactors=F)
head(DE.paths, 1)
## pathway.code pathway.name p.value
## 1 ath03010 Ribosome - Arabidopsis thaliana (thale cress) 5.378589e-35
## Annotated
## 1 301
pid <- DE.paths$pathway.code[3]
head(fc, 2)
## Gene logFC AveExpr P.Value adj.P.Val sig
## 1 AT4G12520 -10.254556 0.3581132 2.206726e-10 4.651998e-06 FDR<0.05
## 2 AT3G30720 5.817438 3.3950689 9.108689e-10 9.601014e-06 FDR<0.05
rownames(fc) <- fc$Gene
colnames(fc)
## [1] "Gene" "logFC" "AveExpr" "P.Value" "adj.P.Val" "sig"
## [1] "Gene" "logFC" "AveExpr" "P.Value" "adj.P.Val"
gene.data <- subset(fc, select="logFC")
head(gene.data)
## logFC
## AT4G12520 -10.254556
## AT3G30720 5.817438
## AT5G26270 2.421030
## AT3G33528 -4.780814
## AT1G64795 -4.872595
## AT3G05955 -4.158939
pv.out <- pathview(gene.data=gene.data, pathway.id=pid, species="ath", gene.idtype="KEGG", kegg.native=T)
## Info: Working in directory /Users/stefaniagiacomello/Documents/Conferences/UCDavis
## Info: Writing image file ath04141.pathview.png
#By default, running pathview() will create an image file named by the pathway id (for example, in this case there should be a file named "ath04141.pathview.png" in the current directory).
#Another package for ploting is "piano". It generates network style graphs. "Cytoscape" is another possible software to generate graphs for enrichment analysis results.
#Visulize GO enrichment results use revigo.irb.hr web application. In a web browser (Safari, explorer, chrome, firefox), open revigo.irb.hr.