Load libraries

library(Seurat)

Load the Seurat object

load(file="pre_sample_corrected.RData")
experiment.aggregate
## An object of class Seurat 
## 12811 features across 2681 samples within 1 assay 
## Active assay: RNA (12811 features)
experiment.test <- experiment.aggregate
set.seed(12345)
rand.genes <- sample(1:nrow(experiment.test), 500,replace = F)
mat <- as.matrix(GetAssayData(experiment.test, slot="data"))
mat[rand.genes,experiment.test$batchid=="Batch2"] <- mat[rand.genes,experiment.test$batchid=="Batch2"] + 0.18
experiment.test = SetAssayData(experiment.test, slot="data", new.data= mat )

Exploring Batch effects 3 ways, none, Seurat [vars.to.regress] and COMBAT

First lets view the data without any corrections

PCA in prep for tSNE

ScaleData - Scales and centers genes in the dataset.

?ScaleData
experiment.test.noc <- ScaleData(object = experiment.test)
## Centering and scaling data matrix

Run PCA

experiment.test.noc <- RunPCA(object = experiment.test.noc)
## PC_ 1 
## Positive:  Txn1, Sncg, Fez1, S100a10, Atp6v0b, Lxn, Dctn3, Tppp3, Sh3bgrl3, Rabac1 
## 	   Cisd1, Ppia, Fxyd2, Atp6v1f, Stmn3, Ndufa11, Atp5g1, Bex2, Atpif1, Uchl1 
## 	   Ndufa4, Psmb6, Ubb, Hagh, Anxa2, Gabarapl2, Rgs10, Nme1, Prdx2, Psmb3 
## Negative:  Mt1, Malat1, Adcyap1, Ptn, Apoe, Zeb2, Mt2, Timp3, Fabp7, Gpm6b 
## 	   Gal, Kit, Qk, Plp1, Atp2b4, Ifitm3, 6330403K07Rik, Sparc, Id3, Gap43 
## 	   Selenop, Gpx3, Zfp36l1, Rgcc, Scg2, Cbfb, Zfp36, Igfbp7, Marcksl1, Phlda1 
## PC_ 2 
## Positive:  Nefh, Cntn1, Thy1, S100b, Sv2b, Cplx1, Slc17a7, Vamp1, Nefm, Lynx1 
## 	   Endod1, Scn1a, Atp1b1, Vsnl1, Nat8l, Ntrk3, Sh3gl2, Fam19a2, Eno2, Scn1b 
## 	   Spock1, Scn8a, Glrb, Syt2, Scn4b, Lrrn1, Lgi3, Snap25, Atp2b2, Cpne6 
## Negative:  Malat1, Cd24a, Tmem233, Cd9, Dusp26, Mal2, Carhsp1, Tmem158, Fxyd2, Ctxn3 
## 	   Prkca, Ubb, Crip2, Arpc1b, Gna14, S100a6, Cd44, Tmem45b, Klf5, Tceal9 
## 	   Cd82, Hs6st2, Bex3, Emp3, Ift122, Fam89a, Pfn1, Dynll1, Acpp, Smim5 
## PC_ 3 
## Positive:  P2ry1, Fam19a4, Gm7271, Rarres1, Th, Zfp521, Wfdc2, Tox3, Gfra2, D130079A08Rik 
## 	   Iqsec2, Pou4f2, Rgs5, Kcnd3, Id4, Rasgrp1, Slc17a8, Casz1, Cdkn1a, Piezo2 
## 	   Dpp10, Gm11549, Fxyd6, Spink2, Rgs10, Zfhx3, C1ql4, Cd34, Gabra1, Cckar 
## Negative:  Calca, Basp1, Gap43, Ppp3ca, Map1b, Scg2, Cystm1, Tmem233, Calm1, Map7d2 
## 	   Ift122, Tubb3, Ncdn, Resp18, Prkca, Etv1, Nmb, Skp1a, Crip2, Camk2a 
## 	   Epb41l3, Tspan8, Ntrk1, Deptor, Gna14, Adk, Jak1, Tmem255a, Etl4, Camk2g 
## PC_ 4 
## Positive:  Id3, Timp3, Selenop, Pvalb, Ifitm3, Sparc, Igfbp7, Adk, Sgk1, Tm4sf1 
## 	   Ly6c1, Id1, Etv1, Nsg1, Mt2, Slc17a7, Zfp36l1, Cldn5, Spp1, Ier2 
## 	   Aldoc, Shox2, Cxcl12, Ptn, Itm2a, Qk, Stxbp6, Sparcl1, Jak1, Slit2 
## Negative:  Gap43, Calca, Stmn1, Tac1, Ppp3ca, 6330403K07Rik, Arhgdig, Alcam, Adcyap1, Prune2 
## 	   Kit, Ngfr, Ywhag, Gal, Fxyd6, Atp1a1, Smpd3, Ntrk1, Tmem100, Cd24a 
## 	   Atp2b4, Mt3, Cnih2, Tppp3, Gpx3, S100a11, Scn7a, Snap25, Cbfb, Gnb1 
## PC_ 5 
## Positive:  Cpne3, Klf5, Acpp, Fxyd2, Jak1, Nppb, Rgs4, Osmr, Zfhx3, Nbl1 
## 	   Etv1, Htr1f, Gm525, Sst, Adk, Tspan8, Cysltr2, Parm1, Tmem233, Prkca 
## 	   Cd24a, Npy2r, Prune2, Nts, Socs2, Dgkz, Gnb1, Phf24, Il31ra, Plxnc1 
## Negative:  Mt1, Ptn, B2m, Prdx1, Dbi, Mt3, Ifitm3, Fxyd7, Id3, Mt2 
## 	   Calca, S100a16, Sparc, Pcp4l1, Ifitm2, Selenop, Igfbp7, Rgcc, Hspb1, Selenom 
## 	   Abcg2, S100a13, Apoe, Tm4sf1, Timp3, Itm2a, Ubb, Ly6c1, Cryab, Cebpd
DimPlot(object = experiment.test.noc, group.by = "batchid", reduction = "pca")

DimPlot(object = experiment.test.noc, group.by = "batchid", dims = c(2,3), reduction = "pca")

PCA Elbow plot to determine how many principal components to use in downstream analyses. Components after the “elbow” in the plot generally explain little additional variability in the data.

ElbowPlot(experiment.test.noc)

We use 10 components in downstream analyses. Using more components more closely approximates the full data set but increases run time.

TSNE Plot

pcs.use <- 10
experiment.test.noc <- RunTSNE(object = experiment.test.noc, dims = 1:pcs.use)
DimPlot(object = experiment.test.noc,  group.by = "batchid")

Correct for sample to sample differences (seurat)

Use vars.to.regress to correct for the sample to sample differences and percent mitochondria

experiment.test.regress <- ScaleData(object = experiment.test, 
                    vars.to.regress = c("batchid"), model.use = "linear")
## Regressing out batchid
## Centering and scaling data matrix
experiment.test.regress <- RunPCA(object =experiment.test.regress)
## PC_ 1 
## Positive:  Txn1, Sncg, Fez1, S100a10, Atp6v0b, Lxn, Dctn3, Tppp3, Sh3bgrl3, Rabac1 
## 	   Cisd1, Ppia, Fxyd2, Atp6v1f, Stmn3, Ndufa11, Atp5g1, Bex2, Atpif1, Uchl1 
## 	   Ndufa4, Psmb6, Ubb, Hagh, Anxa2, Gabarapl2, Rgs10, Nme1, Prdx2, Psmb3 
## Negative:  Mt1, Malat1, Adcyap1, Ptn, Apoe, Zeb2, Mt2, Timp3, Fabp7, Gpm6b 
## 	   Gal, Kit, Qk, Atp2b4, Plp1, Ifitm3, 6330403K07Rik, Sparc, Id3, Gap43 
## 	   Gpx3, Selenop, Zfp36l1, Rgcc, Scg2, Cbfb, Zfp36, Igfbp7, Marcksl1, Phlda1 
## PC_ 2 
## Positive:  Nefh, Cntn1, Thy1, S100b, Sv2b, Cplx1, Slc17a7, Vamp1, Nefm, Lynx1 
## 	   Endod1, Atp1b1, Scn1a, Nat8l, Vsnl1, Ntrk3, Sh3gl2, Fam19a2, Eno2, Scn1b 
## 	   Spock1, Scn8a, Glrb, Syt2, Scn4b, Lrrn1, Atp2b2, Lgi3, Cpne6, Snap25 
## Negative:  Malat1, Cd24a, Tmem233, Cd9, Dusp26, Mal2, Tmem158, Carhsp1, Fxyd2, Ctxn3 
## 	   Ubb, Prkca, Arpc1b, Crip2, S100a6, Gna14, Cd44, Tmem45b, Klf5, Tceal9 
## 	   Bex3, Hs6st2, Cd82, Emp3, Pfn1, Ift122, Tubb2b, Fam89a, Dynll1, Gadd45g 
## PC_ 3 
## Positive:  P2ry1, Fam19a4, Gm7271, Rarres1, Th, Zfp521, Wfdc2, Tox3, Gfra2, D130079A08Rik 
## 	   Iqsec2, Pou4f2, Rgs5, Kcnd3, Id4, Rasgrp1, Slc17a8, Casz1, Cdkn1a, Piezo2 
## 	   Dpp10, Gm11549, Fxyd6, Rgs10, Spink2, Zfhx3, C1ql4, Cd34, Gabra1, Cckar 
## Negative:  Calca, Basp1, Gap43, Ppp3ca, Map1b, Scg2, Cystm1, Tmem233, Map7d2, Calm1 
## 	   Ift122, Tubb3, Ncdn, Resp18, Prkca, Etv1, Nmb, Skp1a, Crip2, Epb41l3 
## 	   Camk2a, Ntrk1, Tspan8, Deptor, Gna14, Adk, Jak1, Tmem255a, Etl4, Camk2g 
## PC_ 4 
## Positive:  Id3, Timp3, Selenop, Pvalb, Ifitm3, Sparc, Igfbp7, Adk, Tm4sf1, Ly6c1 
## 	   Sgk1, Id1, Etv1, Nsg1, Mt2, Cldn5, Zfp36l1, Ier2, Itm2a, Spp1 
## 	   Slc17a7, Aldoc, Ptn, Cxcl12, Shox2, Qk, Stxbp6, Sparcl1, Slit2, Jak1 
## Negative:  Gap43, Calca, Stmn1, Tac1, Ppp3ca, Arhgdig, 6330403K07Rik, Alcam, Prune2, Adcyap1 
## 	   Kit, Ngfr, Ywhag, Atp1a1, Fxyd6, Gal, Smpd3, Ntrk1, Tmem100, Cd24a 
## 	   Atp2b4, Mt3, Cnih2, Tppp3, Scn7a, Gpx3, S100a11, Snap25, Gnb1, Cbfb 
## PC_ 5 
## Positive:  Cpne3, Klf5, Jak1, Acpp, Fxyd2, Nppb, Osmr, Gm525, Htr1f, Sst 
## 	   Etv1, Nbl1, Rgs4, Zfhx3, Cysltr2, Adk, Tspan8, Npy2r, Nts, Parm1 
## 	   Tmem233, Prkca, Cd24a, Socs2, Prune2, Il31ra, Dgkz, Ptafr, Gnb1, Ptprk 
## Negative:  Mt1, Ptn, B2m, Dbi, Prdx1, Mt3, Fxyd7, Ifitm3, S100a16, Calca 
## 	   Id3, Mt2, Pcp4l1, Sparc, Selenop, Ifitm2, Rgcc, Igfbp7, Abcg2, Tm4sf1 
## 	   Apoe, Selenom, S100a13, Cryab, Hspb1, Timp3, Gap43, Ubb, Ly6c1, Phlda1
DimPlot(object = experiment.test.regress, group.by = "batchid", reduction.use = "pca")

Corrected TSNE Plot

experiment.test.regress <- RunTSNE(object = experiment.test.regress, dims.use = 1:pcs.use)
DimPlot(object = experiment.test.regress, group.by = "batchid", reduction = "tsne")

COMBAT corrected, https://academic.oup.com/biostatistics/article-lookup/doi/10.1093/biostatistics/kxj037

library(sva)
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-28. For overview type 'help("mgcv-package")'.
## Loading required package: genefilter
## Loading required package: BiocParallel
?ComBat
m = as.matrix(GetAssayData(experiment.test))
com = ComBat(dat=m, batch=as.numeric(as.factor(experiment.test$orig.ident)), prior.plots=FALSE, par.prior=TRUE)
## Found3batches
## Adjusting for0covariate(s) or covariate level(s)
## Standardizing Data across genes
## Fitting L/S model and finding priors
## Finding parametric adjustments
## Adjusting the Data
experiment.test.combat <- experiment.test
experiment.test.combat <- SetAssayData(experiment.test.combat, new.data = as.matrix(com))
experiment.test.combat = ScaleData(experiment.test.combat)
## Centering and scaling data matrix

Principal components on ComBat adjusted data

experiment.test.combat <- RunPCA(object = experiment.test.combat)
## PC_ 1 
## Positive:  Txn1, Sncg, Fez1, S100a10, Atp6v0b, Lxn, Dctn3, Tppp3, Sh3bgrl3, Rabac1 
## 	   Cisd1, Ppia, Stmn3, Fxyd2, Ndufa11, Atp6v1f, Atp5g1, Bex2, Atpif1, Uchl1 
## 	   Ndufa4, Psmb6, Ubb, Anxa2, Gabarapl2, Hagh, Nme1, Psmb3, Rgs10, Prdx2 
## Negative:  Mt1, Malat1, Adcyap1, Ptn, Apoe, Zeb2, Mt2, Timp3, Fabp7, Gal 
## 	   Gpm6b, Kit, Qk, Atp2b4, Plp1, Ifitm3, 6330403K07Rik, Sparc, Id3, Gpx3 
## 	   Selenop, Gap43, Zfp36l1, Rgcc, Scg2, Cbfb, Zfp36, Igfbp7, Marcksl1, Phlda1 
## PC_ 2 
## Positive:  Nefh, Cntn1, Thy1, Sv2b, S100b, Slc17a7, Cplx1, Vamp1, Nefm, Lynx1 
## 	   Endod1, Atp1b1, Scn1a, Nat8l, Vsnl1, Ntrk3, Scn8a, Sh3gl2, Fam19a2, Eno2 
## 	   Scn1b, Spock1, Glrb, Syt2, Scn4b, Cpne6, Lgi3, Lrrn1, Atp2b2, Clec2l 
## Negative:  Malat1, Cd24a, Tmem233, Cd9, Dusp26, Mal2, Tmem158, Carhsp1, Fxyd2, Ubb 
## 	   Ctxn3, Crip2, Arpc1b, Prkca, S100a6, Gna14, Cd44, Tmem45b, Klf5, Tceal9 
## 	   Bex3, Hs6st2, Cd82, Emp3, Pfn1, Tubb2b, Dynll1, Ift122, Fam89a, Acpp 
## PC_ 3 
## Positive:  P2ry1, Fam19a4, Gm7271, Rarres1, Th, Zfp521, Wfdc2, Tox3, Gfra2, D130079A08Rik 
## 	   Iqsec2, Pou4f2, Rgs5, Kcnd3, Rasgrp1, Id4, Slc17a8, Casz1, Cdkn1a, Piezo2 
## 	   Dpp10, Gm11549, Fxyd6, Rgs10, Zfhx3, Spink2, C1ql4, Gabra1, Cd34, Synpr 
## Negative:  Calca, Basp1, Gap43, Ppp3ca, Map1b, Scg2, Cystm1, Tmem233, Map7d2, Ift122 
## 	   Calm1, Ncdn, Tubb3, Prkca, Resp18, Etv1, Skp1a, Nmb, Crip2, Camk2a 
## 	   Tspan8, Epb41l3, Ntrk1, Gna14, Deptor, Adk, Jak1, Tmem255a, Etl4, Dusp26 
## PC_ 4 
## Positive:  Id3, Timp3, Pvalb, Selenop, Sparc, Adk, Ifitm3, Igfbp7, Etv1, Nsg1 
## 	   Sgk1, Tm4sf1, Id1, Ly6c1, Mt2, Spp1, Slc17a7, Zfp36l1, Shox2, Ier2 
## 	   Aldoc, Cldn5, Itm2a, Ptn, Jak1, Qk, Cxcl12, Stxbp6, Tspan8, Slit2 
## Negative:  Gap43, Calca, Stmn1, Tac1, Arhgdig, Ppp3ca, 6330403K07Rik, Alcam, Adcyap1, Prune2 
## 	   Ngfr, Kit, Ywhag, Atp1a1, Gal, Fxyd6, Smpd3, Ntrk1, Tmem100, Mt3 
## 	   Atp2b4, Cd24a, Cnih2, Tppp3, S100a11, Gpx3, Scn7a, Cbfb, Snap25, Gnb1 
## PC_ 5 
## Positive:  Cpne3, Klf5, Acpp, Fxyd2, Jak1, Nppb, Osmr, Nbl1, Gm525, Htr1f 
## 	   Rgs4, Sst, Etv1, Zfhx3, Cysltr2, Tspan8, Adk, Npy2r, Parm1, Tmem233 
## 	   Cd24a, Nts, Prkca, Prune2, Socs2, Il31ra, Gnb1, Dgkz, Phf24, Ptafr 
## Negative:  Mt1, Ptn, B2m, Dbi, Prdx1, Mt3, Id3, Ifitm3, S100a16, Fxyd7 
## 	   Mt2, Calca, Sparc, Selenop, Pcp4l1, Ifitm2, Rgcc, Apoe, Cryab, Igfbp7 
## 	   Selenom, Ubb, Abcg2, Tm4sf1, Timp3, S100a13, Hspb1, Phlda1, Hspa1a, Dad1
DimPlot(object = experiment.test.combat, group.by = "batchid", reduction = "pca")

PCA Plot, Combat adjusted

TSNE plot on ComBat adjusted data

experiment.test.combat <- RunTSNE(object = experiment.test.combat, dims.use = 1:pcs.use)
DimPlot(object = experiment.test.combat, group.by = "batchid", reduction = "tsne")

TSNE plot, ComBat adjusted

Question(s)

  1. Try a couple of PCA cutoffs (low and high) and compare the TSNE plots from the different methods. Do they look meaningfully different?

Get the next Rmd file

download.file("https://raw.githubusercontent.com/ucdavis-bioinformatics-training/2019-single-cell-RNA-sequencing-Workshop-UCD_UCSF/master/scrnaseq_analysis/scRNA_Workshop-PART4.Rmd", "scRNA_Workshop-PART4.Rmd")

Session Information

sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.5
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] sva_3.32.1          BiocParallel_1.18.0 genefilter_1.66.0  
## [4] mgcv_1.8-28         nlme_3.1-140        Seurat_3.0.2       
## 
## loaded via a namespace (and not attached):
##   [1] tsne_0.1-3           matrixStats_0.54.0   bitops_1.0-6        
##   [4] bit64_0.9-7          RColorBrewer_1.1-2   httr_1.4.0          
##   [7] sctransform_0.2.0    tools_3.6.0          R6_2.4.0            
##  [10] irlba_2.3.3          KernSmooth_2.23-15   DBI_1.0.0           
##  [13] BiocGenerics_0.30.0  lazyeval_0.2.2       colorspace_1.4-1    
##  [16] npsurv_0.4-0         tidyselect_0.2.5     gridExtra_2.3       
##  [19] bit_1.1-14           compiler_3.6.0       Biobase_2.44.0      
##  [22] plotly_4.9.0         labeling_0.3         caTools_1.17.1.2    
##  [25] scales_1.0.0         lmtest_0.9-37        ggridges_0.5.1      
##  [28] pbapply_1.4-0        stringr_1.4.0        digest_0.6.19       
##  [31] rmarkdown_1.13       R.utils_2.9.0        pkgconfig_2.0.2     
##  [34] htmltools_0.3.6      bibtex_0.4.2         highr_0.8           
##  [37] limma_3.40.2         htmlwidgets_1.3      rlang_0.3.4         
##  [40] RSQLite_2.1.1        zoo_1.8-6            jsonlite_1.6        
##  [43] ica_1.0-2            gtools_3.8.1         dplyr_0.8.1         
##  [46] R.oo_1.22.0          RCurl_1.95-4.12      magrittr_1.5        
##  [49] Matrix_1.2-17        S4Vectors_0.22.0     Rcpp_1.0.1          
##  [52] munsell_0.5.0        ape_5.3              reticulate_1.12     
##  [55] R.methodsS3_1.7.1    stringi_1.4.3        yaml_2.2.0          
##  [58] gbRd_0.4-11          MASS_7.3-51.4        gplots_3.0.1.1      
##  [61] Rtsne_0.15           plyr_1.8.4           blob_1.1.1          
##  [64] grid_3.6.0           parallel_3.6.0       gdata_2.18.0        
##  [67] listenv_0.7.0        ggrepel_0.8.1        crayon_1.3.4        
##  [70] lattice_0.20-38      cowplot_0.9.4        splines_3.6.0       
##  [73] annotate_1.62.0      SDMTools_1.1-221.1   knitr_1.23          
##  [76] pillar_1.4.1         igraph_1.2.4.1       stats4_3.6.0        
##  [79] future.apply_1.3.0   reshape2_1.4.3       codetools_0.2-16    
##  [82] XML_3.98-1.20        glue_1.3.1           evaluate_0.14       
##  [85] lsei_1.2-0           metap_1.1            data.table_1.12.2   
##  [88] png_0.1-7            Rdpack_0.11-0        gtable_0.3.0        
##  [91] RANN_2.6.1           purrr_0.3.2          tidyr_0.8.3         
##  [94] future_1.13.0        assertthat_0.2.1     ggplot2_3.2.0       
##  [97] xfun_0.7             rsvd_1.0.1           xtable_1.8-4        
## [100] survival_2.44-1.1    viridisLite_0.3.0    tibble_2.1.3        
## [103] IRanges_2.18.1       memoise_1.1.0        AnnotationDbi_1.46.0
## [106] cluster_2.1.0        globals_0.12.4       fitdistrplus_1.0-14 
## [109] ROCR_1.0-7