Last Updated June 27, 2023

Single Cell V(D)J Analysis

Libraries

if (!requireNamespace("BiocManager", quietly = TRUE)){
    install.packages("BiocManager")
}

if (!any(rownames(installed.packages()) == "ggplot2")){
  BiocManager::install("ggplot2")
}

if (!any(rownames(installed.packages()) == "viridis")){
  BiocManager::install("virids")
}

if (!any(rownames(installed.packages()) == "knitr")){
  BiocManager::install("knitr")
}

if (!any(rownames(installed.packages()) == "kableExtra")){
  BiocManager::install("kableExtra")
}

if (!any(rownames(installed.packages()) == "dplyr")){
  BiocManager::install("dplyr")
}

if (!any(rownames(installed.packages()) == "tidyr")){
  BiocManager::install("tidyr")
}

if (!any(rownames(installed.packages()) == "magrittr")){
  BiocManager::install("magrittr")
}

if (!any(rownames(installed.packages()) == "scRepertoire")){
  BiocManager::install("scRepertoire")
}

if (!any(rownames(installed.packages()) == "circlize")){
  BiocManager::install("circlize")
}

library(ggplot2)
library(viridis)
library(tidyr)
library(magrittr)
library(knitr)
library(kableExtra)
library(dplyr)
library(scRepertoire)
library(Seurat)
library(circlize)

set.seed(1234) # arbitrary

Download Cell Ranger results

The following code downloads the output from a cellranger vdj run that has been stored in the github repository. The system call does not work on all operating systems. If the system call fails, you may need to unzip the file manually.

options(timeout=1200)
download.file("https://raw.githubusercontent.com/ucdavis-bioinformatics-training/2023-June-Advanced-Topics-in-Single-Cell-RNA-Seq-VDJ/main/data_analysis/cellranger_vdj_results.zip", "cellranger_vdj_results.zip")
system("unzip cellranger_vdj_results.zip")

We will also be using the gene expression data from this experiment, which is contained in a Seurat object. Download the Seurat object from the computing cluster. Don’t forget to replace “username” with your username.

scp username@tadpole.genomecenter.ucdavis.edu:/share/workshop/vdj_workshop/R_objects/seurat_object.rds .

Set up

vdj.location <- "./cellranger_vdj_results"
vdj.ids <- c("Pool1")

Sequencing metrics

metrics <- paste(vdj.location, vdj.ids, "metrics_summary.csv", sep = "/")
metrics.table <- do.call("cbind", lapply(metrics, function(x) {
  as.data.frame(t(read.csv(x)))
  }))
colnames(metrics.table) <- vdj.ids
rownames(metrics.table) <- gsub(".", " ", rownames(metrics.table), fixed = TRUE)
metrics.table  %>%
  kable(caption = 'Cell Ranger Results') %>%
  pack_rows("Overview", 1, 3, label_row_css = "background-color: #666; color: #fff;") %>%
  pack_rows("Sequencing Characteristics", 4, 8, label_row_css = "background-color: #666; color: #fff;") %>%
  pack_rows("Mapping Characteristics", 9, 26, label_row_css = "background-color: #666; color: #fff;") %>%
  kable_styling("striped", fixed_thead = TRUE)
Cell Ranger Results
Pool1
Overview
Estimated Number of Cells 5,703
Mean Read Pairs per Cell 49,067
Number of Cells With Productive V J Spanning Pair 4,392
Sequencing Characteristics
Number of Read Pairs 279,829,164
Valid Barcodes 94.4%
Q30 Bases in Barcode 93.8%
Q30 Bases in RNA Read 1 91.6%
Q30 Bases in UMI 93.5%
Mapping Characteristics
Reads Mapped to Any V D J Gene 85.8%
Reads Mapped to TRA 37.3%
Reads Mapped to TRB 48.3%
Mean Used Read Pairs per Cell 30,419
Fraction Reads in Cells 75.0%
Median TRA UMIs per Cell 6
Median TRB UMIs per Cell 14
Cells With Productive V J Spanning Pair 77.0%
Cells With Productive V J Spanning TRA TRB Pair 77.0%
Paired Clonotype Diversity 1162.31
Cells With TRA Contig 90.0%
Cells With TRB Contig 96.1%
Cells With CDR3 annotated TRA Contig 87.1%
Cells With CDR3 annotated TRB Contig 94.4%
Cells With V J Spanning TRA Contig 88.8%
Cells With V J Spanning TRB Contig 94.7%
Cells With Productive TRA Contig 84.5%
Cells With Productive TRB Contig 92.5%

The majority of the following functions and figures come from scRepertoire. We will be exploring and making changes to the code as we go, so please take notes and don’t be afraid to experiment and ask questions!

Read in Cell Ranger VDJ Data

contig.list <- lapply(paste(vdj.location, vdj.ids, "filtered_contig_annotations.csv", sep = "/"), read.csv)
# read in Seurat object and add a shortened sample identifier
expression <- readRDS("seurat_object.rds")
expression$Sample_Name_short <- gsub("batch1.", "", expression$Sample_Name)

For multiplexed data from a single contig annotations CSV:

contigs <- contig.list[[1]]
contigs$barcode <- paste(sapply(strsplit(contigs$barcode, split = "-"), "[[", 1), vdj.ids, sep = "-")
contig.list <- createHTOContigList(contigs,
                           expression,
                           group.by = "Sample_Name_short")
vdj <- combineTCR(contig.list,
                  samples = names(contig.list),
                  ID = names(contig.list),
                  cells = "T-AB",
                  removeMulti = TRUE,
                  removeNA = TRUE)
# rename barcodes to match the format oligo-sample
vdj <- lapply(vdj, stripBarcode)
vdj <- lapply(vdj, function(df){
  df$barcode = paste(sapply(strsplit(df$barcode, split = "-"), "[[", 1), df$sample, sep = "-")
  df
})
# structure of the vdj object
class(vdj)
## [1] "list"
length(vdj)
## [1] 4
names(vdj)
## [1] "conv.COVID_conv.COVID" "conv.Tdap_conv.Tdap"   "conv.MMR_conv.MMR"    
## [4] "norm.COVID_norm.COVID"
class(vdj[[1]])
## [1] "data.frame"
vdj[[1]] %>%
  slice(1:5) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "responsive"))
barcode sample ID TCR1 cdr3_aa1 cdr3_nt1 TCR2 cdr3_aa2 cdr3_nt2 CTgene CTnt CTaa CTstrict cellType
AAACCTGAGCTACCTA-conv.COVID conv.COVID conv.COVID TRAV17.TRAJ9.TRAC CATDARAGGFKTIF TGTGCTACGGACGCGCGGGCTGGAGGCTTCAAAACTATCTTT TRBV20-1.None.TRBJ1-4.TRBC1 CSARDLGQREKLFF TGCAGTGCTAGAGATCTGGGACAGCGTGAAAAACTGTTTTTT TRAV17.TRAJ9.TRAC_TRBV20-1.None.TRBJ1-4.TRBC1 TGTGCTACGGACGCGCGGGCTGGAGGCTTCAAAACTATCTTT_TGCAGTGCTAGAGATCTGGGACAGCGTGAAAAACTGTTTTTT CATDARAGGFKTIF_CSARDLGQREKLFF TRAV17.TRAJ9.TRAC_TGTGCTACGGACGCGCGGGCTGGAGGCTTCAAAACTATCTTT_TRBV20-1.None.TRBJ1-4.TRBC1_TGCAGTGCTAGAGATCTGGGACAGCGTGAAAAACTGTTTTTT T-AB
AAACGGGCACAAGACG-conv.COVID conv.COVID conv.COVID TRAV17.TRAJ10.TRAC CATGPTGGGNKLTF TGTGCTACGGGACCCACGGGAGGAGGAAACAAACTCACCTTT TRBV5-4.None.TRBJ2-2.TRBC2 CASSLLTGFANTGELFF TGTGCCAGCAGCCTCCTGACAGGGTTCGCGAACACCGGGGAGCTGTTTTTT TRAV17.TRAJ10.TRAC_TRBV5-4.None.TRBJ2-2.TRBC2 TGTGCTACGGGACCCACGGGAGGAGGAAACAAACTCACCTTT_TGTGCCAGCAGCCTCCTGACAGGGTTCGCGAACACCGGGGAGCTGTTTTTT CATGPTGGGNKLTF_CASSLLTGFANTGELFF TRAV17.TRAJ10.TRAC_TGTGCTACGGGACCCACGGGAGGAGGAAACAAACTCACCTTT_TRBV5-4.None.TRBJ2-2.TRBC2_TGTGCCAGCAGCCTCCTGACAGGGTTCGCGAACACCGGGGAGCTGTTTTTT T-AB
AAACGGGCACTTAACG-conv.COVID conv.COVID conv.COVID TRAV8-1.TRAJ4.TRAC CAVNVAFSGGYNKLIF TGTGCCGTGAATGTGGCATTTTCTGGTGGCTACAATAAGCTGATTTTT TRBV7-9.None.TRBJ2-5.TRBC2 CASSLATSGGQETQYF TGTGCCAGCAGCTTAGCGACTAGCGGGGGACAAGAGACCCAGTACTTC TRAV8-1.TRAJ4.TRAC_TRBV7-9.None.TRBJ2-5.TRBC2 TGTGCCGTGAATGTGGCATTTTCTGGTGGCTACAATAAGCTGATTTTT_TGTGCCAGCAGCTTAGCGACTAGCGGGGGACAAGAGACCCAGTACTTC CAVNVAFSGGYNKLIF_CASSLATSGGQETQYF TRAV8-1.TRAJ4.TRAC_TGTGCCGTGAATGTGGCATTTTCTGGTGGCTACAATAAGCTGATTTTT_TRBV7-9.None.TRBJ2-5.TRBC2_TGTGCCAGCAGCTTAGCGACTAGCGGGGGACAAGAGACCCAGTACTTC T-AB
AAACGGGCACTTGGAT-conv.COVID conv.COVID conv.COVID TRAV29/DV5.TRAJ45.TRAC CAAWPGGGADGLTF TGTGCAGCATGGCCAGGAGGAGGTGCTGACGGACTCACCTTT TRBV6-2.None.TRBJ2-7.TRBC2 CASSYSEVEQYF TGTGCCAGCAGTTACTCTGAGGTCGAGCAGTACTTC TRAV29/DV5.TRAJ45.TRAC_TRBV6-2.None.TRBJ2-7.TRBC2 TGTGCAGCATGGCCAGGAGGAGGTGCTGACGGACTCACCTTT_TGTGCCAGCAGTTACTCTGAGGTCGAGCAGTACTTC CAAWPGGGADGLTF_CASSYSEVEQYF TRAV29/DV5.TRAJ45.TRAC_TGTGCAGCATGGCCAGGAGGAGGTGCTGACGGACTCACCTTT_TRBV6-2.None.TRBJ2-7.TRBC2_TGTGCCAGCAGTTACTCTGAGGTCGAGCAGTACTTC T-AB
AAACGGGTCTTAGAGC-conv.COVID conv.COVID conv.COVID TRAV5.TRAJ29.TRAC CAEKGETPLVF TGTGCAGAGAAAGGGGAAACACCTCTTGTCTTT TRBV7-2.None.TRBJ1-6.TRBC1 CASSLAGEGNNSPLHF TGTGCCAGCAGCTTAGCGGGAGAGGGTAATAATTCACCCCTCCACTTT TRAV5.TRAJ29.TRAC_TRBV7-2.None.TRBJ1-6.TRBC1 TGTGCAGAGAAAGGGGAAACACCTCTTGTCTTT_TGTGCCAGCAGCTTAGCGGGAGAGGGTAATAATTCACCCCTCCACTTT CAEKGETPLVF_CASSLAGEGNNSPLHF TRAV5.TRAJ29.TRAC_TGTGCAGAGAAAGGGGAAACACCTCTTGTCTTT_TRBV7-2.None.TRBJ1-6.TRBC1_TGTGCCAGCAGCTTAGCGGGAGAGGGTAATAATTCACCCCTCCACTTT T-AB

For non-multiplexed experiments:

vdj <- combineTCR(contig.list,
                  samples = vdj.ids,
                  ID = vdj.ids,
                  cells = "T-AB",
                  removeMulti = TRUE,
                  removeNA = TRUE)
# rename barcodes to match the format oligo-sample
vdj <- lapply(vdj, stripBarcode)
vdj <- lapply(vdj, function(df){
  df$barcode = paste(sapply(strsplit(df$barcode, split = "-"), "[[", 1), df$sample, sep = "-")
  df
})
# structure of the vdj object
class(vdj)
length(vdj)
names(vdj)
class(vdj[[1]])
vdj[[1]] %>%
  slice(1:5) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "responsive"))

Basic repertoire analysis

Number of unique clonotypes

quantContig(vdj, cloneCall="aa", group = "sample", scale = FALSE, exportTable = TRUE) %>%
  select(sample, contigs, total) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "responsive"))
sample contigs total
conv.COVID 734 744
conv.Tdap 815 817
conv.MMR 845 847
norm.COVID 303 437
quantContig(vdj, cloneCall="aa", group = "sample", scale = FALSE) +
  scale_fill_viridis_d()

Distribution of clonotypes by abundance

abundanceContig(vdj, cloneCall = "gene", group = "sample", scale = FALSE) +
  scale_color_viridis_d()

abundanceContig(vdj, cloneCall = "gene", group = "sample", scale = FALSE, exportTable = TRUE) %>%
  group_by(sample) %>%
  arrange(desc(Abundance)) %>%
  filter(!is.na(CTgene)) %>%
  select(CTgene, sample, Abundance) %>%
  slice(1:5) %>%
  kable(caption = "Most abundant clonotype (gene calls) by sample") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "responsive"), fixed_thead = TRUE)
Most abundant clonotype (gene calls) by sample
CTgene sample Abundance
TRAV17.TRAJ49.TRAC_TRBV7-9.None.TRBJ1-1.TRBC1 conv.COVID 3
TRAV38-1.TRAJ40.TRAC_TRBV3-1.None.TRBJ2-7.TRBC2 conv.COVID 3
TRAV13-1.TRAJ13.TRAC_TRBV20-1.None.TRBJ2-1.TRBC2 conv.COVID 2
TRAV13-2.TRAJ17.TRAC_TRBV2.None.TRBJ1-1.TRBC1 conv.COVID 2
TRAV14/DV4.TRAJ45.TRAC_TRBV10-3.None.TRBJ1-1.TRBC1 conv.COVID 2
TRAV17.TRAJ49.TRAC_TRBV7-9.None.TRBJ1-1.TRBC1 conv.MMR 2
TRAV20.TRAJ18.TRAC_TRBV7-8.None.TRBJ1-4.TRBC1 conv.MMR 2
TRAV1-1.TRAJ10.TRAC_TRBV3-1.None.TRBJ2-1.TRBC2 conv.MMR 1
TRAV1-1.TRAJ12.TRAC_TRBV4-3.None.TRBJ1-1.TRBC1 conv.MMR 1
TRAV1-1.TRAJ17.TRAC_TRBV12-3.None.TRBJ2-3.TRBC2 conv.MMR 1
TRAV12-3.TRAJ52.TRAC_TRBV7-2.None.TRBJ2-1.TRBC2 conv.Tdap 2
TRAV13-1.TRAJ11.TRAC_TRBV9.None.TRBJ2-5.TRBC2 conv.Tdap 2
TRAV17.TRAJ53.TRAC_TRBV5-1.None.TRBJ2-5.TRBC2 conv.Tdap 2
TRAV26-1.TRAJ43.TRAC_TRBV24-1.None.TRBJ2-1.TRBC2 conv.Tdap 2
TRAV8-4.TRAJ11.TRAC_TRBV20-1.None.TRBJ2-7.TRBC2 conv.Tdap 2
TRAV25.TRAJ37.TRAC_TRBV6-5.None.TRBJ1-1.TRBC1 norm.COVID 63
TRAV9-2.TRAJ7.TRAC_TRBV7-2.None.TRBJ2-3.TRBC2 norm.COVID 18
TRAV12-2.TRAJ31.TRAC_TRBV9.None.TRBJ2-5.TRBC2 norm.COVID 11
TRAV12-2.TRAJ33.TRAC_TRBV4-1.None.TRBJ1-5.TRBC1 norm.COVID 11
TRAV1-2.TRAJ15.TRAC_TRBV7-9.None.TRBJ1-2.TRBC1 norm.COVID 7

Relative abundance of clones by frequency

clonalHomeostasis(vdj, cloneCall = "aa") +
  scale_fill_viridis_d(option = "plasma") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank(),
        legend.title = element_blank())

Relative abundance of clones by index

Clonal index 1 represents the most frequent clone in a given sample, while index 1000 represents the 1000th most frequent clone.

clonalProportion(vdj, cloneCall = "aa", split = c(10, 50, 100, 500, 1000)) +
  scale_fill_viridis_d(option = "rocket", direction = -1) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        axis.title.x = element_blank(),
        legend.title = element_blank())

Contig length distribution

lengthContig(vdj, cloneCall="nt", scale=TRUE, chain = "both", group="sample") +
  scale_fill_viridis_d() +
  facet_wrap(~sample)

# re-create plot from previous version of scRepertoire
do.call("rbind", vdj) %>%
  mutate(TRA = nchar(cdr3_aa1),
         TRB = nchar(cdr3_aa2)) %>%
  pivot_longer(cols = c(TRA, TRB),
               names_to = "chain",
               values_to = "chain_aa_length") %>%
  ggplot(aes(x = chain_aa_length, fill = sample)) +
  geom_histogram(binwidth = 1) +
  scale_fill_viridis_d() +
  facet_grid(sample~chain) +
  theme_classic()

Gene usage

The scRepertoire function vizGenes will plot the frequency distribution for one gene at a time.

vizGenes(vdj,
         gene = "V",
         chain = "TRA",
         plot = "bar",
         scale = TRUE)

vizGenes(vdj,
         gene = "J",
         chain = "TRA",
         plot = "bar",
         scale = TRUE)

To visualize pairings between genes, we need to write some custom code.

lapply(vdj, function(sample){
  tmp = sample %>%
    filter(!is.na(CTgene)) %>%
    separate(CTgene, into = c("CTgene1", "CTgene2"), sep = "_") %>%
    separate(CTgene1, into = c("v1", "j1", "c1"), sep = "\\.") %>%
    count(v1, j1) %>%
    filter(!is.na(v1) & !is.na(j1) & n > 2)
  grid.cols = turbo(length(unique(tmp$v1)))
  names(grid.cols) = unique(tmp$v1)
  chordDiagram(tmp,
               grid.col = grid.cols,
               annotationTrack = "grid",
               preAllocateTracks = list(track.height = max(strwidth(unlist(dimnames(tmp))))))
  circos.track(track.index = 1,
               panel.fun = function(x, y){
                 circos.text(CELL_META$xcenter,
                             CELL_META$ylim[1],
                             CELL_META$sector.index,
                             facing = "clockwise",
                             niceFacing = TRUE,
                             adj = c(0, 0.5))
               },
               bg.border = NA)
  circos.clear()
})

## $conv.COVID_conv.COVID
## NULL
## 
## $conv.Tdap_conv.Tdap
## NULL
## 
## $conv.MMR_conv.MMR
## NULL
## 
## $norm.COVID_norm.COVID
## NULL

Compare abundance of clonotypes across samples

compareClonotypes(vdj, numbers = 5, cloneCall = "aa", graph = "alluvial") +
  scale_fill_viridis_d(option = "turbo") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(caption = "Results of compareClonotypes() with numbers = 5.")

# list shared clones
compare.clones <- compareClonotypes(vdj,
                                    numbers = 100,
                                    cloneCall = "aa",
                                    exportTable = TRUE) %>%
  pivot_wider(names_from = Sample,
              values_from = Proportion,
              names_repair = "universal")
compare.clones$shared.by <- apply(compare.clones, 1, function(x){
  length(which(!sapply(x[2:5], is.na)))
})
# display 10 clones
compare.clones %>%
  filter(shared.by > 1) %>%
  arrange(desc(shared.by)) %>%
  select(-shared.by) %>%
  slice(1:10) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "responsive"))
Clonotypes conv.COVID_conv.COVID conv.Tdap_conv.Tdap conv.MMR_conv.MMR norm.COVID_norm.COVID
CAMKGGTSYGKLTF_CASSLHVNEQFF 0.0013441 0.002448 0.0011806 NA
CATDTGNQFYF_CASSLVPGVTEAFF 0.0040323 0.001224 0.0023613 NA
CAASANSGYALNF_CASSLDGYEQYF 0.0013441 0.001224 NA NA
CIVRVQYNNNDMRF_CATSDFRGGWVDEQFF 0.0013441 0.002448 NA NA
CAALNSGYSTLTF_CSARDPTSYEQYF NA 0.001224 0.0011806 NA

Calculate clonal overlap between samples

clonalOverlap(vdj, cloneCall = "aa", method = "overlap") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Calculate clonal diversity

clonalDiversity(vdj, cloneCall = "aa") +
  scale_color_viridis_d() +
  theme(legend.title = element_blank())

Combined V(D)J and gene expression data

We can get more from the dataset by combining the gene expression data with the clonotype information.

Add scRepertoire data to the Seurat object

For the multiplexed data, reformat the barcodes.

vdj <- lapply(vdj, function(x){
  x$barcode <- paste(sapply(strsplit(x$barcode, split = "-"), "[[", 1), vdj.ids, sep = "-")
  x
})
# add scRepertoire info to expression object
expression <- combineExpression(vdj, expression, cloneCall = "gene", chain = "both")
head(expression@meta.data) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "responsive"))
orig.ident nCount_RNA nFeature_RNA poolid antibody_call sampleid Sample_Name Convelescent Antigen percent.mito S.Score G2M.Score Phase RNA_snn_res.0.25 Sample_Name_short barcode CTgene CTnt CTaa CTstrict Frequency cloneType
AAACCTGAGACTAGAT-Pool1 Pool1 17521 3471 Pool1 HTO-1ConAPCTcellag_TotalC Pool1.HTO-1ConAPCTcellag_TotalC batch1.conv.COVID Covid convelescent COVID 0.5878660 -0.6721786 -0.2657356 G1 4 conv.COVID NA NA NA NA NA NA NA
AAACCTGAGCAAATCA-Pool1 Pool1 16648 3690 Pool1 HTO-3ConAPCTcellag_TotalC Pool1.HTO-3ConAPCTcellag_TotalC batch1.conv.Tdap Covid convelescent Tdap 1.6518501 -0.2915326 -0.0625301 G1 0 conv.Tdap AAACCTGAGCAAATCA-Pool1 TRAV12-1.TRAJ10.TRAC_TRBV2.None.TRBJ2-2.TRBC2 TGTGTGGTGAACACGGGAGGAGGAAACAAACTCACCTTT_TGTGCCAGCAGCGCCGGGACCGGGGAGCTGTTTTTT CVVNTGGGNKLTF_CASSAGTGELFF TRAV12-1.TRAJ10.TRAC_TGTGTGGTGAACACGGGAGGAGGAAACAAACTCACCTTT_TRBV2.None.TRBJ2-2.TRBC2_TGTGCCAGCAGCGCCGGGACCGGGGAGCTGTTTTTT 0.0012240 Medium (0.001 < X <= 0.01)
AAACCTGAGCTACCTA-Pool1 Pool1 16250 3664 Pool1 HTO-1ConAPCTcellag_TotalC Pool1.HTO-1ConAPCTcellag_TotalC batch1.conv.COVID Covid convelescent COVID 0.4307692 -0.2961077 -0.2611317 G1 0 conv.COVID AAACCTGAGCTACCTA-Pool1 TRAV17.TRAJ9.TRAC_TRBV20-1.None.TRBJ1-4.TRBC1 TGTGCTACGGACGCGCGGGCTGGAGGCTTCAAAACTATCTTT_TGCAGTGCTAGAGATCTGGGACAGCGTGAAAAACTGTTTTTT CATDARAGGFKTIF_CSARDLGQREKLFF TRAV17.TRAJ9.TRAC_TGTGCTACGGACGCGCGGGCTGGAGGCTTCAAAACTATCTTT_TRBV20-1.None.TRBJ1-4.TRBC1_TGCAGTGCTAGAGATCTGGGACAGCGTGAAAAACTGTTTTTT 0.0013441 Medium (0.001 < X <= 0.01)
AAACCTGAGTACGCGA-Pool1 Pool1 10911 2630 Pool1 HTO-2ConAPCTcellag_TotalC Pool1.HTO-2ConAPCTcellag_TotalC batch1.conv.MMR Covid convelescent MMR 0.5499038 -0.4055037 -0.2255377 G1 4 conv.MMR NA NA NA NA NA NA NA
AAACCTGCAGACTCGC-Pool1 Pool1 7321 2560 Pool1 HTO-1ConAPCTcellag_TotalC Pool1.HTO-1ConAPCTcellag_TotalC batch1.conv.COVID Covid convelescent COVID 0.3824614 -0.4477389 -0.2590875 G1 1 conv.COVID NA NA NA NA NA NA NA
AAACCTGCATCCCATC-Pool1 Pool1 2539 1100 Pool1 HTO-1NorAPCTcellag_TotalC Pool1.HTO-1NorAPCTcellag_TotalC batch1.norm.COVID Uninfected Control COVID 6.7349350 -0.1436077 -0.0747784 G1 9 norm.COVID AAACCTGCATCCCATC-Pool1 TRAV13-2.TRAJ30.TRAC_TRBV19.None.TRBJ1-1.TRBC1 TGTGCAGAGAACAGAGATGACAAGATCATCTTT_TGTGCCAGTACTTTCTCTGACTCGGGCGGCACTGAAGCTTTCTTT CAENRDDKIIF_CASTFSDSGGTEAFF TRAV13-2.TRAJ30.TRAC_TGTGCAGAGAACAGAGATGACAAGATCATCTTT_TRBV19.None.TRBJ1-1.TRBC1_TGTGCCAGTACTTTCTCTGACTCGGGCGGCACTGAAGCTTTCTTT 0.0137300 Large (0.01 < X <= 0.1)

Find markers for large clones

DimPlot(expression, group.by = "cloneType") +
  scale_color_viridis_d(option = "plasma", direction = -1, end = 0.8)

Idents(expression) <- "cloneType"
large.markers <- FindMarkers(expression, ident.1 = "Large (0.01 < X <= 0.1)")
large.markers$gene <- rownames(large.markers)
view.markers <- large.markers %>%
  filter(p_val_adj < 0.05) %>%
  filter(!grepl("^TR[AB]", gene)) %>%
  slice(1:5)
# FeaturePlots
lapply(view.markers$gene, function(marker){
  FeaturePlot(expression,
              features = marker)
})  
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

# VlnPlots
lapply(view.markers$gene, function(marker){
  VlnPlot(expression,
          features = marker,
          group.by = "RNA_snn_res.0.25") +
    scale_fill_viridis_d(option = "turbo")
})
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

# view table
kable(view.markers) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "responsive"))
p_val avg_log2FC pct.1 pct.2 p_val_adj gene
ZNF683 0 3.147812 0.717 0.028 0 ZNF683
GZMH 0 2.919273 0.850 0.069 0 GZMH
KLRG1 0 1.924535 0.533 0.031 0 KLRG1
CTSW 0 2.015164 0.583 0.041 0 CTSW
KLRC4 0 1.608025 0.350 0.013 0 KLRC4

clonalOverlay

In our data set, I wasn’t able to find a set of parameters to get the contour to plot using the clonalOverlay function as shown in the documentation here. If you figure out how to make it work, please let me know!

# uses active identity
Idents(expression) <- expression$Sample_Name_short
clonalOverlay(expression,
              reduction = "umap",
              freq.cutpoint = 30,
              bins = 25,
              facet = "Antigen") +
  scale_color_viridis_d()

Visualize selected contig sequences on UMAP

contig <- abundanceContig(vdj, cloneCall = "aa", exportTable = TRUE) %>%
  arrange(desc(Abundance)) %>%
  slice(1:5)
Idents(expression) <- expression$CTaa
DimPlot(expression,
        reduction = "umap",
        cells.highlight = CellsByIdentities(expression, idents = contig$CTaa),
        cols.highlight = turbo(length(contig$CTaa)))

TCR clustering

TCR clustering groups clonotypes together on sequence similarity (either nucleotide or amino acid, controlled by the “sequence” argument). This may be useful in order to identify cells that are producing receptors that may have similar properties, even though the clone differs.

A new column called “TRA_cluster” (or “TRB_cluster” depending on the chain used), is added to the output by the clustering function. Cluster names containing “LD” represent multi-clonal groups created by the function.

tcr.clusters <- clusterTCR(vdj, chain = "TRA", sequence = "aa", threshold = 0.9)
# three most abundant aa sequence in three clusters
do.call("rbind", tcr.clusters) %>%
  count(cdr3_aa1, TRA_cluster) %>%
  filter(TRA_cluster %in% c("TRA:LD.31", "TRA:LD.23", "TRA:LD.55")) %>%
  group_by(TRA_cluster) %>%
  arrange(desc(n)) %>%
  slice(1:3) %>%
  kable() %>%
  kable_styling(bootstrap_options = c("striped", "hover", "responsive"))
cdr3_aa1 TRA_cluster n
CAVSGGSNYKLTF TRA:LD.55 3
CAGSGGSNYKLTF TRA:LD.55 1
CALGGGGSNYKLTF TRA:LD.55 1
# all aa sequences in cluster TRA:LD.55
do.call("rbind", tcr.clusters) %>%
  filter(TRA_cluster == "TRA:LD.55") %>%
  count(cdr3_aa1) %>%
  arrange(desc(n)) %>%
  kable %>%
  kable_styling(bootstrap_options = c("striped", "hover", "responsive"))
cdr3_aa1 n
CAVSGGSNYKLTF 3
CAGSGGSNYKLTF 1
CALGGGGSNYKLTF 1
CALGGGSNYKLTF 1
CALSGGGSNYKLTF 1
CAVGGGSNYKLTF 1
CAVKGGSNYKLTF 1
CAVSEGGSNYKLTF 1
CAVYSGGSNYKLTF 1
CIVSGGSNYKLTF 1

Session Information

sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Monterey 12.4
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] circlize_0.4.15     SeuratObject_4.1.3  Seurat_4.3.0.1     
##  [4] scRepertoire_1.10.0 dplyr_1.1.2         kableExtra_1.3.4   
##  [7] knitr_1.43          magrittr_2.0.3      tidyr_1.3.0        
## [10] viridis_0.6.3       viridisLite_0.4.2   ggplot2_3.4.2      
## 
## loaded via a namespace (and not attached):
##   [1] cubature_2.1.0              RcppAnnoy_0.0.20           
##   [3] splines_4.3.1               later_1.3.1                
##   [5] bitops_1.0-7                tibble_3.2.1               
##   [7] polyclip_1.10-4             lifecycle_1.0.3            
##   [9] doParallel_1.0.17           globals_0.16.2             
##  [11] lattice_0.21-8              MASS_7.3-60                
##  [13] plotly_4.10.2               sass_0.4.6                 
##  [15] rmarkdown_2.22              jquerylib_0.1.4            
##  [17] yaml_2.3.7                  httpuv_1.6.11              
##  [19] sctransform_0.3.5           sp_2.0-0                   
##  [21] spatstat.sparse_3.0-2       reticulate_1.30            
##  [23] cowplot_1.1.1               pbapply_1.7-0              
##  [25] RColorBrewer_1.1-3          abind_1.4-5                
##  [27] zlibbioc_1.46.0             rvest_1.0.3                
##  [29] Rtsne_0.16                  GenomicRanges_1.52.0       
##  [31] purrr_1.0.1                 ggraph_2.1.0               
##  [33] BiocGenerics_0.46.0         RCurl_1.98-1.12            
##  [35] tweenr_2.0.2                evmix_2.12                 
##  [37] GenomeInfoDbData_1.2.10     IRanges_2.34.1             
##  [39] S4Vectors_0.38.1            ggrepel_0.9.3              
##  [41] irlba_2.3.5.1               listenv_0.9.0              
##  [43] spatstat.utils_3.0-3        vegan_2.6-4                
##  [45] goftest_1.2-3               spatstat.random_3.1-5      
##  [47] fitdistrplus_1.1-11         parallelly_1.36.0          
##  [49] svglite_2.1.1               permute_0.9-7              
##  [51] leiden_0.4.3                codetools_0.2-19           
##  [53] DelayedArray_0.26.3         xml2_1.3.4                 
##  [55] ggforce_0.4.1               shape_1.4.6                
##  [57] tidyselect_1.2.0            farver_2.1.1               
##  [59] matrixStats_1.0.0           stats4_4.3.1               
##  [61] spatstat.explore_3.2-1      webshot_0.5.4              
##  [63] jsonlite_1.8.5              ellipsis_0.3.2             
##  [65] tidygraph_1.2.3             progressr_0.13.0           
##  [67] ggridges_0.5.4              ggalluvial_0.12.5          
##  [69] survival_3.5-5              iterators_1.0.14           
##  [71] systemfonts_1.0.4           foreach_1.5.2              
##  [73] tools_4.3.1                 stringdist_0.9.10          
##  [75] ica_1.0-3                   Rcpp_1.0.10                
##  [77] glue_1.6.2                  gridExtra_2.3              
##  [79] xfun_0.39                   mgcv_1.8-42                
##  [81] MatrixGenerics_1.12.2       GenomeInfoDb_1.36.1        
##  [83] withr_2.5.0                 BiocManager_1.30.21        
##  [85] fastmap_1.1.1               fansi_1.0.4                
##  [87] SparseM_1.81                digest_0.6.31              
##  [89] R6_2.5.1                    mime_0.12                  
##  [91] colorspace_2.1-0            scattermore_1.2            
##  [93] tensor_1.5                  spatstat.data_3.0-1        
##  [95] utf8_1.2.3                  generics_0.1.3             
##  [97] data.table_1.14.8           graphlayouts_1.0.0         
##  [99] httr_1.4.6                  htmlwidgets_1.6.2          
## [101] S4Arrays_1.0.4              uwot_0.1.15                
## [103] pkgconfig_2.0.3             gtable_0.3.3               
## [105] lmtest_0.9-40               SingleCellExperiment_1.22.0
## [107] XVector_0.40.0              powerTCR_1.20.0            
## [109] htmltools_0.5.5             scales_1.2.1               
## [111] Biobase_2.60.0              png_0.1-8                  
## [113] rstudioapi_0.14             reshape2_1.4.4             
## [115] nlme_3.1-162                GlobalOptions_0.1.2        
## [117] cachem_1.0.8                zoo_1.8-12                 
## [119] stringr_1.5.0               KernSmooth_2.23-21         
## [121] parallel_4.3.1              miniUI_0.1.1.1             
## [123] pillar_1.9.0                grid_4.3.1                 
## [125] vctrs_0.6.3                 RANN_2.6.1                 
## [127] VGAM_1.1-8                  promises_1.2.0.1           
## [129] xtable_1.8-4                cluster_2.1.4              
## [131] evaluate_0.21               truncdist_1.0-2            
## [133] cli_3.6.1                   compiler_4.3.1             
## [135] rlang_1.1.1                 crayon_1.5.2               
## [137] future.apply_1.11.0         labeling_0.4.2             
## [139] plyr_1.8.8                  stringi_1.7.12             
## [141] deldir_1.0-9                munsell_0.5.0              
## [143] gsl_2.1-8                   lazyeval_0.2.2             
## [145] spatstat.geom_3.2-1         Matrix_1.5-4.1             
## [147] patchwork_1.1.2             future_1.32.0              
## [149] shiny_1.7.4                 highr_0.10                 
## [151] SummarizedExperiment_1.30.2 evd_2.3-6.1                
## [153] ROCR_1.0-11                 igraph_1.5.0               
## [155] bslib_0.5.0