Session #2: High-dimensional analysis of immune responses to COVID-19 in perhiperhal blood Tue 26-Oct, 1:00 pm – 2:30 pm AEDT Lead instructors: Felix Marsh-Wakefield, Jennifer Habel In this session we will be investigating the immune response to COVID-19 in perihperal blood. We will compare immune responses in healthy controls, convalescent patients, and patients with mild COVID-19 or severe COVID-19.
|
We will be using R & RStudio to analyse cells from the blood or respiratory tract of COVID-19 patients, measured by flow cytometry. We will be using the R package ‘Spectre’ to do this. The instructors will guide attendees through the analysis process in R, stopping regularly to discuss what has been done, and what we can observe about the COVID-19 immune response. The COVID-19 datasets used in this workshop originate from these two studies from the Kedzierska Laboratory: Koutsakos et al 2021. Cell Reports Medicine and Zhang et al 2021. medRxiv. To prepare for the workshop we selectively chose representative samples and performed some initial cleanup and batch alignment, which we will discuss during the workshop.
To get started, find the relevant R script and open it in RStudio.
To get started, find the relevant R script and open it in RStudio.
#######################################################################################################
#### 1. Load packages, and set working directory
#######################################################################################################
### Load libraries
library(Spectre)
Spectre::package.check() # Check that all required packages are installed
Spectre::package.load() # Load required packages
### Set PrimaryDirectory
dirname(rstudioapi::getActiveDocumentContext()$path)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
getwd()
PrimaryDirectory <- getwd()
PrimaryDirectory
### Set 'input' directory
setwd(PrimaryDirectory)
setwd("data/")
InputDirectory <- getwd()
setwd(PrimaryDirectory)
### Set 'metadata' directory
setwd(PrimaryDirectory)
setwd("metadata/")
MetaDirectory <- getwd()
setwd(PrimaryDirectory)
### Create output directory
dir.create("Output_Spectre", showWarnings = FALSE)
setwd("Output_Spectre")
OutputDirectory <- getwd()
setwd(PrimaryDirectory)
#######################################################################################################
#### 2. Import and prep data
#######################################################################################################
### Import data
setwd(InputDirectory)
list.files(InputDirectory, ".csv")
data.list <- Spectre::read.files(file.loc = InputDirectory,
file.type = ".csv",
do.embed.file.names = TRUE)
### Check the data
check <- do.list.summary(data.list)
check$name.table # Review column names and their subsequent values
check$ncol.check # Review number of columns (features, markers) in each sample
check$nrow.check # Review number of rows (cells) in each sample
data.list[[1]]
### Merge data
cell.dat <- Spectre::do.merge.files(dat = data.list)
cell.dat
### Read in metadata
setwd(MetaDirectory)
meta.dat <- fread("sample.details.csv")
meta.dat
#######################################################################################################
#### 3. Data transformation
#######################################################################################################
setwd(OutputDirectory)
dir.create("Output 1 - transformed plots")
setwd("Output 1 - transformed plots")
### Arcsinh transformation
as.matrix(names(cell.dat))
high.asinh <- names(cell.dat)[c(1:2,4,7:8,11:13)]
high.asinh
low.asinh <- names(cell.dat)[c(3,5,10,14,15)]
low.asinh
high.cofactor <- 5000
low.cofactor <- 1000
plot.against <- "CD3_asinh"
cell.dat <- do.asinh(cell.dat, high.asinh, cofactor = high.cofactor)
cell.dat <- do.asinh(cell.dat, low.asinh, cofactor = low.cofactor)
transformed.cols <- c(paste0(high.asinh, "_asinh"), paste0(low.asinh, "_asinh"))
transformed.cols
sub <- do.subsample(cell.dat, 20000)
for(i in transformed.cols){
make.colour.plot(sub, i, plot.against)
}
### Re-scale FSC SSC
cell.dat <- do.rescale(cell.dat, c('FSC-A', 'SSC-A'), new.min = 0, new.max = 4)
cell.dat
#######################################################################################################
#### 4. Add metadata and set some preferences
#######################################################################################################
### Add metadata to data.table
meta.dat
sample.info <- meta.dat[,c(1:3)]
sample.info
cell.dat <- do.add.cols(cell.dat, "FileName", sample.info, "FileName", rmv.ext = TRUE)
cell.dat
### Columns
as.matrix(names(cell.dat))
cellular.cols <- names(cell.dat)[c(18:32)]
as.matrix(cellular.cols)
as.matrix(names(cell.dat))
cluster.cols <- names(cell.dat)[c(18,20,22:23,25:32)]
as.matrix(cluster.cols)
exp.name <- "COVID19 blood"
sample.col <- "Sample"
group.col <- "Group"
### Re-order data
as.matrix(unique(cell.dat[[group.col]]))
cell.dat <- do.reorder(cell.dat, group.col, c('Healthy', 'COVID-Mild', 'COVID-Severe', 'COVID-Recovered'))
cell.dat
#######################################################################################################
#### 5. Clustering and dimensionality reduction
#######################################################################################################
setwd(OutputDirectory)
dir.create("Output 2 - clustering")
setwd("Output 2 - clustering")
### Clustering
cell.dat <- run.flowsom(cell.dat, cluster.cols, meta.k = 40)
cell.dat
## CD4 HLA-DR CD14 CD16 CD45RA SSC-A
## 1: 385.9918 1184.3592 56.281924 24943.57611 139.6720 140276.99
## 2: 31404.5996 7557.4725 -27.541306 -86.14949 440.5692 11443.51
## 3: 140.1873 1433.2540 15.818474 16758.88381 108.1146 86944.36
## 4: 153.6085 1160.9915 -171.031998 22152.86316 111.5081 71077.65
## 5: 295.5135 1138.0511 -23.322274 18890.04307 159.4709 115441.59
## ---
## 19996: 3865.7216 22844.6618 7056.586375 420.02435 170.8769 53738.25
## 19997: 10850.9241 62866.6470 1742.570206 448.74208 1301.4540 36420.57
## 19998: 476.9634 648.1186 -8.895763 25166.58920 117.3604 78812.61
## 19999: 23832.6975 2613.9864 -88.975621 -15.28408 553.7296 15705.53
## 20000: 475.2911 851.7723 52.978616 17771.81718 183.1115 116260.53
## CD71 CD3 FSC-A CD19 CD27 CD38
## 1: 139.35008 6.94315 192506.97 929.8541 -70.55481 715.59114
## 2: 165.66003 49372.93109 68268.86 -159.7931 10608.70122 2788.36947
## 3: 71.50105 10.27204 150818.92 339.9177 -67.35428 507.87712
## 4: 197.00084 171.76961 116294.77 329.9222 -122.52182 87.81398
## 5: 126.95319 42.60100 199155.55 715.5343 -56.14074 556.84786
## ---
## 19996: 211.17297 531.39957 84792.46 1617.6456 289.16246 2244.79740
## 19997: 23.68836 3378.17135 101832.80 3779.5089 1823.80966 2638.54999
## 19998: 142.90912 197.36048 113980.21 565.3703 -36.45874 646.89287
## 19999: 133.09405 77852.20031 72824.60 906.8760 9820.58498 2089.57701
## 20000: 171.15769 174.33443 146211.70 770.4627 -203.12146 605.22127
## CD8 CD56 TCRgd FileName FileNo CD4_asinh
## 1: 76.29158 45.24299 245.25104 Healthy_01 25 0.07712188
## 2: 246.38225 -47.53602 -11.39122 Healthy_01 25 2.53694137
## 3: 14.36750 -31.70981 109.99090 Healthy_01 25 0.02803379
## 4: -200.08706 50.30267 112.58608 Healthy_01 25 0.03071687
## 5: -66.98346 -166.66228 180.21414 Healthy_01 25 0.05906835
## ---
## 19996: 164.38672 576.38362 29.96486 COVID-Recovered_08 16 0.71155988
## 19997: 124.04399 177.33431 95.98247 COVID-Recovered_08 16 1.51725313
## 19998: -68.26690 -101.83571 106.98462 COVID-Recovered_08 16 0.09524859
## 19999: 996.79923 534.47559 -56.80312 COVID-Recovered_08 16 2.26559402
## 20000: -23.52617 -107.46807 186.96952 COVID-Recovered_08 16 0.09491564
## HLA-DR_asinh CD16_asinh CD71_asinh CD3_asinh CD27_asinh CD38_asinh
## 1: 0.2347109 2.31022281 0.027866410 0.001388630 -0.014110494 0.14263410
## 2: 1.2011224 -0.01722905 0.033125947 2.985665690 1.496790455 0.53219353
## 3: 0.2828636 1.92418274 0.014299722 0.002054406 -0.013470448 0.10140156
## 4: 0.2301608 2.19417501 0.039389981 0.034347169 -0.024501912 0.01756189
## 5: 0.2256894 2.03941635 0.025387910 0.008520098 -0.011227913 0.11114062
## ---
## 19996: 2.2241931 0.08390638 0.042222047 0.106080845 0.057800302 0.43510061
## 19997: 3.2263027 0.08962837 0.004737654 0.632591168 0.357122362 0.50585807
## 19998: 0.1292634 2.31895166 0.028577934 0.039461853 -0.007291684 0.12902033
## 19999: 0.5015088 -0.00305681 0.026615668 3.439551027 1.427471971 0.40661753
## 20000: 0.1695411 1.98054900 0.034224855 0.034859825 -0.040613127 0.12075060
## CD8_asinh CD14_asinh CD45RA_asinh CD19_asinh CD56_asinh
## 1: 0.015257723 0.056252253 0.1392218 0.8308933 0.04522757
## 2: 0.049256530 -0.027537826 0.4274344 -0.1591208 -0.04751814
## 3: 0.002873496 0.015817814 0.1079051 0.3336904 -0.03170450
## 4: -0.040006739 -0.170208950 0.1112783 0.3242124 0.05028148
## 5: -0.013396291 -0.023320160 0.1588026 0.6653463 -0.16590022
## ---
## 19996: 0.032871424 2.652091749 0.1700561 1.2582986 0.54846883
## 19997: 0.024806254 1.322205647 1.0793373 2.0398002 0.17641777
## 19998: -0.013652955 -0.008895646 0.1170926 0.5389044 -0.10166052
## 19999: 0.198062349 -0.088858639 0.5287458 0.8139691 0.51183325
## 20000: -0.004705217 0.052953864 0.1821033 0.7094370 -0.10726227
## TCRgd_asinh FSC-A_rescaled SSC-A_rescaled Sample
## 1: 0.24285673 2.8873413 2.3204424 Healthy_01
## 2: -0.01139097 0.6944737 0.4746132 Healthy_01
## 3: 0.10977032 2.1515255 1.5563327 Healthy_01
## 4: 0.11234957 1.5421561 1.3290063 Healthy_01
## 5: 0.17925266 3.0046922 1.9646196 Healthy_01
## ---
## 19996: 0.02996038 0.9861239 1.0805804 COVID-Recovered_08
## 19997: 0.09583570 1.2868949 0.8324657 COVID-Recovered_08
## 19998: 0.10678158 1.5013029 1.4398271 COVID-Recovered_08
## 19999: -0.05677262 0.7748850 0.5356762 COVID-Recovered_08
## 20000: 0.18589697 2.0702056 1.9763526 COVID-Recovered_08
## Group FlowSOM_cluster FlowSOM_metacluster
## 1: Healthy 113 18
## 2: Healthy 52 4
## 3: Healthy 105 9
## 4: Healthy 47 1
## 5: Healthy 128 9
## ---
## 19996: COVID-Recovered 195 39
## 19997: COVID-Recovered 165 31
## 19998: COVID-Recovered 4 1
## 19999: COVID-Recovered 54 4
## 20000: COVID-Recovered 73 9
### Dimensionality reduction
cell.dat <- run.fitsne(cell.dat, cluster.cols)
cell.dat
### DR plots
make.colour.plot(cell.dat, "FItSNE_X", "FItSNE_Y", "FlowSOM_metacluster", col.type = 'factor', add.label = TRUE)
make.multi.plot(cell.dat, "FItSNE_X", "FItSNE_Y", cellular.cols, figure.title = 'Cellular cols')
make.multi.plot(cell.dat, "FItSNE_X", "FItSNE_Y", cluster.cols, figure.title = 'Cluster cols')
make.multi.plot(cell.dat, "FItSNE_X", "FItSNE_Y", "FlowSOM_metacluster", group.col, col.type = 'factor')
### DR plots
make.colour.plot(cell.dat, "FItSNE_X", "FItSNE_Y", "FlowSOM_metacluster", col.type = 'factor', add.label = TRUE)
make.multi.plot(cell.dat, "FItSNE_X", "FItSNE_Y", cellular.cols, figure.title = 'Cellular cols')