ASI Masterclass - Workshop Session 2

SESSION 2 - blood (1:00 pm)

Session #2: High-dimensional analysis of immune responses to COVID-19 in perhiperhal blood

Tue 26-Oct, 1:00 pm – 2:30 pm AEDT

Lead instructors: Felix Marsh-Wakefield, Jennifer Habel

In this session we will be investigating the immune response to COVID-19 in perihperal blood. We will compare immune responses in healthy controls, convalescent patients, and patients with mild COVID-19 or severe COVID-19.

ZOOM

Dataset

We will be using R & RStudio to analyse cells from the blood or respiratory tract of COVID-19 patients, measured by flow cytometry. We will be using the R package ‘Spectre’ to do this. The instructors will guide attendees through the analysis process in R, stopping regularly to discuss what has been done, and what we can observe about the COVID-19 immune response. The COVID-19 datasets used in this workshop originate from these two studies from the Kedzierska Laboratory: Koutsakos et al 2021. Cell Reports Medicine and Zhang et al 2021. medRxiv. To prepare for the workshop we selectively chose representative samples and performed some initial cleanup and batch alignment, which we will discuss during the workshop.

Open R script

To get started, find the relevant R script and open it in RStudio.

1. Load packages

To get started, find the relevant R script and open it in RStudio.

#######################################################################################################
#### 1. Load packages, and set working directory
#######################################################################################################

    ### Load libraries

        library(Spectre)
        Spectre::package.check()    # Check that all required packages are installed
        Spectre::package.load()     # Load required packages

    ### Set PrimaryDirectory

        dirname(rstudioapi::getActiveDocumentContext()$path)
        setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
        getwd()
        PrimaryDirectory <- getwd()
        PrimaryDirectory

    ### Set 'input' directory

        setwd(PrimaryDirectory)
        setwd("data/")
        InputDirectory <- getwd()
        setwd(PrimaryDirectory)

    ### Set 'metadata' directory

        setwd(PrimaryDirectory)
        setwd("metadata/")
        MetaDirectory <- getwd()
        setwd(PrimaryDirectory)

    ### Create output directory

        dir.create("Output_Spectre", showWarnings = FALSE)
        setwd("Output_Spectre")
        OutputDirectory <- getwd()
        setwd(PrimaryDirectory)

2. Import data

#######################################################################################################
#### 2. Import and prep data
#######################################################################################################

    ### Import data

        setwd(InputDirectory)
        list.files(InputDirectory, ".csv")

        data.list <- Spectre::read.files(file.loc = InputDirectory,
                                         file.type = ".csv",
                                         do.embed.file.names = TRUE)

    ### Check the data

        check <- do.list.summary(data.list)

        check$name.table # Review column names and their subsequent values
        check$ncol.check # Review number of columns (features, markers) in each sample
        check$nrow.check # Review number of rows (cells) in each sample

        data.list[[1]]

    ### Merge data

        cell.dat <- Spectre::do.merge.files(dat = data.list)
        cell.dat

    ### Read in metadata

        setwd(MetaDirectory)

        meta.dat <- fread("sample.details.csv")
        meta.dat

3. Data transformation

#######################################################################################################
#### 3. Data transformation
#######################################################################################################

    setwd(OutputDirectory)
    dir.create("Output 1 - transformed plots")
    setwd("Output 1 - transformed plots")

    ### Arcsinh transformation

        as.matrix(names(cell.dat))

        high.asinh <- names(cell.dat)[c(1:2,4,7:8,11:13)]
        high.asinh

        low.asinh <- names(cell.dat)[c(3,5,10,14,15)]
        low.asinh

        high.cofactor <- 5000
        low.cofactor <- 1000

        plot.against <- "CD3_asinh"

        cell.dat <- do.asinh(cell.dat, high.asinh, cofactor = high.cofactor)
        cell.dat <- do.asinh(cell.dat, low.asinh, cofactor = low.cofactor)


        transformed.cols <- c(paste0(high.asinh, "_asinh"), paste0(low.asinh, "_asinh"))
        transformed.cols

        sub <- do.subsample(cell.dat, 20000)

        for(i in transformed.cols){
          make.colour.plot(sub, i, plot.against)
        }

    ### Re-scale FSC SSC

        cell.dat <- do.rescale(cell.dat, c('FSC-A', 'SSC-A'), new.min = 0, new.max = 4)
        cell.dat

4. Add metadata

#######################################################################################################
#### 4. Add metadata and set some preferences
#######################################################################################################

    ### Add metadata to data.table

        meta.dat
        sample.info <- meta.dat[,c(1:3)]
        sample.info

        cell.dat <- do.add.cols(cell.dat, "FileName", sample.info, "FileName", rmv.ext = TRUE)
        cell.dat

    ### Columns

        as.matrix(names(cell.dat))

        cellular.cols <- names(cell.dat)[c(18:32)]
        as.matrix(cellular.cols)

        as.matrix(names(cell.dat))

        cluster.cols <- names(cell.dat)[c(18,20,22:23,25:32)]
        as.matrix(cluster.cols)

        exp.name <- "COVID19 blood"
        sample.col <- "Sample"
        group.col <- "Group"

    ### Re-order data

        as.matrix(unique(cell.dat[[group.col]]))

        cell.dat <- do.reorder(cell.dat, group.col, c('Healthy', 'COVID-Mild', 'COVID-Severe', 'COVID-Recovered'))
        cell.dat

5. Clustering

#######################################################################################################
#### 5. Clustering and dimensionality reduction
#######################################################################################################

    setwd(OutputDirectory)
    dir.create("Output 2 - clustering")
    setwd("Output 2 - clustering")

    ### Clustering

        cell.dat <- run.flowsom(cell.dat, cluster.cols, meta.k = 40)
        cell.dat

##               CD4     HLA-DR        CD14        CD16    CD45RA     SSC-A
##     1:   385.9918  1184.3592   56.281924 24943.57611  139.6720 140276.99
##     2: 31404.5996  7557.4725  -27.541306   -86.14949  440.5692  11443.51
##     3:   140.1873  1433.2540   15.818474 16758.88381  108.1146  86944.36
##     4:   153.6085  1160.9915 -171.031998 22152.86316  111.5081  71077.65
##     5:   295.5135  1138.0511  -23.322274 18890.04307  159.4709 115441.59
##    ---
## 19996:  3865.7216 22844.6618 7056.586375   420.02435  170.8769  53738.25
## 19997: 10850.9241 62866.6470 1742.570206   448.74208 1301.4540  36420.57
## 19998:   476.9634   648.1186   -8.895763 25166.58920  117.3604  78812.61
## 19999: 23832.6975  2613.9864  -88.975621   -15.28408  553.7296  15705.53
## 20000:   475.2911   851.7723   52.978616 17771.81718  183.1115 116260.53
##             CD71         CD3     FSC-A      CD19        CD27       CD38
##     1: 139.35008     6.94315 192506.97  929.8541   -70.55481  715.59114
##     2: 165.66003 49372.93109  68268.86 -159.7931 10608.70122 2788.36947
##     3:  71.50105    10.27204 150818.92  339.9177   -67.35428  507.87712
##     4: 197.00084   171.76961 116294.77  329.9222  -122.52182   87.81398
##     5: 126.95319    42.60100 199155.55  715.5343   -56.14074  556.84786
##    ---
## 19996: 211.17297   531.39957  84792.46 1617.6456   289.16246 2244.79740
## 19997:  23.68836  3378.17135 101832.80 3779.5089  1823.80966 2638.54999
## 19998: 142.90912   197.36048 113980.21  565.3703   -36.45874  646.89287
## 19999: 133.09405 77852.20031  72824.60  906.8760  9820.58498 2089.57701
## 20000: 171.15769   174.33443 146211.70  770.4627  -203.12146  605.22127
##               CD8       CD56     TCRgd           FileName FileNo  CD4_asinh
##     1:   76.29158   45.24299 245.25104         Healthy_01     25 0.07712188
##     2:  246.38225  -47.53602 -11.39122         Healthy_01     25 2.53694137
##     3:   14.36750  -31.70981 109.99090         Healthy_01     25 0.02803379
##     4: -200.08706   50.30267 112.58608         Healthy_01     25 0.03071687
##     5:  -66.98346 -166.66228 180.21414         Healthy_01     25 0.05906835
##    ---
## 19996:  164.38672  576.38362  29.96486 COVID-Recovered_08     16 0.71155988
## 19997:  124.04399  177.33431  95.98247 COVID-Recovered_08     16 1.51725313
## 19998:  -68.26690 -101.83571 106.98462 COVID-Recovered_08     16 0.09524859
## 19999:  996.79923  534.47559 -56.80312 COVID-Recovered_08     16 2.26559402
## 20000:  -23.52617 -107.46807 186.96952 COVID-Recovered_08     16 0.09491564
##        HLA-DR_asinh  CD16_asinh  CD71_asinh   CD3_asinh   CD27_asinh CD38_asinh
##     1:    0.2347109  2.31022281 0.027866410 0.001388630 -0.014110494 0.14263410
##     2:    1.2011224 -0.01722905 0.033125947 2.985665690  1.496790455 0.53219353
##     3:    0.2828636  1.92418274 0.014299722 0.002054406 -0.013470448 0.10140156
##     4:    0.2301608  2.19417501 0.039389981 0.034347169 -0.024501912 0.01756189
##     5:    0.2256894  2.03941635 0.025387910 0.008520098 -0.011227913 0.11114062
##    ---
## 19996:    2.2241931  0.08390638 0.042222047 0.106080845  0.057800302 0.43510061
## 19997:    3.2263027  0.08962837 0.004737654 0.632591168  0.357122362 0.50585807
## 19998:    0.1292634  2.31895166 0.028577934 0.039461853 -0.007291684 0.12902033
## 19999:    0.5015088 -0.00305681 0.026615668 3.439551027  1.427471971 0.40661753
## 20000:    0.1695411  1.98054900 0.034224855 0.034859825 -0.040613127 0.12075060
##           CD8_asinh   CD14_asinh CD45RA_asinh CD19_asinh  CD56_asinh
##     1:  0.015257723  0.056252253    0.1392218  0.8308933  0.04522757
##     2:  0.049256530 -0.027537826    0.4274344 -0.1591208 -0.04751814
##     3:  0.002873496  0.015817814    0.1079051  0.3336904 -0.03170450
##     4: -0.040006739 -0.170208950    0.1112783  0.3242124  0.05028148
##     5: -0.013396291 -0.023320160    0.1588026  0.6653463 -0.16590022
##    ---
## 19996:  0.032871424  2.652091749    0.1700561  1.2582986  0.54846883
## 19997:  0.024806254  1.322205647    1.0793373  2.0398002  0.17641777
## 19998: -0.013652955 -0.008895646    0.1170926  0.5389044 -0.10166052
## 19999:  0.198062349 -0.088858639    0.5287458  0.8139691  0.51183325
## 20000: -0.004705217  0.052953864    0.1821033  0.7094370 -0.10726227
##        TCRgd_asinh FSC-A_rescaled SSC-A_rescaled             Sample
##     1:  0.24285673      2.8873413      2.3204424         Healthy_01
##     2: -0.01139097      0.6944737      0.4746132         Healthy_01
##     3:  0.10977032      2.1515255      1.5563327         Healthy_01
##     4:  0.11234957      1.5421561      1.3290063         Healthy_01
##     5:  0.17925266      3.0046922      1.9646196         Healthy_01
##    ---
## 19996:  0.02996038      0.9861239      1.0805804 COVID-Recovered_08
## 19997:  0.09583570      1.2868949      0.8324657 COVID-Recovered_08
## 19998:  0.10678158      1.5013029      1.4398271 COVID-Recovered_08
## 19999: -0.05677262      0.7748850      0.5356762 COVID-Recovered_08
## 20000:  0.18589697      2.0702056      1.9763526 COVID-Recovered_08
##                  Group FlowSOM_cluster FlowSOM_metacluster
##     1:         Healthy             113                  18
##     2:         Healthy              52                   4
##     3:         Healthy             105                   9
##     4:         Healthy              47                   1
##     5:         Healthy             128                   9
##    ---
## 19996: COVID-Recovered             195                  39
## 19997: COVID-Recovered             165                  31
## 19998: COVID-Recovered               4                   1
## 19999: COVID-Recovered              54                   4
## 20000: COVID-Recovered              73                   9

    ### Dimensionality reduction

        cell.dat <- run.fitsne(cell.dat, cluster.cols)
        cell.dat

    ### DR plots

        make.colour.plot(cell.dat, "FItSNE_X", "FItSNE_Y", "FlowSOM_metacluster", col.type = 'factor', add.label = TRUE)

        make.multi.plot(cell.dat, "FItSNE_X", "FItSNE_Y", cellular.cols, figure.title = 'Cellular cols')

        make.multi.plot(cell.dat, "FItSNE_X", "FItSNE_Y", cluster.cols, figure.title = 'Cluster cols')

        make.multi.plot(cell.dat, "FItSNE_X", "FItSNE_Y", "FlowSOM_metacluster", group.col, col.type = 'factor')

    ### DR plots

        make.colour.plot(cell.dat, "FItSNE_X", "FItSNE_Y", "FlowSOM_metacluster", col.type = 'factor', add.label = TRUE)

        make.multi.plot(cell.dat, "FItSNE_X", "FItSNE_Y", cellular.cols, figure.title = 'Cellular cols')

        make.multi.plot(cell.dat, "FItSNE_X", "FItSNE_Y", "FlowSOM_metacluster", group.col, col.type = 'factor')

    ### Expression heatmap

        exp <- do.aggregate(cell.dat, cellular.cols, by = "FlowSOM_metacluster")
        make.pheatmap(exp, "FlowSOM_metacluster", cellular.cols)

6. Annotate

#######################################################################################################
#### 6. Annotate clusters
#######################################################################################################

    setwd(OutputDirectory)
    dir.create("Output 3 - annotation")
    setwd("Output 3 - annotation")

    ### Annotate

        annots <- list("CD4 T cells" = c(7,4,8,3,2,5),
                       "CD8 T cells" = c(21,20,27,23,28,33,19),
                       'gd T cells' = c(13,14,16,10),
                       "NK cells" = c(11,17,15,36,35),
                       'B cells' = c(38,37,26,25,32,31),
                       "Neutrophils" = c(1,6,9,12,18),
                       "Monocytes" = c(40,34,39),
                       "Eosinophils" = c(29),
                       'Unknown' = c(30,22,24)
        )

        annots <- do.list.switch(annots)
        names(annots) <- c("Values", "Population")
        setorderv(annots, 'Values')
        annots

        any(duplicated(annots$Values))

    ### Add annotations

        cell.dat <- do.add.cols(cell.dat, "FlowSOM_metacluster", annots, "Values")
        cell.dat

        make.multi.plot(cell.dat, "FItSNE_X", "FItSNE_Y", "Population", group.col, col.type = 'factor')

        make.multi.plot(cell.dat, "FItSNE_X", "FItSNE_Y", "Population", group.col, col.type = 'factor')

    ### Expression heatmap

        rm(exp)
        exp <- do.aggregate(cell.dat, cellular.cols, by = "Population")
        make.pheatmap(exp, "Population", cellular.cols)

7. Summary data

#######################################################################################################
#### 7. Summary data and statistical analysis
#######################################################################################################

    setwd(OutputDirectory)
    dir.create("Output 4 - summary data")
    setwd("Output 4 - summary data")

    ### Setup

        variance.test <- 'kruskal.test'
        pairwise.test <- "wilcox.test"

        as.matrix(unique(cell.dat$Group))

        grp.order <- c("Healthy", "COVID-Mild", "COVID-Severe", "COVID-Recovered")
        grp.order

        comparisons <- list(c("Healthy", "COVID-Mild"),
                            c('Healthy', "COVID-Severe"),
                            c('Healthy', "COVID-Recovered"),
                            c('COVID-Mild', "COVID-Severe"),
                            c('COVID-Mild', 'COVID-Recovered'),
                            c('COVID-Severe', 'COVID-Recovered'))
        comparisons

    ### Select columns to measure MFI

        as.matrix(cellular.cols)
        dyn.cols <- cellular.cols[c(7,2)]
        dyn.cols

        for(i in dyn.cols){
            make.multi.plot(cell.dat, i, 'CD3_asinh', i, group.col, figure.title = i)
        }

        cutoff <- data.table('Markers' = c('CD38_asinh', 'HLA-DR_asinh'),
                             'Values' = c(0.75, 1.5))
        cutoff

        for(i in dyn.cols){

            val <- cutoff[cutoff[['Markers']] == i,'Values', with = FALSE][[1]]
            x <- cell.dat[cell.dat[[i]] > val,]
            x$EXPRESSION <- 'POSITIVE'

            y <- cell.dat[cell.dat[[i]] < val,]
            y$EXPRESSION <- 'NEGATIVE'

            z <- rbind(x, y)

            make.multi.plot(z, i, 'CD3_asinh', 'EXPRESSION', group.col, figure.title = paste0(i, 'EXPRESSION'))
        }

    ### Create summary tables

        sum.dat <- create.sumtable(dat = cell.dat,
                                   sample.col = sample.col,
                                   pop.col = "Population",
                                   use.cols = dyn.cols,
                                   perc.pos = cutoff,
                                   double.pos = list(c('CD38_asinh', 'HLA-DR_asinh')),
                                   annot.cols = c(group.col))

    ### Review summary data

        sum.dat
        as.matrix(names(sum.dat))

        plot.cols <- names(sum.dat)[c(3:11,49:50)]
        plot.cols

    ### Reorder summary data and SAVE

        sum.dat <- do.reorder(sum.dat, group.col, grp.order)
        sum.dat[,c(1:3)]

        fwrite(sum.dat, 'sum.dat.csv')

### Autographs

        for(i in plot.cols){

            measure <- gsub("\\ --.*", "", i)
            measure

            pop <- gsub("^[^--]*.-- ", "", i)
            pop

            make.autograph(sum.dat,
                           x.axis = group.col,
                           y.axis = i,
                           y.axis.label = measure,
                           max.y = 1.7,

                           grp.order = grp.order,
                           my_comparisons = comparisons,

                           Variance_test = variance.test,
                           Pairwise_test = pairwise.test,

                           title = pop, width = 6, height = 8,
                           subtitle = measure,
                           filename = paste0(i, '.pdf'))

        }

    ### Create a fold change heatmap

        ## Z-score calculation
        sum.dat.z <- do.zscore(sum.dat, plot.cols)

        ## Group
        t.first <- match(grp.order, sum.dat.z[[group.col]])
        t.first <- t.first -1
        t.first

        ## Make heatmap
        make.pheatmap(sum.dat.z,
                      sample.col = sample.col,
                      plot.cols = paste0(plot.cols, '_zscore'),
                      is.fold = TRUE,
                      plot.title = 'Z-score',
                      annot.cols = group.col,
                      dendrograms = 'column',
                      row.sep = t.first,
                      cutree_cols = 3)

8. Session data & info

#######################################################################################################
#### 8. Output data and session info
#######################################################################################################

    ### Save data

        setwd(OutputDirectory)
        dir.create("Output - data", showWarnings = FALSE)
        setwd("Output - data")

        fwrite(cell.dat, "cell.dat.csv")

        write.files(cell.dat,
                    file.prefix = exp.name,
                    divide.by = sample.col,
                    write.csv = FALSE,
                    write.fcs = TRUE)

    ### Session info and metadata

        setwd(OutputDirectory)
        dir.create("Output - info", showWarnings = FALSE)
        setwd("Output - info")

        sink(file = "session_info.txt", append=TRUE, split=FALSE, type = c("output", "message"))
        session_info()
        sink()

        write(cellular.cols, "cellular.cols.txt")
        write(cluster.cols, "cluster.cols.txt")