ASI Masterclass - Workshop Session 3

SESSION 3 - respiratory (3:00 pm)

Session #3: High-dimensional analysis of immune responses to COVID-19 in the respiratory tract

Tue 26-Oct, 3:00 pm – 4:30 pm

Lead instructors: Givanna Putri, Wuji Zhang

In this session we will be investigating the immune response to COVID-19 in the respiratory tract, from samples acquired via an endotracheal tube. In particular we will explore a variety of myeloid cells that infiltrate the respiratory tract via the blood, including macrophages and neutrophils, and how they compare to similar cells in the blood.

ZOOM

Dataset

We will be using R & RStudio to analyse cells from the blood or respiratory tract of COVID-19 patients, measured by flow cytometry. We will be using the R package ‘Spectre’ to do this. The instructors will guide attendees through the analysis process in R, stopping regularly to discuss what has been done, and what we can observe about the COVID-19 immune response. The COVID-19 datasets used in this workshop originate from these two studies from the Kedzierska Laboratory: Koutsakos et al 2021. Cell Reports Medicine and Zhang et al 2021. medRxiv. To prepare for the workshop we selectively chose representative samples and performed some initial cleanup and batch alignment, which we will discuss during the workshop.

Open R script

1. Load packages

#######################################################################################################
#### 1. Load packages, and set working directory
#######################################################################################################

    ### Load libraries

        library(Spectre)
        Spectre::package.check()    # Check that all required packages are installed
        Spectre::package.load()     # Load required packages

    ### Set PrimaryDirectory

        dirname(rstudioapi::getActiveDocumentContext()$path)            # Finds the directory where this script is located
        setwd(dirname(rstudioapi::getActiveDocumentContext()$path))     # Sets the working directory to where the script is located
        getwd()
        PrimaryDirectory <- getwd()
        PrimaryDirectory

    ### Set 'input' directory

        setwd(PrimaryDirectory)
        setwd("data/")
        InputDirectory <- getwd()
        setwd(PrimaryDirectory)

    ### Set 'metadata' directory

        setwd(PrimaryDirectory)
        setwd("metadata/")
        MetaDirectory <- getwd()
        setwd(PrimaryDirectory)

    ### Create output directory

        dir.create("Output_Spectre", showWarnings = FALSE)
        setwd("Output_Spectre")
        OutputDirectory <- getwd()
        setwd(PrimaryDirectory)

2. Import data

#######################################################################################################
#### 2. Import and prep data
#######################################################################################################

    ### Import data

        setwd(InputDirectory)
        list.files(InputDirectory, ".csv")

        data.list <- Spectre::read.files(file.loc = InputDirectory,
                                         file.type = ".csv",
                                         do.embed.file.names = TRUE)

    ### Check the data

        check <- do.list.summary(data.list)

        check$name.table # Review column names and their subsequent values
        check$ncol.check # Review number of columns (features, markers) in each sample
        check$nrow.check # Review number of rows (cells) in each sample

        data.list[[1]]

    ### Merge data

        cell.dat <- Spectre::do.merge.files(dat = data.list)
        cell.dat

    ### Read in metadata

        setwd(MetaDirectory)

        meta.dat <- fread("sample.details.csv")
        meta.dat

3. Data transformation

#######################################################################################################
#### 3. Data transformation
#######################################################################################################

    setwd(OutputDirectory)
    dir.create("Output 1 - transformed plots")
    setwd("Output 1 - transformed plots")

    ### Arcsinh transformation

        as.matrix(names(cell.dat))

        low.asinh <- names(cell.dat)[c(3:19)]
        low.asinh

        high.cofactor <- 5000
        low.cofactor <- 1000

        plot.against <- "CD66b_asinh"

        cell.dat <- do.asinh(cell.dat, low.asinh, cofactor = low.cofactor)

        transformed.cols <- c(paste0(low.asinh, "_asinh"))
        transformed.cols

        sub <- do.subsample(cell.dat, 20000)

        for(i in transformed.cols){
          make.colour.plot(sub, i, plot.against)
        }

    ### Re-scale FSC SSC

        cell.dat <- do.rescale(cell.dat, c('FSC-A', 'SSC-A'), new.min = 0, new.max = 4)
        cell.dat

4. Add metadata

#######################################################################################################
#### 4. Add metadata and set some preferences
#######################################################################################################

    ### Add metadata to data.table

        meta.dat
        sample.info <- meta.dat[,c(1:3)]
        sample.info

        cell.dat <- do.add.cols(cell.dat, "FileName", sample.info, "FileName", rmv.ext = TRUE)
        cell.dat

    ### Columns

        as.matrix(names(cell.dat))

        cellular.cols <- names(cell.dat)[c(22:40)]
        as.matrix(cellular.cols)

        as.matrix(names(cell.dat))

        cluster.cols <- names(cell.dat)[c(22:40)]
        as.matrix(cluster.cols)

        exp.name <- "COVID19 respiratory"
        sample.col <- "Sample"
        group.col <- "Group"

    ### Re-order data

        as.matrix(unique(cell.dat[[group.col]]))

        cell.dat <- do.reorder(cell.dat, sample.col, c('Respiratory_1', 'Respiratory_2', 'Respiratory_3'))
        cell.dat

5. Clustering

#######################################################################################################
#### 5. Clustering and dimensionality reduction
#######################################################################################################

    setwd(OutputDirectory)
    dir.create("Output 2 - clustering")
    setwd("Output 2 - clustering")

    ### Clustering

        cell.dat <- run.flowsom(cell.dat, cluster.cols, meta.k = 40)
        cell.dat

    ### Dimensionality reduction

        cell.sub <- do.subsample(cell.dat, c(2000, 10000, 20000), sample.col)
        cell.sub

        cell.sub <- run.fitsne(cell.sub, cluster.cols, perplexity = 200)
        cell.sub

    ### DR plots

        make.colour.plot(cell.sub, "FItSNE_X", "FItSNE_Y", "FlowSOM_metacluster", col.type = 'factor', add.label = TRUE)

        make.multi.plot(cell.sub, "FItSNE_X", "FItSNE_Y", cellular.cols, figure.title = 'Cellular cols')

make.multi.plot(cell.sub, "FItSNE_X", "FItSNE_Y", cluster.cols, figure.title = 'Cluster cols')

        make.multi.plot(cell.sub, "FItSNE_X", "FItSNE_Y", "FlowSOM_metacluster", sample.col, col.type = 'factor')

        make.multi.plot(cell.sub, "CD16_asinh", "CD66b_asinh", "FlowSOM_metacluster", sample.col, col.type = 'factor', figure.title = 'CD66b x CD16')

    ### Expression heatmap

        exp <- do.aggregate(cell.sub, cellular.cols, by = "FlowSOM_metacluster")
        make.pheatmap(exp, "FlowSOM_metacluster", cellular.cols)

6. Annotate

#######################################################################################################
#### 6. Annotate clusters
#######################################################################################################

    setwd(OutputDirectory)
    dir.create("Output 3 - annotation")
    setwd("Output 3 - annotation")

    ### Annotate

        annots <- list("PD1+" = c(40,35,19),
                       "HLADR+" = c(34,39),
                       "CD8" = c(15),
                       'Macrophage' = c(38,33,27,16,21),
                       'Neutrophil CD66b+CD16-' = c(11,7,13,18,4,26),
                       'Neutrophil CD66b+CD16+' = c(10,9,14),
                       'Neutrophil CD66b(int)CD16+' = c(12,3,20,6,2,1,8,5,17),
                       'Neutrophil dead' = c(32,22,29,37,30,31,24,23,28)
        )

        annots <- do.list.switch(annots)
        names(annots) <- c("Values", "Population")
        setorderv(annots, 'Values')
        annots

        any(duplicated(annots$Values))

    ### Add annotations

        cell.dat <- do.add.cols(cell.dat, "FlowSOM_metacluster", annots, "Values")
        cell.dat

        cell.dat[is.na(cell.dat[['Population']]),'Population'] <- 'Other'
        cell.dat

        cell.sub <- do.add.cols(cell.sub, "FlowSOM_metacluster", annots, "Values")
        cell.sub

        cell.sub[is.na(cell.sub[['Population']]),'Population'] <- 'Other'
        cell.sub

    ### Plots

        make.colour.plot(cell.sub, "FItSNE_X", "FItSNE_Y", "Population", col.type = 'factor', add.label = TRUE)

        make.multi.plot(cell.sub, "FItSNE_X", "FItSNE_Y", "Population", sample.col, col.type = 'factor')

        make.multi.plot(cell.sub, "CD16_asinh", "CD66b_asinh", "Population", sample.col, col.type = 'factor', figure.title = 'CD66b x CD16')

    ### Expression heatmap

        rm(exp)
        exp <- do.aggregate(cell.dat, cellular.cols, by = "Population")
        make.pheatmap(exp, "Population", cellular.cols)

7. Summary

#######################################################################################################
#### 7. Summary data and statistical analysis
#######################################################################################################

    setwd(OutputDirectory)
    dir.create("Output 4 - summary data")
    setwd("Output 4 - summary data")

    ### Setup

        variance.test <- 'kruskal.test'
        pairwise.test <- "wilcox.test"

        as.matrix(unique(cell.dat[[sample.col]]))

        grp.order <- c("Respiratory_1", "Respiratory_2", "Respiratory_3")
        grp.order

    ### Create summary tables

        sum.dat <- create.sumtable(dat = cell.dat,
                                   sample.col = sample.col,
                                   pop.col = "Population",
                                   annot.cols = c(group.col))

    ### Review summary data

        sum.dat
        as.matrix(names(sum.dat))

        plot.cols <- names(sum.dat)[c(3:11)]
        plot.cols

    ### Reorder summary data and SAVE

        sum.dat <- do.reorder(sum.dat, sample.col, grp.order)
        sum.dat[,c(1:3)]

        fwrite(sum.dat, 'sum.dat.csv')

    ### Autographs

        for(i in plot.cols){

            measure <- gsub("\\ --.*", "", i)
            measure

            pop <- gsub("^[^--]*.-- ", "", i)
            pop

            make.autograph(sum.dat,
                           x.axis = sample.col,
                           y.axis = i,
                           y.axis.label = measure,
                           max.y = 1.7,

                           grp.order = grp.order,

                           Variance_test = variance.test,
                           Pairwise_test = pairwise.test,

                           title = pop, width = 6, height = 8,
                           subtitle = measure,
                           filename = paste0(i, '.png'))

        }

    ### Create a fold change heatmap

        ## Z-score calculation
        sum.dat.z <- do.zscore(sum.dat, plot.cols)

        ## Make heatmap
        make.pheatmap(sum.dat.z,
                      sample.col = sample.col,
                      plot.cols = paste0(plot.cols, '_zscore'),
                      is.fold = TRUE,
                      plot.title = 'Z-score',
                      annot.cols = group.col,
                      dendrograms = 'column',
                      cutree_cols = 3)

#######################################################################################################
#### 8. Output data and session info
#######################################################################################################

    ### Save data

        setwd(OutputDirectory)
        dir.create("Output - data", showWarnings = FALSE)
        setwd("Output - data")

        fwrite(cell.dat, "cell.dat.csv")

        write.files(cell.dat,
                    file.prefix = exp.name,
                    divide.by = sample.col,
                    write.csv = FALSE,
                    write.fcs = TRUE)

    ### Session info and metadata

        setwd(OutputDirectory)
        dir.create("Output - info", showWarnings = FALSE)
        setwd("Output - info")

        sink(file = "session_info.txt", append=TRUE, split=FALSE, type = c("output", "message"))
        session_info()
        sink()

        write(cellular.cols, "cellular.cols.txt")
        write(cluster.cols, "cluster.cols.txt")