osmFISH Mouse SS Cortex#

Date

2022-09-16

Dataset explanation#

Codeluppi et al. created a cyclic single-molecule fluorescence in situ hybridization (osmFISH) technology and define the cellular organization of the somatosensory cortex with the expression of 33 genes in 5,328 cells.

../../_images/osmfish_image_demo.png

Set up Giotto environment#

# Ensure Giotto Suite is installed.
if(!"Giotto" %in% installed.packages()) {
  devtools::install_github("drieslab/Giotto@Suite")
}

# Ensure GiottoData, a small, helper module for tutorials, is installed.
if(!"GiottoData" %in% installed.packages()) {
  devtools::install_github("drieslab/GiottoData")
}
library(Giotto)
# Ensure the Python environment for Giotto has been installed.
genv_exists = checkGiottoEnvironment()
if(!genv_exists){
  # The following command need only be run once to install the Giotto environment.
  installGiottoEnvironment()
}
library(Giotto)
library(GiottoData)

# 1. set working directory
results_folder = 'path/to/result'

# Optional: Specify a path to a Python executable within a conda or miniconda
# environment. If set to NULL (default), the Python executable within the previously
# installed Giotto environment will be used.
my_python_path = NULL # alternatively, "/local/python/path/python" if desired.

Dataset download#

The osmFISH data to run this tutorial can be found here. Alternatively you can use the getSpatialDataset to automatically download this dataset like we do in this example; to download the data used to create the Giotto Object below, please ensure that wget is installed locally.

# download data to working directory ####
# if wget is installed, set method = 'wget'
# if you run into authentication issues with wget, then add " extra = '--no-check-certificate' "
getSpatialDataset(dataset = 'osmfish_SS_cortex', directory = results_folder, method = 'wget')

Part 1: Giotto global instructions and preparations#

## instructions allow us to automatically save all plots into a chosen results folder
instrs = createGiottoInstructions(save_plot = TRUE,
                                  show_plot = FALSE,
                                  save_dir = results_folder,
                                  python_path = python_path)

expr_path = paste0(results_folder, "osmFISH_prep_expression.txt")
loc_path = paste0(results_folder, "osmFISH_prep_cell_coordinates.txt")
meta_path = paste0(results_folder, "osmFISH_prep_cell_metadata.txt")

Part 2: Create Giotto object & process data#

## create
osm_test <- createGiottoObject(expression = expr_path,
                              spatial_locs = loc_path,
                              instructions = instrs)

## add field annotation
metadata = data.table::fread(file = meta_path)
osm_test = addCellMetadata(osm_test, new_metadata = metadata,
                           by_column = T, column_cell_ID = 'CellID')
## filter
osm_test <- filterGiotto(gobject = osm_test,
                         expression_threshold = 1,
                         feat_det_in_min_cells = 10,
                         min_det_feats_per_cell = 10,
                         expression_values = c('raw'),
                         verbose = T)

## normalize Giotto
## there are two ways for osmFISH object

# 1. standard z-score way
osm_test <- normalizeGiotto(gobject = osm_test)

# 2. osmFISH way
raw_expr_matrix = get_expression_values(osm_test, values = "raw")
norm_genes = (raw_expr_matrix/Giotto:::rowSums_flex(raw_expr_matrix)) * nrow(raw_expr_matrix)

norm_genes_cells = Giotto:::t_flex((Giotto:::t_flex(norm_genes)/Giotto:::colSums_flex(norm_genes)) * ncol(raw_expr_matrix))
osm_test = set_expression_values(osm_test, values = norm_genes_cells , name = "custom")

## add gene & cell statistics
osm_test <- addStatistics(gobject = osm_test)

# save according to giotto instructions
spatPlot2D(gobject = osm_test, cell_color = 'ClusterName', point_size = 1.5,
         save_param = list(save_name = '2_a_original_clusters'))
../../_images/2_a_original_clusters.png
spatPlot2D(gobject = osm_test, cell_color = 'Region',
         save_param = list(save_name = '2_b_original_regions'))
../../_images/2_b_original_regions.png
spatPlot2D(gobject = osm_test, cell_color = 'ClusterID',
         save_param = list(save_name = '2_c_clusterID'))
../../_images/2_c_clusterID.png
spatPlot2D(gobject = osm_test, cell_color = 'total_expr', color_as_factor = F, gradient_midpoint = 160,
         gradient_limits = c(120,220),
         save_param = list(save_name = '2_d_total_expr_limits'))
../../_images/2_d_total_expr_limits.png

Part 3: Dimension reduction#

## highly variable genes (HVG)
# only 33 genes so use all genes

## run PCA on expression values (default)
osm_test <- runPCA(gobject = osm_test, expression_values = 'custom', scale_unit = F, center = F)
screePlot(osm_test, ncp = 30,
          save_param = list(save_name = '3_a_screeplot'))
../../_images/3_a_screeplot1.png
plotPCA(osm_test,
        save_param = list(save_name = '3_b_PCA_reduction'))
../../_images/3_b_PCA_reduction.png
## run UMAP and tSNE on PCA space (default)
osm_test <- runUMAP(osm_test, dimensions_to_use = 1:31, n_threads = 4)
plotUMAP(gobject = osm_test,
         save_param = list(save_name = '3_c_UMAP_reduction.png'))
../../_images/3_c_UMAP_reduction.png.png
plotUMAP(gobject = osm_test,
         cell_color = 'total_expr', color_as_factor = F, gradient_midpoint = 180, gradient_limits = c(120, 220),
         save_param = list(save_name = '3_d_UMAP_reduction_expression.png'))
../../_images/3_d_UMAP_reduction_expression.png.png
osm_test <- runtSNE(osm_test, dimensions_to_use = 1:31, perplexity = 70, check_duplicates = F)
plotTSNE(gobject = osm_test,  save_param = list(save_name = '3_e_tSNE_reduction'))
../../_images/3_e_tSNE_reduction.png

Part 4: Cluster#

## hierarchical clustering
osm_test = doHclust(gobject = osm_test, expression_values = 'custom', k = 36)
plotUMAP(gobject = osm_test, cell_color = 'hclust', point_size = 2.5,
         show_NN_network = F, edge_alpha = 0.05,
         save_param = list(save_name = '4_a_UMAP_hclust'))
../../_images/4_a_UMAP_hclust.png
## kmeans clustering
osm_test = doKmeans(gobject = osm_test, expression_values = 'normalized', dim_reduction_to_use = 'pca', dimensions_to_use = 1:20, centers = 36, nstart = 2000)
plotUMAP(gobject = osm_test, cell_color = 'kmeans',
         point_size = 2.5, show_NN_network = F, edge_alpha = 0.05,
         save_param =  list(save_name = '4_b_UMAP_kmeans'))
../../_images/4_b_UMAP_kmeans.png
## Leiden clustering strategy:
# 1. overcluster
# 2. merge small clusters that are highly similar

# sNN network (default)
osm_test <- createNearestNetwork(gobject = osm_test, dimensions_to_use = 1:31, k = 12)

osm_test <- doLeidenCluster(gobject = osm_test, resolution = 0.09, n_iterations = 1000)
plotUMAP(gobject = osm_test, cell_color = 'leiden_clus', point_size = 2.5,
         show_NN_network = F, edge_alpha = 0.05,
         save_param = list(save_name = '4_c_UMAP_leiden'))
../../_images/4_c_UMAP_leiden.png
# merge small groups based on similarity
leiden_similarities = getClusterSimilarity(osm_test,
                                           expression_values = 'custom',
                                           cluster_column = 'leiden_clus')

osm_test = mergeClusters(osm_test,
                         expression_values = 'custom',
                         cluster_column = 'leiden_clus',
                         new_cluster_name = 'leiden_clus_m',
                         max_group_size = 30,
                         force_min_group_size = 25,
                         max_sim_clusters = 10,
                         min_cor_score = 0.7)

plotUMAP(gobject = osm_test, cell_color = 'leiden_clus_m', point_size = 2.5,
         show_NN_network = F, edge_alpha = 0.05,
         save_param = list(save_name = '4_d_UMAP_leiden_merged'))
../../_images/4_d_UMAP_leiden_merged.png
## show cluster relationships
showClusterHeatmap(gobject = osm_test, expression_values = 'custom', cluster_column = 'leiden_clus_m',
                   save_param = list(save_name = '4_e_heatmap', units = 'cm'),
                   row_names_gp = grid::gpar(fontsize = 6), column_names_gp = grid::gpar(fontsize = 6))
../../_images/4_e_heatmap.png
showClusterDendrogram(osm_test, cluster_column = 'leiden_clus_m', h = 1, rotate = T,
                      save_param = list(save_name = '4_f_dendro', units = 'cm'))
../../_images/4_f_dendro.png

Part 5: Co-visualize#

# expression and spatial
spatDimPlot2D(gobject = osm_test, cell_color = 'leiden_clus', spat_point_size = 2,
              save_param = list(save_name = '5_a_covis_leiden'))
../../_images/5_a_covis_leiden.png
spatDimPlot2D(gobject = osm_test, cell_color = 'leiden_clus_m', spat_point_size = 2,
              save_param = list(save_name = '5_b_covis_leiden_m'))
../../_images/5_b_covis_leiden_m.png
spatDimPlot2D(gobject = osm_test, cell_color = 'leiden_clus_m',
              dim_point_size = 2, spat_point_size = 2, select_cell_groups = 'm_8',
              save_param = list(save_name = '5_c_covis_leiden_merged_selected'))
../../_images/5_c_covis_leiden_merged_selected.png
spatDimPlot2D(gobject = osm_test, cell_color = 'total_expr', color_as_factor = F,
              gradient_midpoint = 160, gradient_limits = c(120,220),
              save_param = list(save_name = '5_d_total_expr'))
../../_images/5_d_total_expr.png

Part 6: Differential expression#

## split dendrogram nodes ##
dendsplits = getDendrogramSplits(gobject = osm_test,
                                 expression_values = 'custom',
                                 cluster_column = 'leiden_clus_m')
split_3_markers = findMarkers(gobject = osm_test,
                                         method = 'gini',
                                         expression_values = 'custom',
                                         cluster_column = 'leiden_clus_m',
group_1 = unlist(dendsplits[3]$tree_1), group_2 = unlist(dendsplits[3]$tree_2))
../../_images/6_a_dendrogram.png
## Individual populations ##
markers = findMarkers_one_vs_all(gobject = osm_test,
                                 method = 'scran',
                                 expression_values = 'custom',
                                 cluster_column = 'leiden_clus_m',
                                 min_feats = 2, rank_score = 2)
## violinplot
topgenes = markers[, head(.SD, 1), by = 'cluster']$feats
violinPlot(osm_test, feats = unique(topgenes), cluster_column = 'leiden_clus_m', expression_values = 'custom',
           strip_text = 5, strip_position = 'right',
           save_param = c(save_name = '6_a_violinplot'))
../../_images/6_a_violinplot1.png
plotMetaDataHeatmap(osm_test, expression_values = 'custom',
                    metadata_cols = c('leiden_clus_m'),
                    save_param = c(save_name = '6_b_metaheatmap'))
../../_images/6_b_metaheatmap.png
plotMetaDataHeatmap(osm_test, expression_values = 'custom',
                    metadata_cols = c('leiden_clus_m'),
                    save_param = c(save_name = '6_e_metaheatmap_all_genes'))
../../_images/6_e_metaheatmap_all_genes.png
plotMetaDataHeatmap(osm_test, expression_values = 'custom',
                    metadata_cols = c('ClusterName'),
                    save_param = c(save_name = '6_f_metaheatmap_all_genes_names'))
../../_images/6_f_metaheatmap_all_genes_names.png

Part 7: Cell type annotation#

Use annotateGiotto() to annotate the clusters. For this dataset, we have ClusterName in the metadata.

Part 8: Spatial grid#

osm_test <- createSpatialGrid(gobject = osm_test,
                              sdimx_stepsize = 2000,
                              sdimy_stepsize = 2000,
                              minimum_padding = 0)
spatPlot2D(osm_test, cell_color = 'ClusterName', show_grid = T,

           grid_color = 'lightblue', spatial_grid_name = 'spatial_grid',
           point_size = 1.5,
           save_param = c(save_name = '8_grid_det_cell_types'))
../../_images/8_grid_det_cell_types.png

Part 9: Spatial network#

osm_test <- createSpatialNetwork(gobject = osm_test)
spatPlot2D(gobject = osm_test, show_network = T,
           network_color = 'blue',
           point_size = 1.5, cell_color = 'ClusterName', legend_symbol_size = 2,
           save_param = c(save_name = '9_spatial_network_k10'))
../../_images/9_spatial_network_k10.png

Part 10: Spatial genes#

# km binarization
kmtest = binSpect(osm_test, calc_hub = T, hub_min_int = 5,
                  bin_method = 'kmeans')

spatDimFeatPlot2D(osm_test, expression_values = 'scaled',
               feats = kmtest$feats[1:3], plot_alignment = 'horizontal',
               cow_n_col = 1,
               save_param = c(save_name = '10_a_spatial_genes_km'))
../../_images/10_a_spatial_genes_km.png

Part 12. cell-cell preferential proximity#

## calculate frequently seen proximities
cell_proximities = cellProximityEnrichment(gobject = osm_test,
                                           cluster_column = 'ClusterName',
                                           number_of_simulations = 1000)
## barplot
cellProximityBarplot(gobject = osm_test, CPscore = cell_proximities, min_orig_ints = 25, min_sim_ints = 25,
                     save_param = c(save_name = '12_a_barplot_cell_cell_enrichment'))
../../_images/12_a_barplot_cell_cell_enrichment.png
## heatmap
cellProximityHeatmap(gobject = osm_test, CPscore = cell_proximities, order_cell_types = T, scale = T,
                     color_breaks = c(-1.5, 0, 1.5), color_names = c('blue', 'white', 'red'),
                     save_param = c(save_name = '12_b_heatmap_cell_cell_enrichment', unit = 'in'))
../../_images/12_b_heatmap_cell_cell_enrichment.png
## network
cellProximityNetwork(gobject = osm_test, CPscore = cell_proximities, remove_self_edges = F, only_show_enrichment_edges = T,
                     save_param = c(save_name = '12_c_network_cell_cell_enrichment'))
../../_images/12_c_network_cell_cell_enrichment.png
## visualization
spec_interaction = "Astrocyte_Mfge8--Oligodendrocyte_Precursor_cells"
cellProximitySpatPlot(gobject = osm_test,
                      interaction_name = spec_interaction,
                      cluster_column = 'ClusterName',
                      cell_color = 'ClusterName', cell_color_code = c('Astrocyte_Mfge8' = 'blue', 'Oligodendrocyte_Precursor_cells' = 'red'),
                      coord_fix_ratio = 0.5,  point_size_select = 3, point_size_other = 1.5,
                      save_param = c(save_name = '12_d_cell_cell_enrichment_selected'))
../../_images/12_d_cell_cell_enrichment_selected.png