{ "cells": [ { "cell_type": "markdown", "id": "15e71d29", "metadata": {}, "source": [ "# Applying `RCTD` and `MCube` to the 10x Visium CRC dataset" ] }, { "cell_type": "code", "execution_count": 1, "id": "833e8ca8-b82a-46ca-9df1-3c7c0723dad8", "metadata": { "vscode": { "languageId": "r" } }, "outputs": [], "source": [ "set.seed(20250502)\n", "\n", "library(Matrix)\n", "library(ggplot2)\n", "\n", "library(spacexr)\n", "library(MCube)" ] }, { "cell_type": "code", "execution_count": 2, "id": "82892b33", "metadata": { "vscode": { "languageId": "r" } }, "outputs": [], "source": [ "RAW_DATA_PATH <- \"/import/home/share/zw/data/CRC\"\n", "DATA_PATH <- \"/import/home/share/zw/pql/data/CRC\"\n", "RESULT_PATH <- \"/import/home/share/zw/pql/results/CRC/Visium\"\n", "\n", "if (!dir.exists(file.path(RESULT_PATH))) {\n", " dir.create(file.path(RESULT_PATH), recursive = TRUE)\n", "}" ] }, { "cell_type": "markdown", "id": "0918904d", "metadata": {}, "source": [ "## Cell type deconvolution using `RCTD`" ] }, { "cell_type": "code", "execution_count": 3, "id": "4560c24a", "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Loading required package: SeuratObject\n", "\n", "Loading required package: sp\n", "\n", "\n", "Attaching package: ‘SeuratObject’\n", "\n", "\n", "The following objects are masked from ‘package:base’:\n", "\n", " intersect, t\n", "\n", "\n", "Warning message in check_UMI(nUMI, \"Reference\", require_2d = T, require_int = require_int, :\n", "“Reference: some nUMI values are less than min_UMI = 100, and these cells will be removed. Optionally, you may lower the min_UMI parameter.”\n", "Warning message in Reference(FlexRef, CTRef, colSums(FlexRef)):\n", "“Reference: number of cells per cell type is 34844, larger than maximum allowable of 10000. Downsampling number of cells to: 10000”\n" ] } ], "source": [ "library(Seurat)\n", "\n", "FlexRef <- Read10X_h5(file.path(\n", " RAW_DATA_PATH, \"sc\",\n", " \"HumanColonCancer_Flex_Multiplex_count_filtered_feature_bc_matrix.h5\"\n", "))\n", "# MetaData <- readRDS(file.path(\n", "# RAW_DATA_PATH, \"sc\", \"FlexSeuratV5_MetaData.rds\"\n", "# )) # See FlexSingleCell.R if not generated.\n", "\n", "meta <- read.csv(file.path(\n", " RAW_DATA_PATH, \"HumanColonCancer_VisiumHD/MetaData/SingleCell_MetaData.csv.gz\"\n", "))\n", "\n", "KpIdents <- names(which(table(meta$Level2) > 25))\n", "meta <- meta[meta$Level2 %in% KpIdents, ]\n", "FlexRef <- FlexRef[, meta$Barcode]\n", "\n", "CTRef <- meta$Level2\n", "CTRef <- gsub(\"/\", \"_\", CTRef)\n", "CTRef <- as.factor(CTRef)\n", "names(CTRef) <- meta$Barcode\n", "\n", "reference <- Reference(FlexRef, CTRef, colSums(FlexRef))" ] }, { "cell_type": "code", "execution_count": 4, "id": "24e01a2c", "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[1mRows: \u001b[22m\u001b[34m14336\u001b[39m \u001b[1mColumns: \u001b[22m\u001b[34m6\u001b[39m\n", "\u001b[36m──\u001b[39m \u001b[1mColumn specification\u001b[22m \u001b[36m────────────────────────────────────────────────────────\u001b[39m\n", "\u001b[1mDelimiter:\u001b[22m \",\"\n", "\u001b[31mchr\u001b[39m (1): barcode\n", "\u001b[32mdbl\u001b[39m (5): in_tissue, array_row, array_col, pxl_row_in_fullres, pxl_col_in_ful...\n", "\n", "\u001b[36mℹ\u001b[39m Use `spec()` to retrieve the full column specification for this data.\n", "\u001b[36mℹ\u001b[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.\n" ] } ], "source": [ "counts <- Read10X_h5(file.path(\n", " RAW_DATA_PATH, \"visium\",\n", " \"Visium_V2_Human_Colon_Cancer_P2_filtered_feature_bc_matrix.h5\"\n", "))\n", "\n", "coordinates <- as.data.frame(readr::read_csv(file.path(\n", " RAW_DATA_PATH, \"visium\",\n", " \"spatial/tissue_positions.csv\"\n", ")))\n", "rownames(coordinates)<-coordinates$barcode\n", "coordinates<-coordinates[colnames(counts), ]\n", "coordinates <- coordinates[, c(6, 5)]\n", "\n", "puck <- SpatialRNA(coordinates, counts, colSums(counts))" ] }, { "cell_type": "code", "execution_count": 5, "id": "19c0c13e", "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Begin: process_cell_type_info\n", "\n", "process_cell_type_info: number of cells in reference: 186667\n", "\n", "process_cell_type_info: number of genes in reference: 18082\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", " Adipocyte CAF CD4 T cell \n", " 83 10000 10000 \n", " CD8 Cytotoxic T cell Endothelial Enteric Glial \n", " 10000 6940 3680 \n", " Enterocyte Epithelial Fibroblast \n", " 1739 7432 10000 \n", " Goblet Lymphatic Endothelial Macrophage \n", " 10000 1095 10000 \n", " Mast Mature B mRegDC \n", " 1925 8318 404 \n", " Myofibroblast Neuroendocrine Neutrophil \n", " 972 529 3138 \n", " pDC Pericytes Plasma \n", " 119 3046 10000 \n", "Proliferating Immune II QC_Filtered SM Stress Response \n", " 2560 10000 10000 \n", " Smooth Muscle Tuft Tumor I \n", " 10000 101 10000 \n", " Tumor II Tumor III Tumor V \n", " 8292 10000 7893 \n", " Unknown III (SM) vSM \n", " 1784 6617 \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.3 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.3 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.3 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.0 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.3 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.3 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.3 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.1 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.3 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.3 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.3 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.3 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.3 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.1 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.3 GiB”\n", "Warning message in asMethod(object):\n", "“sparse->dense coercion: allocating vector of size 1.1 GiB”\n", "End: process_cell_type_info\n", "\n", "create.RCTD: getting regression differentially expressed genes: \n", "\n", "get_de_genes: Adipocyte found DE genes: 360\n", "\n", "get_de_genes: CAF found DE genes: 217\n", "\n", "get_de_genes: CD4 T cell found DE genes: 387\n", "\n", "get_de_genes: CD8 Cytotoxic T cell found DE genes: 441\n", "\n", "get_de_genes: Endothelial found DE genes: 335\n", "\n", "get_de_genes: Enteric Glial found DE genes: 329\n", "\n", "get_de_genes: Enterocyte found DE genes: 203\n", "\n", "get_de_genes: Epithelial found DE genes: 71\n", "\n", "get_de_genes: Fibroblast found DE genes: 287\n", "\n", "get_de_genes: Goblet found DE genes: 311\n", "\n", "get_de_genes: Lymphatic Endothelial found DE genes: 370\n", "\n", "get_de_genes: Macrophage found DE genes: 355\n", "\n", "get_de_genes: Mast found DE genes: 351\n", "\n", "get_de_genes: Mature B found DE genes: 415\n", "\n", "get_de_genes: mRegDC found DE genes: 463\n", "\n", "get_de_genes: Myofibroblast found DE genes: 348\n", "\n", "get_de_genes: Neuroendocrine found DE genes: 178\n", "\n", "get_de_genes: Neutrophil found DE genes: 406\n", "\n", "get_de_genes: pDC found DE genes: 494\n", "\n", "get_de_genes: Pericytes found DE genes: 311\n", "\n", "get_de_genes: Plasma found DE genes: 108\n", "\n", "get_de_genes: Proliferating Immune II found DE genes: 311\n", "\n", "get_de_genes: QC_Filtered found DE genes: 55\n", "\n", "get_de_genes: SM Stress Response found DE genes: 400\n", "\n", "get_de_genes: Smooth Muscle found DE genes: 469\n", "\n", "get_de_genes: Tuft found DE genes: 392\n", "\n", "get_de_genes: Tumor I found DE genes: 185\n", "\n", "get_de_genes: Tumor II found DE genes: 245\n", "\n", "get_de_genes: Tumor III found DE genes: 355\n", "\n", "get_de_genes: Tumor V found DE genes: 392\n", "\n", "get_de_genes: Unknown III (SM) found DE genes: 350\n", "\n", "get_de_genes: vSM found DE genes: 259\n", "\n", "get_de_genes: total DE genes: 4567\n", "\n", "create.RCTD: getting platform effect normalization differentially expressed genes: \n", "\n", "get_de_genes: Adipocyte found DE genes: 720\n", "\n", "get_de_genes: CAF found DE genes: 375\n", "\n", "get_de_genes: CD4 T cell found DE genes: 867\n", "\n", "get_de_genes: CD8 Cytotoxic T cell found DE genes: 923\n", "\n", "get_de_genes: Endothelial found DE genes: 654\n", "\n", "get_de_genes: Enteric Glial found DE genes: 714\n", "\n", "get_de_genes: Enterocyte found DE genes: 423\n", "\n", "get_de_genes: Epithelial found DE genes: 193\n", "\n", "get_de_genes: Fibroblast found DE genes: 549\n", "\n", "get_de_genes: Goblet found DE genes: 688\n", "\n", "get_de_genes: Lymphatic Endothelial found DE genes: 758\n", "\n", "get_de_genes: Macrophage found DE genes: 673\n", "\n", "get_de_genes: Mast found DE genes: 706\n", "\n", "get_de_genes: Mature B found DE genes: 972\n", "\n", "get_de_genes: mRegDC found DE genes: 821\n", "\n", "get_de_genes: Myofibroblast found DE genes: 667\n", "\n", "get_de_genes: Neuroendocrine found DE genes: 452\n", "\n", "get_de_genes: Neutrophil found DE genes: 685\n", "\n", "get_de_genes: pDC found DE genes: 1003\n", "\n", "get_de_genes: Pericytes found DE genes: 587\n", "\n", "get_de_genes: Plasma found DE genes: 190\n", "\n", "get_de_genes: Proliferating Immune II found DE genes: 788\n", "\n", "get_de_genes: QC_Filtered found DE genes: 187\n", "\n", "get_de_genes: SM Stress Response found DE genes: 810\n", "\n", "get_de_genes: Smooth Muscle found DE genes: 965\n", "\n", "get_de_genes: Tuft found DE genes: 906\n", "\n", "get_de_genes: Tumor I found DE genes: 561\n", "\n", "get_de_genes: Tumor II found DE genes: 643\n", "\n", "get_de_genes: Tumor III found DE genes: 983\n", "\n", "get_de_genes: Tumor V found DE genes: 957\n", "\n", "get_de_genes: Unknown III (SM) found DE genes: 851\n", "\n", "get_de_genes: vSM found DE genes: 499\n", "\n", "get_de_genes: total DE genes: 7329\n", "\n", "fitBulk: decomposing bulk\n", "\n", "chooseSigma: using initial Q_mat with sigma = 1\n", "\n", "Likelihood value: 10788253.6984735\n", "\n", "Sigma value: 0.84\n", "\n", "Likelihood value: 10489224.530827\n", "\n", "Sigma value: 0.69\n", "\n", "Likelihood value: 10212884.670123\n", "\n", "Sigma value: 0.61\n", "\n", "Likelihood value: 10073157.488242\n", "\n", "Sigma value: 0.53\n", "\n", "Likelihood value: 9944652.72074451\n", "\n", "Sigma value: 0.45\n", "\n", "Likelihood value: 9834772.22367206\n", "\n", "Sigma value: 0.37\n", "\n", "Likelihood value: 9754837.1924562\n", "\n", "Sigma value: 0.29\n", "\n", "Likelihood value: 9722265.94164512\n", "\n", "Sigma value: 0.29\n", "\n" ] } ], "source": [ "myRCTD <- create.RCTD(puck, reference, max_cores = 12)\n", "myRCTD <- run.RCTD(myRCTD, doublet_mode = \"full\")\n", "saveRDS(myRCTD, file.path(RESULT_PATH, \"myRCTD.rds\"))\n", "# myRCTD <- readRDS(file.path(RESULT_PATH, \"myRCTD.rds\"))" ] }, { "cell_type": "markdown", "id": "cac90bf5", "metadata": {}, "source": [ "## Cell-type-specific SVG identification using `MCube`" ] }, { "cell_type": "code", "execution_count": 6, "id": "77c8fe30", "metadata": { "vscode": { "languageId": "r" } }, "outputs": [], "source": [ "myRCTD <- readRDS(file.path(RESULT_PATH, \"myRCTD.rds\"))\n", "spot_names <- colnames(myRCTD@spatialRNA@counts)\n", "weights_RCTD <- as.matrix(myRCTD@results$weights)\n", "proportions_RCTD <- weights_RCTD / rowSums(weights_RCTD)\n", "spot_effects_RCTD <- log(rowSums(weights_RCTD))\n", "names(spot_effects_RCTD) <- rownames(weights_RCTD)" ] }, { "cell_type": "code", "execution_count": 7, "id": "c65d686d", "metadata": { "vscode": { "languageId": "r" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "The batch_id is not provided!\n", "All spots are assumed to be from the same batch and share the same gene platform effects.\n", "\n", "Select high-abundance cell types to analyze with proportion_threshold = 0.1 and celltype_threshold = 50.\n", "\n", "mcubeFilterCellTypes: Cell type(s) CAF, Endothelial, Goblet, Macrophage, Plasma, Tumor III, vSM pass the threshold.\n", "\n", "Cell type(s) Adipocyte, CD4 T cell, CD8 Cytotoxic T cell, Enteric Glial, Enterocyte, Epithelial, Fibroblast, Lymphatic Endothelial, Mast, Mature B, mRegDC, Myofibroblast, Neuroendocrine, Neutrophil, pDC, Pericytes, Proliferating Immune II, QC_Filtered, SM Stress Response, Smooth Muscle, Tuft, Tumor I, Tumor II, Tumor V, Unknown III (SM) has/have less than the minimum celltype_threshold = 50 with proportion_threshold = 0.1.\n", "To include the above cell type(s), please reduce celltype_threshold or proportion_threshold.\n", "\n", "Cell type(s) CAF, Endothelial, Goblet, Macrophage, Plasma, Tumor III, vSM will be analyzed.\n", "\n", "Filter out lowly-expressed genes with gene_threshold = 5e-05.\n", "\n", "mcubeFilterGenes: 5774 genes pass the threshold.\n", "\n", "The platform effects are not provided and need to be estimated from data!\n", "\n", "Select highly-expressed genes to analyze for each specific cell type with reference_threshold = 0.5.\n", "\n", "mcubeFilterGenesCellType: Select 2511 genes to analyze for CAF.\n", "\n", "mcubeFilterGenesCellType: Select 3439 genes to analyze for Endothelial.\n", "\n", "mcubeFilterGenesCellType: Select 3593 genes to analyze for Goblet.\n", "\n", "mcubeFilterGenesCellType: Select 2941 genes to analyze for Macrophage.\n", "\n", "mcubeFilterGenesCellType: Select 1197 genes to analyze for Plasma.\n", "\n", "mcubeFilterGenesCellType: Select 4222 genes to analyze for Tumor III.\n", "\n", "mcubeFilterGenesCellType: Select 2740 genes to analyze for vSM.\n", "\n", "Preprocessed data description: 4269 spots and 32 cell types in total. 4244 spots, 5774 genes, and 7 cell type(s) to analyze.\n", "\n", "Number of physical cores: 72.\n", "\n", "Number of workers: 70.\n", "\n", "Number of thread(s) on BLAS per worker: 1.\n", "\n", "mcubeKernel: length_scale is set as 0.100936633966367 for the Gaussian kernel.\n", "\n", "mcubeKernel: length_scale is set as 0.142745956695525 for the Gaussian kernel.\n", "\n", "mcubeKernel: length_scale is set as 0.100936633966367 for the Gaussian_transformed kernel.\n", "\n", "mcubeKernel: length_scale is set as 0.142745956695525 for the Gaussian_transformed kernel.\n", "\n", "Number of physical cores: 72.\n", "\n", "Number of workers: 70.\n", "\n", "Number of thread(s) on BLAS per worker: 1.\n", "\n" ] } ], "source": [ "mcube_object <- createMCube(\n", " counts = t(as.matrix(myRCTD@originalSpatialRNA@counts[, spot_names])),\n", " coordinates = as.matrix(myRCTD@spatialRNA@coords),\n", " proportions = weights_RCTD / rowSums(weights_RCTD),\n", " library_sizes = myRCTD@spatialRNA@nUMI,\n", " reference = t(myRCTD@cell_type_info$info[[1]]),\n", " used_for_deconvolution = rownames(myRCTD@spatialRNA@counts),\n", " spot_effects = spot_effects_RCTD,\n", " celltype_threshold = 50\n", ")\n", "mcube_object <- mcubeFitNull(\n", " mcube_object,\n", " num_workers = 70, num_threads = 1\n", ")\n", "mcube_object <- mcubeTest(\n", " mcube_object,\n", " num_workers = 70, num_threads = 1, shared_memory = TRUE\n", ")\n", "\n", "saveRDS(\n", " mcube_object,\n", " file = file.path(\n", " RESULT_PATH, \n", " paste0(\"mcube\", \".rds\")\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "a92ff116", "metadata": { "vscode": { "languageId": "r" } }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "4.3.2" } }, "nbformat": 4, "nbformat_minor": 5 }