Applying `SPARK` to the ST datasets segmented by different methods

[1]:

set.seed(20240709)

library(SPARK)
library(ggplot2)
library(tidyverse)

max_cores <- 36

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ lubridate 1.9.4     ✔ tibble    3.2.1
✔ purrr     1.0.4     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

[2]:

DATA_PATH <- "/import/home/share/zw/pql/data/breast_cancer"
RESULT_PATH <- "/import/home/share/zw/pql/results/breast_cancer"

if (!dir.exists(RESULT_PATH)) {
    dir.create(RESULT_PATH, recursive = TRUE)
}

Cells segmented by the `10x` method

[3]:

# seg_method <- "Cell_10X"

# counts <- as.data.frame(readr::read_csv(
#     file.path(DATA_PATH, "rawdata", seg_method, "counts.csv")
# ))
# rownames(counts) <- counts[, 1]
# counts[, 1] <- NULL
# # head(counts)

# coordinates <- as.data.frame(readr::read_csv(
#     file.path(DATA_PATH, "rawdata", seg_method, "coordinates.csv")
# ))
# rownames(coordinates) <- coordinates[, 1]
# coordinates[, 1] <- NULL
# # head(coordinates)

# scvi_labels <- as.data.frame(readr::read_csv(
#     file.path(DATA_PATH, "rawdata", seg_method, "scvi_labels.csv")
# ))
# rownames(scvi_labels) <- scvi_labels[, 1]
# scvi_labels <- scvi_labels[, 2]
# # head(scvi_labels)
# all_celltypes <- unique(scvi_labels)

[4]:

seg_method <- "Cell_10X"
myRCTD <- readRDS(file.path(RESULT_PATH, seg_method, "myRCTD.rds"))

spot_names <- colnames(myRCTD@spatialRNA@counts)
counts <- t(as.matrix(myRCTD@originalSpatialRNA@counts[, spot_names]))
# head(counts)
coordinates <- myRCTD@spatialRNA@coords
# head(coordinates)

celltypes_RCTD <- myRCTD@results$results_df$first_type
names(celltypes_RCTD) <- spot_names
# head(celltypes_RCTD)
all_celltypes <- levels(celltypes_RCTD)

[5]:

pvalues_10x <- data.frame()
for(celltype_k in all_celltypes) {
    celltype_idx <- which(celltypes_RCTD == celltype_k)

    counts_k <- counts[celltype_idx, ]
    coordinates_k <- coordinates[celltype_idx, ]

    spark_obj <- CreateSPARKObject(
        counts = t(counts_k), location = coordinates_k
    )
    spark_obj@lib_size <- apply(spark_obj@counts, 2, sum)

    spark_obj <- spark.vc(
        spark_obj,
        covariates = NULL, lib_size = spark_obj@lib_size,
        num_core = max_cores, verbose = FALSE
    )
    spark_obj <- spark.test(spark_obj, check_positive = TRUE, verbose = FALSE)

    pvalues_10x <- rbind(
        pvalues_10x,
        data.frame(
            celltype = celltype_k,
            gene = rownames(spark_obj@res_mtest),
            pvalue = spark_obj@res_mtest[, "combined_pvalue"]
        )
    )
}

saveRDS(pvalues_10x, file.path(RESULT_PATH, seg_method, "pvalues_SPARK.rds"))
# pvalues_10X <- readRDS(file.path(RESULT_PATH, seg_method, "pvalues_SPARK.rds"))

## ===== SPARK INPUT INFORMATION ====
## number of total samples:  5405
## number of total features:  194
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  1291
## number of total features:  177
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  523
## number of total features:  181
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  1515
## number of total features:  148
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  985
## number of total features:  138
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  1442
## number of total features:  123
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  862
## number of total features:  123
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  2062
## number of total features:  124
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  693
## number of total features:  160
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  959
## number of total features:  183
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  288
## number of total features:  164
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  1275
## number of total features:  124
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  487
## number of total features:  166
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  66
## number of total features:  171
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  24
## number of total features:  157
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  40
## number of total features:  160
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  395
## number of total features:  153
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...

Cells segmented by `UCS`

[6]:

# seg_method <- "UCS_10X"

# counts <- as.data.frame(readr::read_csv(
#     file.path(DATA_PATH, "rawdata", seg_method, "counts.csv")
# ))
# rownames(counts) <- counts[, 1]
# counts[, 1] <- NULL
# # head(counts)

# coordinates <- as.data.frame(readr::read_csv(
#     file.path(DATA_PATH, "rawdata", seg_method, "coordinates.csv")
# ))
# rownames(coordinates) <- coordinates[, 1]
# coordinates[, 1] <- NULL
# # head(coordinates)

# scvi_labels <- as.data.frame(readr::read_csv(
#     file.path(DATA_PATH, "rawdata", seg_method, "scvi_labels.csv")
# ))
# rownames(scvi_labels) <- scvi_labels[, 1]
# scvi_labels <- scvi_labels[, 2]
# # head(scvi_labels)
# all_celltypes <- unique(scvi_labels)

[7]:

seg_method <- "UCS_10X"
myRCTD <- readRDS(file.path(RESULT_PATH, seg_method, "myRCTD.rds"))

spot_names <- colnames(myRCTD@spatialRNA@counts)
counts <- t(as.matrix(myRCTD@originalSpatialRNA@counts[, spot_names]))
# head(counts)
coordinates <- myRCTD@spatialRNA@coords
# head(coordinates)

celltypes_RCTD <- myRCTD@results$results_df$first_type
names(celltypes_RCTD) <- spot_names
# head(celltypes_RCTD)
all_celltypes <- levels(celltypes_RCTD)

[8]:

pvalues_UCS <- data.frame()
for(celltype_k in all_celltypes) {
    celltype_idx <- which(celltypes_RCTD == celltype_k)

    counts_k <- counts[celltype_idx, ]
    coordinates_k <- coordinates[celltype_idx, ]

    spark_obj <- CreateSPARKObject(
        counts = t(counts_k), location = coordinates_k
    )
    spark_obj@lib_size <- apply(spark_obj@counts, 2, sum)

    spark_obj <- spark.vc(
        spark_obj,
        covariates = NULL, lib_size = spark_obj@lib_size,
        num_core = max_cores, verbose = FALSE
    )
    spark_obj <- spark.test(spark_obj, check_positive = TRUE, verbose = FALSE)

    pvalues_UCS <- rbind(
        pvalues_UCS,
        data.frame(
            celltype = celltype_k,
            gene = rownames(spark_obj@res_mtest),
            pvalue = spark_obj@res_mtest[, "combined_pvalue"]
        )
    )
}

saveRDS(pvalues_UCS, file.path(RESULT_PATH, seg_method, "pvalues_SPARK.rds"))
# pvalues_UCS <- readRDS(file.path(RESULT_PATH, seg_method, "pvalues_SPARK.rds"))

## ===== SPARK INPUT INFORMATION ====
## number of total samples:  3601
## number of total features:  169
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  1141
## number of total features:  163
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  564
## number of total features:  170
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  1669
## number of total features:  145
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  1184
## number of total features:  126
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  1498
## number of total features:  123
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  870
## number of total features:  120
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  2096
## number of total features:  119
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  773
## number of total features:  141
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  953
## number of total features:  164
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  378
## number of total features:  126
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  1251
## number of total features:  123
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  656
## number of total features:  144
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  71
## number of total features:  147
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  27
## number of total features:  166
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  48
## number of total features:  143
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...
## ===== SPARK INPUT INFORMATION ====
## number of total samples:  421
## number of total features:  150
## number of adjusted covariates:  0
# fitting count-based spatial model under the null hypothesis ...
## testing Gaussian kernel: 1...
## testing Periodic kernel: 1...
## testing Gaussian kernel: 2...
## testing Periodic kernel: 2...
## testing Gaussian kernel: 3...
## testing Periodic kernel: 3...
## testing Gaussian kernel: 4...
## testing Periodic kernel: 4...
## testing Gaussian kernel: 5...
## testing Periodic kernel: 5...

Comparsion of the cell-type-specific SVG identification results

[9]:

# pvalues_10x <- readRDS(
#     file = file.path(
#         RESULT_PATH, "Cell_10X", "pvalues_SPARK.rds"
#     )
# )
# pvalues_UCS <- readRDS(
#     file = file.path(
#         RESULT_PATH, "UCS_10X", "pvalues_SPARK.rds"
#     )
# )

[10]:

log10p_lim <- 17 # 5.55e-17

pvalues_10x$minus_log10_pvalue <- -log10(pvalues_10x$pvalue)
pvalues_10x$minus_log10_pvalue[pvalues_10x$minus_log10_pvalue > log10p_lim] <- log10p_lim

pvalues_UCS$minus_log10_pvalue <- -log10(pvalues_UCS$pvalue)
pvalues_UCS$minus_log10_pvalue[pvalues_UCS$minus_log10_pvalue > log10p_lim] <- log10p_lim

pvalues_all <- merge(
    pvalues_10x,
    pvalues_UCS,
    by = c("celltype", "gene"),
    suffixes = c("_10x", "_UCS")
)
dim(pvalues_all)
head(pvalues_all)

2387
6

A data.frame: 6 × 6
	celltype	gene	pvalue_10x	minus_log10_pvalue_10x	pvalue_UCS	minus_log10_pvalue_UCS
	<chr>	<chr>	<dbl>	<dbl>	<dbl>	<dbl>
1	B Cells	ACTA2	3.488987e-01	0.4573006	0.005374671	2.26964811
2	B Cells	ACTG2	9.802785e-05	4.0086505	0.003115137	2.50652282
3	B Cells	ADAM9	1.921063e-01	0.7164585	0.961432818	0.01708106
4	B Cells	ADGRE5	1.607047e-01	0.7939714	0.156413878	0.80572472
5	B Cells	AIF1	6.393287e-01	0.1942758	0.101539719	0.99336404
6	B Cells	ANKRD28	7.604660e-01	0.1189202	0.087858510	1.05621617

[ ]:

correlations <- pvalues_all %>%
    group_by(celltype) %>%
    summarize(
        correlation = cor(minus_log10_pvalue_10x, minus_log10_pvalue_UCS, method = "pearson")
    )

[ ]:

major_celltypes <- c(
    "Invasive Tumor", "Stromal", "DCIS 1", "DCIS 2",
    "Prolif Invasive Tumor", "Macrophages 1"
)
pvalues_all_major_celltype <- pvalues_all[pvalues_all$celltype %in% major_celltypes, ]
pvalues_all_major_celltype$celltype <- factor(
    pvalues_all_major_celltype$celltype,
    levels = sort(major_celltypes)
)
correlations_major_celltype <- correlations[correlations$celltype %in% major_celltypes, ]

p <- ggplot(
    # pvalues_all,
    pvalues_all_major_celltype,
    aes(x = minus_log10_pvalue_10x, y = minus_log10_pvalue_UCS)
) + geom_point(aes(color = celltype), size = 5, alpha = 0.8) +
    geom_abline(
        intercept = 0, slope = 1,
        linetype = "longdash", linewidth = 1.5
    ) + geom_text(
        data = correlations_major_celltype,
        mapping = aes(
            x = 4, y = 15,
            label = paste("r =", round(correlation, 2))
        ),
        size = 5
    ) +
    scale_x_continuous(limits = c(0, log10p_lim), oob = scales::oob_squish) +
    scale_y_continuous(limits = c(0, log10p_lim), oob = scales::oob_squish) +
    coord_fixed(ratio = 1) +
    facet_wrap(~celltype, nrow = 1) +
    theme_classic() +
    labs(
        x = "10x", y = "UCS",
        title = expression(paste("-log"[10], plain(P), " from SPARK"))
    )  +
    guides(color = guide_legend(nrow = 1)) +
    theme(
        plot.title = element_text(size = 20),
        axis.title = element_text(size = 20),
        axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 20),
        strip.text = element_blank(),
        legend.title = element_blank(),
        legend.text = element_text(size = 18),
        legend.position = "bottom"
    )
ggsave(
    filename = file.path(
        RESULT_PATH,
        paste0("SPARK_seg_comparison_oob_squish_", log10p_lim, ".pdf")),
    plot = p, width = 12, height = 4
)
p

../../_images/analysis_breast_cancer_breast_cancer_seg_method_comparison_SPARK_15_0.png

[ ]:

major_celltypes <- c("Invasive Tumor", "Stromal", "DCIS 1", "DCIS 2", "Prolif Invasive Tumor")
p <- ggplot(
    pvalues_all[pvalues_all$celltype %in% major_celltypes, ],
    aes(x = -log10(pvalue_10x), y = -log10(pvalue_UCS))
) +
    geom_point(aes(color = celltype), size = 5, alpha = 0.5) +
    geom_abline(
        intercept = 0, slope = 1,
        linetype = "longdash", linewidth = 2
    ) +
    coord_fixed(ratio = 1) +
    theme_classic() +
    labs(
        x = "10x", y = "UCS",
        title = expression(paste("-log"[10], plain(P), " comparison"))
    ) +
    theme(
        plot.title = element_text(size = 20),
        axis.title = element_text(size = 20),
        axis.text.x = element_text(size = 16),
        axis.text.y = element_text(size = 16),
        legend.title = element_blank(),
        legend.text = element_text(size = 16)
    )
ggsave(
    filename = file.path(RESULT_PATH, "SPARK_seg_comparison_together.pdf"),
    plot = p, width = 8, height = 8
)
p

../../_images/analysis_breast_cancer_breast_cancer_seg_method_comparison_SPARK_16_0.png

[ ]: