Skip to content

Commit ae2e698

Browse files
committed
Complete dictionary function and adapt function to replicate clusters in independent datasets
1 parent fb8c2b4 commit ae2e698

10 files changed

+201
-308
lines changed

R/cell_deconvolution.R

Lines changed: 56 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1978,6 +1978,16 @@ replicate_deconvolution_subgroups = function(deconv_res, deconvolution_test){
19781978
return(deconvolution_test)
19791979
}
19801980

1981+
## Extract the deconv feature without the cluster type
1982+
features_with_clusters <- colnames(deconv_res[["Deconvolution matrix"]])
1983+
1984+
# Extract the base name and cluster suffix from the original names
1985+
base_names <- gsub("_Cluster_\\d+$", "", features_with_clusters)
1986+
cluster_suffixes <- sub(".*(_Cluster_\\d+$)", "\\1", features_with_clusters)
1987+
1988+
# Create df to map the features with their corresponding clusters
1989+
map <- data.frame(base = base_names, suffix = cluster_suffixes, stringsAsFactors = FALSE)
1990+
19811991
# Create same groups composition
19821992
for (m in 1:iterations) {
19831993
base_groups = list()
@@ -1999,6 +2009,9 @@ replicate_deconvolution_subgroups = function(deconv_res, deconvolution_test){
19992009

20002010
}
20012011

2012+
## Paste the corresponding clusters to the deconvolution features
2013+
colnames(deconvolution_test) <- paste0(colnames(deconvolution_test), map$suffix[match(colnames(deconvolution_test), map$base)])
2014+
20022015
deconvolution_test = deconvolution_test[,colnames(deconvolution_test)%in%colnames(deconv_res[["Deconvolution matrix"]])]
20032016

20042017
return(data.frame(deconvolution_test))
@@ -2549,22 +2562,23 @@ prepare_multideconv_folds <- function(data, folds, cells_extra = NULL) {
25492562
#'
25502563
#' @description
25512564
#' The `deconvolution_dictionary()` function integrates cell-type–specific
2552-
#' deconvolution features with pathway activity information (e.g., PROGENy pathways).
2553-
#' It identifies globally consistent pathway clusters and reannotates each
2554-
#' deconvolution feature according to its association with those pathway clusters.
2565+
#' deconvolution features with a pathway activity matrix. It identifies globally
2566+
#' consistent pathway clusters and reannotates each deconvolution feature
2567+
#' according to its association with those pathway clusters.
25552568
#'
25562569
#' The function first computes a global correlation matrix between the full
25572570
#' deconvolution matrix and the provided pathway activity matrix, performs
2558-
#' hierarchical clustering on pathways, and automatically determines the optimal
2559-
#' number of pathway clusters using the silhouette method. Each deconvolution
2560-
#' feature within each cell type is then correlated with pathways, scored against
2561-
#' the global clusters, and classified into the cluster with which it is most
2562-
#' strongly associated.
2571+
#' hierarchical clustering on the pathways, and automatically determines the
2572+
#' optimal number of pathway clusters using the silhouette method. Each
2573+
#' deconvolution feature within each cell type is then correlated with the
2574+
#' pathways, scored against the global clusters, and classified into the cluster
2575+
#' with which it is most strongly associated.
25632576
#'
25642577
#' @param deconv_subgroups Output of `compute.deconvolution.analysis()`
25652578
#'
2566-
#' @param progeny A matrix or data frame of pathway activities (e.g., PROGENy output),
2567-
#' with the same rows (samples) as in the deconvolution matrices.
2579+
#' @param pathway_matrix A numeric matrix or data frame of pathway activities
2580+
#' (rows = samples, columns = pathways), with the same row names as the
2581+
#' deconvolution matrices. Can be any pathway or feature activity matrix.
25682582
#'
25692583
#' @return
25702584
#' An updated version of \code{deconv_subgroups} containing:
@@ -2600,7 +2614,7 @@ prepare_multideconv_folds <- function(data, folds, cells_extra = NULL) {
26002614
#' @importFrom factoextra fviz_nbclust hcut
26012615
#'
26022616
#' @export
2603-
deconvolution_dictionary = function(deconv_subgroups, progeny){
2617+
deconvolution_dictionary = function(deconv_subgroups, pathway_matrix){
26042618

26052619
cell_subgroups = deconv_subgroups[["Deconvolution subgroups per cell types"]]
26062620
cell_clusters = list()
@@ -2609,7 +2623,7 @@ deconvolution_dictionary = function(deconv_subgroups, progeny){
26092623
# Compute global module correlation using the full deconvolution matrix
26102624
global_x = CellTFusion::compute.modules.relationship(
26112625
deconv_subgroups[["Deconvolution matrix"]],
2612-
progeny,
2626+
pathway_matrix,
26132627
return = TRUE,
26142628
plot = FALSE
26152629
)
@@ -2625,13 +2639,29 @@ deconvolution_dictionary = function(deconv_subgroups, progeny){
26252639
clusters_global <- split(names(clusters_global), clusters_global)
26262640
names(clusters_global) <- paste0("Cluster_", seq_along(clusters_global))
26272641

2642+
# Calculate mean correlation for each cluster dynamically
2643+
corr_matrix_global <- data.frame(global_x[[1]])
2644+
for (k in seq_along(clusters_global)) {
2645+
cluster_name <- names(clusters_global)[k]
2646+
corr_matrix_global[[paste0(cluster_name, "_Score")]] <- rowMeans(
2647+
corr_matrix_global[, clusters_global[[k]]], na.rm = TRUE
2648+
)
2649+
}
2650+
2651+
# Classify features based on the highest mean correlation across all clusters
2652+
cluster_scores <- corr_matrix_global[, grepl("_Score$", colnames(corr_matrix_global))]
2653+
corr_matrix_global$Classification <- apply(cluster_scores, 1, function(row) {
2654+
cluster_name <- names(which.max(row))
2655+
gsub("_Score", "", cluster_name)
2656+
})
2657+
26282658
#Compute correlation within each cell type to see the cluster classification in its own cell-type context (avoid domination of strong correlations from abundant or variable cell types)
2629-
for (cell in seq_along(cell_subgroups)) {
2659+
for (cell in names(cell_subgroups)) {
26302660
if (ncol(cell_subgroups[[cell]]) >= 2) {
26312661
rownames(cell_subgroups[[cell]]) <- rownames(deconv_subgroups[["Deconvolution matrix"]])
26322662

26332663
#Compute module correlation between cell deconvolution features and PROGENy pathways
2634-
x <- CellTFusion::compute.modules.relationship(cell_subgroups[[cell]], progeny, return = TRUE, plot = FALSE)
2664+
x <- CellTFusion::compute.modules.relationship(cell_subgroups[[cell]], pathway_matrix, return = TRUE, plot = FALSE)
26352665
corr_matrix <- data.frame(x[[1]])
26362666

26372667
#Calculate mean correlation for each cluster dynamically
@@ -2653,7 +2683,18 @@ deconvolution_dictionary = function(deconv_subgroups, progeny){
26532683
deconv_names <- paste0(rownames(corr_matrix), "_", corr_matrix$Classification)
26542684
colnames(cell_subgroups[[cell]]) <- deconv_names
26552685

2656-
i <- i + 1
2686+
}else if (ncol(cell_subgroups[[cell]]) > 0) { # Subgroup has <2 features → assign cluster based on global classification
2687+
feature_names <- colnames(cell_subgroups[[cell]])
2688+
# Use global classification computed earlier
2689+
global_feature_class <- corr_matrix_global$Classification
2690+
names(global_feature_class) <- rownames(corr_matrix_global)
2691+
# Assign the cluster to the feature (if feature exists in global_x)
2692+
feature_class <- ifelse(feature_names %in% names(global_feature_class),
2693+
global_feature_class[feature_names],
2694+
"Unclassified")
2695+
2696+
# Rename the feature with the assigned cluster
2697+
colnames(cell_subgroups[[cell]]) <- paste0(feature_names, "_", feature_class)
26572698
}
26582699
}
26592700

Results/Cell_subgroups_Tutorial.csv

Lines changed: 0 additions & 117 deletions
This file was deleted.

0 commit comments

Comments
 (0)