Skip to content

Commit 5c553dc

Browse files
authored
Merge pull request #87 from stemangiola/fix-unnest-with-new-purrr
Fix unnest with new purrr
2 parents 8d99c96 + 4625ba2 commit 5c553dc

File tree

3 files changed

+134
-60
lines changed

3 files changed

+134
-60
lines changed

R/dplyr_methods.R

Lines changed: 86 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -62,29 +62,82 @@ bind_rows.SummarizedExperiment <- function(..., .id=NULL, add.cell.ids=NULL) {
6262
bind_cols_internal <- function(..., .id=NULL, column_belonging=NULL) {
6363
tts <- tts <- flatten_if(dots_values(...), is_spliced)
6464

65-
tts[[1]] |>
66-
as_tibble(skip_GRanges = TRUE) |>
67-
dplyr::bind_cols(tts[[2]], .id=.id) %>%
68-
when(
69-
70-
# If the column added are not sample-wise
71-
# or feature-wise, return tibble
72-
(colnames(tts[[2]]) %in% c(
73-
get_subset_columns(., !!s_(tts[[1]])$symbol),
74-
get_subset_columns(., !!f_(tts[[1]])$symbol)
75-
)
76-
) |> all() ~ update_SE_from_tibble(., tts[[1]],
77-
column_belonging=column_belonging),
7865

79-
# Return tiblle
80-
~ {
66+
# If I have column corresponding bind directly
67+
# Without tranformation to tibble
68+
if(!is.null(column_belonging)){
69+
70+
# For colData
71+
colData_additions = column_belonging[column_belonging==s_(tts[[1]] )$name] |> names()
72+
73+
data_frame_to_attach =
74+
tts[[1]] |>
75+
select(!!s_(tts[[1]] )$symbol) |>
76+
suppressMessages() |>
77+
bind_cols(tts[[2]] |> select(all_of(colData_additions))) |>
78+
distinct()
79+
80+
# Set row names
81+
data_frame_to_attach =
82+
data_frame_to_attach |>
83+
select(-1) |>
84+
DataFrame(row.names = data_frame_to_attach |> pull(1))
85+
86+
# Reorder
87+
data_frame_to_attach = data_frame_to_attach[match(rownames(data_frame_to_attach), colnames(tts[[1]])), , drop=FALSE]
88+
89+
# Attach
90+
colData(tts[[1]]) = cbind(colData(tts[[1]]), data_frame_to_attach)
91+
92+
# For rowData
93+
rowData_additions = column_belonging[column_belonging==f_(tts[[1]] )$name] |> names()
94+
95+
data_frame_to_attach =
96+
tts[[1]] |>
97+
select(!!f_(tts[[1]] )$symbol) |>
98+
suppressMessages() |>
99+
bind_cols(tts[[2]] |> select(all_of(rowData_additions))) |>
100+
distinct()
101+
102+
# Set row names
103+
data_frame_to_attach =
104+
data_frame_to_attach |>
105+
select(-1) |>
106+
DataFrame(row.names = data_frame_to_attach |> pull(1))
107+
108+
# Reorder
109+
data_frame_to_attach = data_frame_to_attach[match(rownames(data_frame_to_attach), rownames(tts[[1]])), , drop=FALSE]
110+
111+
# Attach
112+
rowData(tts[[1]]) = cbind(rowData(tts[[1]]), data_frame_to_attach)
113+
114+
tts[[1]]
115+
}
116+
117+
# If I DON'T have column corresponding go through tibble
118+
else
119+
tts[[1]] |>
120+
as_tibble(skip_GRanges = TRUE) |>
121+
dplyr::bind_cols(tts[[2]], .id=.id) %>%
122+
when(
123+
124+
# If the column added are not sample-wise or feature-wise return tibble
125+
(colnames(tts[[2]]) %in% c(
126+
get_subset_columns(., !!s_(tts[[1]])$symbol),
127+
get_subset_columns(., !!f_(tts[[1]])$symbol)
128+
)
129+
) |> all() ~ update_SE_from_tibble(., tts[[1]], column_belonging = column_belonging),
130+
131+
# Return tiblle
132+
~ {
81133
warning("tidySummarizedExperiment says:",
82-
" The new columns do not include pure sample-wise",
83-
" or feature-wise. A data frame is returned for",
84-
" independent data analysis.")
134+
" The new columns do not include pure sample-wise",
135+
" or feature-wise. A data frame is returned for",
136+
" independent data analysis.")
85137
(.)
86-
}
87-
)
138+
}
139+
)
140+
88141
}
89142

90143
bind_cols_ <- function(..., .id=NULL) { bind_cols_internal(..., .id=NULL) }
@@ -630,19 +683,28 @@ select.SummarizedExperiment <- function(.data, ...) {
630683
data.frame(row.names=pull(., !!f_(.data)$symbol)) |>
631684
select(-!!f_(.data)$symbol) |>
632685
DataFrame()
633-
686+
687+
# If SE does not have rownames,
688+
# I have to take them our of here, otherwise count integration,
689+
# which is a matrix and behaved differently from DataFrame fails
690+
if(rownames(.data) |> is.null()) rownames(row_data_DF) = NULL
691+
634692
col_data_tibble <-
635693
colData(.data) |>
636694
as_tibble(rownames = s_(.data)$name)
637695

638696
col_data_DF <-
639697
col_data_tibble |>
640-
select(one_of(columns_query), !!s_(.data)$symbol) |>
641-
suppressWarnings() %>%
642-
data.frame(row.names=pull(., !!s_(.data)$symbol)) |>
698+
select(any_of(columns_query), !!s_(.data)$symbol) |>
699+
data.frame(row.names=pull(col_data_tibble, !!s_(.data)$symbol)) |>
643700
select(-!!s_(.data)$symbol) |>
644701
DataFrame()
645702

703+
# If SE does not have rownames,
704+
# I have to take them our of here, otherwise count integration,
705+
# which is a matrix and behaved differently from DataFrame fails
706+
if(colnames(.data) |> is.null()) rownames(col_data_DF) = NULL
707+
646708
count_data <-
647709
assays(.data)@listData %>%
648710
.[names(assays(.data)@listData) %in% columns_query]

R/tidyr_methods.R

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ unnest.tidySummarizedExperiment_nested <- function(data, cols, ...,
4141
unnest_summarized_experiment <- function(data, cols, ...,
4242
keep_empty=FALSE, ptype=NULL, names_sep=NULL,
4343
names_repair="check_unique", .drop, .id, .sep, .preserve) {
44-
4544
. <- NULL
4645

4746
# Need this otherwise crashes map
@@ -71,6 +70,7 @@ unnest_summarized_experiment <- function(data, cols, ...,
7170
)
7271
}
7372

73+
7474
# If both nested by transcript and sample
7575
if (s_(se)$name %in% colnames(data) &
7676
f_(se)$name %in% colnames(data) ) {
@@ -88,11 +88,11 @@ unnest_summarized_experiment <- function(data, cols, ...,
8888

8989

9090
# Mark if columns belong to feature or sample
91-
my_unnested_tibble <-
92-
mutate(data, !!cols := map(!!cols, ~ as_tibble(.x))) %>%
93-
select(-suppressWarnings(one_of(s_(my_se)$name,
94-
f_(my_se)$name))) %>%
95-
unnest(!!cols)
91+
my_unnested_tibble =
92+
data |>
93+
mutate(!!cols := map(!!cols, ~ as_tibble(.x))) |>
94+
select(-any_of(c(s_(my_se)$name, f_(my_se)$name))) |>
95+
unnest(!!cols)
9696

9797
# Get which column is relative to feature or sample
9898
sample_columns <- my_unnested_tibble %>%
@@ -130,14 +130,14 @@ unnest_summarized_experiment <- function(data, cols, ...,
130130

131131
# Attach back the columns used for nesting
132132
.data_ %>%
133-
select(-!!cols, - any_of(c(s_(my_se)$name, f_(my_se)$name))) %>%
133+
select(-!!cols, -any_of(c(s_(my_se)$name, f_(my_se)$name))) %>%
134134
slice(rep(as.integer(.y), ncol(.x) * nrow(.x))),
135135

136136
# Column sample-wise or feature-wise
137137
column_belonging =
138138
source_column[
139139
.data_ %>%
140-
select(-!!cols, - any_of(c(s_(my_se)$name, f_(my_se)$name))) %>%
140+
select(-!!cols, -any_of(c(s_(my_se)$name, f_(my_se)$name))) %>%
141141
colnames()
142142
]
143143
)
@@ -219,6 +219,14 @@ nest.SummarizedExperiment <- function(.data, ..., .names_sep=NULL) {
219219
.data <- ping_old_special_column_into_metadata(.data)
220220
}
221221

222+
# if the data does not have raw, names or clumn names
223+
# we have to add them, otherwise the nesting and a nesting will get confused with
224+
# the link between Sample wise, columns, and Sample IDs
225+
if(rownames(.data) |> is.null() | colnames(.data) |> is.null() )
226+
warning("tidySummarizedExperiment says: the nesting and unnesting operations require row names and column names to avoid side-effects. Therefore, doors will be added as \"1\", \"2\", \"3\".")
227+
if(rownames(.data) |> is.null() ) rownames(.data) = .data |> nrow() |> seq_len() |> as.character()
228+
if(colnames(.data) |> is.null() ) colnames(.data) = .data |> ncol() |> seq_len() |> as.character()
229+
222230
my_data__ <- .data
223231

224232
# Names
@@ -256,12 +264,6 @@ nest.SummarizedExperiment <- function(.data, ..., .names_sep=NULL) {
256264
" We are working for optimising a generalised solution of nest().")
257265
}
258266

259-
# my_data__nested <-
260-
# my_data__ %>%
261-
# # This is needed otherwise nest goes into loop and fails
262-
# as_tibble() %>%
263-
# tidyr::nest(...)
264-
265267
# If I nest only for .feature -> THIS WORKS ONLY WITH THE CHECK ABOVE
266268
if (feature_name %in% colnames(my_test_nest)) {
267269
return(
@@ -280,7 +282,7 @@ nest.SummarizedExperiment <- function(.data, ..., .names_sep=NULL) {
280282

281283

282284
my_data__ %>%
283-
select(!!sample_symbol, !!feature_symbol, my_nesting_column) |>
285+
select(!!sample_symbol, !!feature_symbol, all_of(my_nesting_column)) |>
284286
as_tibble() %>%
285287
tidyr::nest(...) |>
286288

@@ -311,15 +313,18 @@ nest.SummarizedExperiment <- function(.data, ..., .names_sep=NULL) {
311313
# Here I am filtering because if I have 0 features this leads to failure
312314
else my_transcripts= ..1 |> filter(!is.na(!!feature_symbol)) |> pull(!!feature_symbol)
313315

314-
###
315-
316+
# if the summarised experiment does not have feature ID or Sample ID,
317+
# convert back those to integers
318+
# Note to self: this is convoluted, because if feature ID and sample ID
319+
# were to stay integers all along, these will not be needed
320+
if(rownames(my_data__) |> is.null()) my_transcripts = as.integer(my_transcripts)
321+
if(colnames(my_data__) |> is.null()) my_samples = as.integer(my_samples)
322+
316323
my_data__[unique(my_transcripts),unique(my_samples)] |>
317-
select(-one_of(
324+
select(-any_of(
318325
my_nesting_column |>
319326
setdiff(c(sample_name, feature_name))
320-
)) |>
321-
suppressWarnings()
322-
327+
))
323328

324329
}
325330
)

R/utilities.R

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,8 @@ get_count_datasets <- function(se) {
749749
se <- check_se_dimnames(se)
750750

751751
# Join assays
752-
map2(
752+
list_assays =
753+
map2(
753754
assays(se, withDimnames = FALSE) %>% as.list(),
754755
names(assays(se)),
755756
~ {
@@ -807,21 +808,27 @@ get_count_datasets <- function(se) {
807808
f_(se)$name %in% colnames(.x) %>% not ~ mutate(.x, !!f_(se)$symbol := as.character(NA)),
808809
s_(se)$name %in% colnames(.x) %>% not ~ mutate(.x, !!s_(se)$symbol := as.character(NA)),
809810
~ .x
810-
)) |>
811-
812-
when(
813-
length(.)>0 ~
814-
815-
reduce(., full_join, by = c(f_(se)$name, s_(se)$name)),
816-
# reduce(., left_join, by = c(f_(se)$name, s_(se)$name)),
817-
# bind_cols(., .name_repair = c("minimal")) %>% .[!duplicated(colnames(.))],
818-
~ expand.grid(
819-
rownames(se), colnames(se)
820-
) %>%
821-
setNames(c(f_(se)$name, s_(se)$name)) %>%
822-
tibble::as_tibble()
823-
)
811+
))
812+
813+
# If assays is non empty
814+
if(list_assays |> length() > 0)
815+
list_assays |>
816+
reduce(full_join, by = c(f_(se)$name, s_(se)$name))
824817

818+
# If assays is empty
819+
else {
820+
821+
# If I don't have row column names
822+
if(se |> rownames() |> is.null()) rn = nrow(se) |> seq_len() |> as.character()
823+
else rn = rownames(se)
824+
if(se |> colnames() |> is.null()) cn = ncol(se) |> seq_len() |> as.character()
825+
else cn = colnames(se)
826+
827+
expand.grid( rn, cn ) |>
828+
setNames(c(f_(se)$name, s_(se)$name)) |>
829+
tibble::as_tibble()
830+
}
831+
825832

826833
}
827834

0 commit comments

Comments
 (0)