Skip to content

Commit d0bc3f2

Browse files
authored
Merge pull request #77 from stemangiola/unnest_SE
Unnest se
2 parents c798f5b + 7ac1906 commit d0bc3f2

File tree

5 files changed

+315
-85
lines changed

5 files changed

+315
-85
lines changed

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ export(tbl_format_header)
6767
export(tidy)
6868
export(unite)
6969
export(unnest)
70+
export(unnest_summarized_experiment)
7071
importFrom(S4Vectors,"metadata<-")
7172
importFrom(S4Vectors,DataFrame)
7273
importFrom(S4Vectors,SimpleList)
@@ -78,6 +79,7 @@ importFrom(SummarizedExperiment,"rowData<-")
7879
importFrom(SummarizedExperiment,assays)
7980
importFrom(SummarizedExperiment,cbind)
8081
importFrom(SummarizedExperiment,colData)
82+
importFrom(SummarizedExperiment,rbind)
8183
importFrom(SummarizedExperiment,rowData)
8284
importFrom(SummarizedExperiment,rowRanges)
8385
importFrom(cli,cat_line)
@@ -127,6 +129,7 @@ importFrom(purrr,map)
127129
importFrom(purrr,map2)
128130
importFrom(purrr,map_chr)
129131
importFrom(purrr,map_int)
132+
importFrom(purrr,map_lgl)
130133
importFrom(purrr,pmap)
131134
importFrom(purrr,reduce)
132135
importFrom(purrr,when)

R/tidyr_methods.R

Lines changed: 178 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -60,101 +60,194 @@ NULL
6060
unnest.tidySummarizedExperiment_nested <-
6161
function(data, cols, ..., keep_empty=FALSE, ptype=NULL, names_sep=NULL, names_repair="check_unique", .drop, .id, .sep, .preserve) {
6262

63-
64-
# Need this otherwise crashes map
65-
.data_ <- data
66-
6763
cols <- enquo(cols)
6864

69-
# If the column is not SE do normal stuff
70-
if(
71-
data %>%
72-
pull(!!cols) %>%
73-
.[[1]] %>%
74-
class() %>%
75-
as.character() %in%
76-
c("SummarizedExperiment", "RangedSummarizedExperiment") %>%
77-
all() %>%
78-
not()
65+
unnest_summarized_experiment(data, !!cols, ..., keep_empty=keep_empty, ptype=ptype,
66+
names_sep=names_sep, names_repair=names_repair)
67+
68+
69+
}
70+
71+
72+
#' unnest_summarized_experiment
73+
#'
74+
#' @importFrom tidyr unnest
75+
#' @importFrom purrr when
76+
#' @importFrom rlang quo_name
77+
#' @importFrom purrr imap
78+
#' @importFrom purrr map_lgl
79+
#'
80+
#' @param data A tbl. (See tidyr)
81+
#' @param cols <[`tidy-select`][tidyr_tidy_select]> Columns to unnest.
82+
#' If you `unnest()` multiple columns, parallel entries must be of
83+
#' compatible sizes, i.e. they're either equal or length 1 (following the
84+
#' standard tidyverse recycling rules).
85+
#' @param ... <[`tidy-select`][tidyr_tidy_select]> Columns to nest, specified
86+
#' using name-variable pairs of the form `new_col=c(col1, col2, col3)`.
87+
#' The right hand side can be any valid tidy select expression.
88+
#'
89+
#' \Sexpr[results=rd, stage=render]{lifecycle::badge("deprecated")}:
90+
#' previously you could write `df %>% nest(x, y, z)` and `df %>%
91+
#' unnest(x, y, z)`. Convert to `df %>% nest(data=c(x, y, z))`.
92+
#' and `df %>% unnest(c(x, y, z))`.
93+
#'
94+
#' If you previously created new variable in `unnest()` you'll now need to
95+
#' do it explicitly with `mutate()`. Convert `df %>% unnest(y=fun(x, y, z))`
96+
#' to `df %>% mutate(y=fun(x, y, z)) %>% unnest(y)`.
97+
#' @param names_sep If `NULL`, the default, the names will be left
98+
#' as is. In `nest()`, inner names will come from the former outer names;
99+
#' in `unnest()`, the new outer names will come from the inner names.
100+
#'
101+
#' If a string, the inner and outer names will be used together. In `nest()`,
102+
#' the names of the new outer columns will be formed by pasting together the
103+
#' outer and the inner column names, separated by `names_sep`. In `unnest()`,
104+
#' the new inner names will have the outer names (+ `names_sep`) automatically
105+
#' stripped. This makes `names_sep` roughly symmetric between nesting and unnesting.
106+
#' @param keep_empty See tidyr::unnest
107+
#' @param names_repair See tidyr::unnest
108+
#' @param ptype See tidyr::unnest
109+
#' @param .drop See tidyr::unnest
110+
#' @param .id tidyr::unnest
111+
#' @param .sep tidyr::unnest
112+
#' @param .preserve See tidyr::unnest
113+
#'
114+
#' @return A tidySingleCellExperiment objector a tibble depending on input
115+
#'
116+
#' @examples
117+
#'
118+
#' tidySummarizedExperiment::pasilla |>
119+
#' nest(data=-condition) |>
120+
#' unnest_summarized_experiment(data)
121+
#'
122+
#' @rdname unnest-methods
123+
#' @name unnest_summarized_experiment
124+
#'
125+
#'
126+
#'
127+
#' @export
128+
unnest_summarized_experiment <- function(data, cols, ..., keep_empty=FALSE, ptype=NULL,
129+
names_sep=NULL, names_repair="check_unique", .drop, .id, .sep, .preserve) {
130+
# Need this otherwise crashes map
131+
.data_ <- data
132+
133+
cols <- enquo(cols)
134+
135+
# If the column is not SE do normal stuff
136+
if(
137+
data %>%
138+
pull(!!cols) %>%
139+
.[[1]] %>%
140+
class() %>%
141+
as.character() %in%
142+
c("SummarizedExperiment", "RangedSummarizedExperiment") %>%
143+
all() %>%
144+
not()
145+
)
146+
return(
147+
data %>%
148+
drop_class("tidySummarizedExperiment_nested") %>%
149+
tidyr::unnest(!!cols, ..., keep_empty=keep_empty, ptype=ptype, names_sep=names_sep, names_repair=names_repair) %>%
150+
add_class("tidySummarizedExperiment_nested")
79151
)
80-
return(
81-
data %>%
82-
drop_class("tidySummarizedExperiment_nested") %>%
83-
tidyr::unnest(!!cols, ..., keep_empty=keep_empty, ptype=ptype, names_sep=names_sep, names_repair=names_repair) %>%
84-
add_class("tidySummarizedExperiment_nested")
85-
)
152+
153+
# If both nested by transcript and sample
154+
if( s_(se)$name %in% colnames(data) & f_(se)$name %in% colnames(data) ){
155+
stop("tidySummarizedExperiment says: for the moment nesting both by sample- and feature-wise information is not possible. Please ask this feature to github/stemangiola/tidySummarizedExperiment")
156+
}
157+
158+
# If both nested not by transcript nor sample
159+
if(! s_(se)$name %in% colnames(data) & !f_(se)$name %in% colnames(data) ){
86160

87-
# If both nested by transcript and sample
88-
if( s_(se)$name %in% colnames(data) & f_(se)$name %in% colnames(data) ){
89-
stop("tidySummarizedExperiment says: for the moment nesting both by sample- and feature-wise information is not possible. Please ask this feature to github/stemangiola/tidySummarizedExperiment")
90-
}
161+
my_se = pull(.data_, !!cols) %>% .[[1]]
162+
91163

92-
# If both nested not by transcript nor sample
93-
if(! s_(se)$name %in% colnames(data) & !f_(se)$name %in% colnames(data) ){
94-
95-
se = pull(data, !!cols) %>% .[[1]]
96-
97-
# Mark if columns belong to feature or sample
98-
my_unnested_tibble =
99-
mutate(data, !!cols := map(!!cols, ~ as_tibble(.x))) %>%
100-
select(-suppressWarnings( one_of(s_(se)$name, f_(se)$name))) %>%
101-
unnest(!!cols)
102-
103-
# Get which column is relative to feature or sample
104-
sample_columns = my_unnested_tibble %>% get_subset_columns(!!s_(se)$symbol)
105-
transcript_columns = my_unnested_tibble %>% get_subset_columns(!!f_(se)$symbol)
106-
107-
source_column =
108-
c(
109-
rep(s_(se)$name, length(sample_columns)) %>% setNames(sample_columns),
110-
rep(f_(se)$name, length(transcript_columns)) %>% setNames(transcript_columns)
111-
)
112-
113-
# Do my trick to unnest
114-
return(
115-
mutate(data, !!cols := imap(
116-
!!cols, ~ .x %>%
117-
bind_cols_internal(
118-
119-
# Attach back the columns used for nesting
120-
.data_ %>%
121-
select(-!!cols, -suppressWarnings( one_of(s_(se)$name, f_(se)$name))) %>%
122-
slice(rep(.y, ncol(.x) * nrow(.x))),
123-
124-
# Column sample-wise or feature-wise
125-
column_belonging =
126-
source_column[
127-
.data_ %>%
128-
select(-!!cols, -suppressWarnings( one_of(s_(se)$name, f_(se)$name))) %>%
129-
colnames()
130-
]
131-
)
132-
)) %>%
133-
pull(!!cols) %>%
134-
reduce(bind_rows)
164+
# Mark if columns belong to feature or sample
165+
my_unnested_tibble =
166+
mutate(data, !!cols := map(!!cols, ~ as_tibble(.x))) %>%
167+
168+
select(-suppressWarnings( one_of(s_(my_se)$name, f_(my_se)$name))) %>%
169+
unnest(!!cols)
170+
171+
# Get which column is relative to feature or sample
172+
sample_columns = my_unnested_tibble %>% get_subset_columns(!!s_(my_se)$symbol)
173+
transcript_columns = my_unnested_tibble %>% get_subset_columns(!!f_(my_se)$symbol)
174+
175+
source_column =
176+
c(
177+
rep(s_(my_se)$name, length(sample_columns)) %>% setNames(sample_columns),
178+
rep(f_(my_se)$name, length(transcript_columns)) %>% setNames(transcript_columns)
179+
135180
)
136-
137-
}
138181

139-
# If column is SE nd only feature
140-
if(f_(se)$name %in% colnames(data)){
141-
142-
se = do.call(SummarizedExperiment::rbind, pull(data, !!cols))
143-
rowData(se) = cbind( rowData(se), data %>% select(-!!cols, -!!f_(se)$symbol))
144-
145-
return(se)
182+
# Drop if SE is null
183+
if(data |> filter(map_lgl(!!cols, is.null)) |> nrow() > 0){
184+
warning("tidySummarizedcExperiment says: some SummarizedExperiment objects to unnest were <NULL>, and were elminated")
185+
data = data |> filter(!map_lgl(!!cols, is.null))
146186
}
147187

148-
# If column is SE nd only sample
149-
if(s_(se)$name %in% colnames(data)){
150-
151-
se = do.call(SummarizedExperiment::cbind, pull(data, !!cols))
152-
colData(se) = cbind( colData(se), data %>% select(-!!cols, -!!s_(se)$symbol))
153-
154-
return(se)
155-
156-
}
188+
# Do my trick to unnest
189+
data =
190+
data |>
191+
mutate(!!cols := imap(
192+
!!cols, ~ .x %>%
193+
bind_cols_internal(
194+
195+
# Attach back the columns used for nesting
196+
.data_ %>%
197+
select(-!!cols, -suppressWarnings( one_of(s_(my_se)$name, f_(my_se)$name))) %>%
198+
slice(rep(.y, ncol(.x) * nrow(.x))),
199+
200+
# Column sample-wise or feature-wise
201+
column_belonging =
202+
source_column[
203+
.data_ %>%
204+
select(-!!cols, -suppressWarnings( one_of(s_(my_se)$name, f_(my_se)$name))) %>%
205+
colnames()
206+
]
207+
)
208+
))
209+
210+
# Understand if split was done feature
211+
if(identical(
212+
data |> pull(!!cols) |> magrittr::extract2(1) |> colnames() |> sort(),
213+
data |> pull(!!cols) |> map(colnames) |> reduce(intersect) |> sort()
214+
))
215+
return(data |> pull(!!cols) |> reduce_rbind_se())
216+
217+
# Understand if split was done sample
218+
else if(identical(
219+
data |> pull(!!cols) |> magrittr::extract2(1) |> rownames() |> sort(),
220+
data |> pull(!!cols) |> map(rownames) |> reduce(intersect) |> sort()
221+
))
222+
return(data |> pull(!!cols) |> reduce_cbind_se())
223+
224+
# If neither there is something wrong
225+
else
226+
stop("tidybulk says: not the sample names nor the feature names overlap through your nesting. The nesting (due to the underlying SummarizedExperiment::cbind and SummarizedExperiment::rbind requirements) needs to be rectangular.)")
227+
157228
}
229+
230+
# If column is SE nd only feature
231+
if(f_(se)$name %in% colnames(data)){
232+
233+
se = do.call(SummarizedExperiment::rbind, pull(data, !!cols))
234+
rowData(se) = cbind( rowData(se), data %>% select(-!!cols, -!!f_(se)$symbol))
235+
236+
return(se)
237+
}
238+
239+
# If column is SE nd only sample
240+
if(s_(se)$name %in% colnames(data)){
241+
242+
se = data |> pull(!!cols) |> reduce_cbind_se()
243+
colData(se) = cbind( colData(se), data %>% select(-!!cols, -!!s_(se)$symbol))
244+
245+
return(se)
246+
247+
}
248+
}
249+
250+
158251

159252
#' nest
160253
#'

R/utilities.R

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1326,3 +1326,34 @@ order_assays_internally_to_be_consistent <- function(se) {
13261326

13271327
se
13281328
}
1329+
1330+
#' @importFrom SummarizedExperiment cbind
1331+
reduce_cbind_se = function(se_list){
1332+
1333+
do.call(cbind, se_list)
1334+
}
1335+
1336+
#' @importFrom purrr reduce
1337+
#' @importFrom purrr map
1338+
#' @importFrom SummarizedExperiment rbind
1339+
reduce_rbind_se = function(se_list){
1340+
1341+
# rbind does not accept elementMetadata so I merge and take it off
1342+
element_metadata = se_list %>% map(elementMetadata) |> reduce(rbind)
1343+
1344+
# Drop elementMetadata
1345+
se_list = se_list |> map(~{
1346+
elementMetadata(.x) = NULL
1347+
.x
1348+
})
1349+
1350+
# Bind
1351+
se = do.call(rbind, se_list)
1352+
rm(se_list)
1353+
1354+
# Put elementMetadata back in - THE (safe) ASSUMPTION IS THAT THE ORDER DOES NOT CHANGE
1355+
elementMetadata(se) = element_metadata
1356+
1357+
# Return
1358+
se
1359+
}

0 commit comments

Comments
 (0)