Merge branch 'master' into update-documentation

stemangiola · web-flow · commit 65c8bfdaf31c · 2023-09-07T16:27:53.000+09:30
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: tidySummarizedExperiment
 Title: Brings SummarizedExperiment to the Tidyverse 
-Version: 1.11.3
+Version: 1.11.4
 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com",
                   role = c("aut", "cre")) )
 Description: The tidySummarizedExperiment package provides a set of tools for creating and
diff --git a/R/print_method.R b/R/print_method.R
@@ -64,16 +64,30 @@ tbl_format_header.tidySummarizedExperiment <- function(x, setup, ...) {
 print.SummarizedExperiment <- function(x, ..., n=NULL,
     width=NULL, n_extra=NULL) {
 
-    # Fix NOTEs
-    . <- NULL
 
-    # Stop if column names of assays do not overlap
-    if (check_if_assays_are_NOT_overlapped(x))
-        stop( 
-            "tidySummarizedExperiment says:",
-            " the assays in your SummarizedExperiment have column names, ",
-            "but their order is not the same, and they not completely overlap." 
-        )
+  # Fix NOTEs
+  . <- NULL
+  
+  # Stop if any column or row names are duplicated
+  if (check_if_any_dimnames_duplicated(x, dim = "cols")) {
+      stop("tidySummarizedExperiment says: some column names are duplicated")
+  }
+  if (check_if_any_dimnames_duplicated(x, dim = "rows")) {
+      stop("tidySummarizedExperiment says: some row names are duplicated")
+  }
+  # Stop if column names of assays do not overlap
+  if (check_if_assays_are_NOT_overlapped(x, dim = "cols")) { 
+      stop( 
+          "tidySummarizedExperiment says: the assays in your SummarizedExperiment have column names, 
+but they do not completely overlap." 
+      )
+  }
+  if (check_if_assays_are_NOT_overlapped(x, dim = "rows")) { 
+      stop( 
+          "tidySummarizedExperiment says: the assays in your SummarizedExperiment have row names, 
+but they do not completely overlap." 
+      )
+  }
   
     # reorder assay colnames before printing
     # Rearrange if assays has colnames and rownames
diff --git a/R/utilities.R b/R/utilities.R
@@ -623,6 +623,14 @@ get_special_datasets <- function(se) {
 }
 
 check_se_dimnames <- function(se) {
+    # Stop if any column or row names are duplicated
+    if (check_if_any_dimnames_duplicated(se, dim = "cols")) {
+        stop("tidySummarizedExperiment says: some column names are duplicated")
+    }
+    if (check_if_any_dimnames_duplicated(se, dim = "rows")) {
+        stop("tidySummarizedExperiment says: some row names are duplicated")
+    }
+
     # Stop if column names of assays do not overlap, or if some assays have 
     # column names and others don't
     if (check_if_assays_are_NOT_overlapped(se, dim = "cols")) { 
@@ -1275,6 +1283,51 @@ check_if_assays_are_NOT_consistently_ordered <- function(se) {
         not()
 }
 
+check_if_any_dimnames_duplicated <- function(se, dim = "cols") {
+    stopifnot(dim %in% c("rows", "cols"))
+    if (dim == "rows") {
+        dimnames_function <- rownames
+        nbr_unique_dimnames_function <- function(x) length(unique(rownames(x)))
+        length_function <- nrow
+    } else {
+        dimnames_function <- colnames
+        nbr_unique_dimnames_function <- function(x) length(unique(colnames(x)))
+        length_function <- ncol
+    }
+    
+    # Check assays
+    # If I have any assay at all
+    assays_check <- assays(se) |> length() |> gt(0) &&
+        
+        # If I have at least one assay with dimnames
+        Filter(
+            Negate(is.null),
+            assays(se, withDimnames = FALSE) |>  
+                as.list() |> 
+                map(dimnames_function)
+        ) |> 
+        length() |>
+        gt(0) &&
+        
+        # If any named assay have fewer unique names than expected
+        assays(se, withDimnames = FALSE) |>  
+        as.list() |> 
+        map(dimnames_function) |>
+        Filter(Negate(is.null), x = _) |>
+        map(unique) |> 
+        map(length) |>
+        reduce(min) |> 
+        equals(length_function(se)) |> 
+        not()
+    
+    # Check SE object
+    se_check <- !is.null(dimnames_function(se)) &&
+        nbr_unique_dimnames_function(se) != length_function(se)
+    
+    # Return TRUE if either of the two checks return TRUE
+    assays_check || se_check
+}
+
 check_if_assays_are_NOT_overlapped <- function(se, dim = "cols") {
     stopifnot(dim %in% c("rows", "cols"))
     if (dim == "rows") {
@@ -1284,6 +1337,7 @@ check_if_assays_are_NOT_overlapped <- function(se, dim = "cols") {
         dimnames_function <- colnames
         length_function <- ncol
     }
+    is_identical_for_reduce <- function(x,y) if (identical(x,y)) x else FALSE
     
     # If I have any assay at all
     assays(se) |> length() |> gt(0) &&
@@ -1300,13 +1354,16 @@ check_if_assays_are_NOT_overlapped <- function(se, dim = "cols") {
         
         # If I have lack of consistency
         # This will be TRUE also if some assays have dimnames and other don't
+        # For each assay, sort the dimnames, then check that they are all the 
+        # same. Can't check for unique length, since some names may be repeated
+        # If they're not all the same, the reduce() step will return FALSE; 
+        # otherwise, returns the (shared) dimnames
         assays(se, withDimnames = FALSE) |>  
         as.list() |> 
         map(dimnames_function) |> 
-        reduce(intersect) |> 
-        length() |> 
-        equals(length_function(se)) |> 
-        not()
+        map(sort) |>
+        reduce(is_identical_for_reduce) |> 
+        is.logical()
 }
 
 
diff --git a/tests/testthat/test-utilities.R b/tests/testthat/test-utilities.R
@@ -247,6 +247,91 @@ test_that("get_count_datasets works", {
     expect_equal(cds$mat2, seq(10, 18))
     expect_equal(cds$mat3, seq(19, 27))
     
+    # SE does not have dimnames, one assay has duplicated colnames, one has no colnames
+    se1 <- se
+    rownames(se1) <- colnames(se1) <- NULL
+    colnames(assay(se1, "mat1", withDimnames = FALSE))[2] <- 
+        colnames(assay(se1, "mat1", withDimnames = FALSE))[1]
+    colnames(assay(se1, "mat2", withDimnames = FALSE)) <- NULL
+    expect_equal(colnames(assay(se1, "mat1", withDimnames = FALSE)), paste0("S", c(1, 1, 3)))
+    expect_equal(rownames(assay(se1, "mat1", withDimnames = FALSE)), paste0("G", seq_len(3)))
+    expect_null(colnames(assay(se1, "mat2", withDimnames = FALSE)))
+    expect_equal(colnames(assay(se1, "mat3", withDimnames = FALSE)), paste0("S", seq_len(3)))
+    expect_null(colnames(se1))
+    expect_null(rownames(se1))
+    expect_error(cds <- get_count_datasets(se1), "some column names are duplicated")
+    
+    # SE does not have dimnames, one assay has duplicated rownames, one has no rownames
+    se1 <- se
+    rownames(se1) <- colnames(se1) <- NULL
+    rownames(assay(se1, "mat1", withDimnames = FALSE))[2:3] <- 
+        rownames(assay(se1, "mat1", withDimnames = FALSE))[1]
+    rownames(assay(se1, "mat2", withDimnames = FALSE)) <- NULL
+    expect_equal(rownames(assay(se1, "mat1", withDimnames = FALSE)), paste0("G", c(1, 1, 1)))
+    expect_equal(colnames(assay(se1, "mat1", withDimnames = FALSE)), paste0("S", seq_len(3)))
+    expect_null(rownames(assay(se1, "mat2", withDimnames = FALSE)))
+    expect_equal(rownames(assay(se1, "mat3", withDimnames = FALSE)), paste0("G", seq_len(3)))
+    expect_null(colnames(se1))
+    expect_null(rownames(se1))
+    expect_error(cds <- get_count_datasets(se1), "some row names are duplicated")
+    
+    # SE has duplicated colnames
+    se1 <- se
+    colnames(se1) <- paste0("S", c(1, 1, 1))
+    expect_error(cds <- get_count_datasets(se1), "some column names are duplicated")
+    expect_true(check_if_any_dimnames_duplicated(se1, dim = "cols"))
+    expect_false(check_if_any_dimnames_duplicated(se1, dim = "rows"))
+    
+    # SE has duplicated rownames
+    se1 <- se
+    rownames(se1) <- paste0("G", c(1, 2, 1))
+    expect_error(cds <- get_count_datasets(se1), "some row names are duplicated")
+    expect_false(check_if_any_dimnames_duplicated(se1, dim = "cols"))
+    expect_true(check_if_any_dimnames_duplicated(se1, dim = "rows"))
+    
+    # All assays + SE have duplicated colnames
+    se1 <- se
+    colnames(se1)[2] <- 
+        colnames(assay(se1, "mat1", withDimnames = FALSE))[2] <- 
+        colnames(assay(se1, "mat2", withDimnames = FALSE))[2] <- 
+        colnames(assay(se1, "mat3", withDimnames = FALSE))[2] <- "S1"
+    expect_true(check_if_any_dimnames_duplicated(se1, dim = "cols"))
+    expect_false(check_if_any_dimnames_duplicated(se1, dim = "rows"))
+    expect_false(check_if_assays_are_NOT_overlapped(se1, dim = "cols"))
+    expect_false(check_if_assays_are_NOT_overlapped(se1, dim = "rows"))
+    
+    # Two assays + SE have duplicated colnames
+    se1 <- se
+    colnames(se1)[2] <- 
+        colnames(assay(se1, "mat1", withDimnames = FALSE))[2] <- 
+        colnames(assay(se1, "mat3", withDimnames = FALSE))[2] <- "S1"
+    expect_true(check_if_any_dimnames_duplicated(se1, dim = "cols"))
+    expect_false(check_if_any_dimnames_duplicated(se1, dim = "rows"))
+    expect_true(check_if_assays_are_NOT_overlapped(se1, dim = "cols"))
+    expect_false(check_if_assays_are_NOT_overlapped(se1, dim = "rows"))
+    
+    # Assays have duplicated colnames in different ways
+    se1 <- se
+    assay(se1, "mat2") <- NULL
+    colnames(assay(se1, "mat1", withDimnames = FALSE)) <- c("S1", "S1", "S2")
+    colnames(assay(se1, "mat3", withDimnames = FALSE)) <- c("S1", "S2", "S2")
+    expect_true(check_if_any_dimnames_duplicated(se1, dim = "cols"))
+    expect_false(check_if_any_dimnames_duplicated(se1, dim = "rows"))
+    expect_true(check_if_assays_are_NOT_overlapped(se1, dim = "cols"))
+    expect_false(check_if_assays_are_NOT_overlapped(se1, dim = "rows"))
+
+    # All dimnames are NULL - not duplicated
+    se1 <- se
+    rownames(se1) <- colnames(se1) <- NULL
+    rownames(assay(se1, "mat1", withDimnames = FALSE)) <- 
+        colnames(assay(se1, "mat1", withDimnames = FALSE)) <- 
+        rownames(assay(se1, "mat2", withDimnames = FALSE)) <- 
+        colnames(assay(se1, "mat2", withDimnames = FALSE)) <- 
+        rownames(assay(se1, "mat3", withDimnames = FALSE)) <- 
+        colnames(assay(se1, "mat3", withDimnames = FALSE)) <- NULL
+    expect_false(check_if_any_dimnames_duplicated(se1, dim = "cols"))
+    expect_false(check_if_any_dimnames_duplicated(se1, dim = "rows"))
+    
     # Unnamed assay(s)
     # se1 <- SummarizedExperiment::SummarizedExperiment(
     #     assays = list(