Skip to content

Commit 8146849

Browse files
add deseq2 branch
1 parent 334b3ce commit 8146849

15 files changed

+284
-0
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ export(arrange_obs)
118118
export(arrange_taxa)
119119
export(as_phyloseq)
120120
export(branches)
121+
export(calc_diff_abund_deseq2)
121122
export(calc_group_mean)
122123
export(calc_group_median)
123124
export(calc_group_rsd)
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#' Differential abundance with DESeq2
2+
#'
3+
#' EXPERIMENTAL: This function is still being tested and developed; use with caution. Uses the
4+
#' \code{\link{DESeq2}} package to conduct differential abundance analysis of count data. Counts can
5+
#' be of OTUs/ASVs or taxa. The plotting function \code{\link{heat_tree_matrix}} is useful for
6+
#' visualizing these results. See details section below for considerations on preparing data for
7+
#' this analysis.
8+
#'
9+
#' Data should be raw read counts, not rarefied, converted to proportions, or modified with any
10+
#' other technique designed to correct for sample size since \code{\link{DESeq2}} is designed to be
11+
#' used with count data and takes into accoutn unequal sample size when determining differential
12+
#' abundance. Warnings will be given if the data is not integers or all sample sizes are equal.
13+
#'
14+
#' @param obj A \code{\link[taxa]{taxmap}} object
15+
#' @param data The name of a table in \code{obj} that contains data for each sample in columns.
16+
#' @param cols The names/indexes of columns in \code{data} to use. By default, all numeric columns
17+
#' are used. Takes one of the following inputs: \describe{ \item{TRUE/FALSE:}{All/No columns will
18+
#' used.} \item{Character vector:}{The names of columns to use} \item{Numeric vector:}{The indexes
19+
#' of columns to use} \item{Vector of TRUE/FALSE of length equal to the number of columns:}{Use
20+
#' the columns corresponding to \code{TRUE} values.} }
21+
#' @param groups A vector defining how samples are grouped into "treatments". Must be the same order
22+
#' and length as \code{cols}.
23+
#' @param other_cols If \code{TRUE}, preserve all columns not in \code{cols} in the output. If
24+
#' \code{FALSE}, dont keep other columns. If a column names or indexes are supplied, only preserve
25+
#' those columns.
26+
#' @param lfc_shrinkage What technique to use to adjust the log fold change results for low counts.
27+
#' Useful for ranking and visualizing log fold changes. Must be one of the following:
28+
#' \describe{
29+
#' \item{'none'}{No log fold change adjustments.}
30+
#' \item{'normal'}{The original DESeq2 shrinkage estimator}
31+
#' \item{'ashr'}{Adaptive shrinkage estimator from the \code{\link{ashr}} package, using a fitted mixture of normals prior.}
32+
#' }
33+
#' @param ... Passed to \code{\link[DESeq2]{results}} if the \code{lfc_shrinkage} option is "none"
34+
#' and to \code{\link[DESeq2]{lfcShrink}} otherwise.
35+
#'
36+
#' @return A tibble with at least the taxon ID of the thing tested, the groups compared, and the
37+
#' DESeq2 results. The \code{log2FoldChange} values will be positive if \code{treatment_1} is more
38+
#' abundant and \code{treatment_2}.
39+
#'
40+
#' @family calculations
41+
#'
42+
#' @examples
43+
#' \dontrun{
44+
#' # Parse data for plotting
45+
#' x = parse_tax_data(hmp_otus, class_cols = "lineage", class_sep = ";",
46+
#' class_key = c(tax_rank = "taxon_rank", tax_name = "taxon_name"),
47+
#' class_regex = "^(.+)__(.+)$")
48+
#'
49+
#' # Get per-taxon counts
50+
#' x$data$tax_table <- calc_taxon_abund(x, data = "tax_data", cols = hmp_samples$sample_id)
51+
#'
52+
#' # Calculate difference between groups
53+
#' x$data$diff_table <- calc_diff_abund_deseq2(x, data = "tax_table",
54+
#' cols = hmp_samples$sample_id,
55+
#' groups = hmp_samples$body_site)
56+
#'
57+
#' # Plot results (might take a few minutes)
58+
#' heat_tree_matrix(x,
59+
#' data = "diff_table",
60+
#' node_size = n_obs,
61+
#' node_label = taxon_names,
62+
#' node_color = ifelse(is.na(padj) | padj > 0.05, 0, log2FoldChange),
63+
#' node_color_range = diverging_palette(),
64+
#' node_color_trans = "linear",
65+
#' node_color_interval = c(-3, 3),
66+
#' edge_color_interval = c(-3, 3),
67+
#' node_size_axis_label = "Number of OTUs",
68+
#' node_color_axis_label = "Log2 fold change")
69+
#'
70+
#' }
71+
#'
72+
#' @export
73+
calc_diff_abund_deseq2 <- function(obj, data, cols, groups, other_cols = FALSE,
74+
lfc_shrinkage = c('none', 'normal', 'ashr'), ...) {
75+
76+
# Check that DESeq2 is intalled
77+
if (! requireNamespace("DESeq2", quietly = TRUE)) {
78+
stop('The "DESeq2" package needs to be installed for this function to work.',
79+
call. = FALSE)
80+
}
81+
82+
# Parse options
83+
lfc_shrinkage <- match.arg(lfc_shrinkage)
84+
85+
# Get abundance by sample data
86+
abund_data <- get_taxmap_table(obj, data)
87+
88+
# Parse columns to use
89+
cols <- get_numeric_cols(obj, data, cols)
90+
91+
# Check that columns contain integers only
92+
col_is_int <- vapply(cols, FUN.VALUE = logical(1), function(c) {
93+
all(abund_data[[c]] %% 1 == 0)
94+
})
95+
non_int_cols <- cols[! col_is_int]
96+
if (length(non_int_cols) > 0) {
97+
stop(call. = FALSE,
98+
'All data given to DESeq2 should be untransformed counts. The following columns contain non-integers:\n',
99+
limited_print(type = 'silent', non_int_cols, prefix = ' '))
100+
}
101+
102+
# Check for equal sample sizes
103+
if (length(unique(colSums(abund_data[cols]))) == 1) {
104+
warning(call. = FALSE,
105+
'All columns have equal counts, suggesting counts were normalized (e.g. rarefied) to correct for sample size variation.',
106+
' DESeq2 is designed to be used with unnormalized data.',
107+
' Use untransformed counts if available.')
108+
}
109+
110+
# Check groups option
111+
groups <- check_option_groups(groups, cols)
112+
113+
# Find other columns
114+
# These might be added back to the output later
115+
other_cols <- get_taxmap_other_cols(obj, data, cols, other_cols)
116+
117+
# Get every combination of groups to compare
118+
combinations <- t(utils::combn(unique(groups), 2))
119+
combinations <- lapply(seq_len(nrow(combinations)), function(i) combinations[i, ])
120+
121+
# Format data for DESeq2
122+
metadata <- data.frame(group = groups)
123+
deseq_data <- DESeq2::DESeqDataSetFromMatrix(countData = abund_data[cols],
124+
colData = metadata,
125+
design = ~ group)
126+
127+
# Run DESeq2
128+
deseq_result <- DESeq2::DESeq(deseq_data)
129+
130+
# Make function to compare one pair of groups
131+
one_comparison <- function(treat_1, treat_2) {
132+
133+
# Extract results for a single pair
134+
if (lfc_shrinkage == 'none') {
135+
pair_result <- DESeq2::results(deseq_result, contrast = c('group', treat_1, treat_2), ...)
136+
} else {
137+
pair_result <- DESeq2::lfcShrink(deseq_result, contrast = c('group', treat_1, treat_2),
138+
type = lfc_shrinkage, ...)
139+
}
140+
141+
# Add treatments compared
142+
output <- cbind(data.frame(stringsAsFactors = FALSE,
143+
treatment_1 = treat_1,
144+
treatment_2 = treat_2),
145+
as.data.frame(pair_result))
146+
147+
# Add in other columns if specified
148+
output <- cbind(abund_data[, other_cols], output)
149+
150+
return(output)
151+
}
152+
153+
# Compare all pairs of treatments and combine
154+
output <- lapply(seq_len(length(combinations)), function(i) {
155+
one_comparison(combinations[[i]][1], combinations[[i]][2])
156+
})
157+
output <- do.call(rbind, output)
158+
159+
# Convert to tibble and return
160+
dplyr::as_tibble(output)
161+
}

man/calc_diff_abund_deseq2.Rd

Lines changed: 110 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/calc_group_mean.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/calc_group_median.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/calc_group_rsd.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/calc_group_stat.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/calc_n_samples.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/calc_obs_props.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/calc_prop_samples.Rd

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)