Skip to content

Commit 23cd526

Browse files
authored
Merge pull request #17 from gallardoalba/Add_Tsallis_metric
Add Tsallis metric
2 parents 3ca559c + 844197d commit 23cd526

14 files changed

Lines changed: 763 additions & 29 deletions

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Description: The SplicingFactory R package uses transcript-level expression
1313
transcript isoform diversity within samples or between conditions.
1414
Additionally, the package analyzes the isoform diversity data, looking for
1515
significant changes between conditions.
16-
RoxygenNote: 7.1.1
16+
RoxygenNote: 7.3.3
1717
Imports: SummarizedExperiment, methods, stats
1818
Suggests:
1919
testthat,

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ export(calculate_entropy)
66
export(calculate_gini)
77
export(calculate_inverse_simpson)
88
export(calculate_simpson)
9+
export(calculate_tsallis_entropy)
910
import(methods)
1011
import(stats)
1112
importFrom(SummarizedExperiment,SummarizedExperiment)

NEWS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
# SplicingFactory 1.3.2 (dev)
2+
3+
* Added Tsallis entropy as a diversity metric, with full documentation and examples.
4+
* Users can now set the `q` parameter for Tsallis entropy in `calculate_diversity()` and `calculate_method()`.
5+
* Documentation and vignette updated to reflect this new feature.
6+
17
# SplicingFactory 1.3.1 (dev)
28

39
* Citation update

R/calculate_diversity.R

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
#' input dataset with transcript-level expression values. The values in
77
#' \code{x} are grouped into genes based on this vector.
88
#' @param method Method to use for splicing diversity calculation, including
9-
#' naive entropy (\code{naive}), Laplace entropy (\code{laplace}), Gini index
10-
#' (\code{gini}), Simpson index (\code{simpson}) and inverse Simpson index
9+
#' naive entropy (\code{naive}), Laplace entropy (\code{laplace}), Tsallis entropy (\code{tsallis}),
10+
#' Gini index (\code{gini}), Simpson index (\code{simpson}) and inverse Simpson index
1111
#' (\code{invsimpson}). The default method is Laplace entropy.
1212
#' @param norm If \code{TRUE}, the entropy values are normalized to the number
1313
#' of transcripts for each gene. The normalized entropy values are always
@@ -21,6 +21,11 @@
2121
#' to use for diversity calculations.
2222
#' @param verbose If \code{TRUE}, the function will print additional diagnostic
2323
#' messages, besides the warnings and errors.
24+
#' @param q Tsallis entropy parameter (q ≥ 0). Only used if method = "tsallis".
25+
#' Default is 2. Must be a single scalar value.
26+
#' Tsallis entropy is a generalization that encompasses multiple diversity measures:
27+
#' q = 0 gives species richness, q = 1 gives Shannon entropy, and other q values
28+
#' give related diversity indices (e.g., Simpson index at q=2).
2429
#' @return Gene-level splicing diversity values in a \code{SummarizedExperiment}
2530
#' object.
2631
#' @import methods
@@ -35,7 +40,7 @@
3540
#' diversity values for each gene in each sample. These diversity values can be
3641
#' used to investigate the dominance of a specific transcript for a gene,
3742
#' the diversity of transcripts in a gene, and analyze changes in diversity.
38-
#'
43+
#'
3944
#' There are a number of diversity values implemented in the package. These
4045
#' include the following:
4146
#' \itemize{
@@ -44,6 +49,9 @@
4449
#' values mean a more diverse set of transcripts for a gene.
4550
#' \item Laplace entropy: Shannon entropy where the transcript frequencies are
4651
#' replaced by a Bayesian estimate, using Laplace's prior.
52+
#' \item Tsallis entropy: A generalization of Shannon entropy, parameterized by q (q ≥ 0).
53+
#' q = 0 gives species richness, q → 1 gives Shannon entropy, q ≠ 1 gives Tsallis entropy.
54+
#' The default q is 2.
4755
#' \item Gini index: a measure of statistical dispersion originally used in
4856
#' economy. This measurement ranges from 0 (complete equality) to 1
4957
#' (complete inequality). A value of 1 (complete inequality) means a single
@@ -73,7 +81,7 @@
7381
#' # calculating normalized Laplace entropy
7482
#' result <- calculate_diversity(x, gene, method = "laplace", norm = TRUE)
7583
calculate_diversity <- function(x, genes = NULL, method = "laplace", norm = TRUE,
76-
tpm = FALSE, assayno = 1, verbose = FALSE) {
84+
tpm = FALSE, assayno = 1, verbose = FALSE, q = 2) {
7785
if (!(is.matrix(x) || is.data.frame(x) || is.list(x) || is(x, "DGEList") ||
7886
is(x, "RangedSummarizedExperiment") || is(x, "SummarizedExperiment"))) {
7987
stop("Input data type is not supported! Please use `?calculate_diversity`
@@ -143,7 +151,7 @@ calculate_diversity <- function(x, genes = NULL, method = "laplace", norm = TRUE
143151
stop("The number of rows is not equal to the given gene set.", call. = FALSE)
144152
}
145153

146-
if (!(method %in% c("naive", "laplace", "gini", "simpson", "invsimpson"))) {
154+
if (!(method %in% c("naive", "laplace", "tsallis", "gini", "simpson", "invsimpson"))) {
147155
stop("Invalid method. Please use `?calculate_diversity` to see the possible
148156
arguments and details.",
149157
call. = FALSE
@@ -168,18 +176,28 @@ calculate_diversity <- function(x, genes = NULL, method = "laplace", norm = TRUE
168176
have any effect on the calculation.", call. = FALSE)
169177
}
170178

171-
result <- calculate_method(x, genes, method, norm, verbose = verbose)
179+
result <- calculate_method(x, genes, method, norm, verbose = verbose, q = q)
172180

181+
# Prepare assay and row/col data
173182
result_assay <- result[, -1, drop = FALSE]
174-
rownames(result_assay) <- result[, 1]
175183
result_rowData <- data.frame(genes = result[, 1], row.names = result[, 1])
176-
result_colData <- data.frame(samples = colnames(x), row.names = colnames(x))
184+
185+
# For Tsallis with scalar q, columns correspond to samples only
186+
col_ids <- colnames(x)
187+
row_ids <- as.character(result[, 1])
188+
result_colData <- data.frame(samples = col_ids, row.names = col_ids)
189+
colnames(result_assay) <- col_ids
190+
rownames(result_assay) <- row_ids
191+
177192
result_metadata <- list(method = method, norm = norm)
193+
if (method == "tsallis") result_metadata$q <- q
178194

179-
result <- SummarizedExperiment(assays = list(diversity = result_assay),
180-
rowData = result_rowData,
181-
colData = result_colData,
182-
metadata = result_metadata)
195+
result <- SummarizedExperiment(
196+
assays = list(diversity = result_assay),
197+
rowData = result_rowData,
198+
colData = result_colData,
199+
metadata = result_metadata
200+
)
183201

184202
return(result)
185203
}

R/calculate_method.R

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,18 @@
66
#' input dataset with transcript-level expression values. The values in
77
#' \code{x} are grouped into genes based on this vector.
88
#' @param method Method to use for splicing diversity calculation, including
9-
#' naive entropy (\code{naive}), Laplace entropy (\code{laplace}), Gini index
10-
#' (\code{gini}), Simpson index (\code{simpson}) and inverse Simpson index
9+
#' naive entropy (\code{naive}), Laplace entropy (\code{laplace}), Tsallis entropy (\code{tsallis}),
10+
#' Gini index (\code{gini}), Simpson index (\code{simpson}) and inverse Simpson index
1111
#' (\code{invsimpson}). The default method is Laplace entropy.
1212
#' @param norm If \code{TRUE}, the entropy values are normalized to the number
1313
#' of transcripts for each gene. The normalized entropy values are always
1414
#' between 0 and 1. If \code{FALSE}, genes cannot be compared to each other,
1515
#' due to possibly different maximum entropy values.
16+
#' @param q Tsallis entropy parameter (q ≥ 0). Only used if method = "tsallis".
17+
#' Default is 2. Must be a single scalar value.
18+
#' Tsallis entropy is a generalization that encompasses multiple diversity measures:
19+
#' q = 0 gives species richness, q = 1 gives Shannon entropy, and other q values
20+
#' give related diversity indices (e.g., Simpson index at q=2).
1621
#' @param verbose If \code{TRUE}, the function will print additional diagnostic
1722
#' messages, besides the warnings and errors.
1823
#' @return Gene-level splicing diversity values in a \code{data.frame}, where
@@ -23,7 +28,8 @@
2328
#' transcript-level expression values, aggregated by the genes defined in the
2429
#' \code{genes} parameter.
2530
#' @import stats
26-
calculate_method <- function(x, genes, method, norm = TRUE, verbose = FALSE) {
31+
calculate_method <- function(x, genes, method, norm = TRUE, verbose = FALSE, q = 2) {
32+
2733
if (method == "naive") {
2834
x <- aggregate(x, by = list(genes), calculate_entropy, norm = norm)
2935
}
@@ -33,6 +39,31 @@ calculate_method <- function(x, genes, method, norm = TRUE, verbose = FALSE) {
3339
pseudocount = 1)
3440
}
3541

42+
if (method == "tsallis") {
43+
# Note: q must be a scalar value (required for statistical testing)
44+
# calculate_tsallis_entropy enforces length(q) == 1
45+
gene_levels <- unique(genes)
46+
coln <- colnames(x)
47+
rown <- gene_levels
48+
tsallis_row <- function(gene) {
49+
idx <- which(genes == gene)
50+
sapply(seq_len(ncol(x)), function(j) {
51+
calculate_tsallis_entropy(x[idx, j], q = q, norm = norm)
52+
})
53+
}
54+
result_mat <- t(vapply(gene_levels, tsallis_row, FUN.VALUE = numeric(ncol(x))))
55+
colnames(result_mat) <- coln
56+
rownames(result_mat) <- rown
57+
out_df <- data.frame(Gene = rown, result_mat, check.names = FALSE)
58+
if (all(rowSums(!is.na(result_mat)) == 0)) {
59+
out_df <- data.frame(Gene=character(0))
60+
for (nm in coln) out_df[[nm]] <- numeric(0)
61+
x <- out_df
62+
return(x)
63+
}
64+
x <- out_df
65+
}
66+
3667
if (method == "gini") {
3768
x <- aggregate(x, by = list(genes), calculate_gini)
3869
}

0 commit comments

Comments
 (0)