66# ' input dataset with transcript-level expression values. The values in
77# ' \code{x} are grouped into genes based on this vector.
88# ' @param method Method to use for splicing diversity calculation, including
9- # ' naive entropy (\code{naive}), Laplace entropy (\code{laplace}), Gini index
10- # ' (\code{gini}), Simpson index (\code{simpson}) and inverse Simpson index
9+ # ' naive entropy (\code{naive}), Laplace entropy (\code{laplace}), Tsallis entropy (\code{tsallis}),
10+ # ' Gini index (\code{gini}), Simpson index (\code{simpson}) and inverse Simpson index
1111# ' (\code{invsimpson}). The default method is Laplace entropy.
1212# ' @param norm If \code{TRUE}, the entropy values are normalized to the number
1313# ' of transcripts for each gene. The normalized entropy values are always
2121# ' to use for diversity calculations.
2222# ' @param verbose If \code{TRUE}, the function will print additional diagnostic
2323# ' messages, besides the warnings and errors.
24+ # ' @param q Tsallis entropy parameter (q ≥ 0). Only used if method = "tsallis".
25+ # ' Default is 2. Must be a single scalar value.
26+ # ' Tsallis entropy is a generalization that encompasses multiple diversity measures:
27+ # ' q = 0 gives species richness, q = 1 gives Shannon entropy, and other q values
28+ # ' give related diversity indices (e.g., Simpson index at q=2).
2429# ' @return Gene-level splicing diversity values in a \code{SummarizedExperiment}
2530# ' object.
2631# ' @import methods
3540# ' diversity values for each gene in each sample. These diversity values can be
3641# ' used to investigate the dominance of a specific transcript for a gene,
3742# ' the diversity of transcripts in a gene, and analyze changes in diversity.
38- # '
43+ # '
3944# ' There are a number of diversity values implemented in the package. These
4045# ' include the following:
4146# ' \itemize{
4449# ' values mean a more diverse set of transcripts for a gene.
4550# ' \item Laplace entropy: Shannon entropy where the transcript frequencies are
4651# ' replaced by a Bayesian estimate, using Laplace's prior.
52+ # ' \item Tsallis entropy: A generalization of Shannon entropy, parameterized by q (q ≥ 0).
53+ # ' q = 0 gives species richness, q → 1 gives Shannon entropy, q ≠ 1 gives Tsallis entropy.
54+ # ' The default q is 2.
4755# ' \item Gini index: a measure of statistical dispersion originally used in
4856# ' economy. This measurement ranges from 0 (complete equality) to 1
4957# ' (complete inequality). A value of 1 (complete inequality) means a single
7381# ' # calculating normalized Laplace entropy
7482# ' result <- calculate_diversity(x, gene, method = "laplace", norm = TRUE)
7583calculate_diversity <- function (x , genes = NULL , method = " laplace" , norm = TRUE ,
76- tpm = FALSE , assayno = 1 , verbose = FALSE ) {
84+ tpm = FALSE , assayno = 1 , verbose = FALSE , q = 2 ) {
7785 if (! (is.matrix(x ) || is.data.frame(x ) || is.list(x ) || is(x , " DGEList" ) ||
7886 is(x , " RangedSummarizedExperiment" ) || is(x , " SummarizedExperiment" ))) {
7987 stop(" Input data type is not supported! Please use `?calculate_diversity`
@@ -143,7 +151,7 @@ calculate_diversity <- function(x, genes = NULL, method = "laplace", norm = TRUE
143151 stop(" The number of rows is not equal to the given gene set." , call. = FALSE )
144152 }
145153
146- if (! (method %in% c(" naive" , " laplace" , " gini" , " simpson" , " invsimpson" ))) {
154+ if (! (method %in% c(" naive" , " laplace" , " tsallis " , " gini" , " simpson" , " invsimpson" ))) {
147155 stop(" Invalid method. Please use `?calculate_diversity` to see the possible
148156 arguments and details." ,
149157 call. = FALSE
@@ -168,18 +176,28 @@ calculate_diversity <- function(x, genes = NULL, method = "laplace", norm = TRUE
168176 have any effect on the calculation." , call. = FALSE )
169177 }
170178
171- result <- calculate_method(x , genes , method , norm , verbose = verbose )
179+ result <- calculate_method(x , genes , method , norm , verbose = verbose , q = q )
172180
181+ # Prepare assay and row/col data
173182 result_assay <- result [, - 1 , drop = FALSE ]
174- rownames(result_assay ) <- result [, 1 ]
175183 result_rowData <- data.frame (genes = result [, 1 ], row.names = result [, 1 ])
176- result_colData <- data.frame (samples = colnames(x ), row.names = colnames(x ))
184+
185+ # For Tsallis with scalar q, columns correspond to samples only
186+ col_ids <- colnames(x )
187+ row_ids <- as.character(result [, 1 ])
188+ result_colData <- data.frame (samples = col_ids , row.names = col_ids )
189+ colnames(result_assay ) <- col_ids
190+ rownames(result_assay ) <- row_ids
191+
177192 result_metadata <- list (method = method , norm = norm )
193+ if (method == " tsallis" ) result_metadata $ q <- q
178194
179- result <- SummarizedExperiment(assays = list (diversity = result_assay ),
180- rowData = result_rowData ,
181- colData = result_colData ,
182- metadata = result_metadata )
195+ result <- SummarizedExperiment(
196+ assays = list (diversity = result_assay ),
197+ rowData = result_rowData ,
198+ colData = result_colData ,
199+ metadata = result_metadata
200+ )
183201
184202 return (result )
185203}
0 commit comments