
Compute Pseudo-Correlation Using Beta-Binomial Model
Source:R/general_tools.R
get_pseudo_correlation.RdThis function calculates a pseudo R²-like correlation metric using a beta-binomial model
implemented in C++. It takes in a data matrix ZDB_matrix and two model matrices
for inclusion and exclusion, respectively. The function now supports both sparse and dense
matrices for m1 and m2, and allows selection between Cox-Snell and Nagelkerke R² metrics.
Usage
get_pseudo_correlation(
ZDB_matrix,
m1_inclusion = NULL,
m2_exclusion = NULL,
metric = "CoxSnell",
suppress_warnings = TRUE
)Arguments
- ZDB_matrix
A numeric dense matrix of shape (events x samples). Should have rownames representing events.
- m1_inclusion
A numeric matrix (dense or sparse) of the same number of rows as
ZDB_matrix, representing inclusion features.- m2_exclusion
A numeric matrix (dense or sparse) of the same number of rows as
ZDB_matrix, representing exclusion features.- metric
Character string specifying which R² metric to compute. Options are "CoxSnell" (default) or "Nagelkerke".
- suppress_warnings
Logical. If
TRUE(default), suppresses warnings during any warnings triggered during computation (e.g., due to ill-conditioned inputs)
Value
A data.table with the following columns:
- event
The event names from
ZDB_matrixrownames.- pseudo_correlation
The computed pseudo R² correlation values using the specified metric.
- null_distribution
Null correlation values from a permuted version of
ZDB_matrix.
Examples
set.seed(42)
# get the m1 object
junction_abundance_object <- load_toy_SJ_object()
m1_obj <- make_m1(junction_ab_object = junction_abundance_object)
#> Starting M1 matrix creation...
#> Combined eventdata from 1 samples
#> Found 915 unique junctions
#> Creating coordinate groups...
#> Start coordinate alternative events: 25
#> End coordinate alternative events: 31
#> Combined eventdata has 56 alternative splicing events
#> Processing junction abundance matrices...
#> Applying count threshold filtering...
#> Filtered from 56 to 23 events
#> Events removed: 33
#> Finished processing M1.
#>
#> Summary:
#> Input junctions: 915
#> Alternative splicing events: 56
#> Events passing threshold: 23
#> Total cells: 4120
# obtaining the m1 and eventdata
m1_inclusion <- m1_obj$m1_inclusion_matrix
eventdata <- m1_obj$event_data
m2_exclusion <- make_m2(m1_inclusion, eventdata)
#> Starting M2 matrix creation...
#> +-- Using C++ implementation for faster computation
#> | |-- Events: 23
#> | |-- Cells: 4120
#> | +-- Groups: 16
#> Finished M2 matrix creation.
# creating a dummy ZDB
ZDB_matrix <- matrix(rnorm(n = (nrow(m1_inclusion) * ncol(m1_inclusion)), sd = 7),
nrow = nrow(m1_inclusion),
ncol = ncol(m1_inclusion))
rownames(ZDB_matrix) <- rownames(m1_inclusion)
# m1 and m2 can now be either sparse or dense matrices
# Example with dense matrices (backward compatible)
m1_dense <- as.matrix(m1_inclusion)
m2_dense <- as.matrix(m2_exclusion)
pseudo_r_square_cox <- get_pseudo_correlation(ZDB_matrix, m1_dense, m2_dense)
#> Input dimension check passed. Proceeding with computation.
#> Computation completed successfully.
print(pseudo_r_square_cox)
#> event pseudo_correlation null_distribution
#> <char> <num> <num>
#> 1: chr10:3690620-3691056_S 0.866025398 0.866025398
#> 2: chr10:3690620-3695363_S -0.866025398 -0.866025398
#> 3: chr16:96295068-96326589_S -0.023446044 -0.010431211
#> 4: chr16:96295068-96385740_S -0.010570969 -0.003707927
#> 5: chr12:102934101-103029363_E -0.032077419 0.056644992
#> 6: chr12:103001385-103029363_E -0.007059557 -0.021802424
#> 7: chr12:103027776-103029363_E -0.013384216 -0.018713745
#> 8: chr19:27376501-27377078_E -0.013745616 0.020151950
#> 9: chr19:27376505-27377078_E 0.026667266 -0.007784455
#> 10: chr8:96323677-96435749_E -0.024944763 -0.336453467
#> 11: chr8:96373048-96435749_E 0.396903841 -0.462346154
#> 12: chrX:73349048-73350184_E 0.003094836 0.013572848
#> 13: chrX:73349704-73350184_E 0.012095621 -0.020866152
# Example with sparse matrices (more memory efficient)
pseudo_r_square_sparse <- get_pseudo_correlation(ZDB_matrix, m1_inclusion, m2_exclusion)
#> Input dimension check passed. Proceeding with computation.
#> Computation completed successfully.
# Example using Nagelkerke R² instead of Cox-Snell
pseudo_r_square_nagel <- get_pseudo_correlation(ZDB_matrix, m1_inclusion, m2_exclusion,
metric = "Nagelkerke")
#> Input dimension check passed. Proceeding with computation.
#> Computation completed successfully.
print(pseudo_r_square_nagel)
#> event pseudo_correlation null_distribution
#> <char> <num> <num>
#> 1: chr10:3690620-3691056_S 1.00000000 1.000000000
#> 2: chr10:3690620-3695363_S -1.00000000 -1.000000000
#> 3: chr16:96295068-96326589_S -0.37414684 -0.409813926
#> 4: chr16:96295068-96385740_S -0.16868921 0.064619929
#> 5: chr12:102934101-103029363_E -0.33430698 -0.140232733
#> 6: chr12:103001385-103029363_E -0.02546534 0.002229504
#> 7: chr12:103027776-103029363_E -0.04706796 -0.066274574
#> 8: chr19:27376501-27377078_E -0.13379882 -0.206035428
#> 9: chr19:27376505-27377078_E 0.25957722 -0.272184398
#> 10: chr8:96323677-96435749_E -0.02880373 -0.556840749
#> 11: chr8:96373048-96435749_E 0.45830508 -0.488146237
#> 12: chrX:73349048-73350184_E 0.03666344 0.212132338
#> 13: chrX:73349704-73350184_E 0.14329260 0.243972825