This vignette demonstrates bootstrap stability-selection for GAMLSS models.
sb_gamlss() with correlated-selectBoost
bootstraps for μ and σ.selection_table(),
plot_sb_gamlss(), and effect_plot()
outputs.engine,
engine_sigma, engine_tau) and keep
factor/spline terms grouped.sb_gamlss_c0_grid(), autoboost_gamlss(), and
fastboost_gamlss().Conference highlight. SelectBoost for GAMLSS was presented at the Joint Statistics Meetings 2024 (Portland, OR) in the talk “An Improvement for Variable Selection for Generalized Additive Models for Location, Shape and Scale and Quantile Regression” by Frédéric Bertrand and M. Maumy. The presentation stressed how correlation-aware resampling sharpens variable selection for GAMLSS and quantile regression problems even when predictors are numerous and strongly correlated.
set.seed(1)
library(gamlss)
library(SelectBoost.gamlss)
n <- 400
x1 <- rnorm(n); x2 <- rnorm(n); x3 <- rnorm(n)
mu <- 1 + 1.5*x1 - 1.2*x3
y <- gamlss.dist::rNO(n, mu = mu, sigma = 1)
dat <- data.frame(y, x1, x2, x3)
fit <- sb_gamlss(
y ~ 1,
mu_scope = ~ x1 + x2 + x3,
sigma_scope = ~ x1 + x2,
family = gamlss.dist::NO(),
data = dat,
B = 50,
sample_fraction = 0.7,
pi_thr = 0.6,
k = 2,
pre_standardize = TRUE,
trace = FALSE
)
fit$final_formula
sel <- selection_table(fit)
sel <- sel[order(sel$parameter, -sel$prop), , drop = FALSE]
head(sel, n = min(8L, nrow(sel)))
stable <- sel[sel$prop >= fit$pi_thr, , drop = FALSE]
if (nrow(stable)) {
stable
} else {
cat("No terms reached the stability threshold of", fit$pi_thr, "for this run.\n")
}
plot_sb_gamlss(fit, top = 10)
if (requireNamespace("ggplot2", quietly = TRUE)) {
print(effect_plot(fit, "x1", dat, what = "mu"))
}selection_table() ranks stable terms for each parameter;
the chunk above prints the eight strongest effects (sorted by stability)
alongside the terms exceeding the stability threshold.
plot_sb_gamlss() overlays selection frequency
vs. coefficient magnitude, and effect_plot() visualizes
partial effects (using ggplot2 if available, otherwise
base R).
SelectBoost.gamlss lets you mix-and-match engines across parameters.
Grouped penalties (via grpreg and SGL)
keep whole factors/spline bases/interactions together, while
engine, engine_sigma, engine_nu,
and engine_tau control the selection backend
individually.
set.seed(2)
f <- factor(sample(letters[1:3], n, TRUE))
dat2 <- transform(dat, f = f, x4 = rnorm(n))
fit_grouped <- sb_gamlss(
y ~ 1,
mu_scope = ~ f + x1 + pb(x2) + x3 + x1:f,
sigma_scope = ~ f + x1 + pb(x2),
family = gamlss.dist::NO(),
data = dat2,
engine = "grpreg", # μ: grouped penalty
engine_sigma = "sgl", # σ: sparse group lasso
engine_tau = "glmnet", # τ (if present) would fall back automatically
grpreg_penalty = "grLasso",
sgl_alpha = 0.7,
B = 40,
pi_thr = 0.6,
pre_standardize = TRUE,
trace = FALSE
)
sel_grouped <- selection_table(fit_grouped)
sel_grouped <- sel_grouped[order(sel_grouped$parameter, -sel_grouped$prop), , drop = FALSE]
head(sel_grouped, n = min(8L, nrow(sel_grouped)))
stable_grouped <- sel_grouped[sel_grouped$prop >= fit_grouped$pi_thr, , drop = FALSE]
if (nrow(stable_grouped)) {
stable_grouped
} else {
cat("No terms reached the stability threshold of", fit_grouped$pi_thr, "for this run.\n")
}Grouped engines build design matrices internally (safely handling
factors/splines) and retain original formulas for the final
gamlss refit.
The SelectBoost wrappers expose correlation-aware grouping and automated c0 choice.
grid_obj <- sb_gamlss_c0_grid(
y ~ 1,
data = dat,
family = gamlss.dist::NO(),
mu_scope = ~ x1 + x2 + x3,
sigma_scope = ~ x1 + x2,
c0_grid = seq(0.2, 0.8, by = 0.2),
B = 30,
pi_thr = 0.6,
pre_standardize = TRUE,
trace = FALSE,
progress = TRUE
)
plot(grid_obj)
confidence_table(grid_obj)
auto_obj <- autoboost_gamlss(
y ~ 1,
data = dat,
family = gamlss.dist::NO(),
mu_scope = ~ x1 + x2 + x3,
sigma_scope = ~ x1 + x2,
c0_grid = seq(0.2, 0.8, by = 0.2),
B = 30,
pi_thr = 0.6,
pre_standardize = TRUE,
trace = FALSE
)
attr(auto_obj, "chosen_c0")
fast_obj <- fastboost_gamlss(
y ~ 1,
data = dat,
family = gamlss.dist::NO(),
mu_scope = ~ x1 + x2 + x3,
sigma_scope = ~ x1 + x2,
B = 30,
sample_fraction = 0.6,
pi_thr = 0.6,
pre_standardize = TRUE,
trace = FALSE
)
plot_sb_gamlss(fast_obj)Use progress = TRUE on any of these helpers to monitor
grid/config iteration.
confidence_functionals() collapses stability curves
across the c0 grid into scalar summaries (AUSC, coverage, quantiles,
weighted, conservative). The output ranks terms and integrates
seamlessly with plot() and
plot_stability_curves().
cf <- confidence_functionals(
grid_obj,
pi_thr = 0.6,
q = c(0.5, 0.8, 0.9),
conservative = TRUE,
B = 30
)
head(cf)
plot(cf, top = 8)
plot_stability_curves(grid_obj, terms = c("x1", "x3"), parameter = "mu")For model interpretation, pair the stability summaries with
partial-effect diagnostics via effect_plot() (see the
quick-start example above) or use selection_table() to
inspect the final refit.
tune_sb_gamlss() evaluates different engine/penalty
configurations using either stability or deviance metrics (with optional
K-fold CV). Supply a list of configurations and a shared baseline of
arguments.
configs <- list(
list(engine = "stepGAIC"),
list(engine = "glmnet", glmnet_alpha = 1),
list(engine = "grpreg", grpreg_penalty = "grLasso", engine_sigma = "sgl", sgl_alpha = 0.8)
)
baseline <- list(
formula = y ~ 1,
data = dat2,
family = gamlss.dist::NO(),
mu_scope = ~ f + x1 + pb(x2) + x3 + x1:f,
sigma_scope = ~ f + x1 + pb(x2),
pi_thr = 0.6,
pre_standardize = TRUE,
sample_fraction = 0.7,
trace = FALSE,
parallel = "auto"
)
tuned <- tune_sb_gamlss(configs, base_args = baseline, B_small = 25, metric = "stability", progress = TRUE)
tuned$best_configSwitch metric = "deviance" and set K to
engage fast deviance CV with the optimized density routes.
Deviance-based tuning reuses
loglik_gamlss_newdata_fast() under the hood. Use
fast_vs_generic_ll() to benchmark the speedup relative to
the generic density evaluator, and check_fast_vs_generic()
to verify numerical agreement (with per-family tolerances).
# Example: compare deviance routes on a fitted model
fit_ga <- gamlss::gamlss(y ~ x1 + x2, sigma.formula = ~ x1, data = dat, family = gamlss.dist::NO())
fast_vs_generic_ll(fit_ga, newdata = dat, reps = 30)
check_fast_vs_generic(fit_ga, newdata = dat, tol = 1e-6)See the dedicated fast-deviance vignettes for expanded results across
many gamlss.dist families.
All bootstrap helpers accept parallel = "auto" to
leverage future.apply (or any plan you set via
future::plan()). Progress bars are enabled via
progress = TRUE on sb_gamlss(), grid/autoboost
helpers, and tune_sb_gamlss().
future::plan(multisession, workers = 4)
fit_parallel <- sb_gamlss(
y ~ 1,
mu_scope = ~ x1 + x2 + x3,
sigma_scope = ~ x1 + x2,
family = gamlss.dist::NO(),
data = dat,
B = 200,
pi_thr = 0.6,
pre_standardize = TRUE,
parallel = "auto",
progress = TRUE,
trace = FALSE
)To opt into the full suite of equality/benchmark tests locally, set
options(SelectBoost.gamlss.run_long_tests = TRUE) or
Sys.setenv(RUN_LONG_TESTS = "true") before running
devtools::test().