| Title: | Supervised Variational Autoencoder Regression via 'reticulate' |
|---|---|
| Description: | Supervised latent-variable regression for high-dimensional predictors such as soil reflectance spectra. The model uses an encoder-decoder neural network with a stochastic Gaussian latent representation regularized by a Kullback-Leibler term, and a supervised prediction head trained jointly with the reconstruction objective. The implementation interfaces R with a 'Python' deep-learning backend and provides utilities for training, tuning, and prediction. |
| Authors: | Hugo Rodrigues [aut, cre] |
| Maintainer: | Hugo Rodrigues <[email protected]> |
| License: | MIT + file LICENSE |
| Version: | 0.1.9 |
| Built: | 2026-05-18 05:53:20 UTC |
| Source: | https://github.com/hugomachadorodrigues/soilvae |
A small soil spectroscopy dataset packaged with soilVAE for demonstrating
typical spectral preprocessing (reflectance absorbance,
resampling, SNV, smoothing) and for comparing a classic PLS baseline model
against supervised VAE regression via soilVAE.
A data.frame or list containing at minimum:
Numeric matrix/data.frame of reflectance spectra (samples wavelengths).
Numeric vector of total carbon values.
The object datsoilspc contains:
spc: a numeric matrix (or data.frame) of reflectance spectra, with rows as
samples and columns as wavelengths (nm). Column names should be interpretable
as numeric wavelengths.
TotalCarbon: a numeric vector with the soil total carbon content for each sample.
Depending on the original source, additional columns may be present (e.g., sample identifiers or other soil properties).
The dataset is intended for examples and unit-sized demonstrations. It is not meant to be a comprehensive soil spectral library.
data("datsoilspc", package = "soilVAE") str(datsoilspc) # basic plot of reflectance spectra spc <- as.matrix(datsoilspc$spc) wav <- as.numeric(colnames(spc)) matplot(wav, t(spc), type = "l", lty = 1, xlab = "Wavelength / nm", ylab = "Reflectance")data("datsoilspc", package = "soilVAE") str(datsoilspc) # basic plot of reflectance spectra spc <- as.matrix(datsoilspc$spc) wav <- as.numeric(colnames(spc)) matplot(wav, t(spc), type = "l", lty = 1, xlab = "Wavelength / nm", ylab = "Reflectance")
Select the best configuration from a tuning table
select_best_from_grid( tuning_df, selection_metric = c("euclid", "rmse", "r2", "rpiq") )select_best_from_grid( tuning_df, selection_metric = c("euclid", "rmse", "r2", "rpiq") )
tuning_df |
Data frame containing |
selection_metric |
One of: |
List with best (one-row data frame) and best_score.
tuning_df <- data.frame( cfg_id = 1:3, RMSE_val = c(0.5, 0.3, 0.4), R2_val = c(0.8, 0.9, 0.85), RPIQ_val = c(2.0, 3.0, 2.5) ) best <- select_best_from_grid(tuning_df, selection_metric = "rmse") best$besttuning_df <- data.frame( cfg_id = 1:3, RMSE_val = c(0.5, 0.3, 0.4), R2_val = c(0.8, 0.9, 0.85), RPIQ_val = c(2.0, 3.0, 2.5) ) best <- select_best_from_grid(tuning_df, selection_metric = "rmse") best$best
Tune VAEReg on a train/validation split
tune_vae_train_val(X_tr, y_tr, X_va, y_va, seed = 123, grid_vae)tune_vae_train_val(X_tr, y_tr, X_va, y_va, seed = 123, grid_vae)
X_tr |
Train predictors matrix. |
y_tr |
Train response numeric. |
X_va |
Validation predictors matrix. |
y_va |
Validation response numeric. |
seed |
Integer seed. |
grid_vae |
Data frame with required columns: |
A list with grid (input grid) and tuning_df (metrics per config).
## Not run: vae_configure() X <- matrix(rnorm(500), nrow = 50, ncol = 10) y <- rnorm(50) grid <- data.frame( latent_dim = 4L, dropout = 0.1, lr = 0.001, beta_kl = 1, alpha_y = 1, epochs = 5L, batch_size = 16L, patience = 3L ) grid$hidden_enc <- list(c(32L, 16L)) grid$hidden_dec <- list(c(16L, 32L)) result <- tune_vae_train_val( X_tr = X[1:40, ], y_tr = y[1:40], X_va = X[41:50, ], y_va = y[41:50], grid_vae = grid ) ## End(Not run)## Not run: vae_configure() X <- matrix(rnorm(500), nrow = 50, ncol = 10) y <- rnorm(50) grid <- data.frame( latent_dim = 4L, dropout = 0.1, lr = 0.001, beta_kl = 1, alpha_y = 1, epochs = 5L, batch_size = 16L, patience = 3L ) grid$hidden_enc <- list(c(32L, 16L)) grid$hidden_dec <- list(c(16L, 32L)) result <- tune_vae_train_val( X_tr = X[1:40, ], y_tr = y[1:40], X_va = X[41:50, ], y_va = y[41:50], grid_vae = grid ) ## End(Not run)
Build a supervised VAE regression model (VAEReg)
vae_build( input_dim, hidden_enc = c(512L, 256L), hidden_dec = c(256L, 512L), latent_dim = 32L, dropout = 0.1, lr = 0.001, beta_kl = 1, alpha_y = 1 )vae_build( input_dim, hidden_enc = c(512L, 256L), hidden_dec = c(256L, 512L), latent_dim = 32L, dropout = 0.1, lr = 0.001, beta_kl = 1, alpha_y = 1 )
input_dim |
integer |
|
integer vector |
|
|
integer vector |
|
latent_dim |
integer |
dropout |
numeric |
lr |
numeric learning rate |
beta_kl |
numeric |
alpha_y |
numeric |
A fitted 'Python' 'Keras' model object (VAEReg).
## Not run: vae_configure() model <- vae_build(input_dim = 100L) ## End(Not run)## Not run: vae_configure() model <- vae_build(input_dim = 100L) ## End(Not run)
Configure Python / reticulate for soilVAE
vae_configure(python = NULL, venv = NULL, conda = NULL)vae_configure(python = NULL, venv = NULL, conda = NULL)
python |
Path to python executable (optional). |
venv |
Name of virtualenv to use (optional). |
conda |
Name of conda env to use (optional). |
TRUE invisibly.
## Not run: # Let reticulate auto-detect Python vae_configure() # Or point to an existing virtual environment vae_configure(venv = "r-soilvae") ## End(Not run)## Not run: # Let reticulate auto-detect Python vae_configure() # Or point to an existing virtual environment vae_configure(venv = "r-soilvae") ## End(Not run)
Extract latent embeddings (z) from VAEReg
vae_encode(model, X)vae_encode(model, X)
model |
Python VAEReg |
X |
matrix |
Matrix of latent embeddings (samples x latent_dim).
## Not run: vae_configure() X <- matrix(rnorm(200), nrow = 20, ncol = 10) y <- rnorm(20) model <- vae_build(input_dim = ncol(X)) vae_fit(model, X, y, epochs = 5L) z <- vae_encode(model, X) ## End(Not run)## Not run: vae_configure() X <- matrix(rnorm(200), nrow = 20, ncol = 10) y <- rnorm(20) model <- vae_build(input_dim = ncol(X)) vae_fit(model, X, y, epochs = 5L) z <- vae_encode(model, X) ## End(Not run)
Fit VAEReg
vae_fit( model, X, y, X_val = NULL, y_val = NULL, epochs = 80L, batch_size = 64L, patience = 10L, verbose = 0L )vae_fit( model, X, y, X_val = NULL, y_val = NULL, epochs = 80L, batch_size = 64L, patience = 10L, verbose = 0L )
model |
Python VAEReg object from vae_build() |
X |
matrix (n x p) |
y |
numeric vector (n) |
X_val |
optional matrix |
y_val |
optional numeric vector |
epochs |
integer |
batch_size |
integer |
patience |
integer for early stopping (only if validation provided) |
verbose |
0/1/2 |
The fitted model, invisibly.
## Not run: vae_configure() X <- matrix(rnorm(200), nrow = 20, ncol = 10) y <- rnorm(20) model <- vae_build(input_dim = ncol(X)) vae_fit(model, X, y, epochs = 5L) ## End(Not run)## Not run: vae_configure() X <- matrix(rnorm(200), nrow = 20, ncol = 10) y <- rnorm(20) model <- vae_build(input_dim = ncol(X)) vae_fit(model, X, y, epochs = 5L) ## End(Not run)
Predict y using VAEReg (via latent z -> y_head)
vae_predict(model, X)vae_predict(model, X)
model |
Python VAEReg |
X |
matrix |
Numeric vector of predicted values.
## Not run: vae_configure() X <- matrix(rnorm(200), nrow = 20, ncol = 10) y <- rnorm(20) model <- vae_build(input_dim = ncol(X)) vae_fit(model, X, y, epochs = 5L) preds <- vae_predict(model, X) ## End(Not run)## Not run: vae_configure() X <- matrix(rnorm(200), nrow = 20, ncol = 10) y <- rnorm(20) model <- vae_build(input_dim = ncol(X)) vae_fit(model, X, y, epochs = 5L) preds <- vae_predict(model, X) ## End(Not run)