n <- 20 # input dimension
m <- 10 # output dimension
N <- 100 # number of training pairs
N_val <- 50 # number of validation pairs
set.seed(243)
## True parameters
A_true <- matrix(rnorm(m * n), nrow = m, ncol = n) / 10
c_true <- abs(rnorm(m))Structured Prediction
Introduction
The derivative functionality (backward, derivative, gradient, delta) is not implemented in CVXR at this time. The gradient descent training loop below illustrates the intended API based on CVXPY but is not executable.
In this example, we fit a regression model to structured data, using a log-log convex program (LLCP). The training dataset
Our regression model
Here, the minimization is over
The training loss
We fit the parameters
This example is described in the paper Differentiating through Log-Log Convex Programs.
Data Generation
We generate synthetic data by choosing a true
generate_data <- function(num_points, seed) {
set.seed(seed)
## Generate lognormal inputs
inputs <- matrix(exp(rnorm(num_points * n)), nrow = num_points, ncol = n)
## For each input, solve the LLCP with true parameters to get output
input_p <- Parameter(n, pos = TRUE)
prediction <- multiply(c_true, gmatmul(A_true, input_p))
y_var <- Variable(m, pos = TRUE)
obj_fn <- sum_entries(prediction / y_var + y_var / prediction)
cons <- lapply(seq_len(m - 1), function(i) y_var[i] <= y_var[i + 1])
prob <- Problem(Minimize(obj_fn), cons)
outputs <- matrix(0, nrow = num_points, ncol = m)
for (i in seq_len(num_points)) {
value(input_p) <- inputs[i, ]
psolve(prob, gp = TRUE)
outputs[i, ] <- value(y_var)
}
list(inputs = inputs, outputs = outputs)
}train_data <- generate_data(N, 243)
train_inputs <- train_data$inputs
train_outputs <- train_data$outputsplot(train_outputs[1, ], type = "o", pch = 16,
xlab = "i", ylab = expression(y[i]),
main = "First Training Output (sorted)")val_data <- generate_data(N_val, 0)
val_inputs <- val_data$inputs
val_outputs <- val_data$outputsMonomial Fit to Each Component
We initialize the parameters in our LLCP model by fitting monomials to the training data, without enforcing the monotonicity constraint. This is a standard least squares problem in log space.
log_outputs <- t(log(train_outputs)) # m x N
log_inputs <- t(log(train_inputs)) # n x N
## Solve: log_outputs ≈ theta^T %*% log_inputs + log_c %*% ones^T
## Variables: theta (n x m), log_c (m x 1)
theta <- Variable(c(n, m))
log_c <- Variable(c(m, 1))
offsets <- do.call(cbind, replicate(N, log_c, simplify = FALSE)) # m x N
cp_preds <- t(theta) %*% log_inputs + offsets # m x N
lstsq_obj <- (1 / N) * sum_squares(cp_preds - log_outputs)
lstsq_problem <- Problem(Minimize(lstsq_obj))
lstsq_result <- psolve(lstsq_problem, verbose = TRUE)
cat(sprintf("Least squares fit: %f\n", lstsq_result))## Extract initialized parameters
A_init <- t(value(theta)) # m x n
c_init <- exp(value(log_c)) # m x 1Fitting with the LLCP Model
We now set up the parameterized LLCP and iteratively fit the parameters using gradient descent with the backward method.
A_param <- Parameter(c(m, n))
c_param <- Parameter(m, pos = TRUE)
x_slack <- Variable(n, pos = TRUE)
x_param <- Parameter(n, pos = TRUE)
y <- Variable(m, pos = TRUE)
prediction <- multiply(c_param, gmatmul(A_param, x_slack))
obj_fn <- sum_entries(prediction / y + y / prediction)
cons <- c(list(x_slack == x_param),
lapply(seq_len(m - 1), function(i) y[i] <= y[i + 1]))
problem <- Problem(Minimize(obj_fn), cons)
cat(sprintf("Is DGP: %s\n", is_dgp(problem)))We run projected gradient descent. In each epoch, for every training example, we solve the LLCP, compute the gradient of the squared loss with respect to backward, and accumulate the gradients. We then update the parameters.
## Initialize parameters from the monomial fit
A_current <- A_init
c_current <- as.vector(c_init)
lr <- 5e-2 # learning rate
n_epochs <- 10
train_losses <- numeric(n_epochs)
val_losses <- numeric(n_epochs)
for (epoch in seq_len(n_epochs)) {
## -- Training pass --
A_grad_accum <- matrix(0, m, n)
c_grad_accum <- numeric(m)
train_loss <- 0
for (i in seq_len(N)) {
value(A_param) <- A_current
value(c_param) <- c_current
value(x_param) <- train_inputs[i, ]
psolve(problem, gp = TRUE, requires_grad = TRUE)
y_pred <- value(y)
residual <- y_pred - train_outputs[i, ]
train_loss <- train_loss + sum(residual^2)
## Set gradient of y to d(loss)/d(y) = 2 * residual / N
gradient(y) <- 2 * residual / N
backward(problem)
A_grad_accum <- A_grad_accum + gradient(A_param)
c_grad_accum <- c_grad_accum + gradient(c_param)
}
train_losses[epoch] <- train_loss / N
## -- Validation pass (no gradients needed) --
val_loss <- 0
for (i in seq_len(N_val)) {
value(A_param) <- A_current
value(c_param) <- c_current
value(x_param) <- val_inputs[i, ]
psolve(problem, gp = TRUE)
y_pred <- value(y)
val_loss <- val_loss + sum((y_pred - val_outputs[i, ])^2)
}
val_losses[epoch] <- val_loss / N_val
cat(sprintf("(epoch %d) train / val (%.4f / %.4f)\n",
epoch, train_losses[epoch], val_losses[epoch]))
## -- Update parameters --
A_current <- A_current - lr * A_grad_accum
c_current <- c_current - lr * c_grad_accum
## Project c to stay positive
c_current <- pmax(c_current, 1e-8)
}Comparing Predictions
We compare the LLCP model predictions (with fitted parameters) to the true outputs on a validation example.
## Compute LLCP predictions on a validation example
val_idx <- 1
value(A_param) <- A_current
value(c_param) <- c_current
value(x_param) <- val_inputs[val_idx, ]
psolve(problem, gp = TRUE)
llcp_pred <- value(y)
## Compute least-squares monomial predictions (no monotonicity)
lstsq_pred <- as.vector(c_init) *
exp(A_init %*% log(val_inputs[val_idx, ]))
plot(val_outputs[val_idx, ], type = "o", pch = 16, col = "orange",
xlab = "i", ylab = expression(y[i]),
main = "Validation Example: True vs. Predictions",
ylim = range(c(val_outputs[val_idx, ], llcp_pred, lstsq_pred)))
lines(llcp_pred, type = "o", pch = 17, col = "teal")
lines(lstsq_pred, type = "o", pch = 15, col = "gray", lty = 2)
legend("topleft",
legend = c("True", "LLCP", "Least Squares"),
col = c("orange", "teal", "gray"),
pch = c(16, 17, 15), lty = c(1, 1, 2))Session Info
R version 4.5.2 (2025-10-31)
Platform: aarch64-apple-darwin20
Running under: macOS Tahoe 26.3
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/Los_Angeles
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] CVXR_1.8.1
loaded via a namespace (and not attached):
[1] slam_0.1-55 cli_3.6.5 knitr_1.51 ECOSolveR_0.6.1
[5] rlang_1.1.7 xfun_0.56 clarabel_0.11.2 otel_0.2.0
[9] gurobi_13.0-1 Rglpk_0.6-5.1 highs_1.12.0-3 cccp_0.3-3
[13] scs_3.2.7 S7_0.2.1 jsonlite_2.0.0 Rcplex_0.3-8
[17] backports_1.5.0 htmltools_0.5.9 Rmosek_11.1.1 gmp_0.7-5.1
[21] piqp_0.6.2 rmarkdown_2.30 grid_4.5.2 evaluate_1.0.5
[25] fastmap_1.2.0 yaml_2.3.12 compiler_4.5.2 codetools_0.2-20
[29] htmlwidgets_1.6.4 Rcpp_1.1.1 osqp_1.0.0 lattice_0.22-9
[33] digest_0.6.39 checkmate_2.3.4 Matrix_1.7-4 tools_4.5.2