## Problem data
n <- 30
M <- 50
K <- 200
set.seed(n * M * K)
X <- matrix(stats::rnorm(K * n), nrow = K, ncol = n)
beta_true <- matrix(stats::rnorm(n), nrow = n, ncol = 1)
y <- X %*% beta_true + 0.3 * sqrt(n) * stats::rnorm(K)
## Order variables based on y
idx <- order(y, decreasing = FALSE)
y_ordered <- y[idx]
X_ordered <- X[idx, ]
## Find cutoff and censor
D <- (y_ordered[M] + y_ordered[M + 1]) / 2
censored <- (y_ordered > D)
y_censored <- pmin(y_ordered, D)Censored Regression
Introduction
Data collected from an experimental study is sometimes censored, so that only partial information is known about a subset of observations. For instance, when measuring the lifespan of mice, we may find a number of subjects live beyond the duration of the project. Thus, all we know is the lower bound on their lifespan. This right censoring can be incorporated into a regression model via convex optimization.
Suppose that only \(K\) of our observations \((x_i,y_i)\) are fully observed, and the remaining are censored such that we observe \(x_i\), but only know \(y_i \geq D\) for \(i = K+1,\ldots,m\) and some constant \(D \in {\mathbf R}\). We can build an OLS model using the uncensored data, restricting the fitted values \(\hat y_i = x_i^T\beta\) to lie above \(D\) for the censored observations:
\[ \begin{array}{ll} \underset{\beta}{\mbox{minimize}} & \sum_{i=1}^K (y_i - x_i^T\beta)^2 \\ \mbox{subject to} & x_i^T\beta \geq D, \quad i = K+1,\ldots,m. \end{array} \]
This avoids the bias introduced by standard OLS, while still utilizing all of the data points in the regression. The constraint requires only one more line in CVXR.
Example
We will generate synthetic data for this example, censoring observations beyond a value \(D\).
We now fit regular OLS, OLS using just the censored data and finally the censored regression.
## Regular OLS
beta <- Variable(n)
obj <- sum((y_censored - X_ordered %*% beta)^2)
prob <- Problem(Minimize(obj))
result <- psolve(prob)
check_solver_status(prob)
beta_ols <- value(beta)
## OLS using uncensored data
obj <- sum((y_censored[1:M] - X_ordered[1:M,] %*% beta)^2)
prob <- Problem(Minimize(obj))
result <- psolve(prob)
check_solver_status(prob)
beta_unc <- value(beta)
## Censored regression
obj <- sum((y_censored[1:M] - X_ordered[1:M,] %*% beta)^2)
constr <- list(X_ordered[(M+1):K,] %*% beta >= D)
prob <- Problem(Minimize(obj), constr)
result <- psolve(prob)
check_solver_status(prob)
beta_cens <- value(beta)Here’s are some plots comparing the results. The blue diamond points are estimates.
plot_results <- function(beta_res, title) {
d <- data.frame(index = seq_len(K),
y_ordered = y_ordered,
y_fit = as.numeric(X_ordered %*% beta_res),
censored = as.factor(censored))
ggplot(data = d) +
geom_point(mapping = aes(x = index, y = y_ordered, color = censored)) +
geom_point(mapping = aes(x = index, y = y_fit), color = "blue", shape = 23) +
geom_abline(intercept = D, slope = 0, lty = "dashed") +
labs(x = "Observations", y = "y") +
ggtitle(title)
}plot_results(beta_ols, "Regular OLS.")
plot_results(beta_unc, "OLS using uncensored data.")
plot_results(beta_cens, "Censored Regression.")
Session Info
R version 4.5.3 (2026-03-11)
Platform: aarch64-apple-darwin20
Running under: macOS Tahoe 26.3.1
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: America/Los_Angeles
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] ggplot2_4.0.2 CVXR_1.8.1
loaded via a namespace (and not attached):
[1] gmp_0.7-5.1 generics_0.1.4 clarabel_0.11.2 slam_0.1-55
[5] lattice_0.22-9 digest_0.6.39 magrittr_2.0.4 evaluate_1.0.5
[9] grid_4.5.3 RColorBrewer_1.1-3 fastmap_1.2.0 rprojroot_2.1.1
[13] jsonlite_2.0.0 Matrix_1.7-4 ECOSolveR_0.6.1 backports_1.5.0
[17] scs_3.2.7 Rmosek_11.1.1 scales_1.4.0 codetools_0.2-20
[21] cli_3.6.5 rlang_1.1.7 Rglpk_0.6-5.1 withr_3.0.2
[25] yaml_2.3.12 otel_0.2.0 tools_4.5.3 osqp_1.0.0
[29] checkmate_2.3.4 dplyr_1.2.0 here_1.0.2 gurobi_13.0-1
[33] vctrs_0.7.1 R6_2.6.1 lifecycle_1.0.5 htmlwidgets_1.6.4
[37] pkgconfig_2.0.3 cccp_0.3-3 pillar_1.11.1 gtable_0.3.6
[41] glue_1.8.0 Rcpp_1.1.1 xfun_0.56 tibble_3.3.1
[45] tidyselect_1.2.1 knitr_1.51 dichromat_2.0-0.1 highs_1.12.0-3
[49] farver_2.1.2 htmltools_0.5.9 labeling_0.4.3 rmarkdown_2.30
[53] piqp_0.6.2 compiler_4.5.3 S7_0.2.1