Functions for generating matrices
These functions generate test X and Y vectors and matrices.
make_dense <- function (nrow, ncol){
rnorm (nrow * ncol, mean = 1 , sd = 5 ) |> matrix (nrow= nrow)
}
make_dense (10 , 4 )
[,1] [,2] [,3] [,4]
[1,] 1.8892588 0.9030567 0.97493208 2.9510242
[2,] 3.3960281 -0.4714899 -4.93087690 1.5255536
[3,] -1.6270367 -14.6452840 1.63917737 -2.9674811
[4,] 9.8982711 4.5972585 -0.01125264 3.5621743
[5,] -2.3090774 -8.1560351 2.13535775 -9.0829556
[6,] 0.3725077 1.9453953 6.82626032 -9.4719708
[7,] 5.6168550 -4.4956928 5.63168144 -0.5948949
[8,] -3.4289597 -1.6226364 -3.17391649 10.1173155
[9,] 5.4910691 -5.9826822 -4.60582256 3.6239851
[10,] -3.0872638 3.0859606 -2.86287104 -0.7641079
make_sparse <- function (nrow, ncol){
zeroes <- sample (c (TRUE , FALSE ), size= nrow * ncol * 0.9 , replace= TRUE )
` [<- ` (make_dense (nrow, ncol), zeroes, 0 ) |> Matrix:: Matrix (sparse= TRUE )
}
make_y_vector <- function (x){
beta <- ncol (x) |> runif (max= 100 )
as.numeric (as.matrix (x) %*% beta)
}
make_dense (4 , 4 ) |>
make_y_vector ()
[1] -163.910450 111.611023 -107.326772 -4.166507
make_y_matrix <- function (x, cols= 2 ){
beta <- (cols * ncol (x)) |> runif (max= 100 ) |> matrix (ncol= 2 )
x %*% beta
}
make_dense (4 , 4 ) |>
make_y_matrix ()
[,1] [,2]
[1,] 575.59012 441.7061
[2,] 36.55084 -130.9256
[3,] 342.59766 46.9593
[4,] 854.80379 841.4551
Functions for calculating residuals
These functions take the vector \(\vec{y}\) and matrix \(X\) , and return the residuals resulting from estimating \(\vec{y}\) from \(X\) using ordinary least squares regression:
\(\mathbf{y} - \left(\mathbf {X} ^{\operatorname {T} }\mathbf {X} \right)^{-1}\mathbf {X} ^{\operatorname {T} }\mathbf {y}\)
Using standard matrix operations in R:
formula_resid <- function (x, y){
y - x %*% solve (t (x) %*% x) %*% t (x) %*% y
}
Base R’s QR decomposition:
qr_resid <- function (x, y){
qr (x) |> qr.resid (as.matrix (y))
}
Base R’s lm
function:
lm_resid <- function (x, y){
lm (y ~ x) |> resid ()
}
Base R’s .lm.fit
, which is a lower level function underlying lm
:
lm_fit_resid <- function (x, y){
.lm.fit (x, y)$ residuals
}
Algebraically calculating the residuals, but using Eigen C++ instead of base R:
eigen_resid <- Rcpp:: cppFunction ("
SEXP fastResidop(const Eigen::Map<Eigen::MatrixXd> X, Eigen::Map<Eigen::MatrixXd> Y){
Eigen::MatrixXd T1 = X.transpose() * X ;
Eigen::MatrixXd T2 = T1.inverse();
Eigen::MatrixXd T3 = X * T2;
Eigen::MatrixXd T4 = X.transpose() * Y;
Eigen::MatrixXd T5 = T3 * T4;
Eigen::MatrixXd T6 = Y - T5;
return Rcpp::wrap(T6);
}
" , depends = "RcppEigen" )
A custom function that uses Eigen + QR decomposition. This relies on some C++ code:
#include <RcppEigen.h>
using Eigen::Map;
using Eigen::HouseholderQR;
using Eigen::MatrixXd;
// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::export]]
SEXP qr_dense_residop(Map<MatrixXd> X, Map<MatrixXd> Y){
const HouseholderQR<MatrixXd> QR(X);
return Rcpp::wrap(Y - (X * QR.solve(Y)));
}
#include <RcppEigen.h>
using Eigen::Map;
using Eigen::SparseMatrix;
using Eigen::SparseQR;
using Eigen::NaturalOrdering;
using Eigen::MatrixXd;
// [[Rcpp::depends(RcppEigen)]]
// [[Rcpp::export]]
SEXP qr_sparse_residop(Map<SparseMatrix<double>> X, Map<MatrixXd> Y){
const SparseQR<SparseMatrix<double>, NaturalOrdering<int>> QR(X);
return Rcpp::wrap(Y - (X * QR.solve(Y)));
}
qr_eigen <- function (x, y){
if (inherits (x, "sparseMatrix" ))
qr_sparse_residop (x, as.matrix (y))
else
qr_dense_residop (x, y)
}
A custom function that uses QR decomposition, using Eigen only if the matrix is sparse:
qr_eigen_partial <- function (x, y){
if (inherits (x, "sparseMatrix" ))
qr (x) |> qr.resid (as.matrix (y))
else
qr_dense_residop (x, y)
}
A built-in function that uses C++ but comes with the RcppEigen
package:
eigen_fastlm_resid <- function (...){
RcppEigen:: fastLm (...) |> resid ()
}
The Rfast package’s version of lm
:
rfast_resid <- function (...){
Rfast:: lmfit (...)$ residuals
}
Benchmark
This function evaluates a given residual calculation and returns the time and memory usage:
benchmark <- function (x, y, calculate, repetition, ...){
time <- bench:: mark ({
result <- try (calculate (x, y), silent = TRUE )
}, min_iterations = repetition)
time |> dplyr:: mutate (
result = list (result),
...
)
}
Compile all the functions to benchmark:
all_funcs = list (
qr= qr_resid,
formula= formula_resid,
lm= lm_resid,
eigen_cpp= eigen_resid,
qr_eigen= qr_eigen,
qr_eigen_partial= qr_eigen_partial,
.lm.fit= lm_fit_resid,
# eigen_fastlm has a number of modes
eigen_fastlm_colpiv_qr = \(x, y) eigen_fastlm_resid (x, y, 0 ),
eigen_fastlm_unpiv_qr = \(x, y) eigen_fastlm_resid (x, y, 1 ),
eigen_fastlm_llt_chol = \(x, y) eigen_fastlm_resid (x, y, 2 ),
eigen_fastlm_ldlt_chol = \(x, y) eigen_fastlm_resid (x, y, 3 ),
eigen_fastlm_jacobi_svd = \(x, y) eigen_fastlm_resid (x, y, 4 ),
eigen_fastlm_eig = \(x, y) eigen_fastlm_resid (x, y, 4 ),
rfast = rfast_resid
)
Run the benchmark:
tibble:: tibble (
name = names (all_funcs),
calculate = all_funcs,
) |>
dplyr:: cross_join (
tibble:: tibble (
x = list (
make_dense (nrow = 10000 , ncol = 500 ),
make_sparse (nrow = 10000 , ncol = 500 )
),
x_type = c ("dense" , "sparse" )
)
) |>
dplyr:: cross_join (
tibble:: tibble (
make_y = list (
make_y_matrix,
make_y_vector
),
y_type = c ("y_matrix" , "y_vector" )
)
) |>
dplyr:: mutate (
y = purrr:: map2 (x, make_y, function (x, make_y) make_y (x)),
make_y = NULL ,
repetition = 2 ,
) |>
dplyr:: filter (
# eigen_cpp will crash with a sparse matrix
! (x_type == "sparse" & name == "eigen_cpp" )
) |>
purrr:: pmap (benchmark) |>
purrr:: list_rbind () |>
dplyr:: mutate (
success = result |> purrr:: map_lgl (function (x){
! (x[[1 ]] |> attr ("condition" ) |> inherits ("error" ))
})
) |>
dplyr:: select (name, success, x_type, y_type, median, mem_alloc)