To install the stable version of mcglm, use
devtools::install_git(). For more information, visit mcglm/README.
library(devtools)
install_git("wbonat/mcglm")library(mcglm)
packageVersion("mcglm")The mcglm package implements the multivariate covariance
generalized linear models (McGLMs) proposed by Bonat and J\(\o\)rgensen (2016). In this introductory
vignette we employed the mcglm package for fitting a set of
generalized linear models and compare our results with the ones obtained
by ordinary R functions like lm and
glm.
Consider the count data obtained in Dobson (1990).
## Dobson (1990) Page 93: Randomized Controlled Trial :
counts <- c(18,17,15,20,10,20,25,13,12)
outcome <- gl(3,1,9)
treatment <- gl(3,3)
print(d.AD <- data.frame(treatment, outcome, counts))##   treatment outcome counts
## 1         1       1     18
## 2         1       2     17
## 3         1       3     15
## 4         2       1     20
## 5         2       2     10
## 6         2       3     20
## 7         3       1     25
## 8         3       2     13
## 9         3       3     12Ordinary analysis using quasi-Poisson model.
fit.glm <- glm(counts ~ outcome + treatment, family = quasipoisson)The orthodox quasi-Poisson model is obtained by specifying the
variance function as tweedie and fix the power parameter at
\(1\). Since, we are dealing with
independent data, the matrix linear predictor is composed of a diagonal
matrix.
require(mcglm)
require(Matrix)
# Matrix linear predictor
Z0 <- mc_id(d.AD)
fit.qglm <- mcglm(linear_pred = c(counts ~ outcome + treatment),
                  matrix_pred = list("resp1" = Z0),
                  link = "log", variance = "tweedie", data = d.AD,
                  control_algorithm = list(verbose = FALSE,
                                           method = "chaser",
                                           tuning = 0.8))## Automatic initial values selected.Comparing regression parameter estimates and standard errors.
cbind("GLM" = coef(fit.glm),
      "McGLM" = coef(fit.qglm, type = "beta")$Estimates)##                       GLM         McGLM
## (Intercept)  3.044522e+00  3.044522e+00
## outcome2    -4.542553e-01 -4.542553e-01
## outcome3    -2.929871e-01 -2.929871e-01
## treatment2   1.337909e-15 -1.751522e-16
## treatment3   1.421085e-15 -6.040927e-17cbind("GLM" = sqrt(diag(vcov(fit.glm))), 
      "McGLM" = coef(fit.qglm, type = "beta", std.error = TRUE)$Std.error)##                   GLM     McGLM
## (Intercept) 0.1943517 0.1943517
## outcome2    0.2299154 0.2299154
## outcome3    0.2191931 0.2191931
## treatment2  0.2274467 0.2274467
## treatment3  0.2274467 0.2274467Consider the example from Venables & Ripley (2002, p.189). The
response variable is continuous for which we can assume the Gaussian
distribution. In this example, we exemplify the use of the
offset argument.
# Loading the data set
utils::data(anorexia, package = "MASS")
# GLM fit
anorex.1 <- glm(Postwt ~ Prewt + Treat + offset(Prewt),
               family = gaussian, data = anorexia)
# McGLM fit
Z0 <- mc_id(anorexia)
fit.anorexia <- mcglm(linear_pred = c(Postwt ~ Prewt + Treat),
                      matrix_pred = list(Z0),
                      offset = list(anorexia$Prewt),
                      power_fixed = TRUE, data = anorexia,
                      control_algorithm = list("correct" = TRUE))## Automatic initial values selected.Comparing parameter estimates and standard errors.
# Estimates
cbind("McGLM" = round(coef(fit.anorexia, type = "beta")$Estimates,5),
      "GLM" = round(coef(anorex.1),5))##                McGLM      GLM
## (Intercept) 49.77111 49.77111
## Prewt       -0.56554 -0.56554
## TreatCont   -4.09707 -4.09707
## TreatFT      4.56306  4.56306# Standard errors
cbind("McGLM" = sqrt(diag(vcov(fit.anorexia))),
      "GLM" = c(sqrt(diag(vcov(anorex.1))),NA))##             McGLM        GLM
## beta10 13.3909581 13.3909581
## beta11  0.1611824  0.1611824
## beta12  1.8934926  1.8934926
## beta13  2.1333359  2.1333359
## tau11   6.1832255         NAConsider the following data set from McCullagh & Nelder (1989, pp.300-2). It is an example of Gamma regression model.
clotting <- data.frame(
  u = c(5,10,15,20,30,40,60,80,100),
  lot1 = c(118,58,42,35,27,25,21,19,18),
  lot2 = c(69,35,26,21,18,16,13,12,12))Analysis using generalized linear models glm function in
R.
fit.lot1 <- glm(lot1 ~ log(u), data = clotting,
                family = Gamma(link = "inverse"))
fit.lot2 <- glm(lot2 ~ log(u), data = clotting,
                family = Gamma(link = "inverse"))The code below exemplify how to use the control_initial
argument for fixing the power parameter at \(p
= 2\).
list_initial = list()
list_initial$regression <- list(coef(fit.lot1))
list_initial$power <- list(c(2))
list_initial$tau <- list(summary(fit.lot1)$dispersion)
list_initial$rho = 0The control_initial argument should be a named list with
initial values for all parameters involved in the model. Note that, in
this example we used the parameter estimates from the glm
fit as initial values for the regression and dispersion parameters. The
power parameter was fixed at \(p = 2\),
since we want to fit Gamma regression models. In that case, we have only
one response variable, but the initial value for correlation parameter
\(\rho\) should be specified.
Z0 <- mc_id(clotting)
fit.lot1.mcglm <- mcglm(linear_pred = c(lot1 ~ log(u)),
                        matrix_pred = list(Z0),
                        link = "inverse", variance = "tweedie",
                        data = clotting,
                        control_initial = list_initial)Comparing parameter estimates and standard errors.
# Estimates
cbind("mcglm" = round(coef(fit.lot1.mcglm, type = "beta")$Estimates,5),
      "glm" = round(coef(fit.lot1),5))##                mcglm      glm
## (Intercept) -0.01655 -0.01655
## log(u)       0.01534  0.01534# Standard errors
cbind("mcglm" = sqrt(diag(vcov(fit.lot1.mcglm))),
      "glm" = c(sqrt(diag(vcov(fit.lot1))),NA))##               mcglm          glm
## beta10 0.0009275501 0.0009275466
## beta11 0.0004149601 0.0004149596
## tau11  0.0005340030           NAInitial values for the response variable lot2
list_initial$regression <- list("resp1" = coef(fit.lot2))
list_initial$tau <- list("resp1" = c(var(1/clotting$lot2)))Note that, since the list_initial object already have
all components required, we just modify the entries
regression and tau.
fit.lot2.mcglm <- mcglm(linear_pred = c(lot2 ~ log(u)),
                        matrix_pred = list(Z0),
                        link = "inverse", variance = "tweedie",
                        data = clotting,
                        control_initial = list_initial)Comparing parameter estimates and standard errors.
# Estimates
cbind("mcglm" = round(coef(fit.lot2.mcglm, type = "beta")$Estimates,5),
      "glm" = round(coef(fit.lot2),5))##                mcglm      glm
## (Intercept) -0.02391 -0.02391
## log(u)       0.02360  0.02360# Standard errors
cbind("mcglm" = sqrt(diag(vcov(fit.lot2.mcglm))),
      "glm" = c(sqrt(diag(vcov(fit.lot2))),NA))##               mcglm          glm
## beta10 0.0013215825 0.0013264571
## beta11 0.0005746644 0.0005767841
## tau11  0.0002649003           NAThe main contribution of the mcglmpackage is that it
easily fits multivariate regression models. For example, for the
clotting data a bivariate Gamma model is a suitable
choice.
# Initial values
list_initial = list()
list_initial$regression <- list(coef(fit.lot1), coef(fit.lot2))
list_initial$power <- list(c(2),c(2))
list_initial$tau <- list(c(0.00149), c(0.001276))
list_initial$rho = 0.80
# Matrix linear predictor
Z0 <- mc_id(clotting)
# Fit bivariate Gamma model
fit.joint.mcglm <- mcglm(linear_pred = c(lot1 ~ log(u), lot2 ~ log(u)),
                         matrix_pred = list(Z0, Z0),
                         link = c("inverse", "inverse"),
                         variance = c("tweedie", "tweedie"),
                         data = clotting,
                         control_initial = list_initial,
                         control_algorithm = list("correct" = TRUE,
                                                 "method" = "chaser",
                                                 "tuning" = 0.1,
                                                 "max_iter" = 1000))
summary(fit.joint.mcglm)## Call: lot1 ~ log(u)
## 
## Link function: inverse
## Variance function: tweedie
## Covariance function: identity
## Regression:
##               Estimates    Std.error   Z value      Pr(>|z|)
## (Intercept) -0.01655946 0.0009226782 -17.94716  5.050529e-72
## log(u)       0.01534500 0.0004128619  37.16739 2.296479e-302
## 
## Dispersion:
##     Estimates    Std.error  Z value     Pr(>|z|)
## 1 0.002422623 0.0005447457 4.447255 8.697457e-06
## 
## Call: lot2 ~ log(u)
## 
## Link function: inverse
## Variance function: tweedie
## Covariance function: identity
## Regression:
##               Estimates    Std.error   Z value     Pr(>|z|)
## (Intercept) -0.02385457 0.0013224002 -18.03884 9.654067e-73
## log(u)       0.02357948 0.0005747444  41.02603 0.000000e+00
## 
## Dispersion:
##     Estimates    Std.error  Z value     Pr(>|z|)
## 1 0.001800727 0.0002773475 6.492676 8.432517e-11
## 
## Correlation matrix:
##   Parameters Estimates Std.error Z value Pr(>|z|)
## 1      rho12 0.7262051       NaN     NaN      NaN
## 
## Algorithm: chaser
## Correction: TRUE
## Number iterations: 47We also can easily change the link function. The code below fit a
bivariate Gamma model using the log link function.
# Initial values
list_initial = list()
list_initial$regression <- list(c(log(mean(clotting$lot1)),0),
                                c(log(mean(clotting$lot2)),0))
list_initial$power <- list(c(2), c(2))
list_initial$tau <- list(c(0.023), c(0.024))
list_initial$rho = 0
# Fit bivariate Gamma model
fit.joint.log <- mcglm(linear_pred = c(lot1 ~ log(u), lot2 ~ log(u)),
                       matrix_pred = list(Z0,Z0),
                       link = c("log", "log"),
                       variance = c("tweedie", "tweedie"),
                       data = clotting,
                       control_initial = list_initial)
summary(fit.joint.log)## Call: lot1 ~ log(u)
## 
## Link function: log
## Variance function: tweedie
## Covariance function: identity
## Regression:
##              Estimates  Std.error   Z value      Pr(>|z|)
## (Intercept)  5.5032302 0.19030106  28.91855 6.979582e-184
## log(u)      -0.6019177 0.05530784 -10.88304  1.388474e-27
## 
## Dispersion:
##    Estimates   Std.error  Z value     Pr(>|z|)
## 1 0.02435442 0.004609548 5.283472 1.267584e-07
## 
## Call: lot2 ~ log(u)
## 
## Link function: log
## Variance function: tweedie
## Covariance function: identity
## Regression:
##              Estimates  Std.error   Z value      Pr(>|z|)
## (Intercept)  4.9187575 0.18554086  26.51037 7.359390e-155
## log(u)      -0.5674356 0.05392437 -10.52280  6.782394e-26
## 
## Dispersion:
##    Estimates  Std.error  Z value     Pr(>|z|)
## 1 0.02315125 0.00491656 4.708832 2.491407e-06
## 
## Correlation matrix:
##   Parameters Estimates Std.error Z value Pr(>|z|)
## 1      rho12 0.9807089       NaN     NaN      NaN
## 
## Algorithm: chaser
## Correction: TRUE
## Number iterations: 11Consider the example menarche from the MASS
R package.
require(MASS)
data(menarche)
data <- data.frame("resp" = menarche$Menarche/menarche$Total,
                   "Ntrial" = menarche$Total,
                   "Age" = menarche$Age)Logistic regression model.
glm.out = glm(cbind(Menarche, Total-Menarche) ~ Age,
              family=binomial(logit), data=menarche)The same fitted by mcglm function.
# Matrix linear predictor
Z0 <- mc_id(data)
fit.logit <- mcglm(linear_pred = c(resp ~ Age),
                   matrix_pred = list(Z0),
                   link = "logit", variance = "binomialP",
                   Ntrial = list(data$Ntrial), data = data)## Automatic initial values selected.Comparing parameter estimates and standard errors.
# Estimates
cbind("GLM" = coef(glm.out),
      "McGLM" = coef(fit.logit, type = "beta")$Estimates)##                    GLM      McGLM
## (Intercept) -21.226395 -21.226395
## Age           1.631968   1.631968# Standard error
cbind("GLM" = c(sqrt(diag(vcov(glm.out))),NA),
      "McGLM" =  sqrt(diag(vcov(fit.logit))))##                    GLM      McGLM
## (Intercept) 0.77068466 0.75151473
## Age         0.05895308 0.05748669
##                     NA 0.09310035We can estimate a more flexible model using the extended binomial variance function.
fit.logit.power <- mcglm(linear_pred = c(resp ~ Age),
                         matrix_pred = list(Z0),
                         link = "logit", variance = "binomialP",
                         Ntrial = list(data$Ntrial),
                         power_fixed = FALSE, data = data)## Automatic initial values selected.summary(fit.logit.power)## Call: resp ~ Age
## 
## Link function: logit
## Variance function: binomialP
## Covariance function: identity
## Regression:
##              Estimates  Std.error   Z value      Pr(>|z|)
## (Intercept) -21.378210 0.75681833 -28.24748 1.528506e-175
## Age           1.643005 0.05784811  28.40205 1.907701e-177
## 
## Power:
##   Estimates Std.error  Z value     Pr(>|z|)
## 1  1.071814 0.0599217 17.88691 1.491508e-71
## 
## Dispersion:
##   Estimates Std.error  Z value     Pr(>|z|)
## 1  1.199726 0.2431969 4.933146 8.091577e-07
## 
## Algorithm: chaser
## Correction: TRUE
## Number iterations: 9