This notebook contains R code supporting the lecture on count (Poisson) regression from STA303, Summer 2018. The Galapagos Islands Species data are analyzed with a discussion on model assumptions, transformations, overdispersion and offsets. A dose-response model is fit to the salmonella dataset, with a discussion of assumptions, predictor transformations, and overdispersion. Both analyses follow but expand upon those from the Count Regression chapter in Faraway: Extending the Linear Model with R
library(tidyverse)
library(faraway)
library(MASS)
# Load the Galapagos Islands data
data(gala)
gala_tbl <- gala %>%
as_data_frame() %>%
dplyr::select(-Endemics) # Alternative response variable; remove from analysis
glimpse(gala_tbl)
## Observations: 30
## Variables: 6
## $ Species <dbl> 58, 31, 3, 25, 2, 18, 24, 10, 8, 2, 97, 93, 58, 5, 4...
## $ Area <dbl> 25.09, 1.24, 0.21, 0.10, 0.05, 0.34, 0.08, 2.33, 0.0...
## $ Elevation <dbl> 346, 109, 114, 46, 77, 119, 93, 168, 71, 112, 198, 1...
## $ Nearest <dbl> 0.6, 0.6, 2.8, 1.9, 1.9, 8.0, 6.0, 34.1, 0.4, 2.6, 1...
## $ Scruz <dbl> 0.6, 26.3, 58.7, 47.4, 1.9, 8.0, 12.0, 290.2, 0.4, 5...
## $ Adjacent <dbl> 1.84, 572.33, 0.78, 0.18, 903.82, 1.84, 0.34, 2.85, ...
# Want to model species as a function of the other variables
# Look at its distribution
gala_tbl %>%
ggplot(aes(x = Species)) +
theme_classic() +
geom_histogram(bins = 10,colour = "black",fill = "orange") +
labs(title = "Histogram of Species",
subtitle = "Galapagos Islands plant species",
x = "Species",
y = "# Islands")
# Definitely not normal
# Some high counts, but a lot of low ones
# What happens if we do a normal linear regression?
gala_lm1 <- lm(Species ~ .,data=gala_tbl)
summary(gala_lm1)
##
## Call:
## lm(formula = Species ~ ., data = gala_tbl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -111.679 -34.898 -7.862 33.460 182.584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.068221 19.154198 0.369 0.715351
## Area -0.023938 0.022422 -1.068 0.296318
## Elevation 0.319465 0.053663 5.953 3.82e-06 ***
## Nearest 0.009144 1.054136 0.009 0.993151
## Scruz -0.240524 0.215402 -1.117 0.275208
## Adjacent -0.074805 0.017700 -4.226 0.000297 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 60.98 on 24 degrees of freedom
## Multiple R-squared: 0.7658, Adjusted R-squared: 0.7171
## F-statistic: 15.7 on 5 and 24 DF, p-value: 6.838e-07
# The fit isn't even that bad
# But the diagnostics...
data_frame(x = residuals(gala_lm1)) %>%
mutate_at("x",funs( (. - mean(.)) / sd(.))) %>%
arrange(x) %>%
mutate(q = qnorm(seq(1,length(residuals(gala_lm1))) / (1+length(residuals(gala_lm1))))) %>%
ggplot(aes(x = q,y = x)) +
theme_classic() +
geom_point() +
geom_abline(slope = 1,intercept = 0) +
labs(title = "Normal QQ-Plot of Residuals",
subtitle = "Normal linear model for Galapagos Islands plant species data",
x = "Theoretical Quantiles",
y = "Sample Quantiles")
# Not actually that bad. A lot of the counts are pretty large
data_frame(resid = residuals(gala_lm1),
fitted = fitted(gala_lm1)) %>%
mutate_at("resid",funs( (. - mean(.)) / sd(.))) %>%
ggplot(aes(x = fitted,y = resid)) +
theme_classic() +
geom_point() +
#geom_smooth() +
geom_hline(yintercept = 0) +
geom_hline(yintercept = -2,linetype="dashed",colour="red") +
geom_hline(yintercept = 2,linetype="dashed",colour="red") +
labs(title = "Residuals vs Fitted Values",
subtitle = "Normal linear model for Galapagos Islands plant species data",
x = "Fitted Values",
y = "Residuals")
# Do you see the problem? This is classic
# Transformation? We have the variance stabilizing transformation. Let's try the boxcox one too and compare them
#
gala_boxcox <- boxcox(gala_lm1)
# 0.5 is within the range of "good" values. So, let's try a square root transformation
# Boxcox actually should be (y^lambda - 1)/lambda
# Try "variance-stabilizing transformation":
gala_lm2 <- lm(sqrt(Species) ~ .,data=gala_tbl)
summary(gala_lm2)
##
## Call:
## lm(formula = sqrt(Species) ~ ., data = gala_tbl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5572 -1.4969 -0.3031 1.3527 5.2110
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.3919243 0.8712678 3.893 0.000690 ***
## Area -0.0019718 0.0010199 -1.933 0.065080 .
## Elevation 0.0164784 0.0024410 6.751 5.55e-07 ***
## Nearest 0.0249326 0.0479495 0.520 0.607844
## Scruz -0.0134826 0.0097980 -1.376 0.181509
## Adjacent -0.0033669 0.0008051 -4.182 0.000333 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.774 on 24 degrees of freedom
## Multiple R-squared: 0.7827, Adjusted R-squared: 0.7374
## F-statistic: 17.29 on 5 and 24 DF, p-value: 2.874e-07
# Fit is reasonable. Diagnostics?
data_frame(x = residuals(gala_lm2)) %>%
mutate_at("x",funs( (. - mean(.)) / sd(.))) %>%
arrange(x) %>%
mutate(q = qnorm(seq(1,length(residuals(gala_lm2))) / (1+length(residuals(gala_lm2))))) %>%
ggplot(aes(x = q,y = x)) +
theme_classic() +
geom_point() +
geom_abline(slope = 1,intercept = 0) +
labs(title = "Normal QQ-Plot of Residuals",
subtitle = "Normal linear model for Galapagos Islands plant species data with square-root transformation",
x = "Theoretical Quantiles",
y = "Sample Quantiles")
data_frame(resid = residuals(gala_lm2),
fitted = fitted(gala_lm2)) %>%
mutate_at("resid",funs( (. - mean(.)) / sd(.))) %>%
ggplot(aes(x = fitted,y = resid)) +
theme_classic() +
geom_point() +
geom_hline(yintercept = 0) +
geom_hline(yintercept = -2,linetype="dashed",colour="red") +
geom_hline(yintercept = 2,linetype="dashed",colour="red") +
labs(title = "Residuals vs Fitted Values",
subtitle = "Normal linear model for Galapagos Islands plant species data with square-root transformation",
x = "Fitted Values",
y = "Residuals")
# Much better.
#
# Before we do any modelling, really, we should at least take a look at the relationship
# between the response and each predictor
plot_gala <- function(x) {
gala_tbl %>%
mutate(Species = log(Species)) %>%
ggplot(aes_string(x = x,y = "Species")) +
theme_classic() +
geom_point() +
labs(subtitle = "Galapagos Island Species Data",
title = stringr::str_c("Species vs ",x),
x = x,
y = "log(Species)") +
scale_x_continuous(labels = scales::comma_format())
}
cowplot::plot_grid(
plot_gala("Area"),
plot_gala("Elevation"),
plot_gala("Nearest"),
plot_gala("Scruz"),
plot_gala("Adjacent"),
ncol = 3
)
# There are several variables that clearly have nonlinear relationships with log(Species)
# Let's investigate the Poisson GLM
### Poisson GLM ###
gala_glm1 <- glm(Species ~ .,data=gala_tbl,family=poisson)
summary(gala_glm1)
##
## Call:
## glm(formula = Species ~ ., family = poisson, data = gala_tbl)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.2752 -4.4966 -0.9443 1.9168 10.1849
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.155e+00 5.175e-02 60.963 < 2e-16 ***
## Area -5.799e-04 2.627e-05 -22.074 < 2e-16 ***
## Elevation 3.541e-03 8.741e-05 40.507 < 2e-16 ***
## Nearest 8.826e-03 1.821e-03 4.846 1.26e-06 ***
## Scruz -5.709e-03 6.256e-04 -9.126 < 2e-16 ***
## Adjacent -6.630e-04 2.933e-05 -22.608 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 3510.73 on 29 degrees of freedom
## Residual deviance: 716.85 on 24 degrees of freedom
## AIC: 889.68
##
## Number of Fisher Scoring iterations: 5
# Coefficients are all small, but remember their effect is multiplicative now, not additive
# That residual deviance is a bit ridiculous. Is the fit really that bad?
# It could be caused by an outlier. But note that the null deviance is also huge.
# What proportion of the null deviance is explained by the model?
(gala_glm1$null.deviance - deviance(gala_glm1)) / gala_glm1$null.deviance
## [1] 0.7958128
# So the model does explain much of the deviance
#
# While in the normal linear model we assumed constant variance,
# here was are assuming that Var(Y) = E(Y)
# This is a model assumption, and needs to be checked
# Note that the graph in Faraway is labelled incorrectly- it's a log-log plot
data_frame(muhat = predict(gala_glm1,type="response"),
varhat = (gala_tbl$Species - muhat)^2) %>%
mutate_all(log) %>%
ggplot(aes(x = muhat,y = varhat)) +
theme_classic() +
geom_point() +
geom_abline(slope = 1,intercept = 0,colour = "red") +
labs(title = "Mean-Variance Relationship",
subtitle = "Poisson GLM, Galapagos Islands species data",
x = "Predicted Mean",
y = "Estimated Variance at Predicted Mean")
# It looks like we have overdispersion
#
# Estimate dispersion
n <- nrow(gala_tbl)
p <- length(coef(gala_glm1))
phi <- data_frame(muhat = predict(gala_glm1,type="response"),
varhat = (gala_tbl$Species - muhat)^2) %>%
summarize(phi = sum(varhat / muhat) / (n - p)) %>%
pull(phi)
phi
## [1] 31.74914
# Yes, lots of overdispersion
# Compute the new standard errors
summary(gala_glm1,dispersion = phi)
##
## Call:
## glm(formula = Species ~ ., family = poisson, data = gala_tbl)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.2752 -4.4966 -0.9443 1.9168 10.1849
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.1548079 0.2915897 10.819 < 2e-16 ***
## Area -0.0005799 0.0001480 -3.918 8.95e-05 ***
## Elevation 0.0035406 0.0004925 7.189 6.53e-13 ***
## Nearest 0.0088256 0.0102621 0.860 0.390
## Scruz -0.0057094 0.0035251 -1.620 0.105
## Adjacent -0.0006630 0.0001653 -4.012 6.01e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 31.74914)
##
## Null deviance: 3510.73 on 29 degrees of freedom
## Residual deviance: 716.85 on 24 degrees of freedom
## AIC: 889.68
##
## Number of Fisher Scoring iterations: 5
summary(gala_glm1)
##
## Call:
## glm(formula = Species ~ ., family = poisson, data = gala_tbl)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.2752 -4.4966 -0.9443 1.9168 10.1849
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.155e+00 5.175e-02 60.963 < 2e-16 ***
## Area -5.799e-04 2.627e-05 -22.074 < 2e-16 ***
## Elevation 3.541e-03 8.741e-05 40.507 < 2e-16 ***
## Nearest 8.826e-03 1.821e-03 4.846 1.26e-06 ***
## Scruz -5.709e-03 6.256e-04 -9.126 < 2e-16 ***
## Adjacent -6.630e-04 2.933e-05 -22.608 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 3510.73 on 29 degrees of freedom
## Residual deviance: 716.85 on 24 degrees of freedom
## AIC: 889.68
##
## Number of Fisher Scoring iterations: 5
sqrt(diag(vcov(gala_glm1)))
## (Intercept) Area Elevation Nearest Scruz
## 5.174952e-02 2.627298e-05 8.740704e-05 1.821260e-03 6.256200e-04
## Adjacent
## 2.932754e-05
sqrt(diag(vcov(gala_glm1)) * phi)
## (Intercept) Area Elevation Nearest Scruz
## 0.2915897482 0.0001480387 0.0004925069 0.0102621392 0.0035251417
## Adjacent
## 0.0001652500
# Offset. Area is potentially an offset
# First, try a model with log(Area)
#
gala_glm2 <- glm(Species ~ log(Area) + Elevation + Nearest + Scruz + Adjacent,
data = gala_tbl,
family = poisson)
summary(gala_glm2)
##
## Call:
## glm(formula = Species ~ log(Area) + Elevation + Nearest + Scruz +
## Adjacent, family = poisson, data = gala_tbl)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.5525 -2.9223 -0.8698 2.8767 7.6345
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.518e+00 5.105e-02 68.900 < 2e-16 ***
## log(Area) 3.994e-01 1.524e-02 26.214 < 2e-16 ***
## Elevation -3.804e-04 8.844e-05 -4.301 1.70e-05 ***
## Nearest -4.152e-03 1.729e-03 -2.402 0.0163 *
## Scruz -3.118e-03 6.050e-04 -5.154 2.55e-07 ***
## Adjacent -1.671e-04 2.964e-05 -5.636 1.74e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 3510.73 on 29 degrees of freedom
## Residual deviance: 411.46 on 24 degrees of freedom
## AIC: 584.29
##
## Number of Fisher Scoring iterations: 5
# Coefficient for log(Area) is about 0.4
# This is probably too small to consider to be "close to 1"
# Especially relative to its own standard error of 0.015
# Probably in practice, would NOT fix it to be 1
# But, let's see what happens when we do
gala_glm3 <- glm(Species ~ offset(log(Area)) + Elevation + Nearest + Scruz + Adjacent,
data = gala_tbl,
family = poisson)
summary(gala_glm3)
##
## Call:
## glm(formula = Species ~ offset(log(Area)) + Elevation + Nearest +
## Scruz + Adjacent, family = poisson, data = gala_tbl)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -14.2091 -0.6762 3.4247 6.9750 14.4016
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.155e+00 6.222e-02 34.642 < 2e-16 ***
## Elevation -2.966e-03 5.913e-05 -50.153 < 2e-16 ***
## Nearest -1.674e-02 1.553e-03 -10.780 < 2e-16 ***
## Scruz -1.078e-03 7.384e-04 -1.460 0.144
## Adjacent 1.568e-04 2.808e-05 5.582 2.37e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 5766.9 on 29 degrees of freedom
## Residual deviance: 1565.0 on 25 degrees of freedom
## AIC: 1735.9
##
## Number of Fisher Scoring iterations: 6
# Exercise: can you try transformations (e.g. log) on other predictors
# to get a better model?
# Or interactions?
# If you're super keen, try to get a significantly better model than me,
# and show me!
### Example ###
# Use the salmonella data, exercise 2 Faraway chapter 3 page 73
# Data represents number of colonies (a count) for levels of dose of quinoline
#
# Load the data and do some initial data analysis
data(salmonella)
salmonella_tbl <- as_data_frame(salmonella)
glimpse(salmonella_tbl)
## Observations: 18
## Variables: 2
## $ colonies <int> 15, 21, 29, 16, 18, 21, 16, 26, 33, 27, 41, 60, 33, 3...
## $ dose <int> 0, 0, 0, 10, 10, 10, 33, 33, 33, 100, 100, 100, 333, ...
# Plot it. Technically dose is continuous, but there are only 6 unique values,
# so it would be better to do boxplots. Except for the additional fact that there are only
# 3 datapoints per level.
#
# So let's just plot them.
salmonella_tbl %>%
ggplot(aes(x = dose,y = colonies)) +
theme_classic() +
geom_point() +
labs(title = "Salmonella Data",
subtitle = "# of colonies for each observed dose of quinoline",
x = "Dose",
y = "# colonies")
# That doesn't tell us much. Dose increases exponentially; let's log it
salmonella_tbl %>%
mutate_at("dose",funs(log(. + 1))) %>%
ggplot(aes(x = dose,y = colonies)) +
theme_classic() +
geom_point() +
labs(title = "Salmonella Data",
subtitle = "# of colonies for each observed dose of quinoline",
x = "log(Dose+1)",
y = "# colonies")
# Hmmm... maybe the boxplots were a better idea
# We could treat dose as categorical in the model
salmonella_tbl %>%
mutate_at("dose",as.factor) %>%
ggplot(aes(x = dose,y = colonies)) +
theme_classic() +
geom_boxplot() +
labs(title = "Salmonella Data",
subtitle = "Boxplot of # of colonies for each observed dose of quinoline",
x = "Dose",
y = "# colonies")
# That's better!
#
# Fit an initial glm
dose_glm1 <- glm(colonies ~ dose,data=salmonella_tbl,family = poisson)
summary(dose_glm1)
##
## Call:
## glm(formula = colonies ~ dose, family = poisson, data = salmonella_tbl)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6482 -1.8225 -0.2993 1.2917 5.1861
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.3219950 0.0540292 61.485 <2e-16 ***
## dose 0.0001901 0.0001172 1.622 0.105
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 78.358 on 17 degrees of freedom
## Residual deviance: 75.806 on 16 degrees of freedom
## AIC: 172.34
##
## Number of Fisher Scoring iterations: 4
# Not a good fit. Why? There could be reasons other than overdispersion
# Let's look at the fitted model
curve_data <- data_frame(x = seq(1,1000,by=0.1),
y = exp(coef(dose_glm1)[1] + x*coef(dose_glm1)[2]))
salmonella_tbl %>%
ggplot(aes(x = dose,y = colonies)) +
theme_classic() +
geom_point() +
geom_line(data = curve_data,aes(x = x,y = y)) +
labs(title = "Salmonella Data - Fitted Poisson GLM",
subtitle = "# of colonies for each observed dose of quinoline",
x = "Dose",
y = "# colonies")
# It's more likely here that we got the functional form of the model wrong
# We already said that dose was increasing exponentially, so let's log it in the model
dose_glm2 <- glm(colonies ~ log(dose+1),data=salmonella_tbl,family = poisson)
summary(dose_glm2)
##
## Call:
## glm(formula = colonies ~ log(dose + 1), family = poisson, data = salmonella_tbl)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0764 -1.4488 -0.2306 0.9259 4.7212
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.01989 0.09712 31.095 < 2e-16 ***
## log(dose + 1) 0.08585 0.02018 4.255 2.09e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 78.358 on 17 degrees of freedom
## Residual deviance: 59.629 on 16 degrees of freedom
## AIC: 156.17
##
## Number of Fisher Scoring iterations: 4
# Check out the residual deviance- it's not great, but it's better than it was
curve_data <- data_frame(x = seq(1,1000,by=0.1),
y = exp(coef(dose_glm2)[1] + log(x+1)*coef(dose_glm2)[2]))
salmonella_tbl %>%
ggplot(aes(x = dose,y = colonies)) +
theme_classic() +
geom_point() +
geom_line(data = curve_data,aes(x = x,y = y)) +
labs(title = "Salmonella Data - Fitted Poisson GLM",
subtitle = "# of colonies for each observed dose of quinoline",
x = "Dose",
y = "# colonies")
# Better. Can we do any better than this?
# From before, it looked like there might be some sort of a quadratic relationship
# between log dose and colonies
dose_glm3 <- glm(colonies ~ log(dose+1) + I(log(dose+1)^2),data=salmonella_tbl,family = poisson)
summary(dose_glm3)
##
## Call:
## glm(formula = colonies ~ log(dose + 1) + I(log(dose + 1)^2),
## family = poisson, data = salmonella_tbl)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7345 -1.2974 -0.3907 1.0539 4.4299
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.922028 0.128564 22.728 <2e-16 ***
## log(dose + 1) 0.169533 0.071061 2.386 0.017 *
## I(log(dose + 1)^2) -0.011367 0.009193 -1.237 0.216
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 78.358 on 17 degrees of freedom
## Residual deviance: 58.082 on 15 degrees of freedom
## AIC: 156.62
##
## Number of Fisher Scoring iterations: 4
# Nah, the residual deviance didn't really change
#
# Now estimate the overdispersion from the better model
n <- nrow(salmonella_tbl)
p <- length(coef(dose_glm2))
phi <- data_frame(muhat = predict(dose_glm2,type="response"),
varhat = (salmonella_tbl$colonies - muhat)^2) %>%
summarize(phi = sum(varhat / muhat) / (n - p)) %>%
pull(phi)
phi
## [1] 3.95387
summary(dose_glm2,dispersion = phi)
##
## Call:
## glm(formula = colonies ~ log(dose + 1), family = poisson, data = salmonella_tbl)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.0764 -1.4488 -0.2306 0.9259 4.7212
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.01989 0.19311 15.64 <2e-16 ***
## log(dose + 1) 0.08585 0.04012 2.14 0.0324 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 3.95387)
##
## Null deviance: 78.358 on 17 degrees of freedom
## Residual deviance: 59.629 on 16 degrees of freedom
## AIC: 156.17
##
## Number of Fisher Scoring iterations: 4