---
title: "MLM Chapter 5"
author: "Doug Luke"
date: "12/29/2019"
output: 
  html_document:
    toc: true
    toc_float: true
    number_sections: false
    theme: lumen
    highlight: textmate
---

# Extending the Basic Model
{#chap:extensions} 

```{r setupext, message=FALSE}
library(LukeMLM)
library(lme4)
library(xtable)
library(ggplot2)
```

## The Flexibility of the Mixed-Effects Model
{#sec:flexibility}

## Generalized Models
{#sec:generalized}

### Binary outcomes
{#subsec:binary}


```{r plotlogittransform}

p <- seq(.001,.999,.001)
lp <- qlogis(p)

df <- data.frame(
p = p,
lp = lp
)

ggplot(data=df,aes(x=p, y=lp)) +
  geom_line() +
#  scale_linetype_discrete(name="Acres") +
  xlab(expression(italic("p"))) +
  ylab(expression(paste("logit(",italic("p"),")"))) +
  theme_bw() 
  
```


```{r votebinary, echo=FALSE,results=FALSE}
data(tobvote)
# levels(tobvote$votedpro)
# table(tobvote$votedpro)
# tobvote$votedpro_bin <- as.numeric(tobvote$votedpro)-1
gmod1 <- glmer(votedpro ~ party + pactotal + (1|cmid), data=tobvote, family=binomial)
summary(gmod1)
```


```{r plotbinary1}

# Fixes for later: put into ggplot and don't use hand-coded values
x <- seq(0,113,.5)

ydem <- plogis(-1.86 + x*.035)
yrep <- plogis(-1.86 + 2.72 + x*.035)

plot(x,ydem,type="l",col="black",lty=1,
	xlim=c(0,125),ylim=c(0,1.1),
	ylab="Predicted protobacco voting probabilities",xlab="PAC contributions ($K)")
points(x,yrep,type="l",lty=2,col="gray20")
legend(c(70,108),c(.18,.4), c("Republican","Democrat"),
	col=c("gray20","black"),lty=c(2,1),cex=1.2,box.lty=0)
```

### Count outcomes
{#subsec:counts}


```{r alcsetup}

# Fixes for later: larger sample size, make prettier, use consistent ggplot formatting, select true random subset
data(nlsy)

nwrk <- nlsy[nlsy$alcday>=0 & nlsy$smkday>=0,]
nwrk <- nwrk[1:5000,]
```


```{r alcdist}
ggplot(nwrk, aes(x=alcday)) + 
  scale_x_continuous(name = "Number of alcohol days") +
  scale_y_continuous(name = "Density") +
  geom_density(alpha=0.3,color="gray20",fill="gray80") +
  theme_minimal()

```


```{r countsetup}
library(lme4)
compare <- mean(subset(nwrk, sex97 == "Female" & nonwhite == "Non-white")$alcday)
mod1 <- lmer(alcday ~  sex97 + nonwhite + (1|pubid), data=nwrk)
summary(mod1)

gmod1 <- glmer(alcday ~  sex97 + nonwhite + (1|pubid), data=nwrk, family=poisson)
summary(gmod1)

```

Note: The models in the following code chunk will take some time to run.

```{r countmodels,warning=FALSE,message=FALSE}
# NOte for later: to install glmmADMB I had to choose option 4 on this page:
# https://r-forge.r-project.org/scm/viewvc.php/*checkout*/www/index.html?revision=197&root=glmmadmb
# install.packages("glmmADMB", repos="http://www.math.mcmaster.ca/bolker/R",type="source")
library(glmmADMB)

nwrk$pubid2 <- factor(nwrk$pubid)
cmod_poiss <- glmmadmb(alcday ~  smkday + sex97 + nonwhite + (1|pubid2), data=nwrk, 
               zeroInflation=FALSE,family="poisson")
summary(cmod_poiss)

cmod_zip <- glmmadmb(alcday ~  smkday + sex97 + nonwhite + (1|pubid2), data=nwrk, 
               zeroInflation=TRUE,family="poisson")
summary(cmod_zip)

cmod_zipnbinom <- glmmadmb(alcday ~  smkday + sex97 + nonwhite + (1|pubid2), data=nwrk, 
               zeroInflation=TRUE,family="nbinom")
summary(cmod_zipnbinom)

AIC(cmod_poiss,cmod_zip,cmod_zipnbinom)


```

```{r overdispfun,warning=FALSE,message=FALSE}

overdisp_fun <- function(model) {
  ## number of variance parameters in 
  ##   an n-by-n variance-covariance matrix
  vpars <- function(m) {
    nrow(m)*(nrow(m)+1)/2
  }
  model.df <- sum(sapply(VarCorr(model),vpars))+length(fixef(model))
  rdf <- nrow(model.frame(model))-model.df
  rp <- residuals(model,type="pearson")
  Pearson.chisq <- sum(rp^2)
  prat <- Pearson.chisq/rdf
  pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
  c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}
```


```{r overdispnlsy,warning=FALSE,message=FALSE}
od1 <- overdisp_fun(cmod_poiss)
od2 <- overdisp_fun(cmod_zip)
od3 <- overdisp_fun(cmod_zipnbinom)
od1
od2
od3
```


## Three-level Models
{#sec:3level}

```{r vote3level}
data(tobvote)
# levels(tobvote$votedpro)
# table(tobvote$votedpro)
# tobvote$votedpro_bin <- as.numeric(tobvote$votedpro)-1
gmod2 <- glmer(votedpro ~ party + pactotal + acres + (1|cmid) + (party|state), data=tobvote, family=binomial)
summary(gmod2)
```


```{r plot3lvl1}

# Fixes for later: put into ggplot and don't use hand-coded values
x <- seq(0,113,.5)

ydem_a0 <- plogis(-1.79 + x*.025)
yrep_a0 <- plogis(-1.79 + 2.63 + x*.025)

ydem_a10 <- plogis(-1.79 + x*.025 + 10*.006)
yrep_a10 <- plogis(-1.79 + 2.63 + x*.025 + 10*.006)

ydem_a100 <- plogis(-1.79 + x*.025 + 100*.006)
yrep_a100 <- plogis(-1.79 + 2.63 + x*.025 + 100*.006)

plot(x,ydem_a0,type="l",col="black",lty=1,lwd=2,
	xlim=c(0,125),ylim=c(0,1.1),
	ylab="Predicted protobacco voting probabilities",xlab="PAC contributions ($K)")

points(x,ydem_a10,type="l",lty=2,col="black",lwd=2)
points(x,ydem_a100,type="l",lty=3,col="black",lwd=2)

points(x,yrep_a0,type="l",lty=1,col="gray40",lwd=1.5)
points(x,yrep_a10,type="l",lty=2,col="gray40",lwd=1.5)
points(x,yrep_a100,type="l",lty=3,col="gray40",lwd=1.5)

legend(c(58,105),c(-.02,.40), 
       c("Republican - 100K Acres",
         "Republican - 10K Acres",
         "Republican - 0K Acres",
         "Democrat - 100K Acres",
         "Democrat - 10K Acres",
         "Democrat - 0K Acres"),
	col=c("gray40","gray40","gray40","black","black","black"),
	lty=c(3,2,1),lwd=c(1.5,1.5,1.5,2,2,2),cex=1.1,
	box.lty=0)
```

## Cross-classified Models
{#sec:crossclass}


```{r ctabmodelswarning=FALSE,message=FALSE}
library(lmerTest)
data(pupcross)
m1<-lmer(achiev ~ 1 + (1|sschool) + (1|pschool), 
         data=pupcross, REML=FALSE)
summary(m1)

m2 <-lmer(achiev ~ pupsex  + pupses + 
  (1|sschool) + (1|pschool), data=pupcross, REML=FALSE)
summary(m2)

m3 <-lmer(achiev ~ pupsex  + pupses + pdenom + sdenom +
  (1|sschool) + (1|pschool), data=pupcross, REML=FALSE)
summary(m3)

m4 <-lmer(achiev ~ pupsex  + pupses + pdenom + sdenom +
  (pupses|sschool) + (pupses|pschool), data=pupcross, REML=FALSE)
summary(m4)

anova(m1,m2,m3,m4)
```