Help for package GLMpack

Type:

Package

Title:

Data and Code to Accompany Generalized Linear Models, 2nd Edition

Version:

0.1.0

Description:

Contains all the data and functions used in Generalized Linear Models, 2nd edition, by Jeff Gill and Michelle Torres. Examples to create all models, tables, and plots are included for each data set.

License:

GPL (≥ 3)

Depends:

R (≥ 2.10)

Encoding:

UTF-8

LazyData:

true

RoxygenNote:

6.1.1

Imports:

MASS, pBrackets, nnet, effects, AER, pscl, foreign, Matrix, lme4, lmtest, sandwich, censReg, plm

NeedsCompilation:

Packaged:

2019-07-19 01:05:52 UTC; michelletorres

Author:

Jeff Gill [aut], Michelle Torres [aut, cre], Simon Heuberger [aut]

Maintainer:

Michelle Torres <smtorres@wustl.edu>

Repository:

CRAN

Date/Publication:

2019-07-19 09:40:05 UTC

Data on inflation increase in Africa

Description

Data for the Africa example used in chapter 7

Usage

data(africa)

Format

A data frame with 47 rows and 7 variables:

INFLATN: Inflation rates
DICTATOR: Number of years of personal dictatorship that occurred from independence to 1989
SIZE: Area at the end of the period, in thousand square kilometers
GROWTH: Average annual gross national product (GNP) rate of growth in percent from 1965 to 1989
CHURMED: Number of church-operated hospitals and medical clinics as of 1973
CONSTIT: the constitutional structure when not a dictatorship in ascending centrality (0 = monarchy, 1 = presidential, 2 = presidential/parliamentary mix, 3 = parliamentary)
REPRESS: Violence and threats of violence by the government against opposition political activity from 1990 to 1994

...

Examples

data(africa)
attach(africa)
library(lmtest)
library(plm)

## Table 7.4
y <- (INFLATN/100)[-16]
y[y > 1] <- 1
X <- cbind(DICTATOR,SIZE,GROWTH,CHURMED,CONSTIT,REPRESS)[-16,]
X[,4] <- log(X[,4]+.01)
africa.glm <- glm(y ~ X[,1:6], family=quasibinomial('logit'))
out.mat <- coeftest(africa.glm, vcov.=vcovHC(africa.glm, type="HC0"))
( out.mat <- round(cbind(out.mat[,1:2], out.mat[,1] - 1.96*out.mat[,2],
                         out.mat[,2] + 1.96*out.mat[,2]),4) )

Data on campaign contributions in California in the 2014 House elections

Description

Data for the campaign contributions example used in chapter 6

Usage

data(campaign)

Format

A data frame with 180 rows and 16 variables:

DTRCT: California district
CANDID: FEC ID
CYCLE: Election cycle
NAME: Name of the candidate
INCUMCHALL: Incumbency status
CFSCORE: CFscore
CANDGENDER: Gender of the candidate
PARTY: Party of the candidate
TOTCONTR: Contributions to the 2014 electoral campaigns in the 53 districts of California in the U.S. House of Representatives
TOTPOP: Total state population
FEMALE: Number of female citizens in the state
WHITE: Number of white citizens in the state
HISP: Number of Hispanic citizens in the state
FEMALEPCT: Percentage of female citizens in the state
WHITEPCT: Percentage of white citizens in the state
HISPPCT: Percentage of Hispanic citizens in the state

...

Examples

data(campaign)
attach(campaign)
library(pBrackets)

## Gamma model
cmpgn.out <- glm(TOTCONTR ~ CANDGENDER + PARTY + INCUMCHALL + HISPPCT,
             family=Gamma(link = 'log'), data=campaign)

## Linear model
cmpgn.out_lm <- lm(TOTCONTR ~ CANDGENDER + PARTY + INCUMCHALL + HISPPCT, data=campaign)

## Table 6.8
round(glm.summary(cmpgn.out),4)
cmpgn.out$null.deviance
cmpgn.out$df.null
cmpgn.out$deviance
cmpgn.out$df.residual
logLik(cmpgn.out)
cmpgn.out$aic

## Table 6.9
summary(cmpgn.out_lm)
confint(cmpgn.out_lm)

## Figure 6.4
opar = par(mfrow=c(1,1), mar=c(5.1,4.1,4.1,2.1), oma=c(0,0,0,0))
par(mar=c(4,3,3,0),oma=c(1,1,1,1))
hist(campaign$TOTCONTR,xlab="",ylab="", yaxt="n", xaxt="n",
     xlim=c(0,9000), ylim=c(0, 130), main="",
     col = "gray40")
axis(1, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=1, lwd.ticks = 1, las=2)
title(xlab = 'Total campaign contributions (thousands of dollars)',
      ylab= "Frequency",
      line = 1.7, cex.lab=1)
title(line = 1, main="Distribution of campaign contributions", font.main=1)
par(opar)

## Figure 6.5
campaign.mu <- predict(cmpgn.out_lm)
campaign.y <- campaign$TOTCONTR
par(mfrow=c(1,3), mar=c(3,3,2,1),oma=c(1,1,1,1))
plot(campaign.mu,campaign.y,xlab="",ylab="", yaxt='n', xaxt="n", pch="+")
axis(1, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = "Fitted values", ylab="Observed values",
      line = 1.7, cex.lab=1.3)
title(main="Model Fit Plot",
      line = 1, cex.main=1.7, font.main=1)
abline(lm(campaign.y~campaign.mu)$coefficients, lwd=2)
plot(fitted(cmpgn.out_lm),resid(cmpgn.out_lm,type="pearson"),xlab="",ylab="",
     yaxt='n', xaxt="n", pch="+")
axis(1, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = "Fitted values", ylab="Pearson Residuals",
      line = 1.7, cex.lab=1.3)
title(main="Residual Dependence Plot",
      line = 1, cex.main=1.7, font.main=1)
abline(0,0, lwd=2)
plot(cmpgn.out_lm,which=2, pch="+",
     sub.caption = "", caption = "", mgp=c(1.5, 0.3, 0),
     tck=0.02, cex.axis=0.9, cex.lab=1.3, lty=1)
title(main="Normal-Quantile Plot",
      line = 1, cex.main=1.7, font.main=1)
par(opar)

## Figure 6.6
mygray = rgb(153, 153, 153, alpha = 200, maxColorValue = 255)
newdat_gender <- data.frame(CANDGENDER = c('F','M'), PARTY= rep('Democrat',2),
                            INCUMCHALL=rep("C", 2), HISPPCT=rep(mean(campaign$HISPPCT),2))
newdat_party <- data.frame(CANDGENDER = rep('M', 3), PARTY= c('Democrat','Republican',
                           'Independent'), INCUMCHALL=rep("C", 3),
                           HISPPCT=rep(mean(campaign$HISPPCT),3))
newdat_incumchall <- data.frame(CANDGENDER = rep('M', 3), PARTY= rep('Democrat',3),
                                INCUMCHALL=c('C', 'I', 'O'),
                                HISPPCT=rep(mean(campaign$HISPPCT),3))
newdat_hisiq <- data.frame(CANDGENDER = rep('M', 2), PARTY= rep('Democrat',2),
                           INCUMCHALL=rep("C", 2),
                           HISPPCT=as.numeric(summary(campaign$HISPPCT)[c(2,5)]))
newdat_hispf <- data.frame(CANDGENDER = rep('M', 200), PARTY= rep('Democrat',200),
                           INCUMCHALL=rep("C", 200), HISPPCT=seq(.1, .9, length.out = 200))
preds_gender <- predict(cmpgn.out, newdata = newdat_gender, se.fit = TRUE)
preds_party <- predict(cmpgn.out, newdata = newdat_party, se.fit = TRUE)
preds_incumchall <- predict(cmpgn.out, newdata = newdat_incumchall, se.fit = TRUE)
preds_hispiq <- predict(cmpgn.out, newdata = newdat_hisiq, se.fit = TRUE)
preds_hispf <- predict(cmpgn.out, newdata = newdat_hispf, se.fit = TRUE)
cis_gender <- round(glm.cis(preds_gender$fit, preds_gender$se.fit, 0.95,cmpgn.out$df.residual),4)
cis_party <- round(glm.cis(preds_party$fit, preds_party$se.fit, 0.95,cmpgn.out$df.residual),4)
cis_incumchall <- round(glm.cis(preds_incumchall$fit, preds_incumchall$se.fit, 0.95,
                                cmpgn.out$df.residual),4)
cis_hispiq <- round(glm.cis(preds_hispiq$fit, preds_hispiq$se.fit, 0.95,cmpgn.out$df.residual),4)
cis_hispf <- round(glm.cis(preds_hispf$fit, preds_hispf$se.fit, 0.95,cmpgn.out$df.residual),4)
iqrange = cbind(summary(campaign$HISPPCT)[c(2,5)],cis_hispiq[2,4] - cis_hispf[1,4])
par(mfrow=c(2,2), mar=c(4,3,3,0),oma=c(1,1,1,1))
plot(1:2, cis_gender[,4], type="n",xlab="",ylab="",  yaxt="n", xaxt="n",
     xlim=c(0,3), ylim=c(100, 700))
segments(1:2, cis_gender[,5], 1:2, cis_gender[,6], lwd=2, col="gray60")
points(1:2, cis_gender[,4], pch=16, cex=2.5)
text(1:2, cis_gender[,4], labels = c("F", "M"), col="white", cex=0.9)
segments(1.05, cis_gender[1,4], 1.95, cis_gender[2,4], lty=2)
brackets(1, cis_gender[1,4]+20, 2, cis_gender[1,4]+20, h = 45,  ticks = 0.5, lwd=2)
text(1.5, cis_gender[1,4]+100, bquote(hat(y)['F']-hat(y)['M'] ~ '='
     ~ .(cis_gender[2,4]-cis_gender[1,4])), cex=0.9)
axis(1, at=1:2, labels = c("Female", "Male"), tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0),
     lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'Gender of candidate',
      ylab="Total campaign contributions",
      line = 1.7, cex.lab=1)
title(line = 1, main="Gender of candidate", font.main=3)
plot(1:3, cis_party[,4], type="n",xlab="",ylab="",  yaxt="n", xaxt="n",
     xlim=c(0.5,3.5), ylim=c(0, 600))
segments(1:3, cis_party[,5], 1:3, cis_party[,6], lwd=2, col="gray60")
points(1:3, cis_party[,4], pch=15:17, cex=2.5)
text(1:3, cis_party[,4], labels = c("D", "R", "I"), col="white", cex=0.8)
segments(c(1.05,2.05), cis_party[1:2,4], c(1.95,2.95), cis_party[2:3,4], lty=2)
brackets(1, cis_party[2,4]+20, 2, cis_party[2,4]+20, h = 45,  ticks = 0.5, lwd=2)
brackets(3, cis_party[3,4]+20, 2, cis_party[3,4]+20, h = 45,  ticks = 0.5, lwd=2)
text(1.5, cis_party[1,4]+160, bquote(hat(y)['R']-hat(y)['D'] ~ '='
     ~ .(cis_party[2,4]-cis_party[1,4])), cex=0.9)
text(2.5, cis_party[3,4]-40, bquote(hat(y)['I']-hat(y)['R'] ~ '='
     ~ .(cis_party[3,4]-cis_party[2,4])), cex=0.9)
axis(1, at=1:3, labels = c("Democrat", "Republican", "Independent"), tck=0.03, cex.axis=0.9,
     mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'Party of candidate',
      ylab="Total campaign contributions",
      line = 1.7, cex.lab=1)
title(line = 1, main="Party of candidate", font.main=3)
plot(1:3, cis_incumchall[,4], type="n",xlab="",ylab="",  yaxt="n", xaxt="n",
     xlim=c(0.5,3.5), ylim=c(0, 3200))
segments(1:3, cis_incumchall[,5], 1:3, cis_incumchall[,6], lwd=2, col="gray60")
points(1:3, cis_incumchall[,4], pch=15:17, cex=1.5)
segments(c(1.05,2.05), cis_incumchall[1:2,4], c(1.95,2.95), cis_incumchall[2:3,4], lty=2)
brackets(1, cis_incumchall[2,4]+20, 2, cis_incumchall[2,4]+20, h = 105,  ticks = 0.5, lwd=2)
brackets(3, cis_incumchall[3,4]+20, 2, cis_incumchall[3,4]+20, h = 105,  ticks = 0.5, lwd=2)
text(1.5, cis_incumchall[2,4]+285, bquote(hat(y)['I']-hat(y)['C'] ~ '='
     ~ .(cis_incumchall[2,4]-cis_incumchall[1,4])), cex=0.9)
text(2.5, cis_incumchall[3,4]-200, bquote(hat(y)['O']-hat(y)['I'] ~ '='
     ~ .(cis_incumchall[3,4]-cis_incumchall[2,4])), cex=0.9)
axis(1, at=1:3, labels = c("Challenger", "Incumbent", "Open seat"), tck=0.03, cex.axis=0.9,
     mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'Status of candidate',
      ylab="Total campaign contributions",
      line = 1.7, cex.lab=1)
title(line = 1, main="Status of candidate", font.main=3)
plot(newdat_hispf$HISPPCT, cis_hispf[,4], type="n",xlab="",ylab="",  yaxt="n", xaxt="n",
     ylim = c(0,1100))
polygon(x = c(newdat_hispf$HISPPCT, rev(newdat_hispf$HISPPCT)),
        y = c(cis_hispf[,5], rev(cis_hispf[,6])), col = mygray, border = NA)
lines(newdat_hispf$HISPPCT, cis_hispf[,4], lwd=2)
rug(campaign$HISPPCT)
segments(iqrange[,1], cis_hispiq[,4], iqrange[,1], rep(500,2), lty=2)
segments(iqrange[1,1], 500, iqrange[2,1], 500, lty = 2)
brackets(iqrange[1,1], 510, iqrange[2,1], 510, h = 75,  ticks = 0.5, lwd=2)
text(abs((iqrange[2,1]-iqrange[1,1])/2)+iqrange[1,1], 450, 'Interquartile range', cex=0.8)
text(iqrange[,1], cis_hispiq[,4]-50, round(iqrange[,1],3), cex=0.8)
text(abs((iqrange[2,1]-iqrange[1,1])/2)+iqrange[1,1], 655,
          labels=bquote(hat(y)['Q3']-hat(y)['Q1'] ~ '=' ~ .(iqrange[1,2])), cex=0.9)
axis(1, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = '% Hispanic population in Congressional District',
      ylab="Total campaign contributions",
      line = 1.7, cex.lab=1)
title(line = 1, main="Hispanic constituency", font.main=3)
par(opar)

Data on bills assigned to House committees in the 103rd and 104th Houses of Representatives

Description

Data for the committees example used in chapters 6

Usage

data(committee)

Format

A data frame with 20 rows and 6 variables:

SIZE: Number of members on the committee
SUBS: Number of subcommittees
STAFF: Number of staff assigned to the committee
PRESTIGE: Dummy variable indicating whether or not it is a high-prestige committee
BILLS103: Number of bills in the 103rd House
BILLS104: Number of bills in the 104th House

...

Examples

data(committee)
attach(committee)
library(AER)
library(MASS)
library(pscl)

## Table 6.6
committee

## Table 6.7
committee.out <- glm.nb(BILLS104 ~ SIZE + SUBS * (log(STAFF)) + PRESTIGE + BILLS103)
summary.glm(committee.out)
data.frame(cbind(round(cbind(summary(committee.out)$coef[,1:2], confint(committee.out)),4)[,1],
       round(cbind(summary(committee.out)$coef[,1:2], confint(committee.out)),4)[,2],
       round(cbind(summary(committee.out)$coef[,1:2], confint(committee.out)),4)[,3],
       round(cbind(summary(committee.out)$coef[,1:2], confint(committee.out)),4)[,4]))

## Figure 6.3
z.matrix <- matrix(0,200,200)
for(i in 1:200)  {
        for(j in 1:200)  {
                if(j < 70)    z.matrix[i,j] <- 1
                if(j < 40)    z.matrix[i,j] <- 2
                if(j < 10)    z.matrix[i,j] <- 3
                if(j == 1)    z.matrix[i,j] <- 3.001
                if(j > 130)   z.matrix[i,j] <- 1
                if(j > 160)   z.matrix[i,j] <- 2
                if(j > 190)   z.matrix[i,j] <- 3
                if(j == 200)  z.matrix[i,j] <- 3.001
        }
}
pears <- resid(committee.out,type="pearson")
devs <- resid(committee.out,type="deviance")
x = seq(-2000,2000,length=200)
opar = par(mfrow=c(1,1), mar=c(5.1,4.1,4.1,2.1), oma=c(0,0,0,0))
layout(matrix(c(1,2), ncol = 1), heights = c(0.9,0.1))
par(mar=c(3,4,2,4),oma=c(2,2,1,3))
image(seq(0,51,length=200), seq(-2000,2000,length=200),z.matrix,xlim=c(0,51),ylim=c(-2000,2000),
	xaxt="n",yaxt="n",xlab="",ylab="", col=rev(c("white", "gray40", "gray60", "gray80")))
points(seq(1,50,length=20),(2000/3)*pears[order(BILLS104)],pch=15)
lines(seq(1,50,length=20),(2000/3)*devs[order(BILLS104)],type="h")
abline(0,0, lwd=2)
abline(h=c((x[10]+x[9])/2,(x[40]+x[39])/2,(x[70]+x[69])/2,(x[130]+x[131])/2,
           (x[160]+x[161])/2,(x[191]+x[190])/2), lty=2)
title(xlab = "Order of Fitted Outcome Variable", ylab="Residual Effect",
      line = 1.3, cex.lab=1.3)
title(main="Model Fit Plot",
      line = 1, cex.main=1.7, font.main=1)
par(mar=c(0,1.5,1,1))
plot(0,0, type="n", axes = FALSE, xlab = "", ylab = "")
legend("center", ncol = 2,
       legend = c("Pearson", "Deviances"),
       cex=1, lty=c(0,1), pch = c(15,NA))
par(opar)

Data on censored corruption scale

Description

Data for the corruption example used in chapter 7

Usage

data(corruption)

Format

A data frame with 83 rows and 7 variables:

ticpi85b: Country-level compilation of effects into a 0 to 10 scale of increasing government corruption with an adjustment that modifies this range slightly
MSO: Binary variable indicating whether the government owns a majority of key industries
LOG.PC.GDP: Log of the average per capita GDP from 1975 to 1983
DEMOCRACY: Polity IV democracy score from 1975 to 1983
GOVGDT: Average government spending as a percentage of GDP from 1980 to 1983
ECONFREE: Index of the ability of capitalists to invest and move money
FEDERAL: Binary variable indicating whether the government has a federal system during this period

...

Examples

data(corruption)
attach(corruption)
library(censReg)

## Table 7.5
corruption.tobit.out <- censReg( ticpi85b ~ MSO + LOG.PC.GDP + DEMOCRACY + GOVGDT +
    ECONFREE*FEDERAL,
    left = 2.6, right = 10.70, data=corruption, start = NULL, nGHQ = 8, logLikOnly = FALSE)
summary(corruption.tobit.out)
summary(corruption.tobit.out)$nObs
Coefs <- summary(corruption.tobit.out)$estimate[,1]
SEs <- summary(corruption.tobit.out)$estimate[,2]
CI.low <- Coefs - SEs*1.96
CI.hi <- Coefs + SEs*1.96
( round(out.table <- cbind( Coefs, SEs, CI.low, CI.hi),4) )

Data on capital punishment

Description

Data for the capital punishment example used in chapters 4, 5, and 6

Usage

data(dp)

Format

A data frame with 17 rows and 7 variables:

EXECUTIONS: The number of times that capital punishment is implemented on a state level in the United States for the year 1997
INCOME: Median per capita income in dollars
PERPOVERTY: Percent of the population classified as living in poverty
PERBLACK: Percent of Black citizens in the population
VC100k96: Rate of violent crimes per 100,000 residents for the year before (1996)
SOUTH: Dummy variable to indicate whether the state is in the South
PROPDEGREE: Proportion of the population with a college degree

...

Examples


opar = par(mfrow=c(1,1), mar=c(5.1,4.1,4.1,2.1), oma=c(0,0,0,0))
data(dp)
attach(dp)

## Table 4.2
dp

## Table 5.1
dp.out <- glm(EXECUTIONS ~ INCOME+PERPOVERTY+PERBLACK+log(VC100k96)+
              SOUTH+PROPDEGREE, family=poisson)
dp.cis <- round(glm.summary(dp.out, alpha = 0.05),4)
round(cbind(summary(dp.out)$coef[,1:2], dp.cis),4)
round(dp.out$null.deviance,4);round(dp.out$df.null,4)
round(dp.out$deviance,4);round(dp.out$df.residual,4)
round(logLik(dp.out),4)
round(dp.out$aic,4)
round(vcov(dp.out),4) # variance covariance matrix

## Table 5.2
k <- 200
b5 <- seq(0.1, 5.4,length=k)
w <- rep(0,k)
for(i in 1:k){
  mm <- glm(EXECUTIONS ~ INCOME+PERPOVERTY+PERBLACK+log(VC100k96)+PROPDEGREE,
            offset=b5[i]*SOUTH,family=poisson)
  w[i] <- logLik(mm)
  }

f <- function(b5,x,y,maxloglik){
  mm <- glm(EXECUTIONS ~ INCOME+PERPOVERTY+PERBLACK+log(VC100k96)+PROPDEGREE,
            offset=b5*x,family=poisson)
  logLik(mm) - maxloglik + qchisq(.95,1)/2
  }
low.pll <- uniroot(f,interval=c(1.5,2), x=SOUTH, y=EXECUTIONS, maxloglik=logLik(dp.out))$root
high.pll <- uniroot(f,interval=c(3,4), x=SOUTH, y=EXECUTIONS, maxloglik=logLik(dp.out))$root
w[which.min(abs(w-low.pll))]
round(c(low.pll, high.pll),4)
cbind(round(dp.cis[,3:4],4),
      round(confint(dp.out),4))

## Table 6.2
resp <- resid(dp.out,type="response")
pears <- resid(dp.out,type="pearson")
working <- resid(dp.out,type="working")
devs <- resid(dp.out,type="deviance")
dp.mat <- cbind(rep(1,nrow(dp)), as.matrix(dp[,2:4]), as.matrix(log(dp[,5])),
                as.matrix(dp[,6]), as.matrix(dp[,7]))
dp.resid.mat <- cbind(resp,pears,working,devs)
dimnames(dp.resid.mat)[[2]] <- c("response","pearson","working","deviance")
dimnames(dp.resid.mat)[[1]] <- rownames(dp)
dp.resid.mat2 <- round(dp.resid.mat,4)
resid.df <- data.frame(cbind(dp.resid.mat2[,1], dp.resid.mat2[,2],
      dp.resid.mat2[,3], dp.resid.mat2[,4]))
colnames(resid.df) <- dimnames(dp.resid.mat)[[2]]
resid.df

## Figure 5.1
dp.mat.0 <- cbind(dp.mat[,1:5],rep(0,length=nrow(dp.mat)),dp.mat[,7])
dimnames(dp.mat.0)[[2]] <- names(dp.out$coefficients)
dp.mat.1 <- cbind(dp.mat[,1:5],rep(1,length=nrow(dp.mat)),dp.mat[,7])
dimnames(dp.mat.1)[[2]] <- names(dp.out$coefficients)
tcks = list(seq(0,140,20), seq(0,12,2), seq(0,30,5), seq(0,10,2), seq(0,30,5))
layout(matrix(c(1,1,2,2,3,3,4,4,5,6,6,7,8,8,8,8), ncol=4, byrow = TRUE),
       heights = c(0.3,0.3,0.3,0.1))
par(mar=c(3,3,2,4),oma=c(2,1,1,3))
for (i in 2:(ncol(dp.mat.0)-1))  {
  j = i-1
  if (i==6){
    i <- i+1
    plot(0,0, type = "n", axes=FALSE, xlab = "", ylab="")
  }
  ruler <- seq(min(dp.mat.0[,i]),max(dp.mat.0[,i]),length=1000)
  xbeta0 <- exp(dp.out$coefficients[-i]%*%apply(dp.mat.0[,-i],2,mean)
                + dp.out$coefficients[i]*ruler)
  xbeta1 <- exp(dp.out$coefficients[-i]%*%apply(dp.mat.1[,-i],2,mean)
                + dp.out$coefficients[i]*ruler)
  plot(ruler,xbeta0,type="l", xlab="",ylab="", yaxt="n", xaxt="n",
       ylim=c(min(xbeta0,xbeta1)-2,max(xbeta0,xbeta1)), lwd=3)
  lines(ruler,xbeta1,lty=4, lwd=2)
  axis(1, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
  axis(2, at=tcks[[j]],
       tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
  title(xlab = paste("Levels of",dimnames(dp.mat.0)[[2]][i]), ylab="Expected executions",
        line = 1.7, cex.lab=1.2)
}
plot(0,0, type = "n", axes=FALSE, xlab = "", ylab="")
par(mar=c(0,1.5,1,1))
plot(0,0, type="n", axes = FALSE, xlab = "", ylab = "")
legend("center", ncol = 2,
       legend = c("South State", "Non-South State"),
       cex=1.1, lty=c(2,1), bty="n", lwd=c(2,3))
par(opar)

## Figure 5.2
par(mar=c(3,3,1,0),oma=c(1,1,1,1))
plot(b5,w,type="l",lwd=3, xaxt="n", yaxt="n", xlab="", ylab="")
abline(h=logLik(dp.out)-qchisq(.95,1)/2,lty=3, col="gray40")
segments(dp.cis[6,3], -45, dp.cis[6,4], -45, lty=6, col="black", lwd=2)
segments(dp.cis[6,3:4], c(-45,-45), dp.cis[6,3:4], c(-55,-55), lty=3, col="gray40")
text(3.5, y=-45, "Wald Test", cex=0.9)
segments(low.pll, -42.5, high.pll, -42.5, lty=2, lwd=2, col="black")
segments(c(low.pll, high.pll), c(-55,-55), c(low.pll, high.pll),
         rep(logLik(dp.out)-qchisq(.95,1)/2,2), lty=3, col="gray40")
text(3.25, y=-42.5, "Profile log-likelihood", cex=0.9, pos=4)
axis(1, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'Coefficient of SOUTH', ylab="Profile log-likelihood",
      line = 1.7, cex.lab=1.2)
par(opar)

## Figure 6.1
coef.vector <- NULL
for (i in 1:length(EXECUTIONS))  {
  dp.temp <- glm(EXECUTIONS[-i] ~ INCOME[-i]+PERPOVERTY[-i]+PERBLACK[-i]+log(VC100k96)[-i]+
                   SOUTH[-i]+PROPDEGREE[-i], family=poisson)
  coef.vector <- rbind(coef.vector,dp.temp$coefficients)
}
layout(matrix(c(1,2,3,4,5,6), ncol=2, byrow = TRUE), heights = c(0.33,0.33,0.33))
par(mar=c(3,4.5,2,4),oma=c(2,1,1,3))
for(i in 2:ncol(coef.vector))  {
  x=plot(coef.vector[,i],type="b",xlab="",ylab="",  yaxt="n", xaxt="n", lwd=2,
         ylim=c(min(coef.vector[,i])-abs(min(coef.vector[,i]))*0.25,
         max(coef.vector[,i])+abs(max(coef.vector[,i]))*0.25))
  abline(h=dp.out$coefficients[i])
  axis(1, at =seq(2,16,2), tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
  if(i==2){
    axis(2, at = seq(5,35,5)/100000, labels = as.expression(paste(seq(5,35,5), "e(-5)", sep="")),
         tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
  }
  else{
    axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
  }
  title(xlab = "Index number",
        line = 1.7, cex.lab=1.2)
  title(ylab=dimnames(dp.mat.0)[[2]][i],
        line = 3.25, cex.lab=1.2)
}
par(opar)

Compute confidence intervals for predictions.

Description

Apply an exponential transformation to the confidence intervals and predictions from binomial and Poisson models.

Usage

glm.cis(preds, ses, alpha, df)

Arguments

preds

The predictions based on the additive linear component of the model.

ses

The standard error(s) of the prediction.

alpha

The desired confidence level.

df

The desired degrees of freedom.

Value

The output is a matrix.

Examples

data(campaign)
attach(campaign)
cmpgn.out <- glm(TOTCONTR ~ CANDGENDER + PARTY + INCUMCHALL + HISPPCT,
                 family=Gamma(link = 'log'), data=campaign)
newdat_gender <- data.frame(CANDGENDER = c('F','M'), PARTY= rep('Democrat',2),
                            INCUMCHALL=rep("C", 2), HISPPCT=rep(mean(campaign$HISPPCT),2))
preds_gender <- predict(cmpgn.out, newdata = newdat_gender, se.fit = TRUE)
glm.cis(preds_gender$fit, preds_gender$se.fit, 0.95,cmpgn.out$df.residual)

Summarize regression output from generalized linear models.

Description

An alternative to the summary() function.

Usage

glm.summary(in.object, alpha = 0.05)

Arguments

in.object

The regression output from glm().

alpha

A parameter defaulted to 0.05.

Value

The output is a matrix.

Examples

data(campaign)
attach(campaign)
cmpgn.out <- glm(TOTCONTR ~ CANDGENDER + PARTY + INCUMCHALL + HISPPCT,
              family=Gamma(link = 'log'), data=campaign)
glm.summary(cmpgn.out)

Summarize regression output from multinomial generalized linear models.

Description

An alternative to the summary() function.

Usage

glm.summary.multinom(in.object, alpha = 0.05)

Arguments

in.object

The regression output from multinom().

alpha

A parameter defaulted to 0.05.

Value

The output is a list.

Examples

data(primary)
attach(primary)
library(nnet)
primary.out <- multinom(PRIMVOTE ~ AGE + GENDER + EDUCATION + REGION +
                         RELIGIOSITY + IDEOLOGY + RWA + TRUMPWIN, data=primary)
glm.summary.multinom(primary.out)

Compute variance-covariance matrix.

Description

Calculate the (unscaled) variance-covariance matrix from a generalized linear model regression output. Used in 'GLMpack' within the function 'glm.summary()'.

Usage

glm.vc(obj)

Arguments

obj

The regression output from glm().

Value

The output is a matrix.

Examples

data(campaign)
attach(campaign)
cmpgn.out <- glm(TOTCONTR ~ CANDGENDER + PARTY + INCUMCHALL + HISPPCT,
              family=Gamma(link = 'log'), data=campaign)
glm.vc(cmpgn.out)

Data on Mexico from the Mexican Family Life Survey

Description

Data for the Mexico example used in chapter 7

Usage

data(mexico)

Format

A data frame with 644 rows and 11 variables:

SUBJECT: Subject ID
HOUSEHOLD: Household ID
STATE: State indicator, here 5 for the state of Coahuila
MIGR: Dummy variable indicating whether respondents have thought of migrating
NCRIME: Number of crimes of which the female respondent has been the victim
SEV: Mean seriousness of the crime
PAST: Evaluations of past life conditions
FUT: Evaluations of future community conditions
INC: Respondents' income
AGE: Respondents' age
WAVE: Indicator for the wave under analysis

...

Examples

data(mexico)
attach(mexico)

## Table 7.3
library(lme4)
cs <- function(x) scale(x,scale=TRUE,center=TRUE) # Function to re-scale
mexico.out <- glmer(MIGR ~ cs(NCRIME) + cs(SEV) + cs(PAST) +
                cs(FUT) +cs(INC) + cs(AGE) + WAVE
              + (1|SUBJECT), data=mexico, family=binomial("logit"),
              glmerControl(optimizer="bobyqa", tolPwrss=5e-2, optCtrl = list(maxfun=10000)))
summary(mexico.out)
se <- sqrt(diag(vcov(mexico.out)))
(cis.mexico.out <- cbind(Est = fixef(mexico.out),
                        LL = fixef(mexico.out) - 1.96 * se,
                        UL = fixef(mexico.out) + 1.96 * se))

Data on the characteristics of peace agreement outcomes

Description

Data for the Peace example used in chapter 7

Usage

data(peace)

Format

A data frame with 216 rows and 9 variables:

OUTISS: Ordinal variable indicating the scale of outstanding issues that were not resolved during the peace negotiations with 30 percent zero values
PP: Binary variable indicating whether a rebel force is allowed to transform into a legal political party
INTCIV: Binary variable indicating whether members of the rebel group are to be integrated into the civil service
AMN: Binary variable indicating whether there is an amnesty provision in the agreement
PRIS: Binary variable indicating whether prisoners are released
FED: Binary variable indicating whether a federal state solution is included
COMIMP: Binary variable indicating whether the agreement establishes a commission or committee to oversee implementation
REAFFIRM: Binary variable indicating whether the agreement reaffirms earlier peace agreements
PKO: Binary variable indicating whether or not the peace agreement included the deployment of peacekeeping forces

...

Examples

data(peace)
attach(peace)
require(pscl)

## Table 7.6
M3 <- zeroinfl(OUTISS ~ PP + INTCIV + AMN + PRIS + FED + COMIMP + REAFFIRM | PKO, data=peace)
summary(M3)
out.table.count <- cbind(summary(M3)$coef$count[,1:2],
    summary(M3)$coef$count[,1] - 1.96*summary(M3)$coef$count[,2],
    summary(M3)$coef$count[,1] + 1.96*summary(M3)$coef$count[,2])
out.table.zero <- cbind(summary(M3)$coef$zero[,1:2],
    summary(M3)$coef$zero[,1] - 1.96*summary(M3)$coef$zero[,2],
    summary(M3)$coef$zero[,1] + 1.96*summary(M3)$coef$zero[,2])
out.table.count
out.table.zero

Data on the 2016 Republican Presidentical Primaries

Description

Data for the primary example used in chapters 4 and 5

Usage

data(primary)

Format

A data frame with 706 rows and 9 variables:

PRIMVOTE: Vote intention
AGE: Age
GENDER: Gender
EDUCATION: Education
REGION: Region of the country in which the respondent lives
RELIGIOSITY: Religiosity
IDEOLOGY: Ideology
RWA: Right Wing Authoritarianism scale
TRUMPWIN: Perceptions of whether Trump could win

...

Examples

opar = par(mfrow=c(1,1), mar=c(5.1,4.1,4.1,2.1), oma=c(0,0,0,0))
data(primary)
attach(primary)
library(nnet)
library(pBrackets)
library(effects)

## Model
primary.out <- multinom(PRIMVOTE ~ AGE + GENDER + EDUCATION + REGION +
                        RELIGIOSITY + IDEOLOGY + RWA + TRUMPWIN, data=primary)
summ.primary.out <- glm.summary.multinom(primary.out)

## Figure 4.2
par(mfrow=c(3,3), mar=c(2.5,2,2,1))
# Plot 1: Electoral preference
countsPV0 <- barplot(table(primary$PRIMVOTE), main="Electoral Preference",
        xlab="Candidates", mgp=c(1.1, 0.2, 1))
text(countsPV0[,1], rep(10,4), as.numeric(table(primary$PRIMVOTE)), cex=1.5)
# Plot 2: Age
countsAGE <- barplot(table(primary$AGE), main="Age",
                     xlab="Age categories", mgp=c(1.1, 0.2, 0))
text(countsAGE[,1], rep(10,4), as.numeric(table(primary$AGE)), cex=1.5)
# Plot 3: Gender
countsGENDER <- barplot(table(primary$GENDER), main="Gender",
                     xlab="Gender categories", mgp=c(1.1, 0.2, 0), ylim=c(0,500))
text(countsGENDER[,1], rep(25,2), as.numeric(table(primary$GENDER)), cex=1.5)
# Plot 4: Education
countsEDUC <- barplot(table(primary$EDUCATION), main="Education",
                        xlab="Schooling level", mgp=c(1.1, 0.2, 0))
text(countsEDUC[,1], rep(10,4), as.numeric(table(primary$EDUCATION)), cex=1.5)
# Plot 5: Region
countsREG <- barplot(table(primary$REGION), main="Region",
                      xlab="Region", mgp=c(1.1, 0.2, 0))
text(countsREG[,1], rep(10,4), as.numeric(table(primary$REGION)), cex=1.5)
hist(primary$RELIGIOSITY,xlab="Religiosity Score",ylab="",
     xlim=c(-1.5,2), ylim=c(0, 225), main="Religiosity",
     col = "gray70", mgp=c(1.1, 0.2, 0))
# Plot 7: Ideology
hist(primary$IDEOLOGY,xlab="Ideology Score",ylab="",
     xlim=c(-2,1.5), ylim=c(0, 150), main="Ideology",
     col = "gray70", mgp=c(1.1, 0.2, 0))
# Plot 8: Right Wing Authoritarianism
hist(primary$RWA,xlab="Right Wing Authoritarianism Score",ylab="",
     xlim=c(-2.5,2), ylim=c(0, 200), main="Authoritarianism",
     col = "gray70", mgp=c(1.1, 0.2, 0))
colnames(primary)
# Plot 9: Could Trump Win?
countsWIN <- barplot(table(primary$TRUMPWIN), main="Trump's winnability",
                     xlab="Perceptions of whether Trump could win",
                     mgp=c(1.1, 0.2, 0), ylim=c(0,550))
text(countsWIN[,1], rep(30,3), as.numeric(table(primary$TRUMPWIN)), cex=1.5)
par(opar)

## Figure 5.3
layout(matrix(1:2, ncol = 1), heights = c(0.9,0.1))
par(mar=c(3,4,0,1),oma=c(1,1,1,1))
plot(summ.primary.out[[1]][,1], type = "n", xaxt="n", yaxt="n",
     xlim=c(-10,3), ylim=c(0,60), ylab="", xlab="")
abline(v=-5, h=c(12,16,28,40,44,48,52), lwd=2)
abline(h=c(4,8,20,24,32,36,56), lty=3, col="gray60")
text(rep(-7.5,15), seq(2,58,4),
     labels = c('30-44', '45-59', '60+',
                'Male',
                'High School','Some College','Bachelor\'s degree or higher',
                'Northeast', 'South', 'West',
                'Religiosity',
                'Ideology',
                'Authoritarianism',
                'Yes', 'Don\'t know'))
segments(summ.primary.out[[1]][-1,3], seq(1,57,4), summ.primary.out[[1]][-1,4],
         seq(1,57,4), col="gray30", lwd=2)
points(summ.primary.out[[1]][-1,1], seq(1,57,4), pch=21, cex=1.4, bg="black")
text(summ.primary.out[[1]][-1,1], seq(1,57,4), labels = "C", cex = 0.7, col="white")
segments(summ.primary.out[[2]][-1,3], seq(3,59,4), summ.primary.out[[2]][-1,4],
         seq(3,59,4), col="gray30", lwd=2)
points(summ.primary.out[[2]][-1,1], seq(3,59,4), pch=21, cex=1.4, bg="white")
text(summ.primary.out[[2]][-1,1], seq(3,59,4), labels = "K", cex = 0.7, col="black")
abline(v=0, lty=2)
axis(1, tck=0.01, at = seq(-5,5,0.5),cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0,
     lwd.ticks = 1)
axis(2, tck=0.02, at = c(6,14,22,34,42,46,50,56), labels=c('AGE', 'GENDER',
     'EDUCATION', 'REGION', 'RELIGIOSITY', 'IDEOLOGY', 'RWA', 'TRUMPWIN'),
     cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 0, las=1)
title(xlab = "Coefficient",
      line = 1.7, cex.lab=1.3)
par(mar=c(0,4,0,0))
plot(0,0, type="n", axes = FALSE, xaxt="n", yaxt="n", xlab="", ylab = "")
legend("center", c("Cruz", "Kasich"), ncol=2, pch=c(21,21), pt.bg=c("black", "white"),
       pt.cex=rep(1.4,2), bty = "n")
par(opar)

## Figure 5.4
mygray = rgb(153, 153, 153, alpha = 200, maxColorValue = 255)
mygray2 = rgb(179, 179, 179, alpha = 150, maxColorValue = 255)
mygray3 = rgb(204, 204, 204, alpha = 150, maxColorValue = 255)
preds_win <- Effect("TRUMPWIN", primary.out)
preds_ideol <- Effect("IDEOLOGY", primary.out, xlevels=list(IDEOLOGY=100))
par(mfrow=c(1,2), mar=c(4,3,3,0),oma=c(1,1,1,1))
plot(1:3, preds_win$prob[,1], type="n",xlab="",ylab="",  yaxt="n", xaxt="n",
     xlim=c(0,4), ylim=c(0, 0.7))
segments(rep(1:3, 3), preds_win$lower.prob[,1:3], rep(1:3, 3), preds_win$upper.prob[,1:3],
         col=rep(c('black', 'black', 'gray60'), each=3), lty = rep(c(1,2,1), each=3))
points(rep(1:3,3), preds_win$prob[,1:3], pch=21, cex = 2,
       bg=rep(c("black", "white", "gray60"),each=3), col=rep(c("black", "black", "gray60"),each=3))
text(rep(1:3,3), preds_win$prob[,1:3], labels=rep(c("T", "C", "K"), each=3), cex = 0.8,
     bg=rep(c("black", "white", "gray60"),each=3), col=rep(c("white", "black", "black"),each=3))
axis(1, at=1:3, labels = c("No", "Yes", "DK"), tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0),
     lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'Perceptions of whether Trump could win the election',
      ylab="Probability of voting",
      line = 1.7, cex.lab=1)
title(line = 1, main="Winnability", font.main=3)
plot(preds_ideol$x$IDEOLOGY, preds_ideol$prob[,1], type="n",xlab="",ylab="",  yaxt="n", xaxt="n",
     xlim=c(-2,1.5), ylim=c(0, 0.7))
polygon(c(preds_ideol$x$IDEOLOGY, rev(preds_ideol$x$IDEOLOGY)),
        c(preds_ideol$lower.prob[,2], rev(preds_ideol$upper.prob[,2])), border = NA, col=mygray2)
polygon(c(preds_ideol$x$IDEOLOGY, rev(preds_ideol$x$IDEOLOGY)),
        c(preds_ideol$lower.prob[,1], rev(preds_ideol$upper.prob[,1])), border = NA, col=mygray)
polygon(c(preds_ideol$x$IDEOLOGY, rev(preds_ideol$x$IDEOLOGY)),
        c(preds_ideol$lower.prob[,3], rev(preds_ideol$upper.prob[,3])), border = NA, col=mygray3)
lines(preds_ideol$x$IDEOLOGY, preds_ideol$prob[,1], col="gray20", lwd=2)
lines(preds_ideol$x$IDEOLOGY, preds_ideol$prob[,2], col="gray40", lwd=2, lty=2)
lines(preds_ideol$x$IDEOLOGY, preds_ideol$prob[,3], col="black", lwd=2)
text(rep(1,3), c(min(preds_ideol$prob[,1]), min(preds_ideol$prob[,2]),
     max(preds_ideol$prob[,3]))+.05, labels = c('Trump', 'Cruz', 'Kasich'))
axis(1, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'Ideology scores',
      ylab="Probability of voting",
      line = 1.7, cex.lab=1)
title(line = 1, main="Ideology", font.main=3)
par(opar)

Data on the Scottish national parliament vote

Description

Data for the Scotland example used in chapters 4, 5, and 6

Usage

data(scotvote)

Format

A data frame with 32 rows and 7 variables:

PerYesTax: Percentage of population who granting Scottish parliament taxation powers
CouncilTax: Council tax collected
PerClaimantFemale: Female percentage of total claims for unemployment benefits as of January 1998
StdMortalityRatio: Standardized mortality rate
Active: Percentage of economically active individuals relative to the population of working age
GDP: GDP per council
Percentage5to15: Percentage of children aged 5 to 15

...

Examples

data(scotvote)
attach(scotvote)

## Table 4.3
scotvote

## Table 5.3
scottish.vote.glm <- glm((PerYesTax) ~ CouncilTax*PerClaimantFemale+
                     StdMortalityRatio+Active+GDP+Percentage5to15,
                     family=Gamma,data=scotvote)
vote.sum <- summary(scottish.vote.glm)
round(cbind(
  vote.sum$coefficients[,1], vote.sum$coefficients[,2],
  confint(scottish.vote.glm)),4)
vote.sum

## Table 6.3
anova(scottish.vote.glm,test="F")

Data on California state data on educational policy and outcomes

Description

Data for the STAR program example used in chapter 6

Usage

data(star)

Format

A data frame with 303 rows and 16 variables:

LOWINC: Proportion of low-income students
PERASIAN: Proportions of Asian students
PERBLACK: Proportions of African-American students
PERHISP: Proportions of Hispanic students
PERMINTE: Percentage of minority teachers
AVYRSEXP: Mean teacher experience in years
AVSAL: Median teacher salary, including benefits, in thousands of dollars
PERSPEN: Per-pupil expenditures in thousands of dollars
PTRATIO: Pupil/teacher ratio in the classroom
PCTAF: Percentage of students taking college credit courses
PCTCHRT: Percentage of schools in the district that are charter schools
PCTYRRND: Percent of schools in the district operating year-round programs
READTOT: Total number of students taking the reading exam in the 9th grade
PR50RD: Proportion of students scoring over the reading median in the 9th grade
MATHTOT: Total number of students taking the math exam in the 9th grade
PR50M: Proportion of students scoring over the math median in the 9th grade

...

Examples

opar = par(mfrow=c(1,1), mar=c(5.1,4.1,4.1,2.1), oma=c(0,0,0,0))
data(star)
attach(star)

## MATH MODEL
star.logit.fit <- glm(cbind(PR50M,MATHTOT-PR50M) ~ LOWINC + PERASIAN + PERBLACK + PERHISP +
                  PERMINTE * AVYRSEXP * AVSAL + PERSPEN * PTRATIO * PCTAF +
                  PCTCHRT + PCTYRRND, family=binomial(link=logit),data=star)

## READING MODEL
star.logit.fit2 <- glm(cbind(PR50RD,READTOT-PR50RD) ~ LOWINC + PERASIAN + PERBLACK + PERHISP +
                   PERMINTE * AVYRSEXP * AVSAL + PERSPEN * PTRATIO * PCTAF +
                   PCTCHRT + PCTYRRND, family=binomial(link=logit),data=star)

## Table 6.4
star.summ.mat <- round(summary(star.logit.fit)$coef, 4)
data.frame(cbind(star.summ.mat[,1], star.summ.mat[,2], "[", round(confint(star.logit.fit)[,1],4),
      " ~", round(confint(star.logit.fit)[,2],4), "]"))

## Table 6.5
mean.vector <- apply(star,2,mean)
diff.vector <- c(1,mean.vector[1:12],mean.vector[5]*mean.vector[6],mean.vector[5]*mean.vector[7],
                 mean.vector[6]*mean.vector[7],mean.vector[8]*mean.vector[9],
                 mean.vector[8]*mean.vector[10],mean.vector[9]*mean.vector[10],
                 mean.vector[5]*mean.vector[6]*mean.vector[7],
                 mean.vector[8]*mean.vector[9]*mean.vector[10])
names(diff.vector) <- names(summary(star.logit.fit2)$coef[,1])
# PERMINTE FIRST DIFFERENCE ACROSS IQR
logit <- function(vec){return(exp(vec)/(1+exp(vec)))}
logit(c(diff.vector[1:5],6.329,diff.vector[7:13],6.329*mean.vector[6],6.329*mean.vector[7],
        diff.vector[16:19],6.329*mean.vector[6]*mean.vector[7],diff.vector[21])
      %*%summary.glm(star.logit.fit)$coef[,1]) -
logit(c(diff.vector[1:5],19.180,diff.vector[7:13],19.180*mean.vector[6],19.180*mean.vector[7],
          diff.vector[16:19],19.180*mean.vector[6]*mean.vector[7],diff.vector[21])
        %*%summary.glm(star.logit.fit)$coef[,1])
# First quartile information
q1.diff.mat <- q2.diff.mat <- q3.diff.mat <- q4.diff.mat <-
  matrix(rep(diff.vector,length(diff.vector)),
                      nrow=length(diff.vector), ncol=length(diff.vector),
                      dimnames=list(names(diff.vector),names(diff.vector)))
diag(q1.diff.mat)[2:13] <- apply(star,2,summary)[2,1:12]
q1.diff.mat[14,6] <- q1.diff.mat[6,6]*q1.diff.mat[7,6]
q1.diff.mat[15,6] <- q1.diff.mat[6,6]*q1.diff.mat[8,6]
q1.diff.mat[20,6] <- q1.diff.mat[6,6]*q1.diff.mat[7,6]*q1.diff.mat[8,6]
q1.diff.mat[14,7] <- q1.diff.mat[7,7]*q1.diff.mat[6,7]
q1.diff.mat[16,7] <- q1.diff.mat[7,7]*q1.diff.mat[8,7]
q1.diff.mat[20,7] <- q1.diff.mat[6,7]*q1.diff.mat[7,7]*q1.diff.mat[8,7]
q1.diff.mat[15,8] <- q1.diff.mat[8,8]*q1.diff.mat[6,8]
q1.diff.mat[16,8] <- q1.diff.mat[8,8]*q1.diff.mat[7,8]
q1.diff.mat[20,8] <- q1.diff.mat[6,8]*q1.diff.mat[7,8]*q1.diff.mat[8,8]
q1.diff.mat[17,9] <- q1.diff.mat[9,9]*q1.diff.mat[10,9]
q1.diff.mat[18,9] <- q1.diff.mat[9,9]*q1.diff.mat[11,9]
q1.diff.mat[21,9] <- q1.diff.mat[9,9]*q1.diff.mat[10,9]*q1.diff.mat[11,9]
q1.diff.mat[17,10] <- q1.diff.mat[10,10]*q1.diff.mat[9,10]
q1.diff.mat[19,10] <- q1.diff.mat[10,10]*q1.diff.mat[11,10]
q1.diff.mat[21,10] <- q1.diff.mat[9,10]*q1.diff.mat[10,10]*q1.diff.mat[11,10]
q1.diff.mat[18,11] <- q1.diff.mat[11,11]*q1.diff.mat[9,11]
q1.diff.mat[19,11] <- q1.diff.mat[11,11]*q1.diff.mat[10,11]
q1.diff.mat[21,11] <- q1.diff.mat[9,11]*q1.diff.mat[10,11]*q1.diff.mat[11,11]
# Third quartile
diag(q2.diff.mat)[2:13] <- apply(star,2,summary)[5,1:12]
q2.diff.mat[14,6] <- q2.diff.mat[6,6]*q2.diff.mat[7,6]
q2.diff.mat[15,6] <- q2.diff.mat[6,6]*q2.diff.mat[8,6]
q2.diff.mat[20,6] <- q2.diff.mat[6,6]*q2.diff.mat[7,6]*q2.diff.mat[8,6]
q2.diff.mat[14,7] <- q2.diff.mat[7,7]*q2.diff.mat[6,7]
q2.diff.mat[16,7] <- q2.diff.mat[7,7]*q2.diff.mat[8,7]
q2.diff.mat[20,7] <- q2.diff.mat[6,7]*q2.diff.mat[7,7]*q2.diff.mat[8,7]
q2.diff.mat[15,8] <- q2.diff.mat[8,8]*q2.diff.mat[6,8]
q2.diff.mat[16,8] <- q2.diff.mat[8,8]*q2.diff.mat[7,8]
q2.diff.mat[20,8] <- q2.diff.mat[6,8]*q2.diff.mat[7,8]*q2.diff.mat[8,8]
q2.diff.mat[17,9] <- q2.diff.mat[9,9]*q2.diff.mat[10,9]
q2.diff.mat[18,9] <- q2.diff.mat[9,9]*q2.diff.mat[11,9]
q2.diff.mat[21,9] <- q2.diff.mat[9,9]*q2.diff.mat[10,9]*q2.diff.mat[11,9]
q2.diff.mat[17,10] <- q2.diff.mat[10,10]*q2.diff.mat[9,10]
q2.diff.mat[19,10] <- q2.diff.mat[10,10]*q2.diff.mat[11,10]
q2.diff.mat[21,10] <- q2.diff.mat[9,10]*q2.diff.mat[10,10]*q2.diff.mat[11,10]
q2.diff.mat[18,11] <- q2.diff.mat[11,11]*q2.diff.mat[9,11]
q2.diff.mat[19,11] <- q2.diff.mat[11,11]*q2.diff.mat[10,11]
q2.diff.mat[21,11] <- q2.diff.mat[9,11]*q2.diff.mat[10,11]*q2.diff.mat[11,11]
# Minimum
diag(q3.diff.mat)[2:13] <- apply(star,2,summary)[1,1:12]
q3.diff.mat[14,6] <- q3.diff.mat[6,6]*q3.diff.mat[7,6]
q3.diff.mat[15,6] <- q3.diff.mat[6,6]*q3.diff.mat[8,6]
q3.diff.mat[20,6] <- q3.diff.mat[6,6]*q3.diff.mat[7,6]*q3.diff.mat[8,6]
q3.diff.mat[14,7] <- q3.diff.mat[7,7]*q3.diff.mat[6,7]
q3.diff.mat[16,7] <- q3.diff.mat[7,7]*q3.diff.mat[8,7]
q3.diff.mat[20,7] <- q3.diff.mat[6,7]*q3.diff.mat[7,7]*q3.diff.mat[8,7]
q3.diff.mat[15,8] <- q3.diff.mat[8,8]*q3.diff.mat[6,8]
q3.diff.mat[16,8] <- q3.diff.mat[8,8]*q3.diff.mat[7,8]
q3.diff.mat[20,8] <- q3.diff.mat[6,8]*q3.diff.mat[7,8]*q3.diff.mat[8,8]
q3.diff.mat[17,9] <- q3.diff.mat[9,9]*q3.diff.mat[10,9]
q3.diff.mat[18,9] <- q3.diff.mat[9,9]*q3.diff.mat[11,9]
q3.diff.mat[21,9] <- q3.diff.mat[9,9]*q3.diff.mat[10,9]*q3.diff.mat[11,9]
q3.diff.mat[17,10] <- q3.diff.mat[10,10]*q3.diff.mat[9,10]
q3.diff.mat[19,10] <- q3.diff.mat[10,10]*q3.diff.mat[11,10]
q3.diff.mat[21,10] <- q3.diff.mat[9,10]*q3.diff.mat[10,10]*q3.diff.mat[11,10]
q3.diff.mat[18,11] <- q3.diff.mat[11,11]*q3.diff.mat[9,11]
q3.diff.mat[19,11] <- q3.diff.mat[11,11]*q3.diff.mat[10,11]
q3.diff.mat[21,11] <- q3.diff.mat[9,11]*q3.diff.mat[10,11]*q3.diff.mat[11,11]
diag(q4.diff.mat)[2:13] <- apply(star,2,summary)[6,1:12]
q4.diff.mat[14,6] <- q4.diff.mat[6,6]*q4.diff.mat[7,6]
q4.diff.mat[15,6] <- q4.diff.mat[6,6]*q4.diff.mat[8,6]
q4.diff.mat[20,6] <- q4.diff.mat[6,6]*q4.diff.mat[7,6]*q2.diff.mat[8,6]
q4.diff.mat[14,7] <- q4.diff.mat[7,7]*q4.diff.mat[6,7]
q4.diff.mat[16,7] <- q4.diff.mat[7,7]*q4.diff.mat[8,7]
q4.diff.mat[20,7] <- q4.diff.mat[6,7]*q4.diff.mat[7,7]*q4.diff.mat[8,7]
q4.diff.mat[15,8] <- q4.diff.mat[8,8]*q4.diff.mat[6,8]
q4.diff.mat[16,8] <- q4.diff.mat[8,8]*q4.diff.mat[7,8]
q4.diff.mat[20,8] <- q4.diff.mat[6,8]*q4.diff.mat[7,8]*q4.diff.mat[8,8]
q4.diff.mat[17,9] <- q4.diff.mat[9,9]*q4.diff.mat[10,9]
q4.diff.mat[18,9] <- q4.diff.mat[9,9]*q4.diff.mat[11,9]
q4.diff.mat[21,9] <- q4.diff.mat[9,9]*q4.diff.mat[10,9]*q4.diff.mat[11,9]
q4.diff.mat[17,10] <- q4.diff.mat[10,10]*q4.diff.mat[9,10]
q4.diff.mat[19,10] <- q4.diff.mat[10,10]*q4.diff.mat[11,10]
q4.diff.mat[21,10] <- q4.diff.mat[9,10]*q4.diff.mat[10,10]*q4.diff.mat[11,10]
q4.diff.mat[18,11] <- q4.diff.mat[11,11]*q4.diff.mat[9,11]
q4.diff.mat[19,11] <- q4.diff.mat[11,11]*q4.diff.mat[10,11]
q4.diff.mat[21,11] <- q4.diff.mat[9,11]*q4.diff.mat[10,11]*q4.diff.mat[11,11]
first_diffs <- NULL
for (i in 2:13){
        temp1 <- logit(q2.diff.mat[,i]%*%summary.glm(star.logit.fit)$coef[,1]) -
                        logit(q1.diff.mat[,i]%*%summary.glm(star.logit.fit)$coef[,1])
        temp2 <- logit(q4.diff.mat[,i]%*%summary.glm(star.logit.fit)$coef[,1]) -
          logit(q3.diff.mat[,i]%*%summary.glm(star.logit.fit)$coef[,1])
        first_diffs <- rbind(first_diffs, c(temp1,temp2))
}
first_diffs <- round(first_diffs,4)
diffs_mat <- cbind(diag(q1.diff.mat)[2:13], diag(q2.diff.mat)[2:13],
                   first_diffs[,1],
                   diag(q3.diff.mat)[2:13], diag(q4.diff.mat)[2:13],
                   first_diffs[,2])
colnames(diffs_mat) <- c("1st quartile", "3rd quartile", "Interquartile 1st diff",
                         "Min", "Max", "Full range 1st diff")
diffs_mat

star.mu <- predict.glm(star.logit.fit,type="response")
star.y <- PR50M/MATHTOT
star.n <- length(star.y)
PR50M.adj <- PR50M
for (i in 1:length(PR50M.adj))  {
  if (PR50M.adj[i] > mean(PR50M)) PR50M.adj[i] <- PR50M.adj[i] - 0.5
  if (PR50M.adj[i] < mean(PR50M)) PR50M.adj[i] <- PR50M.adj[i] + 0.5
}
par(mfrow=c(1,3), mar=c(6,3,6,2),oma=c(4,1,4,1))
plot(star.mu,star.y,xlab="",ylab="", yaxt='n', xaxt="n", pch="+")
axis(1, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = "Fitted values", ylab="Observed values",
      line = 1.7, cex.lab=1.3)
title(main="Model Fit Plot",
      line = 1, cex.main=1.7, font.main=1)
abline(lm(star.y~star.mu)$coefficients, lwd=2)
plot(fitted(star.logit.fit),resid(star.logit.fit,type="pearson"),xlab="",ylab="",
     yaxt='n', xaxt="n", pch="+")
axis(1, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = "Fitted values", ylab="Pearson Residuals",
      line = 1.7, cex.lab=1.3)
title(main="Residual Dependence Plot",
      line = 1, cex.main=1.7, font.main=1)
abline(0,0, lwd=2)
qqnorm(resid(star.logit.fit,type="deviance"),main="",xlab="",ylab="",
       yaxt='n', xaxt="n", pch="+")
axis(1, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.02, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = "Quantiles of N(0,1)", ylab="Deviance Residual Quantiles",
      line = 1.7, cex.lab=1.3)
title(main="Normal-Quantile Plot",
      line = 1, cex.main=1.7, font.main=1)
abline(-0.3,3.5, lwd=2)
par(opar)

Data on suicides in 2009 in OECD member states

Description

Data for the suicide example used in chapter 7

Usage

data(suicide)

Format

A data frame with 32 rows and 7 variables:

COUNTRYCODE: Country code
COUNTRYNAME: Name of the country
YEAR: Year
DEATHS: Number of suicides in the country per 100,000 individuals
GDP: GDP in thousands of dollars
SUBABUSE: Share of the population with alcohol or drug use disorder
TEMP: Average temperature

...

Examples

opar = par(mfrow=c(1,1), mar=c(5.1,4.1,4.1,2.1), oma=c(0,0,0,0))
data(suicide)
attach(suicide)

## Table 7.2
# Poisson model
suic.out.p <- glm(DEATHS ~ GDP + TEMP + SUBABUSE, family = poisson)
summary(suic.out.p)
round(confint(suic.out.p),3)
coefs_poisson <- summary(suic.out.p)$coefficients[1:4,]
coefs_poisson
suic.out.qp <- glm(DEATHS ~ GDP + TEMP + SUBABUSE, family = quasipoisson)
summary(suic.out.qp)
round(confint(suic.out.qp),3)
coefs_quasipoisson <- summary(suic.out.qp)$coefficients[1:4,]
coefs_quasipoisson

## Figure 7.1
layout(matrix(c(1,2,3,4), ncol=2, byrow = TRUE))
par(mar=c(4,3,2,0),oma=c(1,1,1,1))
# Histogram #1
hist(TEMP,xlab="",ylab="",  yaxt="n", xaxt="n", main="", col="gray50", border = "gray30",
    ylim=c(0,15))
axis(1, tck=0, mgp=c(0, 0, 0), lty=1, lwd=0, lwd.ticks = 0)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=1, lwd.ticks = 1, las=2)
title(xlab = 'Mean temperature (in Celsius)', ylab="",
      line = 1.7, cex.lab=1.2)
title(line = 1, main="Temperature", font.main=3)
# Histogram #2
hist(GDP,xlab="",ylab="",  yaxt="n", xaxt="n", main="", col="gray50", border = "gray30",
     ylim=c(0,15))
axis(1, tck=0, mgp=c(0, 0, 0), lty=1, lwd=0, lwd.ticks = 0)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=1, lwd.ticks = 1, las=2)
title(xlab = 'GDP per capita (in thousands of dollars)', ylab="",
      line = 1.7, cex.lab=1.2)
title(line = 1, main="Economic Conditions", font.main=3)
# Histogram #3
hist(SUBABUSE,xlab="",ylab="",  yaxt="n", xaxt="n", main="", col="gray50", border = "gray30",
     ylim=c(0,15))
axis(1, tck=0, mgp=c(0, 0, 0), lty=1, lwd=0, lwd.ticks = 0)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=1, lwd.ticks = 1, las=2)
title(xlab = '% of population with alcohol or drug use disorders', ylab="",line = 1.7, cex.lab=1.2)
title(line = 1, main="Substance abuse", font.main=3)
# Histogram #4
hist(DEATHS,xlab="",ylab="",  yaxt="n", xaxt="n", main="", col="gray10", border = "gray20",
     ylim=c(0,15))
axis(1, tck=0, mgp=c(0, 0, 0), lty=1, lwd=0, lwd.ticks = 0)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=1, lwd.ticks = 1, las=2)
title(xlab = 'Number of suicides per 100,000 people', ylab="",
      line = 1.7, cex.lab=1.2)
title(line = 1, main="Suicide rate", font.main=3)
par(opar)

## Figure 7.2
newdat1 <- data.frame(GDP=seq(13, 70.5, 1), TEMP=rep(mean(TEMP), 58),
                      SUBABUSE=rep(mean(SUBABUSE), 58))
newdat2 <- data.frame(GDP=rep(mean(GDP), 61), TEMP=rep(mean(TEMP), 61), SUBABUSE=seq(0,6,0.1))
preds.qp.gdp <- predict(suic.out.qp, newdata = newdat1, type = "link", se.fit = TRUE)
preds.qp.subabuse <- predict(suic.out.qp, newdata = newdat2, type = "link", se.fit = TRUE)
ilink.qp <- family(suic.out.qp)$linkinv
cis.p.preds.qp.gdp <- cbind(ilink.qp(preds.qp.gdp$fit - (2 * preds.qp.gdp$se.fit)),
                            ilink.qp(preds.qp.gdp$fit + (2 * preds.qp.gdp$se.fit)))
cis.p.preds.qp.subabuse <- cbind(ilink.qp(preds.qp.subabuse$fit - (2 * preds.qp.subabuse$se.fit)),
                            ilink.qp(preds.qp.subabuse$fit + (2 * preds.qp.subabuse$se.fit)))
mygray = rgb(153, 153, 153, alpha = 200, maxColorValue = 255)
par(mar=c(4,3,1,0),oma=c(1,1,1,1), mfrow=c(1,2))
plot(newdat1$GDP, ilink.qp(preds.qp.gdp$fit), type="n",xlab="",ylab="",  yaxt="n", xaxt="n", lwd=2,
     ylim = c(0,37))
polygon(c(newdat1$GDP,rev(newdat1$GDP)), c(cis.p.preds.qp.gdp[,1],rev(cis.p.preds.qp.gdp[,2])),
        border = NA, col = mygray)
lines(newdat1$GDP, ilink.qp(preds.qp.gdp$fit))
points(GDP, DEATHS, pch="+", col="gray20", cex=0.8)
axis(1, tck=0.03, cex.axis=0.9, at=seq(20,70,10), labels = seq(20,70,10), mgp=c(0.3, 0.3, 0),
     lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = 'GDP per capita (in thousands of dollars)',
      ylab="Number of suicides per 100,000 people", line = 1.7, cex.lab=1.2)
plot(newdat2$SUBABUSE, ilink.qp(preds.qp.subabuse$fit), type="n",xlab="",ylab="",
     yaxt="n", xaxt="n", lwd=2, ylim = c(0,37))
polygon(c(newdat2$SUBABUSE,rev(newdat2$SUBABUSE)), c(cis.p.preds.qp.subabuse[,1],
        rev(cis.p.preds.qp.subabuse[,2])), border = NA, col = mygray)
lines(newdat2$SUBABUSE, ilink.qp(preds.qp.subabuse$fit))
points(SUBABUSE, DEATHS, pch="+", col="gray20", cex=0.8)
axis(1, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1)
axis(2, tck=0.03, cex.axis=0.9, mgp=c(0.3, 0.3, 0), lty=1, lwd=0, lwd.ticks = 1, las=2)
title(xlab = '% of population with alcohol or drug use disorders', ylab="",
      line = 1.7, cex.lab=1.2)
par(opar)