Donner Party

https://en.wikipedia.org/wiki/Donner_Party

install.packages(“vcdExtra”)

install.packages(“mfx”)

install.packages(“erer”) install.packages(“stargazer”)

require(vcdExtra)
require(lmtest)
require(mfx)

library(sandwich)   # Robustní sm. chyby.
require(stargazer)  # Prehledové tabulky.
library(ggplot2)    # Grafy.
library(reshape2)   # Funkce melt.
        # Funkce wald.test.

library(erer)       # Výpocet AME aj.

data(Donner)
attach(Donner)
head(Donner)
##                    family age    sex survived      death
## Antoine             Other  23   Male        0 1846-12-29
## Breen, Edward       Breen  13   Male        1       <NA>
## Breen, Margaret I.  Breen   1 Female        1       <NA>
## Breen, James        Breen   5   Male        1       <NA>
## Breen, John         Breen  14   Male        1       <NA>
## Breen, Mary         Breen  40 Female        1       <NA>

Lineární pravdepodobnostní model

model_LPM=lm(survived ~ age+factor(family)+factor(sex), data = Donner)
coeftest(model_LPM, vcov =vcovHC(model_LPM, vcov="HC1"))  # je nutné pouzít robustní odhad chyb 
## 
## t test of coefficients:
## 
##                           Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)              1.2989263  0.1289439 10.0736 9.112e-16 ***
## age                     -0.0066747  0.0035192 -1.8967 0.0615764 .  
## factor(family)Donner    -0.5590877  0.1381040 -4.0483 0.0001207 ***
## factor(family)Eddy      -0.8302991  0.4016783 -2.0671 0.0420445 *  
## factor(family)FosdWolf  -0.5269018  0.2608306 -2.0201 0.0468074 *  
## factor(family)Graves    -0.3726296  0.1596454 -2.3341 0.0221654 *  
## factor(family)Keseberg  -0.5619438  0.4277074 -1.3139 0.1927475    
## factor(family)McCutchen -0.4275486  0.5642443 -0.7577 0.4508918    
## factor(family)MurFosPik -0.5719558  0.1700431 -3.3636 0.0011950 ** 
## factor(family)Other     -0.6198134  0.1189369 -5.2113 1.493e-06 ***
## factor(family)Reed      -0.1642807  0.1543104 -1.0646 0.2903363    
## factor(sex)Male         -0.2470263  0.1304361 -1.8938 0.0619526 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
OLS.robust.VCE <- vcovHC(model_LPM, type="HC1")  # Robustní odhad matice VCE.
OLS.robust.se <- sqrt(diag(OLS.robust.VCE)) 
yhat <- model_LPM$fitted.values
yhat[yhat < 0] <- 0.01
yhat[yhat > 1] <- 0.99

hhat <- yhat*(1-yhat)


wls.fit=lm(survived ~ age+factor(family)+factor(sex),
            weights=I(1/hhat), data=Donner)
summary(wls.fit)
## 
## Call:
## lm(formula = survived ~ age + factor(family) + factor(sex), data = Donner, 
##     weights = I(1/hhat))
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.83226 -0.77750 -0.09108  0.91185  2.11011 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              1.141882   0.068502  16.669  < 2e-16 ***
## age                     -0.004852   0.002024  -2.398  0.01889 *  
## factor(family)Donner    -0.540829   0.126593  -4.272 5.42e-05 ***
## factor(family)Eddy      -0.280259   0.161465  -1.736  0.08656 .  
## factor(family)FosdWolf  -0.477891   0.256943  -1.860  0.06667 .  
## factor(family)Graves    -0.242842   0.139216  -1.744  0.08504 .  
## factor(family)Keseberg  -0.500479   0.251026  -1.994  0.04968 *  
## factor(family)McCutchen -0.535053   0.255910  -2.091  0.03980 *  
## factor(family)MurFosPik -0.509276   0.150059  -3.394  0.00109 ** 
## factor(family)Other     -0.673166   0.104347  -6.451 8.54e-09 ***
## factor(family)Reed      -0.085703   0.088959  -0.963  0.33832    
## factor(sex)Male         -0.112004   0.066208  -1.692  0.09470 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.051 on 78 degrees of freedom
## Multiple R-squared:  0.6127, Adjusted R-squared:  0.558 
## F-statistic: 11.22 on 11 and 78 DF,  p-value: 3.826e-12
WLS.robust.VCE = vcovHC(wls.fit, type="HC1")  # Robustní odhad matice VCE.
WLS.robust.se = sqrt(diag(WLS.robust.VCE)) 

Logit

model_Logit=glm(survived ~  age+factor(family)+factor(sex),data=Donner, family = binomial)
summary(model_Logit)
## 
## Call:
## glm(formula = survived ~ age + factor(family) + factor(sex), 
##     family = binomial, data = Donner)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -2.07065  -0.78425   0.00018   0.83084   2.27817  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)  
## (Intercept)               19.48229 1226.69283   0.016   0.9873  
## age                       -0.04161    0.01923  -2.164   0.0304 *
## factor(family)Donner     -18.17917 1226.69277  -0.015   0.9882  
## factor(family)Eddy       -19.50763 1226.69327  -0.016   0.9873  
## factor(family)FosdWolf   -17.89283 1226.69304  -0.015   0.9884  
## factor(family)Graves     -17.16864 1226.69282  -0.014   0.9888  
## factor(family)Keseberg   -18.11527 1226.69308  -0.015   0.9882  
## factor(family)McCutchen  -17.42154 1226.69336  -0.014   0.9887  
## factor(family)MurFosPik  -18.16684 1226.69276  -0.015   0.9882  
## factor(family)Other      -18.16807 1226.69264  -0.015   0.9882  
## factor(family)Reed       -15.69515 1226.69311  -0.013   0.9898  
## factor(sex)Male           -1.32700    0.59592  -2.227   0.0260 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 124.366  on 89  degrees of freedom
## Residual deviance:  88.928  on 78  degrees of freedom
## AIC: 112.93
## 
## Number of Fisher Scoring iterations: 16

Odnadnuté parametry nám ríkají, jak se mení relativní sance na prezití (log-odds ratio) pri zmene regresoru. Takze nelze interpretovat stejne jako pri OLS, jaká dopad má zmena \(x\) na pravdepodobnost výskytu jevu \(y\). Interpretace tak není intuitivní, jako pri OLS. Na druhou stranu, alepon znaménko odhadu nám urcuje vliv dané promenné.

Zároven zde máte místo \(t\)-value, \(z\)-value. Jde o to, ze standard errors jsou odhadnuty pomocí ML, takze jsou pouze asymtoticky validní a proto nemá testová statistika \(t\)-rozdelení, ale asymtotické normované normální rozdelení.

Probit

model_Probit=glm(survived ~ age+factor(family)+factor(sex),data=Donner,family = binomial(link = "probit"))
summary(model_Probit)
## 
## Call:
## glm(formula = survived ~ age + factor(family) + factor(sex), 
##     family = binomial(link = "probit"), data = Donner)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.97304  -0.79647   0.00002   0.85483   2.17995  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)  
## (Intercept)               7.12275  289.65077   0.025   0.9804  
## age                      -0.02439    0.01092  -2.234   0.0255 *
## factor(family)Donner     -6.34701  289.65056  -0.022   0.9825  
## factor(family)Eddy       -6.99469  289.65122  -0.024   0.9807  
## factor(family)FosdWolf   -6.19593  289.65095  -0.021   0.9829  
## factor(family)Graves     -5.73363  289.65061  -0.020   0.9842  
## factor(family)Keseberg   -6.32289  289.65104  -0.022   0.9826  
## factor(family)McCutchen  -6.03046  289.65137  -0.021   0.9834  
## factor(family)MurFosPik  -6.35799  289.65056  -0.022   0.9825  
## factor(family)Other      -6.39906  289.65027  -0.022   0.9824  
## factor(family)Reed       -4.86438  289.65084  -0.017   0.9866  
## factor(sex)Male          -0.76817    0.35266  -2.178   0.0294 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 124.37  on 89  degrees of freedom
## Residual deviance:  89.08  on 78  degrees of freedom
## AIC: 113.08
## 
## Number of Fisher Scoring iterations: 16
stargazer(model_LPM, wls.fit, model_Logit, model_Probit,
                    column.labels = c("LPM","WLS","Logit", "Probit"),
                    se=list(OLS.robust.se, WLS.robust.se,NULL, NULL),
                    keep.stat=c("n"),
                    digits=3,
                    type="text"
)
## 
## =================================================================
##                                    Dependent variable:           
##                         -----------------------------------------
##                                         survived                 
##                                 OLS          logistic    probit  
##                            LPM       WLS       Logit     Probit  
##                            (1)       (2)        (3)        (4)   
## -----------------------------------------------------------------
## age                     -0.007**  -0.005***  -0.042**   -0.024** 
##                          (0.003)   (0.002)    (0.019)    (0.011) 
##                                                                  
## factor(family)Donner    -0.559*** -0.541***   -18.179    -6.347  
##                          (0.130)   (0.112)  (1,226.693) (289.651)
##                                                                  
## factor(family)Eddy      -0.830***  -0.280     -19.508    -6.995  
##                          (0.315)   (0.320)  (1,226.693) (289.651)
##                                                                  
## factor(family)FosdWolf  -0.527**  -0.478**    -17.893    -6.196  
##                          (0.208)   (0.237)  (1,226.693) (289.651)
##                                                                  
## factor(family)Graves    -0.373**  -0.243**    -17.169    -5.734  
##                          (0.147)   (0.112)  (1,226.693) (289.651)
##                                                                  
## factor(family)Keseberg   -0.562*   -0.500     -18.115    -6.323  
##                          (0.334)   (0.318)  (1,226.693) (289.651)
##                                                                  
## factor(family)McCutchen  -0.428    -0.535     -17.422    -6.030  
##                          (0.393)   (0.389)  (1,226.693) (289.651)
##                                                                  
## factor(family)MurFosPik -0.572*** -0.509***   -18.167    -6.358  
##                          (0.162)   (0.157)  (1,226.693) (289.651)
##                                                                  
## factor(family)Other     -0.620*** -0.673***   -18.168    -6.399  
##                          (0.115)   (0.095)  (1,226.693) (289.650)
##                                                                  
## factor(family)Reed       -0.164    -0.086     -15.695    -4.864  
##                          (0.132)   (0.063)  (1,226.693) (289.651)
##                                                                  
## factor(sex)Male         -0.247**   -0.112    -1.327**   -0.768** 
##                          (0.113)   (0.069)    (0.596)    (0.353) 
##                                                                  
## Constant                1.299***  1.142***    19.482      7.123  
##                          (0.110)   (0.079)  (1,226.693) (289.651)
##                                                                  
## -----------------------------------------------------------------
## Observations               90        90         90         90    
## =================================================================
## Note:                                 *p<0.1; **p<0.05; ***p<0.01

Mezní efekty

Mezní efekt nám dává odpoved na to, jak se zmení pravdepodobnost vzniku události (Y=1), kdyz se nekterý z regresoru zmení o 1.

Budeme rozlisovat dva mozné prístupy výpoctu:

MEM mezní efekt pro prumerné pozorování

\(MEM_j=f(\bar{x}\hat{\beta})\hat{\beta_j}\)

OLS <- lm(survived ~ age+factor(family)+factor(sex), data=Donner, x=TRUE)  # Abychom mohli uložit matici X.
X <- OLS$x  # Uložíme matici X.

# MEM
x.bar <- apply(X, 2, mean) # Spoèteme prùmìrné pozorování.
print(x.bar)  # Co že to máme za pohlaví?
##             (Intercept)                     age    factor(family)Donner 
##              1.00000000             20.86666667              0.15555556 
##      factor(family)Eddy  factor(family)FosdWolf    factor(family)Graves 
##              0.04444444              0.04444444              0.11111111 
##  factor(family)Keseberg factor(family)McCutchen factor(family)MurFosPik 
##              0.04444444              0.03333333              0.13333333 
##     factor(family)Other      factor(family)Reed         factor(sex)Male 
##              0.25555556              0.07777778              0.61111111
Logit.MEM <- dlogis(x.bar %*% model_Logit$coef) * model_Logit$coef
Probit.MEM <- dnorm(x.bar %*% model_Probit$coef) * model_Probit$coef
regrese=survived ~ age+factor(family)+factor(sex)
Logit.MEM.mfx <- logitmfx(regrese, data=Donner)
Probit.MEM.mfx <- probitmfx(regrese, data=Donner)
cbind(rucne_logit=Logit.MEM[-1], rucne_probit=Probit.MEM[-1], mfx_Logit=Logit.MEM.mfx$mfxest[, 1],mfx_Probit=Probit.MEM.mfx$mfxest[, 1])
##                          rucne_logit rucne_probit    mfx_Logit
## age                     -0.005309352 -0.008174321 -0.005309352
## factor(family)Donner    -2.319557016 -2.127192030 -0.989659539
## factor(family)Eddy      -2.489061676 -2.344261747 -0.930896998
## factor(family)FosdWolf  -2.283021730 -2.076560787 -0.926134863
## factor(family)Graves    -2.190619824 -1.921620255 -0.974447077
## factor(family)Keseberg  -2.311404422 -2.119109476 -0.926808381
## factor(family)McCutchen -2.222887672 -2.021102399 -0.910051160
## factor(family)MurFosPik -2.317984036 -2.130872686 -0.984567644
## factor(family)Other     -2.318140967 -2.144637129 -0.998294248
## factor(family)Reed      -2.002611434 -1.630293258 -0.950462433
## factor(sex)Male         -0.169317191 -0.257450935 -0.155580650
##                           mfx_Probit
## age                     -0.008174321
## factor(family)Donner    -0.942674545
## factor(family)Eddy      -0.816258971
## factor(family)FosdWolf  -0.806672539
## factor(family)Graves    -0.890159620
## factor(family)Keseberg  -0.808216307
## factor(family)McCutchen -0.785627190
## factor(family)MurFosPik -0.924790566
## factor(family)Other     -0.986965828
## factor(family)Reed      -0.833595097
## factor(sex)Male         -0.240672473

AME prumerný mezní efekt

\(AME_j=\dfrac{1}{n}\sum^{n}_{i=1}f(x\hat{\beta})\hat{\beta_j}\)

Logit.AME <- mean(dlogis(X%*% model_Logit$coef)) * model_Logit$coef
Probit.AME <- mean(dnorm(X%*% model_Probit$coef)) * model_Probit$coef
regrese=survived ~ age+factor(family)+factor(sex)
Logit.AME.mfx <- logitmfx(regrese, data=Donner, atmean = FALSE)
Probit.AME.mfx <- probitmfx(regrese, data=Donner,atmean = FALSE)
cbind(rucne_logit=Logit.AME[-1], rucne_probit=Probit.AME[-1], mfx_Logit=Logit.AME.mfx$mfxest[, 1],mfx_Probit=Probit.AME.mfx$mfxest[, 1])
##                          rucne_logit rucne_probit    mfx_Logit
## age                     -0.006923029 -0.006847397 -0.006923029
## factor(family)Donner    -3.024542706 -1.781888420 -0.491905074
## factor(family)Eddy      -3.245565117 -1.963721564 -0.537855171
## factor(family)FosdWolf  -2.976903208 -1.739476063 -0.485617173
## factor(family)Graves    -2.856417482 -1.609686775 -0.425438853
## factor(family)Keseberg  -3.013912285 -1.775117893 -0.490518014
## factor(family)McCutchen -2.898492535 -1.693020145 -0.464111494
## factor(family)MurFosPik -3.022491649 -1.784971601 -0.491638411
## factor(family)Other     -3.022696275 -1.796501684 -0.613887249
## factor(family)Reed      -2.611267481 -1.365650415 -0.391192068
## factor(sex)Male         -0.220777964 -0.215659345 -0.244983298
##                           mfx_Probit
## age                     -0.006847397
## factor(family)Donner    -0.491996766
## factor(family)Eddy      -0.531466754
## factor(family)FosdWolf  -0.487278516
## factor(family)Graves    -0.424759870
## factor(family)Keseberg  -0.491773462
## factor(family)McCutchen -0.472545883
## factor(family)MurFosPik -0.493143623
## factor(family)Other     -0.618748611
## factor(family)Reed      -0.390659135
## factor(sex)Male         -0.234987571