https://en.wikipedia.org/wiki/Donner_Party
install.packages(“vcdExtra”)
install.packages(“mfx”)
install.packages(“erer”) install.packages(“stargazer”)
require(vcdExtra)
require(lmtest)
require(mfx)
library(sandwich) # Robustní sm. chyby.
require(stargazer) # Prehledové tabulky.
library(ggplot2) # Grafy.
library(reshape2) # Funkce melt.
# Funkce wald.test.
library(erer) # Výpocet AME aj.
data(Donner)
attach(Donner)
head(Donner)
## family age sex survived death
## Antoine Other 23 Male 0 1846-12-29
## Breen, Edward Breen 13 Male 1 <NA>
## Breen, Margaret I. Breen 1 Female 1 <NA>
## Breen, James Breen 5 Male 1 <NA>
## Breen, John Breen 14 Male 1 <NA>
## Breen, Mary Breen 40 Female 1 <NA>
model_LPM=lm(survived ~ age+factor(family)+factor(sex), data = Donner)
coeftest(model_LPM, vcov =vcovHC(model_LPM, vcov="HC1")) # je nutné pouzít robustní odhad chyb
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2989263 0.1289439 10.0736 9.112e-16 ***
## age -0.0066747 0.0035192 -1.8967 0.0615764 .
## factor(family)Donner -0.5590877 0.1381040 -4.0483 0.0001207 ***
## factor(family)Eddy -0.8302991 0.4016783 -2.0671 0.0420445 *
## factor(family)FosdWolf -0.5269018 0.2608306 -2.0201 0.0468074 *
## factor(family)Graves -0.3726296 0.1596454 -2.3341 0.0221654 *
## factor(family)Keseberg -0.5619438 0.4277074 -1.3139 0.1927475
## factor(family)McCutchen -0.4275486 0.5642443 -0.7577 0.4508918
## factor(family)MurFosPik -0.5719558 0.1700431 -3.3636 0.0011950 **
## factor(family)Other -0.6198134 0.1189369 -5.2113 1.493e-06 ***
## factor(family)Reed -0.1642807 0.1543104 -1.0646 0.2903363
## factor(sex)Male -0.2470263 0.1304361 -1.8938 0.0619526 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
OLS.robust.VCE <- vcovHC(model_LPM, type="HC1") # Robustní odhad matice VCE.
OLS.robust.se <- sqrt(diag(OLS.robust.VCE))
yhat <- model_LPM$fitted.values
yhat[yhat < 0] <- 0.01
yhat[yhat > 1] <- 0.99
hhat <- yhat*(1-yhat)
wls.fit=lm(survived ~ age+factor(family)+factor(sex),
weights=I(1/hhat), data=Donner)
summary(wls.fit)
##
## Call:
## lm(formula = survived ~ age + factor(family) + factor(sex), data = Donner,
## weights = I(1/hhat))
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -1.83226 -0.77750 -0.09108 0.91185 2.11011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.141882 0.068502 16.669 < 2e-16 ***
## age -0.004852 0.002024 -2.398 0.01889 *
## factor(family)Donner -0.540829 0.126593 -4.272 5.42e-05 ***
## factor(family)Eddy -0.280259 0.161465 -1.736 0.08656 .
## factor(family)FosdWolf -0.477891 0.256943 -1.860 0.06667 .
## factor(family)Graves -0.242842 0.139216 -1.744 0.08504 .
## factor(family)Keseberg -0.500479 0.251026 -1.994 0.04968 *
## factor(family)McCutchen -0.535053 0.255910 -2.091 0.03980 *
## factor(family)MurFosPik -0.509276 0.150059 -3.394 0.00109 **
## factor(family)Other -0.673166 0.104347 -6.451 8.54e-09 ***
## factor(family)Reed -0.085703 0.088959 -0.963 0.33832
## factor(sex)Male -0.112004 0.066208 -1.692 0.09470 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.051 on 78 degrees of freedom
## Multiple R-squared: 0.6127, Adjusted R-squared: 0.558
## F-statistic: 11.22 on 11 and 78 DF, p-value: 3.826e-12
WLS.robust.VCE = vcovHC(wls.fit, type="HC1") # Robustní odhad matice VCE.
WLS.robust.se = sqrt(diag(WLS.robust.VCE))
model_Logit=glm(survived ~ age+factor(family)+factor(sex),data=Donner, family = binomial)
summary(model_Logit)
##
## Call:
## glm(formula = survived ~ age + factor(family) + factor(sex),
## family = binomial, data = Donner)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.07065 -0.78425 0.00018 0.83084 2.27817
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 19.48229 1226.69283 0.016 0.9873
## age -0.04161 0.01923 -2.164 0.0304 *
## factor(family)Donner -18.17917 1226.69277 -0.015 0.9882
## factor(family)Eddy -19.50763 1226.69327 -0.016 0.9873
## factor(family)FosdWolf -17.89283 1226.69304 -0.015 0.9884
## factor(family)Graves -17.16864 1226.69282 -0.014 0.9888
## factor(family)Keseberg -18.11527 1226.69308 -0.015 0.9882
## factor(family)McCutchen -17.42154 1226.69336 -0.014 0.9887
## factor(family)MurFosPik -18.16684 1226.69276 -0.015 0.9882
## factor(family)Other -18.16807 1226.69264 -0.015 0.9882
## factor(family)Reed -15.69515 1226.69311 -0.013 0.9898
## factor(sex)Male -1.32700 0.59592 -2.227 0.0260 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 124.366 on 89 degrees of freedom
## Residual deviance: 88.928 on 78 degrees of freedom
## AIC: 112.93
##
## Number of Fisher Scoring iterations: 16
Odnadnuté parametry nám ríkají, jak se mení relativní sance na prezití (log-odds ratio) pri zmene regresoru. Takze nelze interpretovat stejne jako pri OLS, jaká dopad má zmena \(x\) na pravdepodobnost výskytu jevu \(y\). Interpretace tak není intuitivní, jako pri OLS. Na druhou stranu, alepon znaménko odhadu nám urcuje vliv dané promenné.
Zároven zde máte místo \(t\)-value, \(z\)-value. Jde o to, ze standard errors jsou odhadnuty pomocí ML, takze jsou pouze asymtoticky validní a proto nemá testová statistika \(t\)-rozdelení, ale asymtotické normované normální rozdelení.
model_Probit=glm(survived ~ age+factor(family)+factor(sex),data=Donner,family = binomial(link = "probit"))
summary(model_Probit)
##
## Call:
## glm(formula = survived ~ age + factor(family) + factor(sex),
## family = binomial(link = "probit"), data = Donner)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.97304 -0.79647 0.00002 0.85483 2.17995
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.12275 289.65077 0.025 0.9804
## age -0.02439 0.01092 -2.234 0.0255 *
## factor(family)Donner -6.34701 289.65056 -0.022 0.9825
## factor(family)Eddy -6.99469 289.65122 -0.024 0.9807
## factor(family)FosdWolf -6.19593 289.65095 -0.021 0.9829
## factor(family)Graves -5.73363 289.65061 -0.020 0.9842
## factor(family)Keseberg -6.32289 289.65104 -0.022 0.9826
## factor(family)McCutchen -6.03046 289.65137 -0.021 0.9834
## factor(family)MurFosPik -6.35799 289.65056 -0.022 0.9825
## factor(family)Other -6.39906 289.65027 -0.022 0.9824
## factor(family)Reed -4.86438 289.65084 -0.017 0.9866
## factor(sex)Male -0.76817 0.35266 -2.178 0.0294 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 124.37 on 89 degrees of freedom
## Residual deviance: 89.08 on 78 degrees of freedom
## AIC: 113.08
##
## Number of Fisher Scoring iterations: 16
stargazer(model_LPM, wls.fit, model_Logit, model_Probit,
column.labels = c("LPM","WLS","Logit", "Probit"),
se=list(OLS.robust.se, WLS.robust.se,NULL, NULL),
keep.stat=c("n"),
digits=3,
type="text"
)
##
## =================================================================
## Dependent variable:
## -----------------------------------------
## survived
## OLS logistic probit
## LPM WLS Logit Probit
## (1) (2) (3) (4)
## -----------------------------------------------------------------
## age -0.007** -0.005*** -0.042** -0.024**
## (0.003) (0.002) (0.019) (0.011)
##
## factor(family)Donner -0.559*** -0.541*** -18.179 -6.347
## (0.130) (0.112) (1,226.693) (289.651)
##
## factor(family)Eddy -0.830*** -0.280 -19.508 -6.995
## (0.315) (0.320) (1,226.693) (289.651)
##
## factor(family)FosdWolf -0.527** -0.478** -17.893 -6.196
## (0.208) (0.237) (1,226.693) (289.651)
##
## factor(family)Graves -0.373** -0.243** -17.169 -5.734
## (0.147) (0.112) (1,226.693) (289.651)
##
## factor(family)Keseberg -0.562* -0.500 -18.115 -6.323
## (0.334) (0.318) (1,226.693) (289.651)
##
## factor(family)McCutchen -0.428 -0.535 -17.422 -6.030
## (0.393) (0.389) (1,226.693) (289.651)
##
## factor(family)MurFosPik -0.572*** -0.509*** -18.167 -6.358
## (0.162) (0.157) (1,226.693) (289.651)
##
## factor(family)Other -0.620*** -0.673*** -18.168 -6.399
## (0.115) (0.095) (1,226.693) (289.650)
##
## factor(family)Reed -0.164 -0.086 -15.695 -4.864
## (0.132) (0.063) (1,226.693) (289.651)
##
## factor(sex)Male -0.247** -0.112 -1.327** -0.768**
## (0.113) (0.069) (0.596) (0.353)
##
## Constant 1.299*** 1.142*** 19.482 7.123
## (0.110) (0.079) (1,226.693) (289.651)
##
## -----------------------------------------------------------------
## Observations 90 90 90 90
## =================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Mezní efekt nám dává odpoved na to, jak se zmení pravdepodobnost vzniku události (Y=1), kdyz se nekterý z regresoru zmení o 1.
Budeme rozlisovat dva mozné prístupy výpoctu:
\(MEM_j=f(\bar{x}\hat{\beta})\hat{\beta_j}\)
OLS <- lm(survived ~ age+factor(family)+factor(sex), data=Donner, x=TRUE) # Abychom mohli uloit matici X.
X <- OLS$x # Uloíme matici X.
# MEM
x.bar <- apply(X, 2, mean) # Spoèteme prùmìrné pozorování.
print(x.bar) # Co e to máme za pohlaví?
## (Intercept) age factor(family)Donner
## 1.00000000 20.86666667 0.15555556
## factor(family)Eddy factor(family)FosdWolf factor(family)Graves
## 0.04444444 0.04444444 0.11111111
## factor(family)Keseberg factor(family)McCutchen factor(family)MurFosPik
## 0.04444444 0.03333333 0.13333333
## factor(family)Other factor(family)Reed factor(sex)Male
## 0.25555556 0.07777778 0.61111111
Logit.MEM <- dlogis(x.bar %*% model_Logit$coef) * model_Logit$coef
Probit.MEM <- dnorm(x.bar %*% model_Probit$coef) * model_Probit$coef
regrese=survived ~ age+factor(family)+factor(sex)
Logit.MEM.mfx <- logitmfx(regrese, data=Donner)
Probit.MEM.mfx <- probitmfx(regrese, data=Donner)
cbind(rucne_logit=Logit.MEM[-1], rucne_probit=Probit.MEM[-1], mfx_Logit=Logit.MEM.mfx$mfxest[, 1],mfx_Probit=Probit.MEM.mfx$mfxest[, 1])
## rucne_logit rucne_probit mfx_Logit
## age -0.005309352 -0.008174321 -0.005309352
## factor(family)Donner -2.319557016 -2.127192030 -0.989659539
## factor(family)Eddy -2.489061676 -2.344261747 -0.930896998
## factor(family)FosdWolf -2.283021730 -2.076560787 -0.926134863
## factor(family)Graves -2.190619824 -1.921620255 -0.974447077
## factor(family)Keseberg -2.311404422 -2.119109476 -0.926808381
## factor(family)McCutchen -2.222887672 -2.021102399 -0.910051160
## factor(family)MurFosPik -2.317984036 -2.130872686 -0.984567644
## factor(family)Other -2.318140967 -2.144637129 -0.998294248
## factor(family)Reed -2.002611434 -1.630293258 -0.950462433
## factor(sex)Male -0.169317191 -0.257450935 -0.155580650
## mfx_Probit
## age -0.008174321
## factor(family)Donner -0.942674545
## factor(family)Eddy -0.816258971
## factor(family)FosdWolf -0.806672539
## factor(family)Graves -0.890159620
## factor(family)Keseberg -0.808216307
## factor(family)McCutchen -0.785627190
## factor(family)MurFosPik -0.924790566
## factor(family)Other -0.986965828
## factor(family)Reed -0.833595097
## factor(sex)Male -0.240672473
\(AME_j=\dfrac{1}{n}\sum^{n}_{i=1}f(x\hat{\beta})\hat{\beta_j}\)
Logit.AME <- mean(dlogis(X%*% model_Logit$coef)) * model_Logit$coef
Probit.AME <- mean(dnorm(X%*% model_Probit$coef)) * model_Probit$coef
regrese=survived ~ age+factor(family)+factor(sex)
Logit.AME.mfx <- logitmfx(regrese, data=Donner, atmean = FALSE)
Probit.AME.mfx <- probitmfx(regrese, data=Donner,atmean = FALSE)
cbind(rucne_logit=Logit.AME[-1], rucne_probit=Probit.AME[-1], mfx_Logit=Logit.AME.mfx$mfxest[, 1],mfx_Probit=Probit.AME.mfx$mfxest[, 1])
## rucne_logit rucne_probit mfx_Logit
## age -0.006923029 -0.006847397 -0.006923029
## factor(family)Donner -3.024542706 -1.781888420 -0.491905074
## factor(family)Eddy -3.245565117 -1.963721564 -0.537855171
## factor(family)FosdWolf -2.976903208 -1.739476063 -0.485617173
## factor(family)Graves -2.856417482 -1.609686775 -0.425438853
## factor(family)Keseberg -3.013912285 -1.775117893 -0.490518014
## factor(family)McCutchen -2.898492535 -1.693020145 -0.464111494
## factor(family)MurFosPik -3.022491649 -1.784971601 -0.491638411
## factor(family)Other -3.022696275 -1.796501684 -0.613887249
## factor(family)Reed -2.611267481 -1.365650415 -0.391192068
## factor(sex)Male -0.220777964 -0.215659345 -0.244983298
## mfx_Probit
## age -0.006847397
## factor(family)Donner -0.491996766
## factor(family)Eddy -0.531466754
## factor(family)FosdWolf -0.487278516
## factor(family)Graves -0.424759870
## factor(family)Keseberg -0.491773462
## factor(family)McCutchen -0.472545883
## factor(family)MurFosPik -0.493143623
## factor(family)Other -0.618748611
## factor(family)Reed -0.390659135
## factor(sex)Male -0.234987571