#2 generate dummy variables
titanic<-titanic%>%
mutate(female =ifelse(gender =="female", 1,0),
child = ifelse(age<16,1,0),
survive =ifelse(survived=="Survive", 1,0),
class1 =ifelse(pclass==1,1,0),
class2 =ifelse(pclass==2,1,0),
class3 =ifelse(pclass==3,1,0)
)
#titanic<-titanic%>%
#na.omit()
summary_table<-titanic%>%
group_by(pclass, gender)%>%
summarise(mean_survived = mean(survive, na.rm=TRUE),
)%>%
pivot_wider(names_from = pclass,
values_from = mean_survived)
## `summarise()` has grouped output by 'pclass'. You can override using the
## `.groups` argument.
print(summary_table)
## # A tibble: 2 × 4
## gender `1` `2` `3`
## <chr> <dbl> <dbl> <dbl>
## 1 female 0.965 0.887 0.491
## 2 male 0.341 0.146 0.152
m1<-lm(survive~child+female+class1+class2, data=titanic)
m_se1<-sqrt(diag(vcovHC(m1,"HC1"))) #robust se
summary(m1)
##
## Call:
## lm(formula = survive ~ child + female + class1 + class2, data = titanic)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.09083 -0.22360 -0.07999 0.27943 0.92001
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.07999 0.01991 4.018 6.30e-05 ***
## child 0.19476 0.03966 4.910 1.06e-06 ***
## female 0.49697 0.02557 19.436 < 2e-16 ***
## class1 0.31911 0.03000 10.636 < 2e-16 ***
## class2 0.14361 0.03016 4.762 2.19e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3928 on 1041 degrees of freedom
## (263 observations deleted due to missingness)
## Multiple R-squared: 0.3644, Adjusted R-squared: 0.362
## F-statistic: 149.2 on 4 and 1041 DF, p-value: < 2.2e-16
m2<-lm(survive~child+female+class1+class2, data=titanic, na.action = na.exclude)
m_se2<-sqrt(diag(vcovHC(m2,"HC1"))) #robust se
stargazer(m2,
se =list(m_se2),
type = "text",
out = "LPM.txt",
title = "LPM",
notes.append = TRUE)
##
## LPM
## ===============================================
## Dependent variable:
## ---------------------------
## survive
## -----------------------------------------------
## child 0.195***
## (0.048)
##
## female 0.497***
## (0.027)
##
## class1 0.319***
## (0.032)
##
## class2 0.144***
## (0.029)
##
## Constant 0.080***
## (0.019)
##
## -----------------------------------------------
## Observations 1,046
## R2 0.364
## Adjusted R2 0.362
## Residual Std. Error 0.393 (df = 1041)
## F Statistic 149.226*** (df = 4; 1041)
## ===============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
summary(m2)
##
## Call:
## lm(formula = survive ~ child + female + class1 + class2, data = titanic,
## na.action = na.exclude)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.09083 -0.22360 -0.07999 0.27943 0.92001
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.07999 0.01991 4.018 6.30e-05 ***
## child 0.19476 0.03966 4.910 1.06e-06 ***
## female 0.49697 0.02557 19.436 < 2e-16 ***
## class1 0.31911 0.03000 10.636 < 2e-16 ***
## class2 0.14361 0.03016 4.762 2.19e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3928 on 1041 degrees of freedom
## (263 observations deleted due to missingness)
## Multiple R-squared: 0.3644, Adjusted R-squared: 0.362
## F-statistic: 149.2 on 4 and 1041 DF, p-value: < 2.2e-16
titanic$yhat<-predict(m2) # predict yhat
nv_sum(titanic, yhat, weight=FALSE) # summarise yhat
## # A tibble: 1 × 7
## variable Obs min mean median st.dev max
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 yhat 1309 0.0800 0.408 0.399 0.297 1.09
p_model<-glm(survive~child+female+class1+class2,family = binomial(link = "probit"), data = titanic)
#summary(p_model)
logLik(p_model) # log Likelihood value
## 'log Lik.' -497.2332 (df=5)
lrtest(p_model) # test of overall significance of the model, including the log likelihood value
## Likelihood ratio test
##
## Model 1: survive ~ child + female + class1 + class2
## Model 2: survive ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 5 -497.23
## 2 1 -707.31 -4 420.15 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
l_model<-glm(survive~child+female+class1+class2,family = binomial(link = "logit"), data = titanic)
#summary(l_model)
logLik(l_model) # log Likelihood value
## 'log Lik.' -495.7903 (df=5)
stargazer(p_model, l_model,
type = "text",
out = "probability.txt",
title = "Probability Model",
notes.append = TRUE
)
##
## Probability Model
## ==============================================
## Dependent variable:
## ----------------------------
## survive
## probit logistic
## (1) (2)
## ----------------------------------------------
## child 0.628*** 1.171***
## (0.144) (0.251)
##
## female 1.490*** 2.508***
## (0.093) (0.165)
##
## class1 1.085*** 1.907***
## (0.112) (0.198)
##
## class2 0.489*** 0.924***
## (0.111) (0.197)
##
## Constant -1.313*** -2.304***
## (0.083) (0.158)
##
## ----------------------------------------------
## Observations 1,046 1,046
## Log Likelihood -497.233 -495.790
## Akaike Inf. Crit. 1,004.466 1,001.581
## ==============================================
## Note: *p<0.1; **p<0.05; ***p<0.01
To calculate the margins, we use the function logitmfx for a logit model and probitmfx for a probit model. Use tidy function from broom package to extract marginal effects for linear probability model
#linear
lpm<-lm(survive~child+female+class1+class2, data=titanic, na.action = na.exclude)
lpm_tidy<-tidy(lpm)
lme <- lpm_tidy$estimate #extract margins (coefficients)
lse <- lpm_tidy$std.error #extract st err
margins(lpm) # coefficients of the lpm
## Average marginal effects
## lm(formula = survive ~ child + female + class1 + class2, data = titanic, na.action = na.exclude)
## child female class1 class2
## 0.1948 0.497 0.3191 0.1436
#probit
pm<-glm(survive~child+female+class1+class2,family = binomial(link = "probit"), data = titanic) # probability model
atmp<-probitmfx(survive~child+female+class1+class2, data = titanic, atmean = TRUE) # Marginal Effect at the average/ Partial Effect Average (PEA)
atmep<-atmp$mfxest[,"dF/dx"] #margins
atse<-atmp$mfxest[,"Std. Err."] #st.errors
avmp<-probitmfx(survive~child+female+class1+class2, data = titanic, atmean = FALSE) # Average Marginal effect /Average Partial Effect (APE)
avmep<-avmp$mfxest[,"dF/dx"] #margins
avse<-avmp$mfxest[,"Std. Err."] #st.error
#logit
lm<-glm(survive~child+female+class1+class2,family = binomial(link = "logit"), data = titanic) # logit model
atml<-logitmfx(survive~child+female+class1+class2, data = titanic, atmean = TRUE) # Marginal Effect at the average/ Partial Effect Average (PEA)
atmel<-atml$mfxest[,"dF/dx"] #margins
atsel<-atml$mfxest[,"Std. Err."] # st.error
avml<-logitmfx(survive~child+female+class1+class2, data = titanic, atmean = FALSE) # Average Marginal effect /Average Partial Effect (APE)
avmel<-avml$mfxest[,"dF/dx"] # margins
avsel<-avml$mfxest[,"Std. Err."] # st.errors
stargazer(lpm,lpm, pm,pm,pm,lm,lm,lm,
coef = list(NULL,lme,NULL, atmep, avmep,NULL, atmel, avmel),
se = list(NULL,lse, NULL, atse,avse,NULL,atsel,avsel),
type = "text",
out = "probability.txt",
title = "Probability Model",
notes.append = TRUE)
##
## Probability Model
## =================================================================================================================
## Dependent variable:
## ---------------------------------------------------------------------------------
## survive
## OLS probit logistic
## (1) (2) (3) (4) (5) (6) (7) (8)
## -----------------------------------------------------------------------------------------------------------------
## child 0.195*** 0.195*** 0.628*** 0.246*** 0.176*** 1.171*** 0.285*** 0.189***
## (0.040) (0.040) (0.144) (0.055) (0.041) (0.251) (0.058) (0.041)
##
## female 0.497*** 0.497*** 1.490*** 0.542*** 0.497*** 2.508*** 0.553*** 0.495***
## (0.026) (0.026) (0.093) (0.029) (0.028) (0.165) (0.029) (0.028)
##
## class1 0.319*** 0.319*** 1.085*** 0.412*** 0.317*** 1.907*** 0.443*** 0.323***
## (0.030) (0.030) (0.112) (0.039) (0.031) (0.198) (0.040) (0.031)
##
## class2 0.144*** 0.144*** 0.489*** 0.191*** 0.132*** 0.924*** 0.223*** 0.144***
## (0.030) (0.030) (0.111) (0.043) (0.030) (0.197) (0.047) (0.030)
##
## Constant 0.080*** 0.080*** -1.313*** -2.304***
## (0.020) (0.020) (0.083) (0.158)
##
## -----------------------------------------------------------------------------------------------------------------
## Observations 1,046 1,046 1,046 1,046 1,046 1,046 1,046 1,046
## R2 0.364 0.364
## Adjusted R2 0.362 0.362
## Log Likelihood -497.233 -497.233 -497.233 -495.790 -495.790 -495.790
## Akaike Inf. Crit. 1,004.466 1,004.466 1,004.466 1,001.581 1,001.581 1,001.581
## Residual Std. Error (df = 1041) 0.393 0.393
## F Statistic (df = 4; 1041) 149.226*** 149.226***
## =================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
lpm <- lm(survive ~ child + female + class1 + class2, data = titanic)
print("LPM")
## [1] "LPM"
summary(lpm)
##
## Call:
## lm(formula = survive ~ child + female + class1 + class2, data = titanic)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.09083 -0.22360 -0.07999 0.27943 0.92001
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.07999 0.01991 4.018 6.30e-05 ***
## child 0.19476 0.03966 4.910 1.06e-06 ***
## female 0.49697 0.02557 19.436 < 2e-16 ***
## class1 0.31911 0.03000 10.636 < 2e-16 ***
## class2 0.14361 0.03016 4.762 2.19e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3928 on 1041 degrees of freedom
## (263 observations deleted due to missingness)
## Multiple R-squared: 0.3644, Adjusted R-squared: 0.362
## F-statistic: 149.2 on 4 and 1041 DF, p-value: < 2.2e-16
lpm_margins <- margins(lpm, at = list(child = 1, female = 1, class1 = 0, class2 = 0))
print(lpm_margins)
## Average marginal effects at specified values
## lm(formula = survive ~ child + female + class1 + class2, data = titanic)
## at(child) at(female) at(class1) at(class2) child female class1 class2
## 1 1 0 0 0.1948 0.497 0.3191 0.1436
# using probit
summary(pm)
##
## Call:
## glm(formula = survive ~ child + female + class1 + class2, family = binomial(link = "probit"),
## data = titanic)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6562 -0.6769 -0.4457 0.7636 2.1720
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.31328 0.08326 -15.774 < 2e-16 ***
## child 0.62822 0.14393 4.365 1.27e-05 ***
## female 1.49005 0.09326 15.978 < 2e-16 ***
## class1 1.08514 0.11151 9.732 < 2e-16 ***
## class2 0.48858 0.11131 4.389 1.14e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1414.62 on 1045 degrees of freedom
## Residual deviance: 994.47 on 1041 degrees of freedom
## (263 observations deleted due to missingness)
## AIC: 1004.5
##
## Number of Fisher Scoring iterations: 5
probit_prediction <- pnorm(-1.308744 + 0.6310225 + 1.483612)
print(probit_prediction)
## [1] 0.789847
probit_margins <- margins(pm, at = list(child = 1, female = 1, class1 = 0, class2 = 0))
print(probit_margins)
## Average marginal effects at specified values
## glm(formula = survive ~ child + female + class1 + class2, family = binomial(link = "probit"), data = titanic)
## at(child) at(female) at(class1) at(class2) child female class1 class2
## 1 1 0 0 0.1813 0.4299 0.3131 0.141
# using logit model
logit_margins <- margins(lm, at = list(child = 1, female = 1, class1 = 0, class2 = 0))
print(logit_margins)
## Average marginal effects at specified values
## glm(formula = survive ~ child + female + class1 + class2, family = binomial(link = "logit"), data = titanic)
## at(child) at(female) at(class1) at(class2) child female class1 class2
## 1 1 0 0 0.1886 0.4039 0.3072 0.1488