options(repos = c(CRAN = "https://cloud.r-project.org"))
#clear the environment
rm(list = ls())
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.5.1 ✔ purrr 1.0.2
## ✔ tibble 3.2.1 ✔ dplyr 1.1.4
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
if(!require("skimr")) install.packages("skimr")
## Loading required package: skimr
library(skimr)# quickly skim the dataset
if(!require("psych")) install.packages("psych")
## Loading required package: psych
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(psych) # comprehensive summary table
if(!require("sandwich")) install.packages("sandwich")
## Loading required package: sandwich
library(sandwich) # for robust SE
if(!require("plm")) install.packages("plm")
## Loading required package: plm
##
## Attaching package: 'plm'
##
## The following objects are masked from 'package:dplyr':
##
## between, lag, lead
library(plm) # panel linear model
if(!require("startgazer")) install.packages("stargazer")
## Loading required package: startgazer
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'startgazer'
##
## The downloaded binary packages are in
## /var/folders/hl/9g4zn0y56nj341lbc23brfdh0000gn/T//RtmpITxYIq/downloaded_packages
library(stargazer)
##
## Please cite as:
##
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
if(!require("broom")) install.packages("broom")
## Loading required package: broom
library(broom)
if(!require("car")) install.packages("car")
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:psych':
##
## logit
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(car) # F-test and other model tests
if(!require("lmtest")) install.packages("lmtest")
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(lmtest)
inc_dem<-read_csv("income_democracy.csv")
## Rows: 1369 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): country
## dbl (12): country_id, year, dem_ind, log_gdppc, log_pop, age_1, age_2, age_3...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
source("/Volumes/middfiles/Classes/Fall23/ECON0211A/Noe_Labs/NV.R") #load NV.R to access nv_sum() function for quick summary statistic table
##
## Attaching package: 'rlang'
##
## The following objects are masked from 'package:purrr':
##
## %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
## flatten_raw, invoke, splice
skim(inc_dem) # help to quickly skim the dataset
Name | inc_dem |
Number of rows | 1369 |
Number of columns | 13 |
_______________________ | |
Column type frequency: | |
character | 1 |
numeric | 12 |
________________________ | |
Group variables | None |
Variable type: character
skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
---|---|---|---|---|---|---|---|
country | 0 | 1 | 4 | 30 | 0 | 195 | 0 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
country_id | 0 | 1.00 | 97.85 | 54.77 | 1.00 | 52.00 | 98.00 | 142.00 | 195.00 | ▇▇▇▇▇ |
year | 0 | 1.00 | 1982.43 | 12.47 | 1960.00 | 1970.00 | 1985.00 | 1995.00 | 2000.00 | ▅▆▃▇▇ |
dem_ind | 103 | 0.92 | 0.50 | 0.37 | 0.00 | 0.17 | 0.50 | 0.83 | 1.00 | ▇▃▂▂▇ |
log_gdppc | 403 | 0.71 | 8.16 | 1.02 | 5.77 | 7.29 | 8.14 | 8.96 | 10.45 | ▂▇▇▆▃ |
log_pop | 167 | 0.88 | 8.67 | 1.87 | 3.71 | 7.66 | 8.75 | 9.83 | 14.00 | ▁▃▇▃▁ |
age_1 | 101 | 0.93 | 0.37 | 0.09 | 0.15 | 0.29 | 0.41 | 0.45 | 0.52 | ▂▃▃▇▇ |
age_2 | 101 | 0.93 | 0.26 | 0.03 | 0.19 | 0.24 | 0.26 | 0.27 | 0.36 | ▂▆▇▁▁ |
age_3 | 101 | 0.93 | 0.17 | 0.03 | 0.09 | 0.15 | 0.17 | 0.19 | 0.36 | ▂▇▂▁▁ |
age_4 | 101 | 0.93 | 0.11 | 0.04 | 0.06 | 0.09 | 0.10 | 0.15 | 0.22 | ▆▇▂▃▁ |
age_5 | 101 | 0.93 | 0.08 | 0.05 | 0.02 | 0.05 | 0.06 | 0.12 | 0.24 | ▇▃▂▂▁ |
educ | 589 | 0.57 | 4.43 | 2.86 | 0.04 | 2.03 | 4.00 | 6.62 | 12.18 | ▇▇▅▃▁ |
age_median | 155 | 0.89 | 22.40 | 6.47 | 14.40 | 17.50 | 19.30 | 27.30 | 39.70 | ▇▃▂▂▁ |
describe(inc_dem) # provide an comprehensive summary table
## vars n mean sd median trimmed mad min max range
## country* 1 1369 97.86 54.78 98.00 97.77 66.72 1.00 195.00 194.00
## country_id 2 1369 97.85 54.77 98.00 97.77 66.72 1.00 195.00 194.00
## year 3 1369 1982.43 12.47 1985.00 1982.84 14.83 1960.00 2000.00 40.00
## dem_ind 4 1266 0.50 0.37 0.50 0.50 0.49 0.00 1.00 1.00
## log_gdppc 5 966 8.16 1.02 8.14 8.15 1.25 5.77 10.45 4.67
## log_pop 6 1202 8.67 1.87 8.75 8.71 1.61 3.71 14.00 10.29
## age_1 7 1268 0.37 0.09 0.41 0.38 0.08 0.15 0.52 0.37
## age_2 8 1268 0.26 0.03 0.26 0.26 0.02 0.19 0.36 0.18
## age_3 9 1268 0.17 0.03 0.17 0.17 0.03 0.09 0.36 0.27
## age_4 10 1268 0.11 0.04 0.10 0.11 0.02 0.06 0.22 0.16
## age_5 11 1268 0.08 0.05 0.06 0.08 0.02 0.02 0.24 0.22
## educ 12 780 4.43 2.86 4.00 4.25 3.24 0.04 12.18 12.14
## age_median 13 1214 22.40 6.47 19.30 21.56 3.71 14.40 39.70 25.30
## skew kurtosis se
## country* 0.01 -1.13 1.48
## country_id 0.01 -1.13 1.48
## year -0.20 -1.12 0.34
## dem_ind 0.10 -1.52 0.01
## log_gdppc 0.06 -0.99 0.03
## log_pop -0.14 0.05 0.05
## age_1 -0.66 -0.97 0.00
## age_2 -0.06 0.41 0.00
## age_3 1.21 2.93 0.00
## age_4 0.89 -0.49 0.00
## age_5 1.12 -0.03 0.00
## educ 0.47 -0.73 0.10
## age_median 0.96 -0.49 0.19
nv_sum(inc_dem, dem_ind, log_gdppc, log_pop, weight = FALSE)
## # A tibble: 3 × 7
## variable Obs min mean median st.dev max
## <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dem_ind 1369 0 0.499 0.5 0.371 1
## 2 log_gdppc 1369 5.77 8.16 8.14 1.02 10.4
## 3 log_pop 1369 3.71 8.67 8.75 1.87 14.0
sum_dem_ind<-inc_dem%>%
group_by(country)
nv_sum(sum_dem_ind, dem_ind, weight = FALSE)
## Warning: There were 2 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `min = min(dem_ind, na.rm = TRUE)`.
## ℹ In group 93: `country = "Korea"`.
## Caused by warning in `min()`:
## ! no non-missing arguments to min; returning Inf
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## # A tibble: 195 × 8
## country variable Obs min mean median st.dev max
## <chr> <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan dem_ind 9 0 0.0967 0 0.173 0.5
## 2 Albania dem_ind 9 0 0.172 0 0.250 0.667
## 3 Algeria dem_ind 7 0 0.190 0.167 0.150 0.5
## 4 Andorra dem_ind 9 0.5 0.833 1 0.289 1
## 5 Angola dem_ind 5 0 0.0667 0 0.0913 0.167
## 6 Antigua dem_ind 3 0.5 0.556 0.5 0.0962 0.667
## 7 Argentina dem_ind 9 0.167 0.666 0.833 0.321 1
## 8 Armenia dem_ind 1 0.5 0.5 0.5 NA 0.5
## 9 Australia dem_ind 9 1 1 1 0 1
## 10 Austria dem_ind 9 0.970 0.993 1 0.0132 1
## # ℹ 185 more rows
# sorting/arranging the data
inc_dem<-inc_dem%>%
arrange(year,country)%>%
arrange(country, year)%>%
mutate(dem_average = mean(dem_ind, na.rm = TRUE))
# generate a variable that is the mean democracy index for each country
inc_dem<-inc_dem%>%
group_by(country)%>%
mutate(dem_c_avg =mean(dem_ind, na.rm = TRUE))
#generate a variable that is the mean gdppc for each country
inc_dem<-inc_dem%>%
group_by(country)%>%
mutate(gdp_c_avg = mean(log_gdppc, na.rm = TRUE))
# quick look at the variables created
inc_dem%>%
dplyr::select(country, dem_average, dem_c_avg,gdp_c_avg)
## # A tibble: 1,369 × 4
## # Groups: country [195]
## country dem_average dem_c_avg gdp_c_avg
## <chr> <dbl> <dbl> <dbl>
## 1 Afghanistan 0.499 0.0967 NaN
## 2 Afghanistan 0.499 0.0967 NaN
## 3 Afghanistan 0.499 0.0967 NaN
## 4 Afghanistan 0.499 0.0967 NaN
## 5 Afghanistan 0.499 0.0967 NaN
## 6 Afghanistan 0.499 0.0967 NaN
## 7 Afghanistan 0.499 0.0967 NaN
## 8 Afghanistan 0.499 0.0967 NaN
## 9 Afghanistan 0.499 0.0967 NaN
## 10 Albania 0.499 0.172 7.95
## # ℹ 1,359 more rows
ggplot(inc_dem, aes(x=gdp_c_avg,y=dem_c_avg))+
geom_point()+
geom_smooth(method = "lm", se=TRUE, color="red")+
labs(
title = "Democracy and Income",
subtitle = "average 1960-2000",
x ="Log GDP per capita",
y = "Democracy Index"
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 296 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 296 rows containing missing values or values outside the scale range
## (`geom_point()`).
filtered_graph<-inc_dem%>%
filter(year==1990)%>%
ggplot(aes(x=log_gdppc,y=dem_ind))+
geom_point()+
geom_smooth(method = "lm", se=TRUE, color="blue")+
labs(
title = "Democracy and Income",
subtitle = "1990",
x ="Log GDP per capita",
y = "Democracy Index"
)
filtered_graph
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 50 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 50 rows containing missing values or values outside the scale range
## (`geom_point()`).
rob <- lm(dem_ind ~ log_gdppc, data = inc_dem)
# Summarize the model
summary(rob)
##
## Call:
## lm(formula = dem_ind ~ log_gdppc, data = inc_dem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.72854 -0.19534 0.02586 0.19123 0.72698
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.354828 0.070919 -19.10 <2e-16 ***
## log_gdppc 0.235673 0.008626 27.32 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2719 on 956 degrees of freedom
## (411 observations deleted due to missingness)
## Multiple R-squared: 0.4385, Adjusted R-squared: 0.4379
## F-statistic: 746.5 on 1 and 956 DF, p-value: < 2.2e-16
rob_se<-sqrt(diag(vcovHC(rob,type="HC1"))) #calculate robust standard error
rob_se
## (Intercept) log_gdppc
## 0.060754898 0.007103956
clust_m<-lm(dem_ind~log_gdppc, data = inc_dem)
clu_se<-sqrt(diag(vcovCL(clust_m, cluster = ~country)))
stargazer(rob,clust_m,
se=list(rob_se, clu_se),
type = "text",
out = "robust.html",
title = "Robust Model",
notes.append = TRUE)
##
## Robust Model
## ===========================================================
## Dependent variable:
## ----------------------------
## dem_ind
## (1) (2)
## -----------------------------------------------------------
## log_gdppc 0.236*** 0.236***
## (0.007) (0.012)
##
## Constant -1.355*** -1.355***
## (0.061) (0.100)
##
## -----------------------------------------------------------
## Observations 958 958
## R2 0.438 0.438
## Adjusted R2 0.438 0.438
## Residual Std. Error (df = 956) 0.272 0.272
## F Statistic (df = 1; 956) 746.477*** 746.477***
## ===========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
pinc_dem<-plm::pdata.frame(inc_dem,index= c("country", "year"), drop.index =FALSE, row.names = FALSE) #tell R that we have a panel data; year becomes a factor
#pinc_dem
# nest the variables (country and country id with the numerical series created)
bpinc_dem<-pinc_dem%>%
complete(nesting(country))
bpinc_dem
## # A tibble: 1,369 × 16
## country country_id year dem_ind log_gdppc log_pop age_1 age_2 age_3 age_4
## <fct> <dbl> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanis… 1 1960 0.14 NA NA 0.427 0.266 0.166 0.0966
## 2 Afghanis… 1 1965 0.23 NA 9.21 0.430 0.264 0.165 0.0962
## 3 Afghanis… 1 1970 0.5 NA 9.32 0.434 0.261 0.163 0.0957
## 4 Afghanis… 1 1975 0 NA 9.43 0.437 0.260 0.162 0.0950
## 5 Afghanis… 1 1980 0 NA 9.55 0.438 0.261 0.160 0.0942
## 6 Afghanis… 1 1985 0 NA 9.68 0.439 0.263 0.158 0.0933
## 7 Afghanis… 1 1990 0 NA 9.67 0.440 0.265 0.157 0.0922
## 8 Afghanis… 1 1995 0 NA 9.78 0.440 0.265 0.157 0.0912
## 9 Afghanis… 1 2000 0 NA 10.0 0.438 0.266 0.159 0.0903
## 10 Albania 2 1960 0.2 NA NA 0.392 0.268 0.154 0.100
## # ℹ 1,359 more rows
## # ℹ 6 more variables: age_5 <dbl>, educ <dbl>, age_median <dbl>,
## # dem_average <dbl>, dem_c_avg <dbl>, gdp_c_avg <dbl>
#Here is some nifty code to make new variables that are the difference between dem_ind and log_gdppc between 1970 and 1995
#create lagged values
bpinc_dem<-bpinc_dem %>%
dplyr:: mutate(
dem_lag_7095 = ifelse(year==1995,lag(pinc_dem$dem_ind, 25),NA),
gdp_lag_7095 =ifelse(year==1995, lag(pinc_dem$log_gdppc, 25),NA)
)
## Warning: There were 2 warnings in `dplyr::mutate()`.
## The first warning was:
## ℹ In argument: `dem_lag_7095 = ifelse(year == 1995, lag(pinc_dem$dem_ind, 25),
## NA)`.
## Caused by warning in `flag.pseries()`:
## ! lag-length exceeds average group-size (7). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
# Create differences
bpinc_dem<-bpinc_dem %>%
dplyr:: mutate(
dem_diff_7095 = (dem_ind - dem_lag_7095),
gdp_diff_7095 = ( log_gdppc - gdp_lag_7095)
)
# view variables created
selected_data <- bpinc_dem %>%
dplyr::select(country, year, dem_ind,dem_lag_7095, log_gdppc, gdp_lag_7095 ) #call select directly from dplyr package to avoid conflict with other packages
# Print the selected data to check the result
print(selected_data)
## # A tibble: 1,369 × 6
## country year dem_ind dem_lag_7095 log_gdppc gdp_lag_7095
## <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan 1960 0.14 NA NA NA
## 2 Afghanistan 1965 0.23 NA NA NA
## 3 Afghanistan 1970 0.5 NA NA NA
## 4 Afghanistan 1975 0 NA NA NA
## 5 Afghanistan 1980 0 NA NA NA
## 6 Afghanistan 1985 0 NA NA NA
## 7 Afghanistan 1990 0 NA NA NA
## 8 Afghanistan 1995 0 0.5 NA NA
## 9 Afghanistan 2000 0 NA NA NA
## 10 Albania 1960 0.2 NA NA NA
## # ℹ 1,359 more rows
pinc_dem_graph <- bpinc_dem %>%
filter(!is.na(gdp_diff_7095) & !is.na(dem_diff_7095))
#str(pinc_dem_graph) #check structure of the entire dataset
ggplot(pinc_dem_graph, aes(x = gdp_diff_7095, y = dem_diff_7095)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE)+
labs(
title = "Changes in Democracy and Income",
subtitle = "1970 to 1995",
x ="Changes Log GDP per capita",
y = "Changes in Democracy Index"
)
## `geom_smooth()` using formula = 'y ~ x'
#pinc_dem_graph
# We can do the same graph using the fitted value
plm_model<-lm(dem_diff_7095~gdp_diff_7095, data = pinc_dem_graph)
summary(plm_model)
##
## Call:
## lm(formula = dem_diff_7095 ~ gdp_diff_7095, data = pinc_dem_graph)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9709 -0.1376 -0.1343 0.1961 0.6983
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.138317 0.048124 2.874 0.00504 **
## gdp_diff_7095 -0.004907 0.075110 -0.065 0.94805
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3432 on 91 degrees of freedom
## Multiple R-squared: 4.69e-05, Adjusted R-squared: -0.01094
## F-statistic: 0.004268 on 1 and 91 DF, p-value: 0.9481
pinc_dem_graph$yhat<-fitted(plm_model)
ggplot(pinc_dem_graph, aes(x=gdp_diff_7095, y=dem_diff_7095))+
geom_point()+
geom_line(aes(y=yhat), color="red")+ # add a smoothed conditional mean
#geom_smooth(method = "lm", se=FALSE, color="blue")+
labs(
title = "Changes in Democracy and Income",
subtitle = "1970 to 1995",
x ="Changes Log GDP per capita",
y = "Changes in Democracy Index"
)
# get rid of na's
bpinc_dem<-bpinc_dem%>%
filter(!log_gdppc=="NA")
#pinc_dem
#creating country_id dummies
bpinc_dem <- cbind(bpinc_dem, model.matrix(~ country- 1, data = bpinc_dem)) #creating new country dummies starting or rearranging the country series
#bpinc_dem
# estimating regressions
dum<-lm(dem_ind~log_gdppc + country-1, data = bpinc_dem)
#summary(dum)
cof<-coef(dum)["log_gdppc"]
#cof
du_se<-sqrt(diag(vcovCL(dum, cluster = ~country)))
#du_se
fe<-plm(dem_ind~log_gdppc, model="within",index = "country", data = bpinc_dem)
#summary(fe)
fe_se<-sqrt(diag(vcovSCC(fe, cluster = "group", type = "HC1")))
#fe_se
coeftest(fe, vcov = vcovSCC(fe,cluster = "group", type = "HC1")) #extract clustered robust coefficients.
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## log_gdppc 0.083741 0.030155 2.777 0.005613 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
stargazer(rob,clust_m, dum,fe,
se=list(rob_se, clu_se, du_se, fe_se),
omit = "^country",
type = "text",
out = "robust.html",
title = "Robust Model",
notes.append = TRUE)
##
## Robust Model
## =======================================================================================================================
## Dependent variable:
## ---------------------------------------------------------------------------------------------------
## dem_ind
## OLS panel
## linear
## (1) (2) (3) (4)
## -----------------------------------------------------------------------------------------------------------------------
## log_gdppc 0.236*** 0.236*** 0.084** 0.084***
## (0.007) (0.012) (0.034) (0.030)
##
## Constant -1.355*** -1.355***
## (0.061) (0.100)
##
## -----------------------------------------------------------------------------------------------------------------------
## Observations 958 958 958 958
## R2 0.438 0.438 0.923 0.020
## Adjusted R2 0.438 0.438 0.909 -0.163
## Residual Std. Error 0.272 (df = 956) 0.272 (df = 956) 0.204 (df = 807)
## F Statistic 746.477*** (df = 1; 956) 746.477*** (df = 1; 956) 64.180*** (df = 151; 807) 16.202*** (df = 1; 807)
## =======================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
coef
## function (object, ...)
## UseMethod("coef")
## <bytecode: 0x7fd1333baa80>
## <environment: namespace:stats>
Note: For cross-sectional data(many subject at a single period of time) with heteroskedasticity, use vcovHC: Corrects for heteroskedasticity For panel data with clustering (e.g., individuals observed over time), use vcovCL: Corrects for within-cluster correlation For panel data with cross-sectional and serial correlation, use vcovSCC: Corrects for both heteroskedasticity and serial correlation vcovCL: Mostly for clustering with linear model (lm) vcovSCC: clustering with panel linear model (plm)
# creating time dummies
dpinc_dem<-cbind(bpinc_dem, model.matrix(~year-1, data = bpinc_dem))
#dpinc_dem
yfe<-plm(dem_ind~log_gdppc+ year-1, index="country", data = bpinc_dem) # fixed effect regression with time dummies; index: country
#summary(yfe)
yfey<-plm(dem_ind~log_gdppc+ year-1, index="year", data = bpinc_dem) # fixed effect regression with time dummies; index: year
summary(yfey)
## Oneway (individual) effect Within Model
##
## Call:
## plm(formula = dem_ind ~ log_gdppc + year - 1, data = bpinc_dem,
## index = "year")
##
## Unbalanced Panel: n = 9, T = 54-148, N = 958
##
## Residuals:
## Min. 1st Qu. Median 3rd Qu. Max.
## -0.741393 -0.186434 0.029116 0.178868 0.650187
##
## Coefficients:
## Estimate Std. Error t-value Pr(>|t|)
## log_gdppc 0.23511 0.00846 27.79 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Total Sum of Squares: 119.51
## Residual Sum of Squares: 65.858
## R-Squared: 0.44893
## Adj. R-Squared: 0.4437
## F-statistic: 772.299 on 1 and 948 DF, p-value: < 2.22e-16
yfe_se<-sqrt(diag(vcovSCC(yfe, type="HC1",cluster = "group")))
#yfe_se
stargazer(rob,clust_m, dum,fe,yfe,
se=list(rob_se, clu_se, du_se,fe_se, yfe_se),
omit = "^country",
type = "text",
out = "robust.html",
title = "Robust Model",
notes.append = TRUE)
##
## Robust Model
## ===============================================================================================================================================
## Dependent variable:
## ---------------------------------------------------------------------------------------------------------------------------
## dem_ind
## OLS panel
## linear
## (1) (2) (3) (4) (5)
## -----------------------------------------------------------------------------------------------------------------------------------------------
## log_gdppc 0.236*** 0.236*** 0.084** 0.084*** 0.054
## (0.007) (0.012) (0.034) (0.030) (0.041)
##
## year1960 -0.032
## (0.043)
##
## year1965 -0.032
## (0.040)
##
## year1970 -0.159***
## (0.040)
##
## year1975 -0.180***
## (0.036)
##
## year1980 -0.130***
## (0.034)
##
## year1985 -0.119***
## (0.030)
##
## year1990 -0.074***
## (0.025)
##
## year1995 -0.023*
## (0.014)
##
## Constant -1.355*** -1.355***
## (0.061) (0.100)
##
## -----------------------------------------------------------------------------------------------------------------------------------------------
## Observations 958 958 958 958 958
## R2 0.438 0.438 0.923 0.020 0.118
## Adjusted R2 0.438 0.438 0.909 -0.163 -0.056
## Residual Std. Error 0.272 (df = 956) 0.272 (df = 956) 0.204 (df = 807)
## F Statistic 746.477*** (df = 1; 956) 746.477*** (df = 1; 956) 64.180*** (df = 151; 807) 16.202*** (df = 1; 807) 11.906*** (df = 9; 799)
## ===============================================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
# f-test
test<-linearHypothesis(yfe, c("year1960","year1965","year1970","year1975","year1980","year1985" ,"year1990","year1995"))
test
##
## Linear hypothesis test:
## year1960 = 0
## year1965 = 0
## year1970 = 0
## year1975 = 0
## year1980 = 0
## year1985 = 0
## year1990 = 0
## year1995 = 0
##
## Model 1: restricted model
## Model 2: dem_ind ~ log_gdppc + year - 1
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 807
## 2 799 8 89.318 6.397e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
yfe2<-plm(dem_ind~log_gdppc+ log_pop+educ+age_2+age_3+age_4+age_5+year-1, index="country", data = bpinc_dem)
#summary(yfe2)
yfe_se2<-sqrt(diag(vcovSCC(yfe2, type="HC1",cluster = "group")))
#yfe_se
stargazer(rob,clust_m, dum,fe,yfe,yfe2,
se=list(rob_se, clu_se, du_se,fe_se, yfe_se, yfe_se2),
omit = "^country",
type = "text",
out = "robust.html",
title = "Robust Model",
column.labels = c("ro", "cl", "du","fe","yfe","y"),
notes.append = TRUE)
##
## Robust Model
## =======================================================================================================================================================================
## Dependent variable:
## ---------------------------------------------------------------------------------------------------------------------------------------------------
## dem_ind
## OLS panel
## linear
## ro cl du fe yfe y
## (1) (2) (3) (4) (5) (6)
## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
## log_gdppc 0.236*** 0.236*** 0.084** 0.084*** 0.054 0.025
## (0.007) (0.012) (0.034) (0.030) (0.041) (0.049)
##
## year1960 -0.032
## (0.043)
##
## log_pop -0.069
## (0.105)
##
## educ -0.0004
## (0.024)
##
## age_2 -0.526
## (0.631)
##
## age_3 -2.481***
## (0.910)
##
## age_4 0.298
## (1.282)
##
## age_5 0.605
## (1.230)
##
## year1965 -0.032 -0.158
## (0.040) (0.107)
##
## year1970 -0.159*** -0.260***
## (0.040) (0.098)
##
## year1975 -0.180*** -0.282***
## (0.036) (0.088)
##
## year1980 -0.130*** -0.205***
## (0.034) (0.077)
##
## year1985 -0.119*** -0.154***
## (0.030) (0.059)
##
## year1990 -0.074*** -0.101**
## (0.025) (0.044)
##
## year1995 -0.023* -0.040*
## (0.014) (0.022)
##
## Constant -1.355*** -1.355***
## (0.061) (0.100)
##
## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Observations 958 958 958 958 958 680
## R2 0.438 0.438 0.923 0.020 0.118 0.133
## Adjusted R2 0.438 0.438 0.909 -0.163 -0.056 -0.033
## Residual Std. Error 0.272 (df = 956) 0.272 (df = 956) 0.204 (df = 807)
## F Statistic 746.477*** (df = 1; 956) 746.477*** (df = 1; 956) 64.180*** (df = 151; 807) 16.202*** (df = 1; 807) 11.906*** (df = 9; 799) 6.227*** (df = 14; 570)
## =======================================================================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
test1<-linearHypothesis(yfe2, c("age_2","age_3","age_4","age_5"))
test2<-linearHypothesis(yfe2, c("age_2","age_3","age_4","age_5","educ", "log_pop"))
test1
##
## Linear hypothesis test:
## age_2 = 0
## age_3 = 0
## age_4 = 0
## age_5 = 0
##
## Model 1: restricted model
## Model 2: dem_ind ~ log_gdppc + log_pop + educ + age_2 + age_3 + age_4 +
## age_5 + year - 1
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 574
## 2 570 4 14.925 0.00486 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
test2
##
## Linear hypothesis test:
## age_2 = 0
## age_3 = 0
## age_4 = 0
## age_5 = 0
## educ = 0
## log_pop = 0
##
## Model 1: restricted model
## Model 2: dem_ind ~ log_gdppc + log_pop + educ + age_2 + age_3 + age_4 +
## age_5 + year - 1
##
## Res.Df Df Chisq Pr(>Chisq)
## 1 576
## 2 570 6 16.091 0.01327 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#creating a list of our models
M_list<-list(rob,clust_m, dum,fe,yfe,yfe2)
#extract coefficients and CI
coef_extr<-lapply(M_list, function(model){
coefs<-tidy(model) #from the broom library
conf_int<-confint(model)
coefs<-cbind(coefs, conf_int) #combine coefficient and confidence intervals
coefs
})
"
for(i in M_list){
coefs<-tidy(i) #from the broom library
conf_int<-confint(i)
coefs<-cbind(coefs, conf_int)
}
"
## [1] "\nfor(i in M_list){\n coefs<-tidy(i) #from the broom library\n conf_int<-confint(i)\n coefs<-cbind(coefs, conf_int)\n}\n"
#apply that to all the models using the function created
#model_coef<-lapply(M_list, coef_extr)
for (i in seq_along(coef_extr)){
coef_extr[[i]]$model<-paste("Model", i)
}
# combine the coefficients in a single dataframe
coef_df<-bind_rows(coef_extr)
#coef_df
# get the coefficients of interest
coef_df<-coef_df%>%
filter(term== "log_gdppc")
#rename the columns for convenience
colnames(coef_df) <- c("term", "estimate", "std.error", "statistic", "p.value", "conf.low", "conf.high", "model")
models=c("basic", "cluster", "dummies", "fixed_effects", "time_fe", "fe_controls") #rename the models to match the colors
ggplot(coef_df, aes(x=model, y =estimate, color=models))+
geom_point()+
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width=0.2)+
geom_hline(yintercept = 0,linetype ="dashed", color="red")+ # add a horizontal dashed line
labs(title = "Coefficients of log_gdppc Across Models",
x = "Coefficients on Log_gdppc",
y = "Coefficient Estimate",
color="Model") +
scale_color_manual(values = c("basic" = "blue", "cluster" = "green", "dummies" = "red", "fixed_effects"="yellow", "time_fe"="black", "fe_controls"="purple")) +
theme_minimal()