Important packages/libraries for this lab:

  1. tidyverse
  2. skimr
  3. psych
  4. sandwich
  5. plm
  6. stargazer
  7. broom
  8. car 9.lmtest
options(repos = c(CRAN = "https://cloud.r-project.org"))

#clear the environment
rm(list = ls())

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.5.1     ✔ purrr   1.0.2
## ✔ tibble  3.2.1     ✔ dplyr   1.1.4
## ✔ tidyr   1.2.1     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
if(!require("skimr")) install.packages("skimr")
## Loading required package: skimr
library(skimr)# quickly skim the dataset

if(!require("psych")) install.packages("psych")
## Loading required package: psych
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(psych) # comprehensive summary table

if(!require("sandwich")) install.packages("sandwich")
## Loading required package: sandwich
library(sandwich) # for robust SE

if(!require("plm")) install.packages("plm")
## Loading required package: plm
## 
## Attaching package: 'plm'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, lag, lead
library(plm) # panel linear model

if(!require("startgazer")) install.packages("stargazer")
## Loading required package: startgazer
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'startgazer'
## 
## The downloaded binary packages are in
##  /var/folders/hl/9g4zn0y56nj341lbc23brfdh0000gn/T//RtmpITxYIq/downloaded_packages
library(stargazer) 
## 
## Please cite as: 
## 
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
if(!require("broom")) install.packages("broom")
## Loading required package: broom
library(broom)

if(!require("car")) install.packages("car")
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:psych':
## 
##     logit
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(car) # F-test and other model tests

if(!require("lmtest")) install.packages("lmtest")
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(lmtest)

Load Data

inc_dem<-read_csv("income_democracy.csv")
## Rows: 1369 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): country
## dbl (12): country_id, year, dem_ind, log_gdppc, log_pop, age_1, age_2, age_3...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
source("/Volumes/middfiles/Classes/Fall23/ECON0211A/Noe_Labs/NV.R") #load NV.R to access nv_sum() function for quick summary statistic table
## 
## Attaching package: 'rlang'
## 
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice

2. Investigate the Data

skim(inc_dem) # help to quickly skim the dataset
Data summary
Name inc_dem
Number of rows 1369
Number of columns 13
_______________________
Column type frequency:
character 1
numeric 12
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
country 0 1 4 30 0 195 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
country_id 0 1.00 97.85 54.77 1.00 52.00 98.00 142.00 195.00 ▇▇▇▇▇
year 0 1.00 1982.43 12.47 1960.00 1970.00 1985.00 1995.00 2000.00 ▅▆▃▇▇
dem_ind 103 0.92 0.50 0.37 0.00 0.17 0.50 0.83 1.00 ▇▃▂▂▇
log_gdppc 403 0.71 8.16 1.02 5.77 7.29 8.14 8.96 10.45 ▂▇▇▆▃
log_pop 167 0.88 8.67 1.87 3.71 7.66 8.75 9.83 14.00 ▁▃▇▃▁
age_1 101 0.93 0.37 0.09 0.15 0.29 0.41 0.45 0.52 ▂▃▃▇▇
age_2 101 0.93 0.26 0.03 0.19 0.24 0.26 0.27 0.36 ▂▆▇▁▁
age_3 101 0.93 0.17 0.03 0.09 0.15 0.17 0.19 0.36 ▂▇▂▁▁
age_4 101 0.93 0.11 0.04 0.06 0.09 0.10 0.15 0.22 ▆▇▂▃▁
age_5 101 0.93 0.08 0.05 0.02 0.05 0.06 0.12 0.24 ▇▃▂▂▁
educ 589 0.57 4.43 2.86 0.04 2.03 4.00 6.62 12.18 ▇▇▅▃▁
age_median 155 0.89 22.40 6.47 14.40 17.50 19.30 27.30 39.70 ▇▃▂▂▁
describe(inc_dem) # provide an comprehensive summary table
##            vars    n    mean    sd  median trimmed   mad     min     max  range
## country*      1 1369   97.86 54.78   98.00   97.77 66.72    1.00  195.00 194.00
## country_id    2 1369   97.85 54.77   98.00   97.77 66.72    1.00  195.00 194.00
## year          3 1369 1982.43 12.47 1985.00 1982.84 14.83 1960.00 2000.00  40.00
## dem_ind       4 1266    0.50  0.37    0.50    0.50  0.49    0.00    1.00   1.00
## log_gdppc     5  966    8.16  1.02    8.14    8.15  1.25    5.77   10.45   4.67
## log_pop       6 1202    8.67  1.87    8.75    8.71  1.61    3.71   14.00  10.29
## age_1         7 1268    0.37  0.09    0.41    0.38  0.08    0.15    0.52   0.37
## age_2         8 1268    0.26  0.03    0.26    0.26  0.02    0.19    0.36   0.18
## age_3         9 1268    0.17  0.03    0.17    0.17  0.03    0.09    0.36   0.27
## age_4        10 1268    0.11  0.04    0.10    0.11  0.02    0.06    0.22   0.16
## age_5        11 1268    0.08  0.05    0.06    0.08  0.02    0.02    0.24   0.22
## educ         12  780    4.43  2.86    4.00    4.25  3.24    0.04   12.18  12.14
## age_median   13 1214   22.40  6.47   19.30   21.56  3.71   14.40   39.70  25.30
##             skew kurtosis   se
## country*    0.01    -1.13 1.48
## country_id  0.01    -1.13 1.48
## year       -0.20    -1.12 0.34
## dem_ind     0.10    -1.52 0.01
## log_gdppc   0.06    -0.99 0.03
## log_pop    -0.14     0.05 0.05
## age_1      -0.66    -0.97 0.00
## age_2      -0.06     0.41 0.00
## age_3       1.21     2.93 0.00
## age_4       0.89    -0.49 0.00
## age_5       1.12    -0.03 0.00
## educ        0.47    -0.73 0.10
## age_median  0.96    -0.49 0.19
nv_sum(inc_dem, dem_ind, log_gdppc, log_pop, weight = FALSE)
## # A tibble: 3 × 7
##   variable    Obs   min  mean median st.dev   max
##   <chr>     <int> <dbl> <dbl>  <dbl>  <dbl> <dbl>
## 1 dem_ind    1369  0    0.499   0.5   0.371   1  
## 2 log_gdppc  1369  5.77 8.16    8.14  1.02   10.4
## 3 log_pop    1369  3.71 8.67    8.75  1.87   14.0

3. Investigating dem_ind variable

sum_dem_ind<-inc_dem%>%
  group_by(country)
nv_sum(sum_dem_ind, dem_ind, weight = FALSE)
## Warning: There were 2 warnings in `summarise()`.
## The first warning was:
## ℹ In argument: `min = min(dem_ind, na.rm = TRUE)`.
## ℹ In group 93: `country = "Korea"`.
## Caused by warning in `min()`:
## ! no non-missing arguments to min; returning Inf
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## # A tibble: 195 × 8
##    country     variable   Obs   min   mean median  st.dev   max
##    <chr>       <chr>    <int> <dbl>  <dbl>  <dbl>   <dbl> <dbl>
##  1 Afghanistan dem_ind      9 0     0.0967  0      0.173  0.5  
##  2 Albania     dem_ind      9 0     0.172   0      0.250  0.667
##  3 Algeria     dem_ind      7 0     0.190   0.167  0.150  0.5  
##  4 Andorra     dem_ind      9 0.5   0.833   1      0.289  1    
##  5 Angola      dem_ind      5 0     0.0667  0      0.0913 0.167
##  6 Antigua     dem_ind      3 0.5   0.556   0.5    0.0962 0.667
##  7 Argentina   dem_ind      9 0.167 0.666   0.833  0.321  1    
##  8 Armenia     dem_ind      1 0.5   0.5     0.5   NA      0.5  
##  9 Australia   dem_ind      9 1     1       1      0      1    
## 10 Austria     dem_ind      9 0.970 0.993   1      0.0132 1    
## # ℹ 185 more rows

4. Generate variables for averages by country

# sorting/arranging the data
inc_dem<-inc_dem%>%
  arrange(year,country)%>%
  arrange(country, year)%>%
  mutate(dem_average = mean(dem_ind, na.rm = TRUE))

  
# generate a variable that is the mean democracy index for each country 
inc_dem<-inc_dem%>%
  group_by(country)%>%
  mutate(dem_c_avg =mean(dem_ind, na.rm = TRUE))

#generate a variable that is the mean gdppc for each country 
inc_dem<-inc_dem%>%
  group_by(country)%>%
  mutate(gdp_c_avg = mean(log_gdppc, na.rm = TRUE))
 
# quick look at the variables created
inc_dem%>%
  dplyr::select(country, dem_average, dem_c_avg,gdp_c_avg)
## # A tibble: 1,369 × 4
## # Groups:   country [195]
##    country     dem_average dem_c_avg gdp_c_avg
##    <chr>             <dbl>     <dbl>     <dbl>
##  1 Afghanistan       0.499    0.0967    NaN   
##  2 Afghanistan       0.499    0.0967    NaN   
##  3 Afghanistan       0.499    0.0967    NaN   
##  4 Afghanistan       0.499    0.0967    NaN   
##  5 Afghanistan       0.499    0.0967    NaN   
##  6 Afghanistan       0.499    0.0967    NaN   
##  7 Afghanistan       0.499    0.0967    NaN   
##  8 Afghanistan       0.499    0.0967    NaN   
##  9 Afghanistan       0.499    0.0967    NaN   
## 10 Albania           0.499    0.172       7.95
## # ℹ 1,359 more rows

5. Graph: Relatinship between the natural log of GDP per capita and the democracy index.

ggplot(inc_dem, aes(x=gdp_c_avg,y=dem_c_avg))+
  geom_point()+
  geom_smooth(method = "lm", se=TRUE, color="red")+
  labs(
    title = "Democracy and Income",
    subtitle = "average 1960-2000",
    x ="Log GDP per capita",
    y = "Democracy Index"
  )
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 296 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 296 rows containing missing values or values outside the scale range
## (`geom_point()`).

filtered_graph<-inc_dem%>%
  filter(year==1990)%>%
  ggplot(aes(x=log_gdppc,y=dem_ind))+
    geom_point()+
    geom_smooth(method = "lm", se=TRUE, color="blue")+
    labs(
      title = "Democracy and Income",
      subtitle = "1990",
      x ="Log GDP per capita",
      y = "Democracy Index"
    )
filtered_graph
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 50 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 50 rows containing missing values or values outside the scale range
## (`geom_point()`).

6. Estimating Simple Robust Regression Model

rob <- lm(dem_ind ~ log_gdppc, data = inc_dem)
# Summarize the model
summary(rob)
## 
## Call:
## lm(formula = dem_ind ~ log_gdppc, data = inc_dem)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72854 -0.19534  0.02586  0.19123  0.72698 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.354828   0.070919  -19.10   <2e-16 ***
## log_gdppc    0.235673   0.008626   27.32   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2719 on 956 degrees of freedom
##   (411 observations deleted due to missingness)
## Multiple R-squared:  0.4385, Adjusted R-squared:  0.4379 
## F-statistic: 746.5 on 1 and 956 DF,  p-value: < 2.2e-16
rob_se<-sqrt(diag(vcovHC(rob,type="HC1"))) #calculate robust standard error
rob_se
## (Intercept)   log_gdppc 
## 0.060754898 0.007103956

7. Estimating Clustered Regression Model (to avoid autocorrelation)

clust_m<-lm(dem_ind~log_gdppc, data = inc_dem)

clu_se<-sqrt(diag(vcovCL(clust_m, cluster = ~country)))


stargazer(rob,clust_m,
          se=list(rob_se, clu_se),
          type = "text",
          out = "robust.html",
          title = "Robust Model",
          notes.append = TRUE)
## 
## Robust Model
## ===========================================================
##                                    Dependent variable:     
##                                ----------------------------
##                                          dem_ind           
##                                     (1)            (2)     
## -----------------------------------------------------------
## log_gdppc                         0.236***      0.236***   
##                                   (0.007)        (0.012)   
##                                                            
## Constant                         -1.355***      -1.355***  
##                                   (0.061)        (0.100)   
##                                                            
## -----------------------------------------------------------
## Observations                        958            958     
## R2                                 0.438          0.438    
## Adjusted R2                        0.438          0.438    
## Residual Std. Error (df = 956)     0.272          0.272    
## F Statistic (df = 1; 956)        746.477***    746.477***  
## ===========================================================
## Note:                           *p<0.1; **p<0.05; ***p<0.01

9. Let’s tell R we have panel data!

pinc_dem<-plm::pdata.frame(inc_dem,index= c("country", "year"), drop.index =FALSE, row.names = FALSE) #tell R that we have a panel data; year becomes a factor
#pinc_dem

# nest the variables (country and country id with the numerical series created)
bpinc_dem<-pinc_dem%>%
  complete(nesting(country))
bpinc_dem
## # A tibble: 1,369 × 16
##    country   country_id year  dem_ind log_gdppc log_pop age_1 age_2 age_3  age_4
##    <fct>          <dbl> <fct>   <dbl>     <dbl>   <dbl> <dbl> <dbl> <dbl>  <dbl>
##  1 Afghanis…          1 1960     0.14        NA   NA    0.427 0.266 0.166 0.0966
##  2 Afghanis…          1 1965     0.23        NA    9.21 0.430 0.264 0.165 0.0962
##  3 Afghanis…          1 1970     0.5         NA    9.32 0.434 0.261 0.163 0.0957
##  4 Afghanis…          1 1975     0           NA    9.43 0.437 0.260 0.162 0.0950
##  5 Afghanis…          1 1980     0           NA    9.55 0.438 0.261 0.160 0.0942
##  6 Afghanis…          1 1985     0           NA    9.68 0.439 0.263 0.158 0.0933
##  7 Afghanis…          1 1990     0           NA    9.67 0.440 0.265 0.157 0.0922
##  8 Afghanis…          1 1995     0           NA    9.78 0.440 0.265 0.157 0.0912
##  9 Afghanis…          1 2000     0           NA   10.0  0.438 0.266 0.159 0.0903
## 10 Albania            2 1960     0.2         NA   NA    0.392 0.268 0.154 0.100 
## # ℹ 1,359 more rows
## # ℹ 6 more variables: age_5 <dbl>, educ <dbl>, age_median <dbl>,
## #   dem_average <dbl>, dem_c_avg <dbl>, gdp_c_avg <dbl>
#Here is some nifty code to make new variables that are the difference between dem_ind and log_gdppc between 1970 and 1995
#create lagged values
 bpinc_dem<-bpinc_dem %>%
  dplyr:: mutate(
    dem_lag_7095 = ifelse(year==1995,lag(pinc_dem$dem_ind, 25),NA),
    
    gdp_lag_7095 =ifelse(year==1995, lag(pinc_dem$log_gdppc, 25),NA)
  )
## Warning: There were 2 warnings in `dplyr::mutate()`.
## The first warning was:
## ℹ In argument: `dem_lag_7095 = ifelse(year == 1995, lag(pinc_dem$dem_ind, 25),
##   NA)`.
## Caused by warning in `flag.pseries()`:
## ! lag-length exceeds average group-size (7). This could also be a result of unused factor levels. See #25. Use fdroplevels() to remove unused factor levels from your data.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
# Create differences
bpinc_dem<-bpinc_dem %>%
 dplyr:: mutate(
    dem_diff_7095 = (dem_ind - dem_lag_7095),
    gdp_diff_7095 = ( log_gdppc - gdp_lag_7095)
  ) 
# view variables created
selected_data <- bpinc_dem %>%
  dplyr::select(country, year, dem_ind,dem_lag_7095, log_gdppc, gdp_lag_7095 ) #call select directly from dplyr package to avoid conflict with other packages

# Print the selected data to check the result
print(selected_data)
## # A tibble: 1,369 × 6
##    country     year  dem_ind dem_lag_7095 log_gdppc gdp_lag_7095
##    <fct>       <fct>   <dbl>        <dbl>     <dbl>        <dbl>
##  1 Afghanistan 1960     0.14         NA          NA           NA
##  2 Afghanistan 1965     0.23         NA          NA           NA
##  3 Afghanistan 1970     0.5          NA          NA           NA
##  4 Afghanistan 1975     0            NA          NA           NA
##  5 Afghanistan 1980     0            NA          NA           NA
##  6 Afghanistan 1985     0            NA          NA           NA
##  7 Afghanistan 1990     0            NA          NA           NA
##  8 Afghanistan 1995     0             0.5        NA           NA
##  9 Afghanistan 2000     0            NA          NA           NA
## 10 Albania     1960     0.2          NA          NA           NA
## # ℹ 1,359 more rows

Plot

pinc_dem_graph <- bpinc_dem %>%
  filter(!is.na(gdp_diff_7095) & !is.na(dem_diff_7095))

#str(pinc_dem_graph)  #check structure of the entire dataset


ggplot(pinc_dem_graph, aes(x = gdp_diff_7095, y = dem_diff_7095)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE)+
   labs(
    title = "Changes in Democracy and Income",
    subtitle = "1970 to 1995",
    x ="Changes Log GDP per capita",
    y = "Changes in Democracy Index"
  )
## `geom_smooth()` using formula = 'y ~ x'

#pinc_dem_graph

using fitted values

# We can do the same graph using the fitted value
plm_model<-lm(dem_diff_7095~gdp_diff_7095, data = pinc_dem_graph)
summary(plm_model)
## 
## Call:
## lm(formula = dem_diff_7095 ~ gdp_diff_7095, data = pinc_dem_graph)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9709 -0.1376 -0.1343  0.1961  0.6983 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)   
## (Intercept)    0.138317   0.048124   2.874  0.00504 **
## gdp_diff_7095 -0.004907   0.075110  -0.065  0.94805   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3432 on 91 degrees of freedom
## Multiple R-squared:  4.69e-05,   Adjusted R-squared:  -0.01094 
## F-statistic: 0.004268 on 1 and 91 DF,  p-value: 0.9481
pinc_dem_graph$yhat<-fitted(plm_model)

ggplot(pinc_dem_graph, aes(x=gdp_diff_7095, y=dem_diff_7095))+
  geom_point()+
  geom_line(aes(y=yhat), color="red")+ # add a smoothed conditional mean
  #geom_smooth(method = "lm", se=FALSE, color="blue")+
  labs(
    title = "Changes in Democracy and Income",
    subtitle = "1970 to 1995",
    x ="Changes Log GDP per capita",
    y = "Changes in Democracy Index"
  )

10. Start Estimating fixed Effect Regression

# get rid of na's
bpinc_dem<-bpinc_dem%>%
  filter(!log_gdppc=="NA")

#pinc_dem

#creating country_id dummies
bpinc_dem <- cbind(bpinc_dem, model.matrix(~ country- 1, data = bpinc_dem)) #creating new country dummies starting or rearranging the country series
#bpinc_dem
# estimating regressions
dum<-lm(dem_ind~log_gdppc + country-1, data = bpinc_dem)
#summary(dum)
cof<-coef(dum)["log_gdppc"]
#cof
du_se<-sqrt(diag(vcovCL(dum, cluster = ~country)))
#du_se

fe<-plm(dem_ind~log_gdppc, model="within",index = "country", data = bpinc_dem)
#summary(fe)
fe_se<-sqrt(diag(vcovSCC(fe, cluster = "group", type = "HC1")))
#fe_se
coeftest(fe, vcov = vcovSCC(fe,cluster = "group", type = "HC1")) #extract clustered robust coefficients.
## 
## t test of coefficients:
## 
##           Estimate Std. Error t value Pr(>|t|)   
## log_gdppc 0.083741   0.030155   2.777 0.005613 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
stargazer(rob,clust_m, dum,fe,
          se=list(rob_se, clu_se, du_se, fe_se),
          omit = "^country",
          type = "text",
          out = "robust.html",
          title = "Robust Model",
          notes.append = TRUE)
## 
## Robust Model
## =======================================================================================================================
##                                                             Dependent variable:                                        
##                     ---------------------------------------------------------------------------------------------------
##                                                                   dem_ind                                              
##                                                         OLS                                               panel          
##                                                                                                          linear          
##                               (1)                      (2)                       (3)                      (4)          
## -----------------------------------------------------------------------------------------------------------------------
## log_gdppc                   0.236***                 0.236***                  0.084**                 0.084***        
##                             (0.007)                  (0.012)                   (0.034)                  (0.030)        
##                                                                                                                        
## Constant                   -1.355***                -1.355***                                                          
##                             (0.061)                  (0.100)                                                           
##                                                                                                                        
## -----------------------------------------------------------------------------------------------------------------------
## Observations                  958                      958                       958                      958          
## R2                           0.438                    0.438                     0.923                    0.020         
## Adjusted R2                  0.438                    0.438                     0.909                   -0.163         
## Residual Std. Error     0.272 (df = 956)         0.272 (df = 956)         0.204 (df = 807)                             
## F Statistic         746.477*** (df = 1; 956) 746.477*** (df = 1; 956) 64.180*** (df = 151; 807) 16.202*** (df = 1; 807)
## =======================================================================================================================
## Note:                                                                                       *p<0.1; **p<0.05; ***p<0.01
coef
## function (object, ...) 
## UseMethod("coef")
## <bytecode: 0x7fd1333baa80>
## <environment: namespace:stats>

11. Including time fix effects

Note: For cross-sectional data(many subject at a single period of time) with heteroskedasticity, use vcovHC: Corrects for heteroskedasticity For panel data with clustering (e.g., individuals observed over time), use vcovCL: Corrects for within-cluster correlation For panel data with cross-sectional and serial correlation, use vcovSCC: Corrects for both heteroskedasticity and serial correlation vcovCL: Mostly for clustering with linear model (lm) vcovSCC: clustering with panel linear model (plm)

# creating time dummies
dpinc_dem<-cbind(bpinc_dem, model.matrix(~year-1, data = bpinc_dem))
#dpinc_dem

yfe<-plm(dem_ind~log_gdppc+ year-1, index="country", data = bpinc_dem) # fixed effect regression with time dummies; index: country
#summary(yfe)

yfey<-plm(dem_ind~log_gdppc+ year-1, index="year", data = bpinc_dem) # fixed effect regression with time dummies; index: year
summary(yfey)
## Oneway (individual) effect Within Model
## 
## Call:
## plm(formula = dem_ind ~ log_gdppc + year - 1, data = bpinc_dem, 
##     index = "year")
## 
## Unbalanced Panel: n = 9, T = 54-148, N = 958
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -0.741393 -0.186434  0.029116  0.178868  0.650187 
## 
## Coefficients:
##           Estimate Std. Error t-value  Pr(>|t|)    
## log_gdppc  0.23511    0.00846   27.79 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    119.51
## Residual Sum of Squares: 65.858
## R-Squared:      0.44893
## Adj. R-Squared: 0.4437
## F-statistic: 772.299 on 1 and 948 DF, p-value: < 2.22e-16
yfe_se<-sqrt(diag(vcovSCC(yfe, type="HC1",cluster = "group")))
#yfe_se

stargazer(rob,clust_m, dum,fe,yfe,
          se=list(rob_se, clu_se, du_se,fe_se, yfe_se),
          omit = "^country",
          type = "text",
          out = "robust.html",
          title = "Robust Model",
          notes.append = TRUE)
## 
## Robust Model
## ===============================================================================================================================================
##                                                                         Dependent variable:                                                    
##                     ---------------------------------------------------------------------------------------------------------------------------
##                                                                               dem_ind                                                          
##                                                         OLS                                                          panel                     
##                                                                                                                     linear                     
##                               (1)                      (2)                       (3)                      (4)                     (5)          
## -----------------------------------------------------------------------------------------------------------------------------------------------
## log_gdppc                   0.236***                 0.236***                  0.084**                 0.084***                  0.054         
##                             (0.007)                  (0.012)                   (0.034)                  (0.030)                 (0.041)        
##                                                                                                                                                
## year1960                                                                                                                        -0.032         
##                                                                                                                                 (0.043)        
##                                                                                                                                                
## year1965                                                                                                                        -0.032         
##                                                                                                                                 (0.040)        
##                                                                                                                                                
## year1970                                                                                                                       -0.159***       
##                                                                                                                                 (0.040)        
##                                                                                                                                                
## year1975                                                                                                                       -0.180***       
##                                                                                                                                 (0.036)        
##                                                                                                                                                
## year1980                                                                                                                       -0.130***       
##                                                                                                                                 (0.034)        
##                                                                                                                                                
## year1985                                                                                                                       -0.119***       
##                                                                                                                                 (0.030)        
##                                                                                                                                                
## year1990                                                                                                                       -0.074***       
##                                                                                                                                 (0.025)        
##                                                                                                                                                
## year1995                                                                                                                        -0.023*        
##                                                                                                                                 (0.014)        
##                                                                                                                                                
## Constant                   -1.355***                -1.355***                                                                                  
##                             (0.061)                  (0.100)                                                                                   
##                                                                                                                                                
## -----------------------------------------------------------------------------------------------------------------------------------------------
## Observations                  958                      958                       958                      958                     958          
## R2                           0.438                    0.438                     0.923                    0.020                   0.118         
## Adjusted R2                  0.438                    0.438                     0.909                   -0.163                  -0.056         
## Residual Std. Error     0.272 (df = 956)         0.272 (df = 956)         0.204 (df = 807)                                                     
## F Statistic         746.477*** (df = 1; 956) 746.477*** (df = 1; 956) 64.180*** (df = 151; 807) 16.202*** (df = 1; 807) 11.906*** (df = 9; 799)
## ===============================================================================================================================================
## Note:                                                                                                               *p<0.1; **p<0.05; ***p<0.01
# f-test

test<-linearHypothesis(yfe, c("year1960","year1965","year1970","year1975","year1980","year1985" ,"year1990","year1995"))
test
## 
## Linear hypothesis test:
## year1960 = 0
## year1965 = 0
## year1970 = 0
## year1975 = 0
## year1980 = 0
## year1985 = 0
## year1990 = 0
## year1995 = 0
## 
## Model 1: restricted model
## Model 2: dem_ind ~ log_gdppc + year - 1
## 
##   Res.Df Df  Chisq Pr(>Chisq)    
## 1    807                         
## 2    799  8 89.318  6.397e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

12. Control for more variables

yfe2<-plm(dem_ind~log_gdppc+ log_pop+educ+age_2+age_3+age_4+age_5+year-1, index="country", data = bpinc_dem)
#summary(yfe2)

yfe_se2<-sqrt(diag(vcovSCC(yfe2, type="HC1",cluster = "group")))
#yfe_se

stargazer(rob,clust_m, dum,fe,yfe,yfe2,
          se=list(rob_se, clu_se, du_se,fe_se, yfe_se, yfe_se2),
          omit = "^country",
          type = "text",
          out = "robust.html",
          title = "Robust Model",
          column.labels = c("ro", "cl", "du","fe","yfe","y"),
          notes.append = TRUE)
## 
## Robust Model
## =======================================================================================================================================================================
##                                                                                     Dependent variable:                                                                
##                     ---------------------------------------------------------------------------------------------------------------------------------------------------
##                                                                                           dem_ind                                                                      
##                                                         OLS                                                                      panel                                 
##                                                                                                                                 linear                                 
##                                ro                       cl                       du                       fe                      yfe                      y           
##                               (1)                      (2)                       (3)                      (4)                     (5)                     (6)          
## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
## log_gdppc                   0.236***                 0.236***                  0.084**                 0.084***                  0.054                   0.025         
##                             (0.007)                  (0.012)                   (0.034)                  (0.030)                 (0.041)                 (0.049)        
##                                                                                                                                                                        
## year1960                                                                                                                        -0.032                                 
##                                                                                                                                 (0.043)                                
##                                                                                                                                                                        
## log_pop                                                                                                                                                 -0.069         
##                                                                                                                                                         (0.105)        
##                                                                                                                                                                        
## educ                                                                                                                                                    -0.0004        
##                                                                                                                                                         (0.024)        
##                                                                                                                                                                        
## age_2                                                                                                                                                   -0.526         
##                                                                                                                                                         (0.631)        
##                                                                                                                                                                        
## age_3                                                                                                                                                  -2.481***       
##                                                                                                                                                         (0.910)        
##                                                                                                                                                                        
## age_4                                                                                                                                                    0.298         
##                                                                                                                                                         (1.282)        
##                                                                                                                                                                        
## age_5                                                                                                                                                    0.605         
##                                                                                                                                                         (1.230)        
##                                                                                                                                                                        
## year1965                                                                                                                        -0.032                  -0.158         
##                                                                                                                                 (0.040)                 (0.107)        
##                                                                                                                                                                        
## year1970                                                                                                                       -0.159***               -0.260***       
##                                                                                                                                 (0.040)                 (0.098)        
##                                                                                                                                                                        
## year1975                                                                                                                       -0.180***               -0.282***       
##                                                                                                                                 (0.036)                 (0.088)        
##                                                                                                                                                                        
## year1980                                                                                                                       -0.130***               -0.205***       
##                                                                                                                                 (0.034)                 (0.077)        
##                                                                                                                                                                        
## year1985                                                                                                                       -0.119***               -0.154***       
##                                                                                                                                 (0.030)                 (0.059)        
##                                                                                                                                                                        
## year1990                                                                                                                       -0.074***               -0.101**        
##                                                                                                                                 (0.025)                 (0.044)        
##                                                                                                                                                                        
## year1995                                                                                                                        -0.023*                 -0.040*        
##                                                                                                                                 (0.014)                 (0.022)        
##                                                                                                                                                                        
## Constant                   -1.355***                -1.355***                                                                                                          
##                             (0.061)                  (0.100)                                                                                                           
##                                                                                                                                                                        
## -----------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Observations                  958                      958                       958                      958                     958                     680          
## R2                           0.438                    0.438                     0.923                    0.020                   0.118                   0.133         
## Adjusted R2                  0.438                    0.438                     0.909                   -0.163                  -0.056                  -0.033         
## Residual Std. Error     0.272 (df = 956)         0.272 (df = 956)         0.204 (df = 807)                                                                             
## F Statistic         746.477*** (df = 1; 956) 746.477*** (df = 1; 956) 64.180*** (df = 151; 807) 16.202*** (df = 1; 807) 11.906*** (df = 9; 799) 6.227*** (df = 14; 570)
## =======================================================================================================================================================================
## Note:                                                                                                                                       *p<0.1; **p<0.05; ***p<0.01
test1<-linearHypothesis(yfe2, c("age_2","age_3","age_4","age_5"))
test2<-linearHypothesis(yfe2, c("age_2","age_3","age_4","age_5","educ", "log_pop"))
test1
## 
## Linear hypothesis test:
## age_2 = 0
## age_3 = 0
## age_4 = 0
## age_5 = 0
## 
## Model 1: restricted model
## Model 2: dem_ind ~ log_gdppc + log_pop + educ + age_2 + age_3 + age_4 + 
##     age_5 + year - 1
## 
##   Res.Df Df  Chisq Pr(>Chisq)   
## 1    574                        
## 2    570  4 14.925    0.00486 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
test2
## 
## Linear hypothesis test:
## age_2 = 0
## age_3 = 0
## age_4 = 0
## age_5 = 0
## educ = 0
## log_pop = 0
## 
## Model 1: restricted model
## Model 2: dem_ind ~ log_gdppc + log_pop + educ + age_2 + age_3 + age_4 + 
##     age_5 + year - 1
## 
##   Res.Df Df  Chisq Pr(>Chisq)  
## 1    576                       
## 2    570  6 16.091    0.01327 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Bonus: Making a visualization of the results

#creating a list of our models
M_list<-list(rob,clust_m, dum,fe,yfe,yfe2)
#extract coefficients and CI
coef_extr<-lapply(M_list, function(model){
  coefs<-tidy(model) #from the broom library
  conf_int<-confint(model)
  coefs<-cbind(coefs, conf_int) #combine coefficient and confidence intervals
  coefs
})
"
for(i in M_list){
  coefs<-tidy(i) #from the broom library
  conf_int<-confint(i)
  coefs<-cbind(coefs, conf_int)
}
"
## [1] "\nfor(i in M_list){\n  coefs<-tidy(i) #from the broom library\n  conf_int<-confint(i)\n  coefs<-cbind(coefs, conf_int)\n}\n"
#apply that to all the models using the function created
#model_coef<-lapply(M_list, coef_extr)
for (i in seq_along(coef_extr)){
  coef_extr[[i]]$model<-paste("Model", i)
}

# combine the coefficients in a single dataframe
coef_df<-bind_rows(coef_extr)
#coef_df
# get the coefficients of interest
coef_df<-coef_df%>%
  filter(term== "log_gdppc")

#rename the columns for convenience
colnames(coef_df) <- c("term", "estimate", "std.error", "statistic", "p.value", "conf.low", "conf.high", "model")

models=c("basic", "cluster", "dummies", "fixed_effects", "time_fe", "fe_controls") #rename the models to match the colors

ggplot(coef_df, aes(x=model, y =estimate, color=models))+
  geom_point()+
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width=0.2)+
  geom_hline(yintercept = 0,linetype ="dashed", color="red")+ # add a horizontal dashed line
  labs(title = "Coefficients of log_gdppc Across Models",
       x = "Coefficients on Log_gdppc",
       y = "Coefficient Estimate",
       color="Model") +
   scale_color_manual(values = c("basic" = "blue", "cluster" = "green", "dummies" = "red", "fixed_effects"="yellow", "time_fe"="black", "fe_controls"="purple")) +
  theme_minimal()