Important Packages/Libraries for this Lab:

options(repos = c(CRAN = "https://cloud.r-project.org"))

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.5.1     ✔ purrr   1.0.2
## ✔ tibble  3.2.1     ✔ dplyr   1.1.4
## ✔ tidyr   1.2.1     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
if(!require("sandwich")) install.packages("sandwich")
## Loading required package: sandwich
library(sandwich)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
if(!require("dotwhisker")) install.packages("dotwhisker")
## Loading required package: dotwhisker
library(dotwhisker) # coefplots

if(!require("broom")) install.packages("broom")
## Loading required package: broom
library(broom) # coefplots

if(!require("margins")) install.packages("margins")
## Loading required package: margins
library(margins) # coefplots

rm(list = ls())

source("/Volumes/middfiles/Classes/Fall23/ECON0211A/Noe_Labs/NV.R") #load NV.R to access nv_sum() function for quick summary statistic tables
## 
## Attaching package: 'rlang'
## 
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice

Read Data

usa77<-read_csv("IPUMS_USA_1970_2017.csv")
## Rows: 945183 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): sample, gq, sex, race, raced, hispan, hispand, educ, educd, empsta...
## dbl  (8): year, serial, cbserial, hhwt, pernum, perwt, age, incwage
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
usa77
## # A tibble: 945,183 × 23
##     year sample serial cbserial  hhwt gq    pernum perwt sex     age race  raced
##    <dbl> <chr>   <dbl>    <dbl> <dbl> <chr>  <dbl> <dbl> <chr> <dbl> <chr> <chr>
##  1  1970 1970 …     14       NA   587 hous…      1   587 male     41 white white
##  2  1970 1970 …     14       NA   587 hous…      2   587 fema…    38 white white
##  3  1970 1970 …     37       NA   587 hous…      1   587 male     30 white white
##  4  1970 1970 …     37       NA   587 hous…      2   587 fema…    27 white white
##  5  1970 1970 …     55       NA   587 hous…      1   587 male     27 white white
##  6  1970 1970 …     61       NA   587 hous…      1   587 male     45 white white
##  7  1970 1970 …     61       NA   587 hous…      2   587 fema…    46 white white
##  8  1970 1970 …     78       NA   587 hous…      1   587 male     51 white white
##  9  1970 1970 …     78       NA   587 hous…      2   587 fema…    49 white white
## 10  1970 1970 …     84       NA   587 hous…      1   587 male     55 white white
## # ℹ 945,173 more rows
## # ℹ 11 more variables: hispan <chr>, hispand <chr>, educ <chr>, educd <chr>,
## #   empstat <chr>, empstatd <chr>, labforce <chr>, wkswork2 <chr>,
## #   hrswork2 <chr>, uhrswork <chr>, incwage <dbl>
usa77 <-usa77%>%
  mutate(codeduc=with(usa77, match(educ, unique(educ))))

usa77%>%
  count(empstat)
## # A tibble: 3 × 2
##   empstat                 n
##   <chr>               <int>
## 1 employed           698967
## 2 not in labor force 209731
## 3 unemployed          36485

Creating education categories using the numerical values in educnum

usa77<-usa77%>%
  mutate(edcat =case_when(codeduc== 1 |codeduc== 3 |codeduc== 4 |codeduc== 5 | codeduc==10 |codeduc== 12 ~"Less than HS",
                          codeduc ==2 ~ "High School",
                          codeduc ==7 |codeduc==8 |codeduc==11 ~"Some College",
                          codeduc ==6~ "Bachelors",
                          codeduc ==9~"Graduate",
                          TRUE~"N/A"),
         inlf=ifelse(empstat!="not in labor force",1,0))
usa77%>%
  count(edcat, educ, edcat, empstat, inlf)
## # A tibble: 36 × 5
##    edcat        educ                empstat             inlf      n
##    <chr>        <chr>               <chr>              <dbl>  <int>
##  1 Bachelors    4 years of college  employed               1 122825
##  2 Bachelors    4 years of college  not in labor force     0  20782
##  3 Bachelors    4 years of college  unemployed             1   3744
##  4 Graduate     5+ years of college employed               1  79768
##  5 Graduate     5+ years of college not in labor force     0   8927
##  6 Graduate     5+ years of college unemployed             1   1705
##  7 High School  grade 12            employed               1 241645
##  8 High School  grade 12            not in labor force     0  83464
##  9 High School  grade 12            unemployed             1  14991
## 10 Less than HS grade 10            employed               1  20341
## # ℹ 26 more rows

Including weights

sex_inlf<- function(data, gender){
  data %>%
  filter(sex=={{gender}})%>%
  group_by(year, edcat)%>%
  summarize(mean_inlf = sum(inlf * perwt) / sum(perwt), .groups = 'drop')%>%  # weighted mean of inlf
  pivot_wider(names_from = edcat,
              values_from = mean_inlf)
}
 
sex_inlf(usa77, "male") # male
## # A tibble: 6 × 6
##    year Bachelors Graduate `High School` `Less than HS` `Some College`
##   <dbl>     <dbl>    <dbl>         <dbl>          <dbl>          <dbl>
## 1  1970     0.967    0.963         0.956          0.887          0.944
## 2  1980     0.960    0.953         0.925          0.822          0.929
## 3  1990     0.952    0.956         0.889          0.768          0.923
## 4  2000     0.924    0.924         0.861          0.774          0.897
## 5  2010     0.931    0.949         0.820          0.712          0.879
## 6  2017     0.929    0.950         0.811          0.711          0.870
sex_inlf(usa77,"female") # female
## # A tibble: 6 × 6
##    year Bachelors Graduate `High School` `Less than HS` `Some College`
##   <dbl>     <dbl>    <dbl>         <dbl>          <dbl>          <dbl>
## 1  1970     0.551    0.750         0.503          0.447          0.513
## 2  1980     0.704    0.812         0.611          0.475          0.673
## 3  1990     0.814    0.869         0.694          0.502          0.779
## 4  2000     0.788    0.836         0.710          0.509          0.771
## 5  2010     0.816    0.869         0.711          0.519          0.785
## 6  2017     0.823    0.867         0.687          0.482          0.777

A Small Summary stats Table

usa77<-usa77%>%
  mutate(LTHS=ifelse(edcat=="Less than HS",1,0),
         HS =ifelse(edcat =="High School",1,0),
         SC =ifelse(edcat=="Some College",1,0 ),
         BA =ifelse(edcat =="Bachelors", 1, 0),
         GRAD =ifelse(edcat =="Graduate", 1, 0)
         )

summary tables

nv_sum(usa77, LTHS, HS, SC, BA, GRAD, inlf, weight = TRUE)
## # A tibble: 6 × 7
##   variable    Obs   min   mean median St.dev   max
##   <chr>     <int> <dbl>  <dbl>  <dbl>  <dbl> <dbl>
## 1 LTHS     945183     0 0.165       0  0.371     1
## 2 HS       945183     0 0.357       0  0.479     1
## 3 SC       945183     0 0.220       0  0.415     1
## 4 BA       945183     0 0.162       0  0.368     1
## 5 GRAD     945183     0 0.0950      0  0.293     1
## 6 inlf     945183     0 0.784       1  0.411     1
nv_sum(usa77, LTHS, HS, SC, BA, GRAD, inlf, weight = TRUE, condition = quo(year==1970))
## # A tibble: 6 × 7
##   variable    Obs   min   mean median St.dev   max
##   <chr>     <int> <dbl>  <dbl>  <dbl>  <dbl> <dbl>
## 1 LTHS     140910     0 0.403       0  0.491     1
## 2 HS       140910     0 0.361       0  0.480     1
## 3 SC       140910     0 0.114       0  0.318     1
## 4 BA       140910     0 0.0685      0  0.253     1
## 5 GRAD     140910     0 0.0530      0  0.224     1
## 6 inlf     140910     0 0.703       1  0.457     1
nv_sum(usa77, LTHS, HS, SC, BA, GRAD, inlf, weight = TRUE, condition = quo(year==2017))
## # A tibble: 6 × 7
##   variable    Obs   min   mean median St.dev   max
##   <chr>     <int> <dbl>  <dbl>  <dbl>  <dbl> <dbl>
## 1 LTHS     146525     0 0.0923      0  0.289     1
## 2 HS       146525     0 0.333       0  0.471     1
## 3 SC       146525     0 0.239       0  0.427     1
## 4 BA       146525     0 0.213       0  0.409     1
## 5 GRAD     146525     0 0.123       0  0.328     1
## 6 inlf     146525     0 0.800       1  0.400     1

Making summary tables by grouping

nv_sum(usa77%>%group_by(year), LTHS, HS, SC, BA, GRAD, inlf, weight = TRUE) %>%
  dplyr::select(year, variable, mean)%>%
  pivot_wider(names_from =variable ,
              values_from = mean)
## # A tibble: 6 × 7
##    year   LTHS    HS    SC     BA   GRAD  inlf
##   <dbl>  <dbl> <dbl> <dbl>  <dbl>  <dbl> <dbl>
## 1  1970 0.403  0.361 0.114 0.0685 0.0530 0.703
## 2  1980 0.253  0.381 0.178 0.0985 0.0890 0.753
## 3  1990 0.148  0.342 0.279 0.150  0.0809 0.806
## 4  2000 0.113  0.391 0.219 0.181  0.0956 0.800
## 5  2010 0.111  0.346 0.243 0.196  0.104  0.801
## 6  2017 0.0923 0.333 0.239 0.213  0.123  0.800
nv_sum(usa77%>%
         group_by(year, sex),
       LTHS, HS, SC, BA, GRAD, inlf, weight = TRUE) %>%
  dplyr::select(year, sex, variable, mean)%>%
  pivot_wider(names_from = variable,
              values_from = mean)
## # A tibble: 12 × 8
##     year sex      LTHS    HS    SC     BA   GRAD  inlf
##    <dbl> <chr>   <dbl> <dbl> <dbl>  <dbl>  <dbl> <dbl>
##  1  1970 female 0.398  0.400 0.111 0.0605 0.0302 0.492
##  2  1970 male   0.409  0.319 0.118 0.0771 0.0773 0.928
##  3  1980 female 0.255  0.421 0.174 0.0859 0.0628 0.608
##  4  1980 male   0.250  0.339 0.183 0.112  0.117  0.907
##  5  1990 female 0.144  0.363 0.285 0.141  0.0666 0.719
##  6  1990 male   0.153  0.320 0.273 0.158  0.0955 0.896
##  7  2000 female 0.107  0.396 0.227 0.181  0.0884 0.727
##  8  2000 male   0.119  0.386 0.211 0.181  0.103  0.876
##  9  2010 female 0.0996 0.329 0.257 0.206  0.108  0.749
## 10  2010 male   0.123  0.365 0.227 0.185  0.100  0.853
## 11  2017 female 0.0810 0.305 0.252 0.226  0.135  0.748
## 12  2017 male   0.104  0.361 0.226 0.200  0.110  0.853

Create new Variables

usa77$uhrswork<-as.numeric(usa77$uhrswork)
## Warning: NAs introduced by coercion
usa77$edcat_num<-as.numeric(factor(usa77$edcat, levels = c("Less than HS", "High School", "Some College", "Bachelors", "Graduate"))) # creating edcat_num,  based on edcat
usa77$edcat_num<-as.factor(usa77$edcat_num) # converting it to factor
ed<-as.data.frame(model.matrix(~edcat_num-1, data = usa77))
colnames(ed)<-paste0("ed", seq_along(colnames(ed)))
usa77_2<-cbind(usa77, ed)


usa77_2<-usa77_2%>%
  mutate(codehrs=with(usa77, match(hrswork2, unique(hrswork2))),
         hrover40_2 =ifelse(hrswork2=="40 hours" |hrswork2=="41-48 hours"|hrswork2=="49-59 hours"|hrswork2=="60+ hours",1,0),
         wkover40=ifelse(wkswork2=="40-47 weeks"| wkswork2=="48-49 weeks"|wkswork2=="50-52 weeks",1,0),
         fulltime_year =case_when(uhrswork>=40 & wkover40==1~ "1",
                                  hrover40_2==1 & year=="1970" & wkover40==1~ "1",
                                  hrover40_2==0 & year=="1970" & wkover40==1~ "0",
                                  TRUE~"0"),
         ln_wage =ifelse(incwage>0, log(incwage),NA),
         male =ifelse(sex=="male",1,0),
         age2 = age^2
         
  )
 
#usa77_2%>%
 # count( edcat_num, ln_wage)

try loops

for(i in 1:10){
   y<-i*2
   print(y)
}
## [1] 2
## [1] 4
## [1] 6
## [1] 8
## [1] 10
## [1] 12
## [1] 14
## [1] 16
## [1] 18
## [1] 20
# alternatives of the loops: apply family functions:
# apply(), lapply(), tapply(), sapply(), vapply(), and mapply()

y<-lapply(1:10, function(x){
  x*2
})
y
## [[1]]
## [1] 2
## 
## [[2]]
## [1] 4
## 
## [[3]]
## [1] 6
## 
## [[4]]
## [1] 8
## 
## [[5]]
## [1] 10
## 
## [[6]]
## [1] 12
## 
## [[7]]
## [1] 14
## 
## [[8]]
## [1] 16
## 
## [[9]]
## [1] 18
## 
## [[10]]
## [1] 20
# to get results as a list
y<-sapply(1:10, function(x){
  x*2
})
y
##  [1]  2  4  6  8 10 12 14 16 18 20

regression results

list_year<-c(1970, 1980, 1990, 2000, 2010, 2017)
for(i in list_year)(
  print(i)
)
## [1] 1970
## [1] 1980
## [1] 1990
## [1] 2000
## [1] 2010
## [1] 2017
list_year<-c(1970, 1980, 1990, 2000, 2010, 2017)
#ed_list<-c("ed2", "ed3", "ed4","ed5")
results<-list()
results2<-list()
graph<-list()
graph2<-list()

for(i in list_year){
  m<-lm(ln_wage~male, subset = (fulltime_year ==1 & year==i), weights = perwt, data = usa77_2)
  rob_se <- coeftest(m, vcov = vcovHC(m, type = "HC1")) #extract st.error 
  results[[as.character(i)]] <- list(model = m, robust_se = rob_se) #put the models and their robust St.Errors in the list results

  graph[[as.character(i)]]<-m # extract coefficient to plot later
  
  m2<-lm(ln_wage~male+ed2+ed3+ed4+ed5+age+age2, subset = (fulltime_year==1 & year ==i), weights = perwt, data = usa77_2)
  rob_se2 <- coeftest(m2, vcov = vcovHC(m2, type = "HC1")) #extract st.error 
  results2[[as.character(i)]]<-list(model=m2, robust_se = rob_se2)
  
   graph2[[as.character(i)]]<-m2 # extract coefficient to plot later

}
all_models <- c(lapply(results, function(x) x$model), lapply(results2, function(x) x$model)) #combine the models from the model results using x$models (model =key used to store models in the list)
all_se <- c(lapply(results, function(x) x$robust_se[, "Std. Error"]), lapply(results2, function(x) x$robust_se[, "Std. Error"])) #combine rob_se from the model results using x$robust_se (robust_se =key used to store the rob_se in the list)


 stargazer::stargazer(all_models,
                       se=all_se,
                       type = "text",
                       out = "table1.text",
                       title = "Wages",
                       column.labels = rep(as.character(list_year),2), #duplicate labels
                       notes.append = TRUE)
## 
## Wages
## ==================================================================================================================================================================================================================================================================================================================================================================================
##                                                                                                                                                                                          Dependent variable:                                                                                                                                                                      
##                     --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##                                                                                                                                                                                                ln_wage                                                                                                                                                                            
##                                 1970                          1980                         1990                         2000                          2010                         2017                         1970                         1980                         1990                         2000                          2010                         2017            
##                                  (1)                          (2)                          (3)                           (4)                          (5)                          (6)                          (7)                          (8)                          (9)                          (10)                          (11)                         (12)            
## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## male                          0.603***                      0.501***                     0.382***                     0.299***                      0.228***                     0.198***                     0.589***                     0.477***                     0.374***                     0.315***                      0.278***                     0.279***          
##                                (0.006)                      (0.005)                      (0.004)                       (0.005)                      (0.006)                      (0.006)                      (0.005)                      (0.005)                      (0.004)                       (0.004)                      (0.005)                      (0.005)           
##                                                                                                                                                                                                                                                                                                                                                                                   
## ed2                                                                                                                                                                                                           0.254***                     0.260***                     0.266***                     0.300***                      0.339***                     0.299***          
##                                                                                                                                                                                                               (0.006)                      (0.007)                      (0.007)                       (0.009)                      (0.010)                      (0.012)           
##                                                                                                                                                                                                                                                                                                                                                                                   
## ed3                                                                                                                                                                                                           0.389***                     0.374***                     0.451***                     0.489***                      0.548***                     0.491***          
##                                                                                                                                                                                                               (0.008)                      (0.008)                      (0.007)                       (0.009)                      (0.011)                      (0.012)           
##                                                                                                                                                                                                                                                                                                                                                                                   
## ed4                                                                                                                                                                                                           0.632***                     0.569***                     0.713***                     0.801***                      0.901***                     0.891***          
##                                                                                                                                                                                                               (0.009)                      (0.009)                      (0.008)                       (0.010)                      (0.011)                      (0.012)           
##                                                                                                                                                                                                                                                                                                                                                                                   
## ed5                                                                                                                                                                                                           0.644***                     0.632***                     0.898***                     1.015***                      1.139***                     1.133***          
##                                                                                                                                                                                                               (0.011)                      (0.009)                      (0.010)                       (0.011)                      (0.012)                      (0.013)           
##                                                                                                                                                                                                                                                                                                                                                                                   
## age                                                                                                                                                                                                           0.048***                     0.060***                     0.060***                     0.057***                      0.073***                     0.067***          
##                                                                                                                                                                                                               (0.002)                      (0.002)                      (0.002)                       (0.002)                      (0.002)                      (0.002)           
##                                                                                                                                                                                                                                                                                                                                                                                   
## age2                                                                                                                                                                                                         -0.001***                    -0.001***                    -0.001***                     -0.001***                    -0.001***                    -0.001***          
##                                                                                                                                                                                                              (0.00002)                    (0.00002)                    (0.00002)                     (0.00003)                    (0.00003)                    (0.00003)          
##                                                                                                                                                                                                                                                                                                                                                                                   
## Constant                      8.412***                      9.203***                     9.853***                     10.240***                    10.561***                    10.696***                     7.122***                     7.542***                     8.031***                     8.391***                      8.203***                     8.396***          
##                                (0.005)                      (0.004)                      (0.003)                       (0.004)                      (0.004)                      (0.005)                      (0.042)                      (0.040)                      (0.036)                       (0.041)                      (0.048)                      (0.050)           
##                                                                                                                                                                                                                                                                                                                                                                                   
## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Observations                   59,417                        73,491                       95,194                       107,582                       75,684                       82,439                       59,417                       73,491                       95,194                       107,582                       75,684                       82,439           
## R2                              0.159                        0.116                        0.083                         0.045                        0.027                        0.018                        0.268                        0.215                        0.266                         0.248                        0.282                        0.278            
## Adjusted R2                     0.159                        0.116                        0.083                         0.045                        0.027                        0.018                        0.268                        0.215                        0.266                         0.248                        0.282                        0.278            
## Residual Std. Error      14.714 (df = 59415)          16.691 (df = 73489)          16.051 (df = 95179)          18.185 (df = 107580)          21.803 (df = 75682)          23.365 (df = 82437)          13.733 (df = 59409)          15.728 (df = 73483)          14.358 (df = 95173)          16.140 (df = 107574)          18.724 (df = 75676)          20.038 (df = 82431)     
## F Statistic         11,251.820*** (df = 1; 59415) 9,629.924*** (df = 1; 73489) 8,571.346*** (df = 1; 95179) 5,124.615*** (df = 1; 107580) 2,070.375*** (df = 1; 75682) 1,544.091*** (df = 1; 82437) 3,101.840*** (df = 7; 59409) 2,875.181*** (df = 7; 73483) 4,928.811*** (df = 7; 95173) 5,073.222*** (df = 7; 107574) 4,250.480*** (df = 7; 75676) 4,536.482*** (df = 7; 82431)
## ==================================================================================================================================================================================================================================================================================================================================================================================
## Note:                                                                                                                                                                                                                                                                                                                                                  *p<0.1; **p<0.05; ***p<0.01

ploting coefficients

# 1st way: Using dwplot function
dwplot(graph)+
  labs(
     title="Evolution of the US Gender Gap",
     x="Coefficients"
  )

# 2nd method: using ggplot, which allows including more feature and chart manipulation
coef_data <- bind_rows(
  lapply(names(graph2), function(year) {
    tidy(graph2[[year]], conf.int = TRUE) %>%
      filter(term == "male") %>%
      mutate(model = paste0("Model_", year),  # Add a model identifier
             year = year)
  })
)

ggplot(coef_data, 
       aes( x = year,y = estimate, color = year)) +
  geom_point(size =1)+
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high), width=0.2)+
  theme_minimal() +
  labs(title = "Evolution of the US Gender Gap",
       x= "Male",
       y = "Coefficient Estimate") 

creating race dummies

usa77_2%>%
  group_by(hispan)%>%
  summarise(mean = mean(ln_wage, na.rm = TRUE))
## # A tibble: 5 × 2
##   hispan        mean
##   <chr>        <dbl>
## 1 cuban         9.78
## 2 mexican       9.70
## 3 not hispanic  9.77
## 4 other         9.83
## 5 puerto rican  9.77
usa77_2<-usa77_2%>%
  mutate(white_nh=ifelse(race=="white" & hispan=="not hispanic", 1,0),
         black_nh=ifelse(race=="black/african american/negro" & hispan =="not hispanic",1, 0),
         hispan_nh=ifelse(hispan!="not hispanic", 1, 0),
         other_nh=ifelse(race!="white" & race !="black/african american/negro" & hispan =="not hispanic",1,0))

#check the proportion of each group
nv_sum(usa77_2, white_nh, black_nh, hispan_nh, other_nh, weight = FALSE)
##    variable    Obs min       mean median    st.dev max
## 1  white_nh 945183   0 0.76344898      1 0.4249645   1
## 2  black_nh 945183   0 0.09663525      0 0.2954606   1
## 3 hispan_nh 945183   0 0.08921870      0 0.2850593   1
## 4  other_nh 945183   0 0.05069706      0 0.2193785   1
usa77_2<-usa77_2%>%
  mutate(racecat=case_when(white_nh==1~"white_nh",
                           black_nh==1~"black_nh",
                           other_nh==1~"other_nh",
                           hispan_nh==1~"hispanic"),
         college =ifelse(edcat=="Bachelors" | edcat=="Graduate", 1,0))
usa77_2%>%
  count(edcat)

usa77_2%>%
  dplyr::select(college ,edcat)

Ploting predicted lines for each race category

mod<-lm(ln_wage~college*racecat, subset = (year==2017), data=usa77_2)
summary(mod)
## 
## Call:
## lm(formula = ln_wage ~ college * racecat, data = usa77_2, subset = (year == 
##     2017))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.5749 -0.3772  0.1362  0.5967  3.3217 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              9.97464    0.01182 844.091  < 2e-16 ***
## college                  0.72292    0.02255  32.066  < 2e-16 ***
## racecathispanic          0.14085    0.01482   9.503  < 2e-16 ***
## racecatother_nh          0.14199    0.01894   7.496 6.64e-14 ***
## racecatwhite_nh          0.35482    0.01277  27.788  < 2e-16 ***
## college:racecathispanic -0.12543    0.02969  -4.224 2.40e-05 ***
## college:racecatother_nh  0.11193    0.03012   3.716 0.000202 ***
## college:racecatwhite_nh -0.09119    0.02372  -3.844 0.000121 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.005 on 111941 degrees of freedom
##   (34576 observations deleted due to missingness)
## Multiple R-squared:  0.1111, Adjusted R-squared:  0.111 
## F-statistic:  1998 on 7 and 111941 DF,  p-value: < 2.2e-16
margins_mod <- margins(mod, variables = "college", at = list(racecat = unique(usa77_2$racecat)))
margins_df <- as.data.frame(margins_mod)

ggplot(margins_df, aes(x = factor(college), y = fitted, color = factor(racecat))) +
  geom_point() +
  #geom_line(aes(group = racecat)) +
  labs(x = "college", y = "Linear Prediction",
       color = "Treated",
       title = "Predicted Ln(wages) by BA status and Race",
       subtitle = "US ACS 2017") +
  theme_minimal()